diff options
author | Ben Bridle <ben@derelict.engineering> | 2025-01-06 12:21:06 +1300 |
---|---|---|
committer | Ben Bridle <ben@derelict.engineering> | 2025-01-06 17:16:24 +1300 |
commit | a78feb46aefaf8e8950e9b029984e9ff98fe69b0 (patch) | |
tree | d524c0656416e27484f8c0ae709f71558ea69bb6 /src/lib.rs | |
parent | 13cb719b87bcef41c4dd398f5a651ddb2b561e0d (diff) | |
download | markdown-a78feb46aefaf8e8950e9b029984e9ff98fe69b0.zip |
Rewrite the library a second timev2.0.0
Diffstat (limited to 'src/lib.rs')
-rw-r--r-- | src/lib.rs | 166 |
1 files changed, 154 insertions, 12 deletions
@@ -1,13 +1,155 @@ -mod document; -mod elements; - -pub use document::*; -pub use elements::*; - -pub(crate) fn is_whitespace(c: &char) -> bool { - c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c) } -pub(crate) fn is_contentful(s:&str, non_content_chars: &[char]) -> bool { - s.chars().any(|c| !non_content_chars.contains(&c)) - && s.chars().nth(0).map(|c| !non_content_chars.contains(&c)).unwrap_or(false) - && s.chars().last().map(|c| !non_content_chars.contains(&c)).unwrap_or(false) +#![feature(never_type)] + +mod block; pub use block::{Block, Level}; +mod line; pub use line::Line; +mod token; pub use token::Token; +mod table; pub use table::{Table, Column, Alignment}; + +pub struct MarkdownDocument { + pub blocks: Vec<Block>, +} + +impl MarkdownDocument { + pub fn from_str(raw_markdown: &str) -> Self { + let mut blocks = Vec::new(); + let mut current_block = None; + + // Chain a blank line to the end to ensure the final block is flushed. + for line in raw_markdown.lines().chain(std::iter::once("")) { + let line_raw = line; + let line = line.trim(); + + // Handle a fragment block separately, because fragment lines are not prefixed. + if let Some(BlockMultiline::Fragment { language, mut content }) = current_block { + if line == "```" { + let language = language.to_string(); + let content = content.join("\n"); + blocks.push(Block::Fragment { language, content }); + current_block = None; + } else { + content.push(line_raw); + current_block = Some(BlockMultiline::Fragment { language, content }); + } + continue; + } + + // Determine line type from prefix. + let line = { + if let Some(("", tail)) = line.split_once("# ") { + BlockLine::Heading { level: Level::Heading1, line: tail.trim() } + } else if let Some(("", tail)) = line.split_once("## ") { + BlockLine::Heading { level: Level::Heading2, line: tail.trim() } + } else if let Some(("", tail)) = line.split_once("### ") { + BlockLine::Heading { level: Level::Heading3, line: tail.trim() } + } else if let Some(("", tail)) = line.split_once("- ") { + BlockLine::List(tail.trim()) + } else if let Some(("", tail)) = line.split_once("> ") { + BlockLine::Note(tail.trim()) + } else if line == ">" { + BlockLine::Note("") + } else if let Some(("", tail)) = line.split_once("```") { + BlockLine::FragmentHeader(tail.trim()) + } else if line.starts_with("|") { + BlockLine::Table(line) + } else if line.len() >= 3 && line.chars().all(|c| c=='-') { + BlockLine::Break + } else if line.is_empty() { + BlockLine::BlankLine + } else { + BlockLine::Paragraph(line) + } + }; + + // If line has the same type as the current block, append and continue. + if let Some(ref mut block) = current_block { + match (&line, block) { + (BlockLine::List(line), BlockMultiline::List(ref mut lines)) => { + lines.push(line); continue; } + (BlockLine::Note(line), BlockMultiline::Note(ref mut lines)) => { + lines.push(line); continue; } + (BlockLine::Table(line), BlockMultiline::Table(ref mut lines)) => { + lines.push(line); continue; } + _ => (), + }; + } + + // Otherwise commit the current block before handling the new line. + if let Some(current_block) = std::mem::take(&mut current_block) { + match current_block { + BlockMultiline::List(raw_lines) => { + let lines = raw_lines.into_iter().map(Line::from_str).collect(); + blocks.push(Block::List(lines)); } + BlockMultiline::Note(raw_lines) => { + let lines = raw_lines.into_iter().map(Line::from_str).collect(); + blocks.push(Block::Note(lines)); } + BlockMultiline::Table(raw_lines) => { + if let Some(table) = Table::from_strs(&raw_lines) { + blocks.push(Block::Table(table)) } + else { + for raw_line in raw_lines { + blocks.push(Block::Paragraph(Line::from_str(&raw_line))) + } + }} + BlockMultiline::Fragment {..} => unreachable!(), + } + } + + // Handle the new line. + match line { + BlockLine::List(line) => current_block = Some( + BlockMultiline::List(vec![line])), + BlockLine::Note(line) => current_block = Some( + BlockMultiline::Note(vec![line])), + BlockLine::Table(line) => current_block = Some( + BlockMultiline::Table(vec![line])), + BlockLine::FragmentHeader(language) => current_block = Some( + BlockMultiline::Fragment { language, content: Vec::new() }), + BlockLine::Heading {level, line} => blocks.push( + Block::Heading { level, line: Line::from_str(&line) }), + BlockLine::Break => blocks.push(Block::Break), + BlockLine::BlankLine => (), + BlockLine::Paragraph(line) => match parse_embedded(&line) { + Some(embedded) => blocks.push(embedded), + None => blocks.push(Block::Paragraph(Line::from_str(&line))), + } + } + } + + Self { blocks } + } +} + + + +enum BlockLine<'a> { + Heading { level: Level, line: &'a str }, + Paragraph(&'a str), + List(&'a str), + Note(&'a str), + Table(&'a str), + FragmentHeader(&'a str), + Break, + BlankLine, +} + +enum BlockMultiline<'a> { + List(Vec<&'a str>), + Note(Vec<&'a str>), + Table(Vec<&'a str>), + Fragment { language: &'a str, content: Vec<&'a str> }, +} + +fn parse_embedded(line: &str) -> Option<Block> { + let line = line.trim(); + if let Some(("", line)) = line.split_once(".collect(); + if parts.len() == 2 { + let label = parts[0].to_string(); + let path = parts[1].to_string(); + return Some(Block::Embedded { label, path }) + } + } + } + return None; } |