diff options
Diffstat (limited to 'src/document.rs')
-rw-r--r-- | src/document.rs | 172 |
1 files changed, 172 insertions, 0 deletions
diff --git a/src/document.rs b/src/document.rs new file mode 100644 index 0000000..fbfea00 --- /dev/null +++ b/src/document.rs @@ -0,0 +1,172 @@ +use crate::*; + +pub struct MarkdownDocument { + pub block_elements: Vec<BlockElement>, +} + +impl MarkdownDocument { + pub fn from_str(raw_markdown: &str) -> Self { + let mut block_elements = Vec::new(); + let mut current_multiline_block = None; + // Chain a blank line to the end to ensure that the final multi-line block is flushed. + let lines = raw_markdown.lines().chain(std::iter::once("")); + + for incoming_line in lines { + let incoming_line_untrimmed = incoming_line; + let incoming_line = incoming_line.trim(); + // Handle an in-progress subdocument block. + if let Some(MultiLineBlock::Subdocument(language, mut lines)) = current_multiline_block { + if incoming_line == "```" { + let subdocument = Subdocument { language: language.to_string(), content: lines.join("\n") }; + block_elements.push(BlockElement::Subdocument(subdocument)); + current_multiline_block = None; + } else { + lines.push(incoming_line_untrimmed); + current_multiline_block = Some(MultiLineBlock::Subdocument(language, lines)); + } + continue; + } + + // Parse the incoming line. + let incoming_line_block = { + if let Some(("", tail)) = incoming_line.split_once("# ") { + BlockLine::DocumentHeading(tail.trim()) + } else if let Some(("", tail)) = incoming_line.split_once("## ") { + BlockLine::SectionHeading(tail.trim()) + } else if let Some(("", tail)) = incoming_line.split_once("### ") { + BlockLine::ArticleHeading(tail.trim()) + } else if let Some(("", tail)) = incoming_line.split_once("- ") { + BlockLine::List(tail.trim()) + } else if let Some(("", tail)) = incoming_line.split_once("> ") { + BlockLine::Aside(tail.trim()) + } else if incoming_line == ">" { + BlockLine::Aside("") + } else if let Some(("", tail)) = incoming_line.split_once("```") { + BlockLine::SubdocumentHeader(tail.trim()) + } else if incoming_line.starts_with("|") { + BlockLine::Table(incoming_line) + } else if incoming_line.len() >= 3 && incoming_line.chars().all(|c| c=='-') { + BlockLine::Break + } else if incoming_line.is_empty() { + BlockLine::BlankLine + } else { + BlockLine::Paragraph(incoming_line) } + }; + + // If the incoming line is of the same type as the current multiline + // block, append it to the end of that current block and continue. + if let Some(ref mut current_block) = current_multiline_block { + match (&incoming_line_block, current_block) { + (BlockLine::List(line), MultiLineBlock::List(ref mut lines)) => { + lines.push(line); continue; } + (BlockLine::Aside(line), MultiLineBlock::Aside(ref mut lines)) => { + lines.push(line); continue; } + (BlockLine::Table(line), MultiLineBlock::Table(ref mut lines)) => { + lines.push(line); continue; } + _ => (), + }; + } + + // Otherwise, commit the current block, then handle the incoming line. + if let Some(current_block) = current_multiline_block { + match current_block { + MultiLineBlock::List(raw_lines) => { + let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect(); + block_elements.push(BlockElement::List(lines)); } + MultiLineBlock::Aside(raw_lines) => { + let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect(); + block_elements.push(BlockElement::Aside(lines)); } + MultiLineBlock::Table(raw_lines) => { + if let Some(table) = Table::try_from_strs(&raw_lines) { + block_elements.push(BlockElement::Table(table)) } + else { for raw_line in raw_lines { + block_elements.push(BlockElement::Paragraph(Line::from_str(&raw_line))) }}} + MultiLineBlock::Subdocument(..) => unreachable!(), + } + current_multiline_block = None; + } + + // Handle the incoming line. + match incoming_line_block { + BlockLine::DocumentHeading(s) => block_elements.push(BlockElement::DocumentHeading(Line::from_str(&s))), + BlockLine::SectionHeading(s) => block_elements.push(BlockElement::SectionHeading(Line::from_str(&s))), + BlockLine::ArticleHeading(s) => block_elements.push(BlockElement::ArticleHeading(Line::from_str(&s))), + BlockLine::List(s) => current_multiline_block = Some(MultiLineBlock::List(vec![s])), + BlockLine::Aside(s) => current_multiline_block = Some(MultiLineBlock::Aside(vec![s])), + BlockLine::Table(s) => current_multiline_block = Some(MultiLineBlock::Table(vec![s])), + BlockLine::SubdocumentHeader(s) => current_multiline_block = Some(MultiLineBlock::Subdocument(s, Vec::new())), + BlockLine::Paragraph(s) => { + if let Some(embedded_file) = parse_embedded_file(&s) { + block_elements.push(BlockElement::EmbeddedFile(embedded_file)) + } else if let Some(math) = parse_math_block(&s) { + block_elements.push(BlockElement::Math(math)) + } else { + block_elements.push(BlockElement::Paragraph(Line::from_str(&s))) } + }, + BlockLine::Break => block_elements.push(BlockElement::Break), + BlockLine::BlankLine => (), + } + } + + Self { block_elements } + } +} + +fn parse_embedded_file(text: &str) -> Option<EmbeddedFile> { + let chars: Vec<char> = text.trim().chars().collect(); + let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b); + + if starts_with(0, " { break }; + label_end += 1; } + let label: String = chars[label_start..label_end].iter().collect(); + if label.is_empty() || !is_contentful(&label, &['[', ']']) { + return None } + // Try to parse the target. + let target_start = label_end + 2; + let target_end = chars.len() - 1; + if let Some(')') = chars.get(target_end) { + let target: String = chars[target_start..target_end].iter().collect(); + if target.is_empty() || target.contains(")") || !is_contentful(&target, &['(',')']) { + return None } + return Some(EmbeddedFile { label, target }) + } + } + return None; +} + +fn parse_math_block(text: &str) -> Option<String> { + if let Some(("", trailing)) = text.split_once("$$") { + if let Some((math, "")) = trailing.rsplit_once("$$") { + return Some(math.trim().to_string()); + } + } + return None; +} + +/// When parsing, is a single line for a one-line block element. +enum BlockLine<'a> { + DocumentHeading(&'a str), + SectionHeading(&'a str), + ArticleHeading(&'a str), + Paragraph(&'a str), + List(&'a str), + Aside(&'a str), + Table(&'a str), + SubdocumentHeader(&'a str), + Break, + BlankLine, +} + +/// When parsing, is the gathered string lines of a multiline block element. +enum MultiLineBlock<'a> { + List(Vec<&'a str>), + Aside(Vec<&'a str>), + Table(Vec<&'a str>), + Subdocument(&'a str, Vec<&'a str>), +} + |