use crate::*; pub struct MarkdownDocument { pub block_elements: Vec, } impl MarkdownDocument { pub fn from_str(raw_markdown: &str) -> Self { let mut block_elements = Vec::new(); let mut current_multiline_block = None; // Chain a blank line to the end to ensure that the final multi-line block is flushed. let lines = raw_markdown.lines().chain(std::iter::once("")); for incoming_line in lines { let incoming_line_untrimmed = incoming_line; let incoming_line = incoming_line.trim(); // Handle an in-progress subdocument block. if let Some(MultiLineBlock::Subdocument(language, mut lines)) = current_multiline_block { if incoming_line == "```" { let subdocument = Subdocument { language: language.to_string(), content: lines.join("\n") }; block_elements.push(BlockElement::Subdocument(subdocument)); current_multiline_block = None; } else { lines.push(incoming_line_untrimmed); current_multiline_block = Some(MultiLineBlock::Subdocument(language, lines)); } continue; } // Parse the incoming line. let incoming_line_block = { if let Some(("", tail)) = incoming_line.split_once("# ") { BlockLine::DocumentHeading(tail.trim()) } else if let Some(("", tail)) = incoming_line.split_once("## ") { BlockLine::SectionHeading(tail.trim()) } else if let Some(("", tail)) = incoming_line.split_once("### ") { BlockLine::ArticleHeading(tail.trim()) } else if let Some(("", tail)) = incoming_line.split_once("- ") { BlockLine::List(tail.trim()) } else if let Some(("", tail)) = incoming_line.split_once("> ") { BlockLine::Aside(tail.trim()) } else if incoming_line == ">" { BlockLine::Aside("") } else if let Some(("", tail)) = incoming_line.split_once("```") { BlockLine::SubdocumentHeader(tail.trim()) } else if incoming_line.starts_with("|") { BlockLine::Table(incoming_line) } else if incoming_line.len() >= 3 && incoming_line.chars().all(|c| c=='-') { BlockLine::Break } else if incoming_line.is_empty() { BlockLine::BlankLine } else { BlockLine::Paragraph(incoming_line) } }; // If the incoming line is of the same type as the current multiline // block, append it to the end of that current block and continue. if let Some(ref mut current_block) = current_multiline_block { match (&incoming_line_block, current_block) { (BlockLine::List(line), MultiLineBlock::List(ref mut lines)) => { lines.push(line); continue; } (BlockLine::Aside(line), MultiLineBlock::Aside(ref mut lines)) => { lines.push(line); continue; } (BlockLine::Table(line), MultiLineBlock::Table(ref mut lines)) => { lines.push(line); continue; } _ => (), }; } // Otherwise, commit the current block, then handle the incoming line. if let Some(current_block) = current_multiline_block { match current_block { MultiLineBlock::List(raw_lines) => { let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect(); block_elements.push(BlockElement::List(lines)); } MultiLineBlock::Aside(raw_lines) => { let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect(); block_elements.push(BlockElement::Aside(lines)); } MultiLineBlock::Table(raw_lines) => { if let Some(table) = Table::try_from_strs(&raw_lines) { block_elements.push(BlockElement::Table(table)) } else { for raw_line in raw_lines { block_elements.push(BlockElement::Paragraph(Line::from_str(&raw_line))) }}} MultiLineBlock::Subdocument(..) => unreachable!(), } current_multiline_block = None; } // Handle the incoming line. match incoming_line_block { BlockLine::DocumentHeading(s) => block_elements.push(BlockElement::DocumentHeading(Line::from_str(&s))), BlockLine::SectionHeading(s) => block_elements.push(BlockElement::SectionHeading(Line::from_str(&s))), BlockLine::ArticleHeading(s) => block_elements.push(BlockElement::ArticleHeading(Line::from_str(&s))), BlockLine::List(s) => current_multiline_block = Some(MultiLineBlock::List(vec![s])), BlockLine::Aside(s) => current_multiline_block = Some(MultiLineBlock::Aside(vec![s])), BlockLine::Table(s) => current_multiline_block = Some(MultiLineBlock::Table(vec![s])), BlockLine::SubdocumentHeader(s) => current_multiline_block = Some(MultiLineBlock::Subdocument(s, Vec::new())), BlockLine::Paragraph(s) => { if let Some(embedded_file) = parse_embedded_file(&s) { block_elements.push(BlockElement::EmbeddedFile(embedded_file)) } else if let Some(math) = parse_math_block(&s) { block_elements.push(BlockElement::Math(math)) } else { block_elements.push(BlockElement::Paragraph(Line::from_str(&s))) } }, BlockLine::Break => block_elements.push(BlockElement::Break), BlockLine::BlankLine => (), } } Self { block_elements } } } fn parse_embedded_file(text: &str) -> Option { let chars: Vec = text.trim().chars().collect(); let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b); if starts_with(0, "![") { let label_start = 2; let mut label_end = label_start; while label_end <= chars.len() { if label_end == chars.len() { return None } if starts_with(label_end, "](") { break }; label_end += 1; } let label: String = chars[label_start..label_end].iter().collect(); if label.is_empty() || !is_contentful(&label, &['[', ']']) { return None } // Try to parse the target. let target_start = label_end + 2; let target_end = chars.len() - 1; if let Some(')') = chars.get(target_end) { let target: String = chars[target_start..target_end].iter().collect(); if target.is_empty() || target.contains(")") || !is_contentful(&target, &['(',')']) { return None } return Some(EmbeddedFile { label, target }) } } return None; } fn parse_math_block(text: &str) -> Option { if let Some(("", trailing)) = text.split_once("$$") { if let Some((math, "")) = trailing.rsplit_once("$$") { return Some(math.trim().to_string()); } } return None; } /// When parsing, is a single line for a one-line block element. enum BlockLine<'a> { DocumentHeading(&'a str), SectionHeading(&'a str), ArticleHeading(&'a str), Paragraph(&'a str), List(&'a str), Aside(&'a str), Table(&'a str), SubdocumentHeader(&'a str), Break, BlankLine, } /// When parsing, is the gathered string lines of a multiline block element. enum MultiLineBlock<'a> { List(Vec<&'a str>), Aside(Vec<&'a str>), Table(Vec<&'a str>), Subdocument(&'a str, Vec<&'a str>), }