summaryrefslogtreecommitdiff
path: root/src/document.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/document.rs')
-rw-r--r--src/document.rs172
1 files changed, 172 insertions, 0 deletions
diff --git a/src/document.rs b/src/document.rs
new file mode 100644
index 0000000..fbfea00
--- /dev/null
+++ b/src/document.rs
@@ -0,0 +1,172 @@
+use crate::*;
+
+pub struct MarkdownDocument {
+ pub block_elements: Vec<BlockElement>,
+}
+
+impl MarkdownDocument {
+ pub fn from_str(raw_markdown: &str) -> Self {
+ let mut block_elements = Vec::new();
+ let mut current_multiline_block = None;
+ // Chain a blank line to the end to ensure that the final multi-line block is flushed.
+ let lines = raw_markdown.lines().chain(std::iter::once(""));
+
+ for incoming_line in lines {
+ let incoming_line_untrimmed = incoming_line;
+ let incoming_line = incoming_line.trim();
+ // Handle an in-progress subdocument block.
+ if let Some(MultiLineBlock::Subdocument(language, mut lines)) = current_multiline_block {
+ if incoming_line == "```" {
+ let subdocument = Subdocument { language: language.to_string(), content: lines.join("\n") };
+ block_elements.push(BlockElement::Subdocument(subdocument));
+ current_multiline_block = None;
+ } else {
+ lines.push(incoming_line_untrimmed);
+ current_multiline_block = Some(MultiLineBlock::Subdocument(language, lines));
+ }
+ continue;
+ }
+
+ // Parse the incoming line.
+ let incoming_line_block = {
+ if let Some(("", tail)) = incoming_line.split_once("# ") {
+ BlockLine::DocumentHeading(tail.trim())
+ } else if let Some(("", tail)) = incoming_line.split_once("## ") {
+ BlockLine::SectionHeading(tail.trim())
+ } else if let Some(("", tail)) = incoming_line.split_once("### ") {
+ BlockLine::ArticleHeading(tail.trim())
+ } else if let Some(("", tail)) = incoming_line.split_once("- ") {
+ BlockLine::List(tail.trim())
+ } else if let Some(("", tail)) = incoming_line.split_once("> ") {
+ BlockLine::Aside(tail.trim())
+ } else if incoming_line == ">" {
+ BlockLine::Aside("")
+ } else if let Some(("", tail)) = incoming_line.split_once("```") {
+ BlockLine::SubdocumentHeader(tail.trim())
+ } else if incoming_line.starts_with("|") {
+ BlockLine::Table(incoming_line)
+ } else if incoming_line.len() >= 3 && incoming_line.chars().all(|c| c=='-') {
+ BlockLine::Break
+ } else if incoming_line.is_empty() {
+ BlockLine::BlankLine
+ } else {
+ BlockLine::Paragraph(incoming_line) }
+ };
+
+ // If the incoming line is of the same type as the current multiline
+ // block, append it to the end of that current block and continue.
+ if let Some(ref mut current_block) = current_multiline_block {
+ match (&incoming_line_block, current_block) {
+ (BlockLine::List(line), MultiLineBlock::List(ref mut lines)) => {
+ lines.push(line); continue; }
+ (BlockLine::Aside(line), MultiLineBlock::Aside(ref mut lines)) => {
+ lines.push(line); continue; }
+ (BlockLine::Table(line), MultiLineBlock::Table(ref mut lines)) => {
+ lines.push(line); continue; }
+ _ => (),
+ };
+ }
+
+ // Otherwise, commit the current block, then handle the incoming line.
+ if let Some(current_block) = current_multiline_block {
+ match current_block {
+ MultiLineBlock::List(raw_lines) => {
+ let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect();
+ block_elements.push(BlockElement::List(lines)); }
+ MultiLineBlock::Aside(raw_lines) => {
+ let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect();
+ block_elements.push(BlockElement::Aside(lines)); }
+ MultiLineBlock::Table(raw_lines) => {
+ if let Some(table) = Table::try_from_strs(&raw_lines) {
+ block_elements.push(BlockElement::Table(table)) }
+ else { for raw_line in raw_lines {
+ block_elements.push(BlockElement::Paragraph(Line::from_str(&raw_line))) }}}
+ MultiLineBlock::Subdocument(..) => unreachable!(),
+ }
+ current_multiline_block = None;
+ }
+
+ // Handle the incoming line.
+ match incoming_line_block {
+ BlockLine::DocumentHeading(s) => block_elements.push(BlockElement::DocumentHeading(Line::from_str(&s))),
+ BlockLine::SectionHeading(s) => block_elements.push(BlockElement::SectionHeading(Line::from_str(&s))),
+ BlockLine::ArticleHeading(s) => block_elements.push(BlockElement::ArticleHeading(Line::from_str(&s))),
+ BlockLine::List(s) => current_multiline_block = Some(MultiLineBlock::List(vec![s])),
+ BlockLine::Aside(s) => current_multiline_block = Some(MultiLineBlock::Aside(vec![s])),
+ BlockLine::Table(s) => current_multiline_block = Some(MultiLineBlock::Table(vec![s])),
+ BlockLine::SubdocumentHeader(s) => current_multiline_block = Some(MultiLineBlock::Subdocument(s, Vec::new())),
+ BlockLine::Paragraph(s) => {
+ if let Some(embedded_file) = parse_embedded_file(&s) {
+ block_elements.push(BlockElement::EmbeddedFile(embedded_file))
+ } else if let Some(math) = parse_math_block(&s) {
+ block_elements.push(BlockElement::Math(math))
+ } else {
+ block_elements.push(BlockElement::Paragraph(Line::from_str(&s))) }
+ },
+ BlockLine::Break => block_elements.push(BlockElement::Break),
+ BlockLine::BlankLine => (),
+ }
+ }
+
+ Self { block_elements }
+ }
+}
+
+fn parse_embedded_file(text: &str) -> Option<EmbeddedFile> {
+ let chars: Vec<char> = text.trim().chars().collect();
+ let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b);
+
+ if starts_with(0, "![") {
+ let label_start = 2;
+ let mut label_end = label_start;
+ while label_end <= chars.len() {
+ if label_end == chars.len() { return None }
+ if starts_with(label_end, "](") { break };
+ label_end += 1; }
+ let label: String = chars[label_start..label_end].iter().collect();
+ if label.is_empty() || !is_contentful(&label, &['[', ']']) {
+ return None }
+ // Try to parse the target.
+ let target_start = label_end + 2;
+ let target_end = chars.len() - 1;
+ if let Some(')') = chars.get(target_end) {
+ let target: String = chars[target_start..target_end].iter().collect();
+ if target.is_empty() || target.contains(")") || !is_contentful(&target, &['(',')']) {
+ return None }
+ return Some(EmbeddedFile { label, target })
+ }
+ }
+ return None;
+}
+
+fn parse_math_block(text: &str) -> Option<String> {
+ if let Some(("", trailing)) = text.split_once("$$") {
+ if let Some((math, "")) = trailing.rsplit_once("$$") {
+ return Some(math.trim().to_string());
+ }
+ }
+ return None;
+}
+
+/// When parsing, is a single line for a one-line block element.
+enum BlockLine<'a> {
+ DocumentHeading(&'a str),
+ SectionHeading(&'a str),
+ ArticleHeading(&'a str),
+ Paragraph(&'a str),
+ List(&'a str),
+ Aside(&'a str),
+ Table(&'a str),
+ SubdocumentHeader(&'a str),
+ Break,
+ BlankLine,
+}
+
+/// When parsing, is the gathered string lines of a multiline block element.
+enum MultiLineBlock<'a> {
+ List(Vec<&'a str>),
+ Aside(Vec<&'a str>),
+ Table(Vec<&'a str>),
+ Subdocument(&'a str, Vec<&'a str>),
+}
+