Initial commit

author: Ben Bridle <bridle.benjamin@gmail.com> 2022-08-25 21:09:25 +1200
committer: Ben Bridle <bridle.benjamin@gmail.com> 2022-08-25 21:09:25 +1200
commit: 54f5e9fd883e207931baa9c87b6181ca724d6bab (patch)
tree: 17111a1da036dbc061ae4062ea0716373e16e23d /src/parse.rs
download: markdown-54f5e9fd883e207931baa9c87b6181ca724d6bab.zip
1 files changed, 283 insertions, 0 deletions
diff --git a/src/parse.rs b/src/parse.rs
new file mode 100644
index 0000000..6e4cdd9
--- /dev/null
+++ b/src/parse.rs
@@ -0,0 +1,283 @@
+use crate::*;
+
+pub fn parse(markdown: &str) -> Vec<Block> {
+    let mut document = Vec::new();
+    let lines: Vec<&str> = markdown.lines().map(|l| l.trim_start()).collect();
+    let mut i = 0;
+
+    // Gather all consecutive lines that begin with a given substring and run a
+    // function over them. The function must be `fn(&[&str])->Result<Block,()>`.
+    macro_rules! gather {
+        ($prefix:expr, $func:ident) => {{
+            let start = i;
+            for line in &lines[i..] {
+                if line.starts_with($prefix) {
+                    i += 1;
+                    continue;
+                }
+                break;
+            }
+            let gathered_lines = &lines[start..i];
+            match gathered_lines.is_empty() {
+                false => $func(gathered_lines),
+                true => Err(()),
+            }
+        }};
+    }
+
+    loop {
+        let line = match lines.get(i) {
+            Some(line) => line,
+            None => return document,
+        };
+        if line.is_empty() {
+            i += 1;
+            continue;
+        } else if let Ok(heading) = parse_heading(line) {
+            document.push(heading);
+            i += 1;
+        } else if let Ok(quote) = gather!(">", parse_quote) {
+            document.push(quote);
+        } else if let Ok(list) = gather!("- ", parse_list) {
+            document.push(list);
+        } else if let Ok(table) = gather!("|", parse_table) {
+            document.push(table);
+        } else if line.starts_with("```") {
+            let language = line[3..].to_string();
+            let mut code_lines = Vec::new();
+            i += 1;
+            for line in &lines[i..] {
+                match line.trim() == "```" {
+                    true => break,
+                    false => {
+                        code_lines.push(line.to_string());
+                        i += 1
+                    }
+                }
+            }
+            document.push(Block::Code(language, code_lines));
+            i += 1;
+        } else {
+            document.push(parse_paragraph(line));
+            i += 1;
+        };
+    }
+}
+
+/// Returns the substring from `chars` that is between the `start` and `end`
+/// delimiters. Returns None if `chars` does not start with `start`, or if an
+/// occurance of `start` and `end` cannot be found within `chars`. There must
+/// not be a space after the occurance of `start` or before the occurance of
+/// `end`. If `start` and `end` consist of just one or more of the same
+/// character, the content must contain at least one other character than
+/// that one.
+fn capture(chars: &[char], start: &str, end: &str) -> Option<String> {
+    // Determine if `pattern` contains only a single unique character
+    let single_char_in_pattern = match start.chars().next() {
+        Some(first_char) => {
+            let start_and_end = start.chars().chain(end.chars());
+            start_and_end.fold(Some(first_char), |accum, elem| match accum {
+                Some(c) if c == elem => accum,
+                _ => None,
+            })
+        }
+        None => None,
+    };
+    let is_space = |i: usize| chars.get(i) == Some(&' ');
+    fn starts_with_pattern(chars: &[char], pattern: &str) -> bool {
+        let mut i = 0;
+        for ref c in pattern.chars() {
+            match chars.get(i) {
+                Some(v) if v == c => i += 1,
+                _ => return false,
+            }
+        }
+        true
+    }
+    if !starts_with_pattern(chars, start) {
+        return None;
+    }
+    let text_start = start.len();
+    if is_space(text_start) {
+        return None;
+    };
+    let mut i = text_start;
+    loop {
+        i += 1;
+        if chars.get(i).is_none() {
+            return None;
+        }
+        if starts_with_pattern(&chars[i..], end) {
+            if is_space(i - 1) {
+                continue;
+            }
+            let text_content: String = chars[text_start..i].iter().collect();
+            match single_char_in_pattern {
+                None => return Some(text_content),
+                Some(c) => {
+                    if text_content.chars().any(|e| e != c) {
+                        return Some(text_content);
+                    }
+                }
+            };
+        }
+    }
+}
+
+fn parse_text(line: &str) -> Line {
+    let mut block_content: Line = Vec::new();
+    let chars: Vec<char> = line.chars().collect();
+    let mut normal = String::new();
+    let mut i = 0;
+
+    macro_rules! commit_normal {
+        () => {
+            if !normal.is_empty() {
+                let normal_text = Text::Normal(std::mem::take(&mut normal));
+                block_content.push(normal_text);
+            }
+        };
+    }
+    let patterns: [(&str, &str, fn(String) -> Text); 7] = [
+        ("***", "***", Text::BoldItalic),
+        ("**", "**", Text::Bold),
+        ("*", "*", Text::Italic),
+        ("___", "___", Text::BoldItalic),
+        ("__", "__", Text::Bold),
+        ("_", "_", Text::Italic),
+        ("`", "`", Text::Code),
+    ];
+
+    'outer: loop {
+        // Check if a simple, non-Normal text type starts at this character
+        for (start, end, text_type) in patterns.iter() {
+            if let Some(string) = capture(&chars[i..], start, end) {
+                i += string.len() + start.len() + end.len();
+                commit_normal!();
+                block_content.push(text_type(string));
+                continue 'outer;
+            }
+        }
+        // Check if a wiki-style hyperlink starts at this character
+        if let Some(content) = capture(&chars[i..], "[[", "]]") {
+            i += content.len() + 4;
+            commit_normal!();
+            block_content.push(Text::WikiLink(content));
+            continue 'outer;
+        }
+
+        // Check if a long-form hyperlink starts at this character
+        if let Some(label) = capture(&chars[i..], "[", "]") {
+            let target_len = label.len() + 2;
+            if let Some(target) = capture(&chars[i + target_len..], "(", ")") {
+                i += target_len + target.len() + 2;
+                commit_normal!();
+                block_content.push(Text::Hyperlink(Hyperlink { label, target }))
+            }
+        }
+
+        // No new text type started here, this must just be normal text
+        match chars.get(i) {
+            Some(c) => {
+                normal.push(*c);
+                i += 1;
+            }
+            None => {
+                commit_normal!();
+                break;
+            }
+        }
+    }
+    return block_content;
+}
+
+fn parse_heading(line: &str) -> Result<Block, ()> {
+    let (heading_type, content): (fn(Line) -> Block, &str) = if line.starts_with("# ") {
+        (Block::Heading1, &line[2..])
+    } else if line.starts_with("## ") {
+        (Block::Heading2, &line[3..])
+    } else if line.starts_with("### ") {
+        (Block::Heading3, &line[4..])
+    } else {
+        return Err(());
+    };
+    if content.is_empty() {
+        return Err(());
+    };
+    Ok(heading_type(parse_text(content)))
+}
+
+/// Accepts a slice of lines that begin with '>'
+fn parse_quote(lines: &[&str]) -> Result<Block, ()> {
+    let mut content = Vec::new();
+    for line in lines {
+        content.push(if *line == ">" {
+            Vec::new()
+        } else {
+            parse_text(&line[2..])
+        });
+    }
+    Ok(Block::Quote(content))
+}
+
+fn parse_list(lines: &[&str]) -> Result<Block, ()> {
+    Ok(Block::List(
+        lines.iter().map(|l| parse_text(&l[2..])).collect(),
+    ))
+}
+
+fn parse_paragraph(line: &str) -> Block {
+    Block::Paragraph(parse_text(line))
+}
+
+fn parse_table(lines: &[&str]) -> Result<Block, ()> {
+    if lines.len() < 3 {
+        return Err(());
+    }
+    let names = split_columns(lines[0])?;
+    let dividers = split_columns(lines[1])?;
+    if names.len() != dividers.len() {
+        return Err(());
+    }
+    let mut columns = Vec::new();
+    for (name, divider) in std::iter::zip(names.iter(), dividers.iter()) {
+        let alignment = Alignment::from_str(divider)?;
+        columns.push(Column {
+            name: parse_text(name),
+            alignment,
+        })
+    }
+    let mut rows = Vec::new();
+    for row in &lines[2..] {
+        let split_row: Vec<Line> = split_columns(row)?.iter().map(|s| parse_text(s)).collect();
+        if split_row.len() != columns.len() {
+            return Err(());
+        }
+        rows.push(split_row);
+    }
+    Ok(Block::Table(Table { columns, rows }))
+}
+
+fn split_columns(line: &str) -> Result<Vec<String>, ()> {
+    // Find the index after the first |, and before the last |
+    let mut start = None;
+    let mut end = None;
+    for (i, c) in line.chars().enumerate() {
+        if c == '|' {
+            if start.is_none() {
+                start = Some(i + 1);
+            } else {
+                end = Some(i);
+            }
+        }
+    }
+    match (start, end) {
+        (Some(s), Some(e)) => {
+            let chars: Vec<char> = line.chars().collect();
+            let string: String = chars[s..e].iter().collect();
+            let split = string.split('|');
+            Ok(split.map(|s| s.trim().to_string()).collect())
+        }
+        _ => Err(()),
+    }
+}
author	Ben Bridle <bridle.benjamin@gmail.com>	2022-08-25 21:09:25 +1200
committer	Ben Bridle <bridle.benjamin@gmail.com>	2022-08-25 21:09:25 +1200
commit	54f5e9fd883e207931baa9c87b6181ca724d6bab (patch)
tree	17111a1da036dbc061ae4062ea0716373e16e23d /src/parse.rs
download	markdown-54f5e9fd883e207931baa9c87b6181ca724d6bab.zip