diff options
author | Ben Bridle <bridle.benjamin@gmail.com> | 2022-08-25 21:09:25 +1200 |
---|---|---|
committer | Ben Bridle <bridle.benjamin@gmail.com> | 2022-08-25 21:09:25 +1200 |
commit | 54f5e9fd883e207931baa9c87b6181ca724d6bab (patch) | |
tree | 17111a1da036dbc061ae4062ea0716373e16e23d /src/parse.rs | |
download | markdown-54f5e9fd883e207931baa9c87b6181ca724d6bab.zip |
Initial commit
Diffstat (limited to 'src/parse.rs')
-rw-r--r-- | src/parse.rs | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..6e4cdd9 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,283 @@ +use crate::*; + +pub fn parse(markdown: &str) -> Vec<Block> { + let mut document = Vec::new(); + let lines: Vec<&str> = markdown.lines().map(|l| l.trim_start()).collect(); + let mut i = 0; + + // Gather all consecutive lines that begin with a given substring and run a + // function over them. The function must be `fn(&[&str])->Result<Block,()>`. + macro_rules! gather { + ($prefix:expr, $func:ident) => {{ + let start = i; + for line in &lines[i..] { + if line.starts_with($prefix) { + i += 1; + continue; + } + break; + } + let gathered_lines = &lines[start..i]; + match gathered_lines.is_empty() { + false => $func(gathered_lines), + true => Err(()), + } + }}; + } + + loop { + let line = match lines.get(i) { + Some(line) => line, + None => return document, + }; + if line.is_empty() { + i += 1; + continue; + } else if let Ok(heading) = parse_heading(line) { + document.push(heading); + i += 1; + } else if let Ok(quote) = gather!(">", parse_quote) { + document.push(quote); + } else if let Ok(list) = gather!("- ", parse_list) { + document.push(list); + } else if let Ok(table) = gather!("|", parse_table) { + document.push(table); + } else if line.starts_with("```") { + let language = line[3..].to_string(); + let mut code_lines = Vec::new(); + i += 1; + for line in &lines[i..] { + match line.trim() == "```" { + true => break, + false => { + code_lines.push(line.to_string()); + i += 1 + } + } + } + document.push(Block::Code(language, code_lines)); + i += 1; + } else { + document.push(parse_paragraph(line)); + i += 1; + }; + } +} + +/// Returns the substring from `chars` that is between the `start` and `end` +/// delimiters. Returns None if `chars` does not start with `start`, or if an +/// occurance of `start` and `end` cannot be found within `chars`. There must +/// not be a space after the occurance of `start` or before the occurance of +/// `end`. If `start` and `end` consist of just one or more of the same +/// character, the content must contain at least one other character than +/// that one. +fn capture(chars: &[char], start: &str, end: &str) -> Option<String> { + // Determine if `pattern` contains only a single unique character + let single_char_in_pattern = match start.chars().next() { + Some(first_char) => { + let start_and_end = start.chars().chain(end.chars()); + start_and_end.fold(Some(first_char), |accum, elem| match accum { + Some(c) if c == elem => accum, + _ => None, + }) + } + None => None, + }; + let is_space = |i: usize| chars.get(i) == Some(&' '); + fn starts_with_pattern(chars: &[char], pattern: &str) -> bool { + let mut i = 0; + for ref c in pattern.chars() { + match chars.get(i) { + Some(v) if v == c => i += 1, + _ => return false, + } + } + true + } + if !starts_with_pattern(chars, start) { + return None; + } + let text_start = start.len(); + if is_space(text_start) { + return None; + }; + let mut i = text_start; + loop { + i += 1; + if chars.get(i).is_none() { + return None; + } + if starts_with_pattern(&chars[i..], end) { + if is_space(i - 1) { + continue; + } + let text_content: String = chars[text_start..i].iter().collect(); + match single_char_in_pattern { + None => return Some(text_content), + Some(c) => { + if text_content.chars().any(|e| e != c) { + return Some(text_content); + } + } + }; + } + } +} + +fn parse_text(line: &str) -> Line { + let mut block_content: Line = Vec::new(); + let chars: Vec<char> = line.chars().collect(); + let mut normal = String::new(); + let mut i = 0; + + macro_rules! commit_normal { + () => { + if !normal.is_empty() { + let normal_text = Text::Normal(std::mem::take(&mut normal)); + block_content.push(normal_text); + } + }; + } + let patterns: [(&str, &str, fn(String) -> Text); 7] = [ + ("***", "***", Text::BoldItalic), + ("**", "**", Text::Bold), + ("*", "*", Text::Italic), + ("___", "___", Text::BoldItalic), + ("__", "__", Text::Bold), + ("_", "_", Text::Italic), + ("`", "`", Text::Code), + ]; + + 'outer: loop { + // Check if a simple, non-Normal text type starts at this character + for (start, end, text_type) in patterns.iter() { + if let Some(string) = capture(&chars[i..], start, end) { + i += string.len() + start.len() + end.len(); + commit_normal!(); + block_content.push(text_type(string)); + continue 'outer; + } + } + // Check if a wiki-style hyperlink starts at this character + if let Some(content) = capture(&chars[i..], "[[", "]]") { + i += content.len() + 4; + commit_normal!(); + block_content.push(Text::WikiLink(content)); + continue 'outer; + } + + // Check if a long-form hyperlink starts at this character + if let Some(label) = capture(&chars[i..], "[", "]") { + let target_len = label.len() + 2; + if let Some(target) = capture(&chars[i + target_len..], "(", ")") { + i += target_len + target.len() + 2; + commit_normal!(); + block_content.push(Text::Hyperlink(Hyperlink { label, target })) + } + } + + // No new text type started here, this must just be normal text + match chars.get(i) { + Some(c) => { + normal.push(*c); + i += 1; + } + None => { + commit_normal!(); + break; + } + } + } + return block_content; +} + +fn parse_heading(line: &str) -> Result<Block, ()> { + let (heading_type, content): (fn(Line) -> Block, &str) = if line.starts_with("# ") { + (Block::Heading1, &line[2..]) + } else if line.starts_with("## ") { + (Block::Heading2, &line[3..]) + } else if line.starts_with("### ") { + (Block::Heading3, &line[4..]) + } else { + return Err(()); + }; + if content.is_empty() { + return Err(()); + }; + Ok(heading_type(parse_text(content))) +} + +/// Accepts a slice of lines that begin with '>' +fn parse_quote(lines: &[&str]) -> Result<Block, ()> { + let mut content = Vec::new(); + for line in lines { + content.push(if *line == ">" { + Vec::new() + } else { + parse_text(&line[2..]) + }); + } + Ok(Block::Quote(content)) +} + +fn parse_list(lines: &[&str]) -> Result<Block, ()> { + Ok(Block::List( + lines.iter().map(|l| parse_text(&l[2..])).collect(), + )) +} + +fn parse_paragraph(line: &str) -> Block { + Block::Paragraph(parse_text(line)) +} + +fn parse_table(lines: &[&str]) -> Result<Block, ()> { + if lines.len() < 3 { + return Err(()); + } + let names = split_columns(lines[0])?; + let dividers = split_columns(lines[1])?; + if names.len() != dividers.len() { + return Err(()); + } + let mut columns = Vec::new(); + for (name, divider) in std::iter::zip(names.iter(), dividers.iter()) { + let alignment = Alignment::from_str(divider)?; + columns.push(Column { + name: parse_text(name), + alignment, + }) + } + let mut rows = Vec::new(); + for row in &lines[2..] { + let split_row: Vec<Line> = split_columns(row)?.iter().map(|s| parse_text(s)).collect(); + if split_row.len() != columns.len() { + return Err(()); + } + rows.push(split_row); + } + Ok(Block::Table(Table { columns, rows })) +} + +fn split_columns(line: &str) -> Result<Vec<String>, ()> { + // Find the index after the first |, and before the last | + let mut start = None; + let mut end = None; + for (i, c) in line.chars().enumerate() { + if c == '|' { + if start.is_none() { + start = Some(i + 1); + } else { + end = Some(i); + } + } + } + match (start, end) { + (Some(s), Some(e)) => { + let chars: Vec<char> = line.chars().collect(); + let string: String = chars[s..e].iter().collect(); + let split = string.split('|'); + Ok(split.map(|s| s.trim().to_string()).collect()) + } + _ => Err(()), + } +} |