use crate::*; pub fn parse(markdown: &str) -> Vec { let mut document = Vec::new(); let lines: Vec<&str> = markdown.lines().map(|l| l.trim_start()).collect(); let mut i = 0; // Gather all consecutive lines that begin with a given substring and run a // function over them. The function must be `fn(&[&str])->Result`. macro_rules! gather { ($prefix:expr, $func:ident) => {{ let start = i; for line in &lines[i..] { if line.starts_with($prefix) { i += 1; continue; } break; } let gathered_lines = &lines[start..i]; match gathered_lines.is_empty() { false => $func(gathered_lines), true => Err(()), } }}; } loop { let line = match lines.get(i) { Some(line) => line, None => return document, }; if line.is_empty() { i += 1; continue; } else if let Ok(heading) = parse_heading(line) { document.push(heading); i += 1; } else if let Ok(quote) = gather!(">", parse_quote) { document.push(quote); } else if let Ok(list) = gather!("- ", parse_list) { document.push(list); } else if let Ok(table) = gather!("|", parse_table) { document.push(table); } else if line.starts_with("```") { let language = line[3..].to_string(); let mut code_lines = Vec::new(); i += 1; for line in &lines[i..] { match line.trim() == "```" { true => break, false => { code_lines.push(line.to_string()); i += 1 } } } document.push(Block::Code(language, code_lines)); i += 1; } else { document.push(parse_paragraph(line)); i += 1; }; } } /// Returns the substring from `chars` that is between the `start` and `end` /// delimiters. Returns None if `chars` does not start with `start`, or if an /// occurance of `start` and `end` cannot be found within `chars`. There must /// not be a space after the occurance of `start` or before the occurance of /// `end`. If `start` and `end` consist of just one or more of the same /// character, the content must contain at least one other character than /// that one. fn capture(chars: &[char], start: &str, end: &str) -> Option { // Determine if `pattern` contains only a single unique character let single_char_in_pattern = match start.chars().next() { Some(first_char) => { let start_and_end = start.chars().chain(end.chars()); start_and_end.fold(Some(first_char), |accum, elem| match accum { Some(c) if c == elem => accum, _ => None, }) } None => None, }; let is_space = |i: usize| chars.get(i) == Some(&' '); fn starts_with_pattern(chars: &[char], pattern: &str) -> bool { let mut i = 0; for ref c in pattern.chars() { match chars.get(i) { Some(v) if v == c => i += 1, _ => return false, } } true } if !starts_with_pattern(chars, start) { return None; } let text_start = start.len(); if is_space(text_start) { return None; }; let mut i = text_start; loop { i += 1; if chars.get(i).is_none() { return None; } if starts_with_pattern(&chars[i..], end) { if is_space(i - 1) { continue; } let text_content: String = chars[text_start..i].iter().collect(); match single_char_in_pattern { None => return Some(text_content), Some(c) => { if text_content.chars().any(|e| e != c) { return Some(text_content); } } }; } } } fn parse_text(line: &str) -> Line { let mut block_content: Line = Vec::new(); let chars: Vec = line.chars().collect(); let mut normal = String::new(); let mut i = 0; macro_rules! commit_normal { () => { if !normal.is_empty() { let normal_text = Text::Normal(std::mem::take(&mut normal)); block_content.push(normal_text); } }; } let patterns: [(&str, &str, fn(String) -> Text); 7] = [ ("***", "***", Text::BoldItalic), ("**", "**", Text::Bold), ("*", "*", Text::Italic), ("___", "___", Text::BoldItalic), ("__", "__", Text::Bold), ("_", "_", Text::Italic), ("`", "`", Text::Code), ]; 'outer: loop { // Check if a simple, non-Normal text type starts at this character for (start, end, text_type) in patterns.iter() { if let Some(string) = capture(&chars[i..], start, end) { i += string.len() + start.len() + end.len(); commit_normal!(); block_content.push(text_type(string)); continue 'outer; } } // Check if a wiki-style hyperlink starts at this character if let Some(content) = capture(&chars[i..], "[[", "]]") { i += content.len() + 4; commit_normal!(); block_content.push(Text::WikiLink(content)); continue 'outer; } // Check if a long-form hyperlink starts at this character if let Some(label) = capture(&chars[i..], "[", "]") { let target_len = label.len() + 2; if let Some(target) = capture(&chars[i + target_len..], "(", ")") { i += target_len + target.len() + 2; commit_normal!(); block_content.push(Text::Hyperlink(Hyperlink { label, target })) } } // No new text type started here, this must just be normal text match chars.get(i) { Some(c) => { normal.push(*c); i += 1; } None => { commit_normal!(); break; } } } return block_content; } fn parse_heading(line: &str) -> Result { let (heading_type, content): (fn(Line) -> Block, &str) = if line.starts_with("# ") { (Block::Heading1, &line[2..]) } else if line.starts_with("## ") { (Block::Heading2, &line[3..]) } else if line.starts_with("### ") { (Block::Heading3, &line[4..]) } else { return Err(()); }; if content.is_empty() { return Err(()); }; Ok(heading_type(parse_text(content))) } /// Accepts a slice of lines that begin with '>' fn parse_quote(lines: &[&str]) -> Result { let mut content = Vec::new(); for line in lines { content.push(if *line == ">" { Vec::new() } else { parse_text(&line[2..]) }); } Ok(Block::Quote(content)) } fn parse_list(lines: &[&str]) -> Result { Ok(Block::List( lines.iter().map(|l| parse_text(&l[2..])).collect(), )) } fn parse_paragraph(line: &str) -> Block { Block::Paragraph(parse_text(line)) } fn parse_table(lines: &[&str]) -> Result { if lines.len() < 3 { return Err(()); } let names = split_columns(lines[0])?; let dividers = split_columns(lines[1])?; if names.len() != dividers.len() { return Err(()); } let mut columns = Vec::new(); for (name, divider) in std::iter::zip(names.iter(), dividers.iter()) { let alignment = Alignment::from_str(divider)?; columns.push(Column { name: parse_text(name), alignment, }) } let mut rows = Vec::new(); for row in &lines[2..] { let split_row: Vec = split_columns(row)?.iter().map(|s| parse_text(s)).collect(); if split_row.len() != columns.len() { return Err(()); } rows.push(split_row); } Ok(Block::Table(Table { columns, rows })) } fn split_columns(line: &str) -> Result, ()> { // Find the index after the first |, and before the last | let mut start = None; let mut end = None; for (i, c) in line.chars().enumerate() { if c == '|' { if start.is_none() { start = Some(i + 1); } else { end = Some(i); } } } match (start, end) { (Some(s), Some(e)) => { let chars: Vec = line.chars().collect(); let string: String = chars[s..e].iter().collect(); let split = string.split('|'); Ok(split.map(|s| s.trim().to_string()).collect()) } _ => Err(()), } }