summaryrefslogtreecommitdiff
path: root/src/parse.rs
diff options
context:
space:
mode:
authorBen Bridle <bridle.benjamin@gmail.com>2022-08-25 21:09:25 +1200
committerBen Bridle <bridle.benjamin@gmail.com>2022-08-25 21:09:25 +1200
commit54f5e9fd883e207931baa9c87b6181ca724d6bab (patch)
tree17111a1da036dbc061ae4062ea0716373e16e23d /src/parse.rs
downloadmarkdown-54f5e9fd883e207931baa9c87b6181ca724d6bab.zip
Initial commit
Diffstat (limited to 'src/parse.rs')
-rw-r--r--src/parse.rs283
1 files changed, 283 insertions, 0 deletions
diff --git a/src/parse.rs b/src/parse.rs
new file mode 100644
index 0000000..6e4cdd9
--- /dev/null
+++ b/src/parse.rs
@@ -0,0 +1,283 @@
+use crate::*;
+
+pub fn parse(markdown: &str) -> Vec<Block> {
+ let mut document = Vec::new();
+ let lines: Vec<&str> = markdown.lines().map(|l| l.trim_start()).collect();
+ let mut i = 0;
+
+ // Gather all consecutive lines that begin with a given substring and run a
+ // function over them. The function must be `fn(&[&str])->Result<Block,()>`.
+ macro_rules! gather {
+ ($prefix:expr, $func:ident) => {{
+ let start = i;
+ for line in &lines[i..] {
+ if line.starts_with($prefix) {
+ i += 1;
+ continue;
+ }
+ break;
+ }
+ let gathered_lines = &lines[start..i];
+ match gathered_lines.is_empty() {
+ false => $func(gathered_lines),
+ true => Err(()),
+ }
+ }};
+ }
+
+ loop {
+ let line = match lines.get(i) {
+ Some(line) => line,
+ None => return document,
+ };
+ if line.is_empty() {
+ i += 1;
+ continue;
+ } else if let Ok(heading) = parse_heading(line) {
+ document.push(heading);
+ i += 1;
+ } else if let Ok(quote) = gather!(">", parse_quote) {
+ document.push(quote);
+ } else if let Ok(list) = gather!("- ", parse_list) {
+ document.push(list);
+ } else if let Ok(table) = gather!("|", parse_table) {
+ document.push(table);
+ } else if line.starts_with("```") {
+ let language = line[3..].to_string();
+ let mut code_lines = Vec::new();
+ i += 1;
+ for line in &lines[i..] {
+ match line.trim() == "```" {
+ true => break,
+ false => {
+ code_lines.push(line.to_string());
+ i += 1
+ }
+ }
+ }
+ document.push(Block::Code(language, code_lines));
+ i += 1;
+ } else {
+ document.push(parse_paragraph(line));
+ i += 1;
+ };
+ }
+}
+
+/// Returns the substring from `chars` that is between the `start` and `end`
+/// delimiters. Returns None if `chars` does not start with `start`, or if an
+/// occurance of `start` and `end` cannot be found within `chars`. There must
+/// not be a space after the occurance of `start` or before the occurance of
+/// `end`. If `start` and `end` consist of just one or more of the same
+/// character, the content must contain at least one other character than
+/// that one.
+fn capture(chars: &[char], start: &str, end: &str) -> Option<String> {
+ // Determine if `pattern` contains only a single unique character
+ let single_char_in_pattern = match start.chars().next() {
+ Some(first_char) => {
+ let start_and_end = start.chars().chain(end.chars());
+ start_and_end.fold(Some(first_char), |accum, elem| match accum {
+ Some(c) if c == elem => accum,
+ _ => None,
+ })
+ }
+ None => None,
+ };
+ let is_space = |i: usize| chars.get(i) == Some(&' ');
+ fn starts_with_pattern(chars: &[char], pattern: &str) -> bool {
+ let mut i = 0;
+ for ref c in pattern.chars() {
+ match chars.get(i) {
+ Some(v) if v == c => i += 1,
+ _ => return false,
+ }
+ }
+ true
+ }
+ if !starts_with_pattern(chars, start) {
+ return None;
+ }
+ let text_start = start.len();
+ if is_space(text_start) {
+ return None;
+ };
+ let mut i = text_start;
+ loop {
+ i += 1;
+ if chars.get(i).is_none() {
+ return None;
+ }
+ if starts_with_pattern(&chars[i..], end) {
+ if is_space(i - 1) {
+ continue;
+ }
+ let text_content: String = chars[text_start..i].iter().collect();
+ match single_char_in_pattern {
+ None => return Some(text_content),
+ Some(c) => {
+ if text_content.chars().any(|e| e != c) {
+ return Some(text_content);
+ }
+ }
+ };
+ }
+ }
+}
+
+fn parse_text(line: &str) -> Line {
+ let mut block_content: Line = Vec::new();
+ let chars: Vec<char> = line.chars().collect();
+ let mut normal = String::new();
+ let mut i = 0;
+
+ macro_rules! commit_normal {
+ () => {
+ if !normal.is_empty() {
+ let normal_text = Text::Normal(std::mem::take(&mut normal));
+ block_content.push(normal_text);
+ }
+ };
+ }
+ let patterns: [(&str, &str, fn(String) -> Text); 7] = [
+ ("***", "***", Text::BoldItalic),
+ ("**", "**", Text::Bold),
+ ("*", "*", Text::Italic),
+ ("___", "___", Text::BoldItalic),
+ ("__", "__", Text::Bold),
+ ("_", "_", Text::Italic),
+ ("`", "`", Text::Code),
+ ];
+
+ 'outer: loop {
+ // Check if a simple, non-Normal text type starts at this character
+ for (start, end, text_type) in patterns.iter() {
+ if let Some(string) = capture(&chars[i..], start, end) {
+ i += string.len() + start.len() + end.len();
+ commit_normal!();
+ block_content.push(text_type(string));
+ continue 'outer;
+ }
+ }
+ // Check if a wiki-style hyperlink starts at this character
+ if let Some(content) = capture(&chars[i..], "[[", "]]") {
+ i += content.len() + 4;
+ commit_normal!();
+ block_content.push(Text::WikiLink(content));
+ continue 'outer;
+ }
+
+ // Check if a long-form hyperlink starts at this character
+ if let Some(label) = capture(&chars[i..], "[", "]") {
+ let target_len = label.len() + 2;
+ if let Some(target) = capture(&chars[i + target_len..], "(", ")") {
+ i += target_len + target.len() + 2;
+ commit_normal!();
+ block_content.push(Text::Hyperlink(Hyperlink { label, target }))
+ }
+ }
+
+ // No new text type started here, this must just be normal text
+ match chars.get(i) {
+ Some(c) => {
+ normal.push(*c);
+ i += 1;
+ }
+ None => {
+ commit_normal!();
+ break;
+ }
+ }
+ }
+ return block_content;
+}
+
+fn parse_heading(line: &str) -> Result<Block, ()> {
+ let (heading_type, content): (fn(Line) -> Block, &str) = if line.starts_with("# ") {
+ (Block::Heading1, &line[2..])
+ } else if line.starts_with("## ") {
+ (Block::Heading2, &line[3..])
+ } else if line.starts_with("### ") {
+ (Block::Heading3, &line[4..])
+ } else {
+ return Err(());
+ };
+ if content.is_empty() {
+ return Err(());
+ };
+ Ok(heading_type(parse_text(content)))
+}
+
+/// Accepts a slice of lines that begin with '>'
+fn parse_quote(lines: &[&str]) -> Result<Block, ()> {
+ let mut content = Vec::new();
+ for line in lines {
+ content.push(if *line == ">" {
+ Vec::new()
+ } else {
+ parse_text(&line[2..])
+ });
+ }
+ Ok(Block::Quote(content))
+}
+
+fn parse_list(lines: &[&str]) -> Result<Block, ()> {
+ Ok(Block::List(
+ lines.iter().map(|l| parse_text(&l[2..])).collect(),
+ ))
+}
+
+fn parse_paragraph(line: &str) -> Block {
+ Block::Paragraph(parse_text(line))
+}
+
+fn parse_table(lines: &[&str]) -> Result<Block, ()> {
+ if lines.len() < 3 {
+ return Err(());
+ }
+ let names = split_columns(lines[0])?;
+ let dividers = split_columns(lines[1])?;
+ if names.len() != dividers.len() {
+ return Err(());
+ }
+ let mut columns = Vec::new();
+ for (name, divider) in std::iter::zip(names.iter(), dividers.iter()) {
+ let alignment = Alignment::from_str(divider)?;
+ columns.push(Column {
+ name: parse_text(name),
+ alignment,
+ })
+ }
+ let mut rows = Vec::new();
+ for row in &lines[2..] {
+ let split_row: Vec<Line> = split_columns(row)?.iter().map(|s| parse_text(s)).collect();
+ if split_row.len() != columns.len() {
+ return Err(());
+ }
+ rows.push(split_row);
+ }
+ Ok(Block::Table(Table { columns, rows }))
+}
+
+fn split_columns(line: &str) -> Result<Vec<String>, ()> {
+ // Find the index after the first |, and before the last |
+ let mut start = None;
+ let mut end = None;
+ for (i, c) in line.chars().enumerate() {
+ if c == '|' {
+ if start.is_none() {
+ start = Some(i + 1);
+ } else {
+ end = Some(i);
+ }
+ }
+ }
+ match (start, end) {
+ (Some(s), Some(e)) => {
+ let chars: Vec<char> = line.chars().collect();
+ let string: String = chars[s..e].iter().collect();
+ let split = string.split('|');
+ Ok(split.map(|s| s.trim().to_string()).collect())
+ }
+ _ => Err(()),
+ }
+}