diff options
author | Ben Bridle <bridle.benjamin@gmail.com> | 2024-04-21 13:57:03 +1200 |
---|---|---|
committer | Ben Bridle <bridle.benjamin@gmail.com> | 2024-04-21 13:57:36 +1200 |
commit | 13cb719b87bcef41c4dd398f5a651ddb2b561e0d (patch) | |
tree | e9e52ed33d5ed5a4d68a1161c3db5c2d8c38dd42 | |
parent | 54f5e9fd883e207931baa9c87b6181ca724d6bab (diff) | |
download | markdown-13cb719b87bcef41c4dd398f5a651ddb2b561e0d.zip |
Completely rewrite the libraryv1.0.0
-rw-r--r-- | src/block.rs | 26 | ||||
-rw-r--r-- | src/document.rs | 172 | ||||
-rw-r--r-- | src/elements.rs | 9 | ||||
-rw-r--r-- | src/elements/block_element.rs | 53 | ||||
-rw-r--r-- | src/elements/line.rs | 117 | ||||
-rw-r--r-- | src/elements/line_element.rs | 61 | ||||
-rw-r--r-- | src/elements/table.rs | 96 | ||||
-rw-r--r-- | src/lib.rs | 44 | ||||
-rw-r--r-- | src/main.rs | 37 | ||||
-rw-r--r-- | src/parse.rs | 283 | ||||
-rw-r--r-- | src/parse_heirarchical.rs | 137 | ||||
-rw-r--r-- | src/table.rs | 60 | ||||
-rw-r--r-- | src/text.rs | 30 |
13 files changed, 520 insertions, 605 deletions
diff --git a/src/block.rs b/src/block.rs deleted file mode 100644 index 2a34fcf..0000000 --- a/src/block.rs +++ /dev/null @@ -1,26 +0,0 @@ -use crate::{Line, Table}; - -pub enum Block { - Heading1(Line), - Heading2(Line), - Heading3(Line), - Paragraph(Line), - List(Vec<Line>), - Quote(Vec<Line>), - Code(String, Vec<String>), - Table(Table), -} -impl std::fmt::Debug for Block { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - f.write_str(match self { - Self::Heading1(_) => "Heading1", - Self::Heading2(_) => "Heading2", - Self::Heading3(_) => "Heading3", - Self::Paragraph(_) => "Paragraph", - Self::List(_) => "List", - Self::Quote(_) => "Quote", - Self::Code(_, _) => "Code", - Self::Table(_) => "Table", - }) - } -} diff --git a/src/document.rs b/src/document.rs new file mode 100644 index 0000000..fbfea00 --- /dev/null +++ b/src/document.rs @@ -0,0 +1,172 @@ +use crate::*; + +pub struct MarkdownDocument { + pub block_elements: Vec<BlockElement>, +} + +impl MarkdownDocument { + pub fn from_str(raw_markdown: &str) -> Self { + let mut block_elements = Vec::new(); + let mut current_multiline_block = None; + // Chain a blank line to the end to ensure that the final multi-line block is flushed. + let lines = raw_markdown.lines().chain(std::iter::once("")); + + for incoming_line in lines { + let incoming_line_untrimmed = incoming_line; + let incoming_line = incoming_line.trim(); + // Handle an in-progress subdocument block. + if let Some(MultiLineBlock::Subdocument(language, mut lines)) = current_multiline_block { + if incoming_line == "```" { + let subdocument = Subdocument { language: language.to_string(), content: lines.join("\n") }; + block_elements.push(BlockElement::Subdocument(subdocument)); + current_multiline_block = None; + } else { + lines.push(incoming_line_untrimmed); + current_multiline_block = Some(MultiLineBlock::Subdocument(language, lines)); + } + continue; + } + + // Parse the incoming line. + let incoming_line_block = { + if let Some(("", tail)) = incoming_line.split_once("# ") { + BlockLine::DocumentHeading(tail.trim()) + } else if let Some(("", tail)) = incoming_line.split_once("## ") { + BlockLine::SectionHeading(tail.trim()) + } else if let Some(("", tail)) = incoming_line.split_once("### ") { + BlockLine::ArticleHeading(tail.trim()) + } else if let Some(("", tail)) = incoming_line.split_once("- ") { + BlockLine::List(tail.trim()) + } else if let Some(("", tail)) = incoming_line.split_once("> ") { + BlockLine::Aside(tail.trim()) + } else if incoming_line == ">" { + BlockLine::Aside("") + } else if let Some(("", tail)) = incoming_line.split_once("```") { + BlockLine::SubdocumentHeader(tail.trim()) + } else if incoming_line.starts_with("|") { + BlockLine::Table(incoming_line) + } else if incoming_line.len() >= 3 && incoming_line.chars().all(|c| c=='-') { + BlockLine::Break + } else if incoming_line.is_empty() { + BlockLine::BlankLine + } else { + BlockLine::Paragraph(incoming_line) } + }; + + // If the incoming line is of the same type as the current multiline + // block, append it to the end of that current block and continue. + if let Some(ref mut current_block) = current_multiline_block { + match (&incoming_line_block, current_block) { + (BlockLine::List(line), MultiLineBlock::List(ref mut lines)) => { + lines.push(line); continue; } + (BlockLine::Aside(line), MultiLineBlock::Aside(ref mut lines)) => { + lines.push(line); continue; } + (BlockLine::Table(line), MultiLineBlock::Table(ref mut lines)) => { + lines.push(line); continue; } + _ => (), + }; + } + + // Otherwise, commit the current block, then handle the incoming line. + if let Some(current_block) = current_multiline_block { + match current_block { + MultiLineBlock::List(raw_lines) => { + let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect(); + block_elements.push(BlockElement::List(lines)); } + MultiLineBlock::Aside(raw_lines) => { + let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect(); + block_elements.push(BlockElement::Aside(lines)); } + MultiLineBlock::Table(raw_lines) => { + if let Some(table) = Table::try_from_strs(&raw_lines) { + block_elements.push(BlockElement::Table(table)) } + else { for raw_line in raw_lines { + block_elements.push(BlockElement::Paragraph(Line::from_str(&raw_line))) }}} + MultiLineBlock::Subdocument(..) => unreachable!(), + } + current_multiline_block = None; + } + + // Handle the incoming line. + match incoming_line_block { + BlockLine::DocumentHeading(s) => block_elements.push(BlockElement::DocumentHeading(Line::from_str(&s))), + BlockLine::SectionHeading(s) => block_elements.push(BlockElement::SectionHeading(Line::from_str(&s))), + BlockLine::ArticleHeading(s) => block_elements.push(BlockElement::ArticleHeading(Line::from_str(&s))), + BlockLine::List(s) => current_multiline_block = Some(MultiLineBlock::List(vec![s])), + BlockLine::Aside(s) => current_multiline_block = Some(MultiLineBlock::Aside(vec![s])), + BlockLine::Table(s) => current_multiline_block = Some(MultiLineBlock::Table(vec![s])), + BlockLine::SubdocumentHeader(s) => current_multiline_block = Some(MultiLineBlock::Subdocument(s, Vec::new())), + BlockLine::Paragraph(s) => { + if let Some(embedded_file) = parse_embedded_file(&s) { + block_elements.push(BlockElement::EmbeddedFile(embedded_file)) + } else if let Some(math) = parse_math_block(&s) { + block_elements.push(BlockElement::Math(math)) + } else { + block_elements.push(BlockElement::Paragraph(Line::from_str(&s))) } + }, + BlockLine::Break => block_elements.push(BlockElement::Break), + BlockLine::BlankLine => (), + } + } + + Self { block_elements } + } +} + +fn parse_embedded_file(text: &str) -> Option<EmbeddedFile> { + let chars: Vec<char> = text.trim().chars().collect(); + let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b); + + if starts_with(0, " { break }; + label_end += 1; } + let label: String = chars[label_start..label_end].iter().collect(); + if label.is_empty() || !is_contentful(&label, &['[', ']']) { + return None } + // Try to parse the target. + let target_start = label_end + 2; + let target_end = chars.len() - 1; + if let Some(')') = chars.get(target_end) { + let target: String = chars[target_start..target_end].iter().collect(); + if target.is_empty() || target.contains(")") || !is_contentful(&target, &['(',')']) { + return None } + return Some(EmbeddedFile { label, target }) + } + } + return None; +} + +fn parse_math_block(text: &str) -> Option<String> { + if let Some(("", trailing)) = text.split_once("$$") { + if let Some((math, "")) = trailing.rsplit_once("$$") { + return Some(math.trim().to_string()); + } + } + return None; +} + +/// When parsing, is a single line for a one-line block element. +enum BlockLine<'a> { + DocumentHeading(&'a str), + SectionHeading(&'a str), + ArticleHeading(&'a str), + Paragraph(&'a str), + List(&'a str), + Aside(&'a str), + Table(&'a str), + SubdocumentHeader(&'a str), + Break, + BlankLine, +} + +/// When parsing, is the gathered string lines of a multiline block element. +enum MultiLineBlock<'a> { + List(Vec<&'a str>), + Aside(Vec<&'a str>), + Table(Vec<&'a str>), + Subdocument(&'a str, Vec<&'a str>), +} + diff --git a/src/elements.rs b/src/elements.rs new file mode 100644 index 0000000..a4a9783 --- /dev/null +++ b/src/elements.rs @@ -0,0 +1,9 @@ +mod block_element; +mod line; +mod line_element; +mod table; + +pub use block_element::*; +pub use line::*; +pub use line_element::*; +pub use table::*; diff --git a/src/elements/block_element.rs b/src/elements/block_element.rs new file mode 100644 index 0000000..cdb7a71 --- /dev/null +++ b/src/elements/block_element.rs @@ -0,0 +1,53 @@ +use crate::*; + +pub enum BlockElement { + /// A first-level heading. + DocumentHeading(Line), + /// A second-level heading. + SectionHeading(Line), + /// A third-level heading. + ArticleHeading(Line), + Paragraph(Line), + /// A bullet-list. + List(Vec<Line>), + /// A paragraph separate from the main text. + Aside(Vec<Line>), + Table(Table), + EmbeddedFile(EmbeddedFile), + /// A non-markdown sub-document within this document. + Subdocument(Subdocument), + /// A KaTeX block + Math(String), + Break, +} + +pub struct EmbeddedFile { + pub label: String, + pub target: String, +} + +pub struct Subdocument { + pub language: String, + pub content: String, +} + +impl std::fmt::Debug for BlockElement { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let string = match self { + BlockElement::DocumentHeading(line) => format!("DocumentHeading ('{line}')"), + BlockElement::SectionHeading(line) => format!("SectionHeading ('{line}')"), + BlockElement::ArticleHeading(line) => format!("ArticleHeading ('{line}')"), + BlockElement::Paragraph(line) => format!("Paragraph ('{line}')"), + BlockElement::List(lines) => format!("List (len: {})", lines.len()), + BlockElement::Aside(_) => format!("Aside"), + BlockElement::Table(_) => format!("Table"), + BlockElement::EmbeddedFile(EmbeddedFile {label, target}) => + format!("EmbeddedFile (label:'{label}', target:'{target}')"), + BlockElement::Subdocument(Subdocument {language, ..}) => + format!("Subdocument ('{language}')"), + BlockElement::Math(string) => format!("Math ('{string}')"), + BlockElement::Break => format!("Break"), + }; + f.write_str(&string) + } +} diff --git a/src/elements/line.rs b/src/elements/line.rs new file mode 100644 index 0000000..d5c078e --- /dev/null +++ b/src/elements/line.rs @@ -0,0 +1,117 @@ +use crate::*; + +macro_rules! opt { + ($v:expr) => {|s| Some($v(s)) }; +} + +pub struct Line { + pub elements: Vec<LineElement>, +} + +impl Line { + pub fn from_str(raw_string: &str) -> Self { + fn unlabeled_extern_link(target: String) -> Option<LineElement> { + target.contains("/").then( || + LineElement::ExternalLink(ExternalLink { target, label:String::new() }) + ) + } + fn labelled_extern_link(s: String) -> Option<LineElement> { + let (label, target) = match s.split_once("](") { + Some((l, t)) => (l.to_string(), t.to_string()), + None => return None }; + if label.contains("]") || target.contains("]") { return None } + Some(LineElement::ExternalLink(ExternalLink { label, target })) } + const DELIMITERS: [(fn(String)->Option<LineElement>, &str, &str, &str); 7] = [ + ( opt!(LineElement::Bold), "**", "**", "*" ), + ( opt!(LineElement::Italic), "_", "_", "_" ), + ( opt!(LineElement::Monospace), "`", "`", "`" ), + ( opt!(LineElement::Math), "$", "$", "$" ), + ( opt!(LineElement::InternalLink), "[[", "]]", "[]" ), + ( labelled_extern_link, "[", ")", "[]()" ), + ( unlabeled_extern_link, "[", "]", "[]" ), + ]; + let chars: Vec<char> = raw_string.chars().collect(); + let mut elements = Vec::new(); + let mut cached_chars = String::new(); + let mut i = 0; + + let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b); + + 'outer: while let Some(c) = chars.get(i) { + // Only check for opening delimiters that directly follow a whitespace character. + let follows_whitespace = match chars.get(i.wrapping_sub(1)) { + Some(w) => is_whitespace(w), + None => true, + }; + if follows_whitespace { + // Try to parse an opening delimiter. + for (variant, start_delim, end_delim, delim_chars) in DELIMITERS { + // Try to match an opening delimiter with a terminating delimiter. + if starts_with(i, start_delim) { + let s_end = i + start_delim.chars().count(); + let mut e_start = s_end; + let mut e_end = e_start + end_delim.chars().count(); + while e_end <= chars.len() { + e_start += 1; e_end += 1; + let end_is_whitespace = + if let Some(end_char) = chars.get(e_end) { + is_whitespace(end_char) + } else { + e_end == chars.len() + }; + // If the terminating delimiter is found, store the normal + // text and the styled text, and continue to the next character. + if end_is_whitespace && starts_with(e_start, end_delim) { + // Check that there is content within the styled string. + let styled_string: String = chars[s_end..e_start].iter().collect(); + let non_content_chars: Vec<_> = delim_chars.chars().collect(); + if !is_contentful(&styled_string, &non_content_chars) { continue } + if styled_string.len() != styled_string.trim().len() { continue } + let line_element = match variant(styled_string) { + Some(e) => e, + None => continue, + }; + // Commit the normal and styled strings. + if !cached_chars.is_empty() { + let normal_string = std::mem::take(&mut cached_chars); + elements.push(LineElement::Normal(normal_string)); } + elements.push(line_element); + i = e_end; + continue 'outer; + } + } + } + } + } + cached_chars.push(*c); i += 1; + } + if !cached_chars.is_empty() { + let normal_string = std::mem::take(&mut cached_chars); + elements.push(LineElement::Normal(normal_string)); } + Self { elements } + } + + /// Return only the character content, with none of the styling information. + pub fn as_plain_text(&self) -> String { + let mut string = String::new(); + for line_element in &self.elements { + string.push_str(line_element.as_plain_text()) } + return string; + } +} + +impl std::fmt::Display for Line { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + for line_element in &self.elements { + write!(f, "{line_element}")?; } + Ok(()) + } +} + +impl std::fmt::Debug for Line { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + for line_element in &self.elements { + write!(f, "{line_element:?}\n")?; } + Ok(()) + } +} diff --git a/src/elements/line_element.rs b/src/elements/line_element.rs new file mode 100644 index 0000000..cc47b4b --- /dev/null +++ b/src/elements/line_element.rs @@ -0,0 +1,61 @@ +pub enum LineElement { + Normal(String), + Bold(String), + Italic(String), + Monospace(String), + Math(String), + InternalLink(String), + ExternalLink(ExternalLink), +} + +impl LineElement { + /// Return only the character content, with none of the styling information. + pub fn as_plain_text(&self) -> &str { + match self { + LineElement::Normal(text) => text, + LineElement::Bold(text) => text, + LineElement::Italic(text) => text, + LineElement::Monospace(text) => text, + LineElement::Math(text) => text, + LineElement::InternalLink(label) => label, + LineElement::ExternalLink(ExternalLink { label, ..}) => label, + } + } +} + +pub struct ExternalLink { + pub label: String, + pub target: String, +} + +impl std::fmt::Display for LineElement { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let string = match self { + LineElement::Normal(text) => format!("{text}"), + LineElement::Bold(text) => format!("**{text}**"), + LineElement::Italic(text) => format!("_{text}_"), + LineElement::Monospace(text) => format!("`{text}`"), + LineElement::Math(text) => format!("${text}$"), + LineElement::InternalLink(text) => format!("[[{text}]]"), + LineElement::ExternalLink(ExternalLink { label, target }) => { + format!("[{label}]({target})") } + }; + f.write_str(&string) + } +} + +impl std::fmt::Debug for LineElement { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let string = match self { + LineElement::Normal(text) => format!("Normal ('{text}')"), + LineElement::Bold(text) => format!("Bold ('{text}')"), + LineElement::Italic(text) => format!("Italic ('{text}')"), + LineElement::Monospace(text) => format!("Monospace ('{text}')"), + LineElement::Math(text) => format!("Math ('{text}')"), + LineElement::InternalLink(text) => format!("InternalLink ('{text}')"), + LineElement::ExternalLink(ExternalLink { label, target }) => { + format!("ExternalLink (label:'{label}', target:'{target}')") } + }; + f.write_str(&string) + } +} diff --git a/src/elements/table.rs b/src/elements/table.rs new file mode 100644 index 0000000..5b354c1 --- /dev/null +++ b/src/elements/table.rs @@ -0,0 +1,96 @@ +use crate::*; + +pub struct Table { + /// The column definitions for this table. + pub column_definitions: Vec<ColumnDefinition>, + /// The content contained in the rows of the table. An individual [Line] is + /// the contents of a single table cell, a group of cells forms a table row, + /// a group of rows forms a vertical section of the table, with a separator + /// intending to be drawn between each section, and a group of sections forms + /// the table itself. + /// Each row in the table is guaranteed to have the same number of columns + /// as the table header. + pub sections: Vec<Vec<Vec<Line>>>, +} + +impl Table { + pub fn try_from_strs(lines: &[&str]) -> Option<Self> { + let mut lines = lines.into_iter(); + let column_definitions: Vec<ColumnDefinition> = { + let names = split_trimmed_columns(lines.next()?)? + .into_iter().map(|l| Line::from_str(l)); + let alignments = parse_alignments(lines.next()?)?; + if names.len() != alignments.len() { return None } + std::iter::zip(names, alignments).map( + |(name, alignment)| ColumnDefinition { name, alignment } ).collect() + }; + + let mut sections = Vec::new(); + let mut current_section = Vec::new(); + + for line in lines { + if let Some(alignments) = parse_alignments(line) { + if alignments.len() != column_definitions.len() { return None } + sections.push(std::mem::take(&mut current_section)) + } else { + let row: Vec<Line> = split_trimmed_columns(line)? + .into_iter().map(|c| Line::from_str(c)).collect(); + if row.len() != column_definitions.len() { return None } + current_section.push(row); + } + } + + if !current_section.is_empty() { + sections.push(std::mem::take(&mut current_section)); } + Some( Self { column_definitions, sections }) + } +} + +pub struct ColumnDefinition { + /// The name of this column, shown in the header row of the table. + pub name: Line, + /// The alignment of the content in this column. + pub alignment: ColumnAlignment, +} + +pub enum ColumnAlignment { + Left, + Center, + Right, +} + +impl ColumnAlignment { + pub fn from_str(cell: &str) -> Option<Self> { + if !cell.chars().all(|c| c == ':' || c == '-') { + return None } + match (cell.starts_with(':'), cell.ends_with(':')) { + (false, false) => Some(ColumnAlignment::Left), + (false, true) => Some(ColumnAlignment::Right), + (true, false) => Some(ColumnAlignment::Left), + (true, true) => Some(ColumnAlignment::Center), + } + } +} + + +fn split_trimmed_columns(line: &str) -> Option<Vec<&str>> { + Some(split_columns(line)?.into_iter().map(|s| s.trim()).collect()) +} + +fn split_columns(line: &str) -> Option<Vec<&str>> { + if let Some(("", tail)) = line.split_once('|') { + if let Some((head, "")) = tail.rsplit_once('|') { + return Some(head.split('|').collect()); + } + } + return None; +} + +fn parse_alignments(line: &str) -> Option<Vec<ColumnAlignment>> { + let mut alignments = Vec::new(); + for cell in split_columns(line)? { + alignments.push(ColumnAlignment::from_str(cell)?); + } + Some(alignments) +} + @@ -1,33 +1,13 @@ -#![feature(iter_zip)] - -mod block; -mod parse; -mod parse_heirarchical; -mod table; -mod text; - -pub use block::Block; -pub use parse::parse; -pub use parse_heirarchical::parse_heirarchical; -pub use table::{Alignment, Column, Table}; -pub use text::{Hyperlink, Text}; - -pub type Line = Vec<Text>; - -pub fn line_to_string(line: &[Text]) -> String { - let mut output = String::new(); - for text in line { - match text { - Text::Normal(content) => output.push_str(&content), - Text::Bold(content) => output.push_str(&format!("**{}**", content)), - Text::Italic(content) => output.push_str(&format!("_{}_", content)), - Text::BoldItalic(content) => output.push_str(&format!("**_{}_**", content)), - Text::Code(content) => output.push_str(&format!("`{}`", content)), - Text::WikiLink(content) => output.push_str(&format!("[[{}]]", content)), - Text::Hyperlink(Hyperlink { label, target }) => { - output.push_str(&format!("[{}]({})", label, target)) - } - } - } - return output; +mod document; +mod elements; + +pub use document::*; +pub use elements::*; + +pub(crate) fn is_whitespace(c: &char) -> bool { + c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c) } +pub(crate) fn is_contentful(s:&str, non_content_chars: &[char]) -> bool { + s.chars().any(|c| !non_content_chars.contains(&c)) + && s.chars().nth(0).map(|c| !non_content_chars.contains(&c)).unwrap_or(false) + && s.chars().last().map(|c| !non_content_chars.contains(&c)).unwrap_or(false) } diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 8cc2bfa..0000000 --- a/src/main.rs +++ /dev/null @@ -1,37 +0,0 @@ -use markdown_parser::parse; - -pub fn main() { - // let markdown = std::fs::read_to_string("/home/ben/markdown_test.md").unwrap(); - let markdown = " -This _is_ a **test** paragraph. -[This](http://www.google.com) is a regular full-length link. -|A|this is **middle** col|CC| -|-|:-:|---:| -|A||| -"; - let document = parse(&markdown); - for node in document { - println!("{:?}", node); - if let markdown_parser::Block::Paragraph(blocks) = node { - for block in blocks { - println!(" {:?}", block); - } - } else if let markdown_parser::Block::List(lines) = node { - for line in lines { - println!("-"); - for block in line { - println!(" {:?}", block); - } - } - } else if let markdown_parser::Block::Table(table) = node { - for column in table.columns { - print!(" {:?}: ", column.alignment); - for block in column.name { - print!("{:?} ", block); - } - println!(); - } - println!(); - } - } -} diff --git a/src/parse.rs b/src/parse.rs deleted file mode 100644 index 6e4cdd9..0000000 --- a/src/parse.rs +++ /dev/null @@ -1,283 +0,0 @@ -use crate::*; - -pub fn parse(markdown: &str) -> Vec<Block> { - let mut document = Vec::new(); - let lines: Vec<&str> = markdown.lines().map(|l| l.trim_start()).collect(); - let mut i = 0; - - // Gather all consecutive lines that begin with a given substring and run a - // function over them. The function must be `fn(&[&str])->Result<Block,()>`. - macro_rules! gather { - ($prefix:expr, $func:ident) => {{ - let start = i; - for line in &lines[i..] { - if line.starts_with($prefix) { - i += 1; - continue; - } - break; - } - let gathered_lines = &lines[start..i]; - match gathered_lines.is_empty() { - false => $func(gathered_lines), - true => Err(()), - } - }}; - } - - loop { - let line = match lines.get(i) { - Some(line) => line, - None => return document, - }; - if line.is_empty() { - i += 1; - continue; - } else if let Ok(heading) = parse_heading(line) { - document.push(heading); - i += 1; - } else if let Ok(quote) = gather!(">", parse_quote) { - document.push(quote); - } else if let Ok(list) = gather!("- ", parse_list) { - document.push(list); - } else if let Ok(table) = gather!("|", parse_table) { - document.push(table); - } else if line.starts_with("```") { - let language = line[3..].to_string(); - let mut code_lines = Vec::new(); - i += 1; - for line in &lines[i..] { - match line.trim() == "```" { - true => break, - false => { - code_lines.push(line.to_string()); - i += 1 - } - } - } - document.push(Block::Code(language, code_lines)); - i += 1; - } else { - document.push(parse_paragraph(line)); - i += 1; - }; - } -} - -/// Returns the substring from `chars` that is between the `start` and `end` -/// delimiters. Returns None if `chars` does not start with `start`, or if an -/// occurance of `start` and `end` cannot be found within `chars`. There must -/// not be a space after the occurance of `start` or before the occurance of -/// `end`. If `start` and `end` consist of just one or more of the same -/// character, the content must contain at least one other character than -/// that one. -fn capture(chars: &[char], start: &str, end: &str) -> Option<String> { - // Determine if `pattern` contains only a single unique character - let single_char_in_pattern = match start.chars().next() { - Some(first_char) => { - let start_and_end = start.chars().chain(end.chars()); - start_and_end.fold(Some(first_char), |accum, elem| match accum { - Some(c) if c == elem => accum, - _ => None, - }) - } - None => None, - }; - let is_space = |i: usize| chars.get(i) == Some(&' '); - fn starts_with_pattern(chars: &[char], pattern: &str) -> bool { - let mut i = 0; - for ref c in pattern.chars() { - match chars.get(i) { - Some(v) if v == c => i += 1, - _ => return false, - } - } - true - } - if !starts_with_pattern(chars, start) { - return None; - } - let text_start = start.len(); - if is_space(text_start) { - return None; - }; - let mut i = text_start; - loop { - i += 1; - if chars.get(i).is_none() { - return None; - } - if starts_with_pattern(&chars[i..], end) { - if is_space(i - 1) { - continue; - } - let text_content: String = chars[text_start..i].iter().collect(); - match single_char_in_pattern { - None => return Some(text_content), - Some(c) => { - if text_content.chars().any(|e| e != c) { - return Some(text_content); - } - } - }; - } - } -} - -fn parse_text(line: &str) -> Line { - let mut block_content: Line = Vec::new(); - let chars: Vec<char> = line.chars().collect(); - let mut normal = String::new(); - let mut i = 0; - - macro_rules! commit_normal { - () => { - if !normal.is_empty() { - let normal_text = Text::Normal(std::mem::take(&mut normal)); - block_content.push(normal_text); - } - }; - } - let patterns: [(&str, &str, fn(String) -> Text); 7] = [ - ("***", "***", Text::BoldItalic), - ("**", "**", Text::Bold), - ("*", "*", Text::Italic), - ("___", "___", Text::BoldItalic), - ("__", "__", Text::Bold), - ("_", "_", Text::Italic), - ("`", "`", Text::Code), - ]; - - 'outer: loop { - // Check if a simple, non-Normal text type starts at this character - for (start, end, text_type) in patterns.iter() { - if let Some(string) = capture(&chars[i..], start, end) { - i += string.len() + start.len() + end.len(); - commit_normal!(); - block_content.push(text_type(string)); - continue 'outer; - } - } - // Check if a wiki-style hyperlink starts at this character - if let Some(content) = capture(&chars[i..], "[[", "]]") { - i += content.len() + 4; - commit_normal!(); - block_content.push(Text::WikiLink(content)); - continue 'outer; - } - - // Check if a long-form hyperlink starts at this character - if let Some(label) = capture(&chars[i..], "[", "]") { - let target_len = label.len() + 2; - if let Some(target) = capture(&chars[i + target_len..], "(", ")") { - i += target_len + target.len() + 2; - commit_normal!(); - block_content.push(Text::Hyperlink(Hyperlink { label, target })) - } - } - - // No new text type started here, this must just be normal text - match chars.get(i) { - Some(c) => { - normal.push(*c); - i += 1; - } - None => { - commit_normal!(); - break; - } - } - } - return block_content; -} - -fn parse_heading(line: &str) -> Result<Block, ()> { - let (heading_type, content): (fn(Line) -> Block, &str) = if line.starts_with("# ") { - (Block::Heading1, &line[2..]) - } else if line.starts_with("## ") { - (Block::Heading2, &line[3..]) - } else if line.starts_with("### ") { - (Block::Heading3, &line[4..]) - } else { - return Err(()); - }; - if content.is_empty() { - return Err(()); - }; - Ok(heading_type(parse_text(content))) -} - -/// Accepts a slice of lines that begin with '>' -fn parse_quote(lines: &[&str]) -> Result<Block, ()> { - let mut content = Vec::new(); - for line in lines { - content.push(if *line == ">" { - Vec::new() - } else { - parse_text(&line[2..]) - }); - } - Ok(Block::Quote(content)) -} - -fn parse_list(lines: &[&str]) -> Result<Block, ()> { - Ok(Block::List( - lines.iter().map(|l| parse_text(&l[2..])).collect(), - )) -} - -fn parse_paragraph(line: &str) -> Block { - Block::Paragraph(parse_text(line)) -} - -fn parse_table(lines: &[&str]) -> Result<Block, ()> { - if lines.len() < 3 { - return Err(()); - } - let names = split_columns(lines[0])?; - let dividers = split_columns(lines[1])?; - if names.len() != dividers.len() { - return Err(()); - } - let mut columns = Vec::new(); - for (name, divider) in std::iter::zip(names.iter(), dividers.iter()) { - let alignment = Alignment::from_str(divider)?; - columns.push(Column { - name: parse_text(name), - alignment, - }) - } - let mut rows = Vec::new(); - for row in &lines[2..] { - let split_row: Vec<Line> = split_columns(row)?.iter().map(|s| parse_text(s)).collect(); - if split_row.len() != columns.len() { - return Err(()); - } - rows.push(split_row); - } - Ok(Block::Table(Table { columns, rows })) -} - -fn split_columns(line: &str) -> Result<Vec<String>, ()> { - // Find the index after the first |, and before the last | - let mut start = None; - let mut end = None; - for (i, c) in line.chars().enumerate() { - if c == '|' { - if start.is_none() { - start = Some(i + 1); - } else { - end = Some(i); - } - } - } - match (start, end) { - (Some(s), Some(e)) => { - let chars: Vec<char> = line.chars().collect(); - let string: String = chars[s..e].iter().collect(); - let split = string.split('|'); - Ok(split.map(|s| s.trim().to_string()).collect()) - } - _ => Err(()), - } -} diff --git a/src/parse_heirarchical.rs b/src/parse_heirarchical.rs deleted file mode 100644 index 75c2bec..0000000 --- a/src/parse_heirarchical.rs +++ /dev/null @@ -1,137 +0,0 @@ -use crate::*; - -macro_rules! get_subsection { - ($t:ident) => { - pub fn get_subsection(&self, name: &str) -> Option<&$t> { - for section in &self.sections { - if line_to_string(§ion.title) == name { - return Some(section); - } - } - return None; - } - }; -} - -#[derive(Default)] -pub struct Document { - pub preamble: Vec<Block>, - pub sections: Vec<TopLevelSection>, -} -impl Document { - get_subsection! {TopLevelSection} -} - -#[derive(Default)] -pub struct TopLevelSection { - pub title: Line, - pub content: Vec<Block>, - pub sections: Vec<MidLevelSection>, -} -impl TopLevelSection { - get_subsection! {MidLevelSection} -} - -#[derive(Default)] -pub struct MidLevelSection { - pub title: Line, - pub content: Vec<Block>, - pub sections: Vec<LowLevelSection>, -} -impl MidLevelSection { - get_subsection! {LowLevelSection} -} - -#[derive(Default)] -pub struct LowLevelSection { - pub title: Line, - pub content: Vec<Block>, -} - -pub fn parse_heirarchical(markdown: &str) -> Result<Document, ()> { - macro_rules! push_section { - ($from:ident => $to:ident) => { - $to.sections.push(std::mem::take(&mut $from)) - }; - } - let mut document = Document::default(); - let mut h1_buffer = TopLevelSection::default(); - let mut h2_buffer = MidLevelSection::default(); - let mut h3_buffer = LowLevelSection::default(); - let mut level = 0; - - let blocks = parse(markdown); - for block in blocks { - match (level, block) { - (0, Block::Heading1(title)) => { - h1_buffer.title = title; - level = 1; - } - (0, Block::Heading2(_)) => return Err(()), - (0, Block::Heading3(_)) => return Err(()), - (0, block) => document.preamble.push(block), - (1, Block::Heading1(title)) => { - push_section!(h1_buffer => document); - h1_buffer.title = title; - } - (1, Block::Heading2(title)) => { - h2_buffer.title = title; - level = 2; - } - (1, Block::Heading3(_)) => return Err(()), - (1, block) => h1_buffer.content.push(block), - (2, Block::Heading1(title)) => { - push_section!(h2_buffer => h1_buffer); - push_section!(h1_buffer => document); - h1_buffer.title = title; - level = 1; - } - (2, Block::Heading2(title)) => { - push_section!(h2_buffer => h1_buffer); - h2_buffer.title = title; - } - (2, Block::Heading3(title)) => { - h3_buffer.title = title; - level = 3; - } - (2, block) => h2_buffer.content.push(block), - (3, Block::Heading1(title)) => { - push_section!(h3_buffer => h2_buffer); - push_section!(h2_buffer => h1_buffer); - push_section!(h1_buffer => document); - h1_buffer.title = title; - level = 1; - } - (3, Block::Heading2(title)) => { - push_section!(h3_buffer => h2_buffer); - push_section!(h2_buffer => h1_buffer); - h2_buffer.title = title; - level = 2; - } - (3, Block::Heading3(title)) => { - push_section!(h3_buffer => h2_buffer); - h3_buffer.title = title; - } - (3, block) => h3_buffer.content.push(block), - _ => unreachable!(), - } - } - - // Push all in-progress sections - match level { - 3 => { - push_section!(h3_buffer => h2_buffer); - push_section!(h2_buffer => h1_buffer); - push_section!(h1_buffer => document); - } - 2 => { - push_section!(h2_buffer => h1_buffer); - push_section!(h1_buffer => document); - } - 1 => { - push_section!(h1_buffer => document); - } - _ => (), - } - Ok(document) -} diff --git a/src/table.rs b/src/table.rs deleted file mode 100644 index cc01ffc..0000000 --- a/src/table.rs +++ /dev/null @@ -1,60 +0,0 @@ -use crate::Line; - -pub struct Table { - pub columns: Vec<Column>, - pub rows: Vec<Vec<Line>>, -} - -pub struct Column { - pub name: Line, - pub alignment: Alignment, -} - -pub enum Alignment { - Left, - Center, - Right, -} -impl Alignment { - pub fn from_str(s: &str) -> Result<Self, ()> { - let mut start = false; - let mut end = false; - for (i, c) in s.chars().enumerate() { - if c == ':' { - if i == 0 { - start = true; - } else if i == s.len() - 1 { - end = true; - } else { - return Err(()); - } - } else if c != '-' { - return Err(()); - } - } - Ok(match (start, end) { - (false, false) => Self::Left, - (true, false) => Self::Left, - (false, true) => Self::Right, - (true, true) => Self::Center, - }) - } -} -impl std::fmt::Display for Alignment { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - f.write_str(match self { - Self::Left => "left", - Self::Center => "center", - Self::Right => "right", - }) - } -} -impl std::fmt::Debug for Alignment { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - f.write_str(match self { - Self::Left => "Left", - Self::Center => "Center", - Self::Right => "Right", - }) - } -} diff --git a/src/text.rs b/src/text.rs deleted file mode 100644 index e9dbdeb..0000000 --- a/src/text.rs +++ /dev/null @@ -1,30 +0,0 @@ -pub enum Text { - Normal(String), - Bold(String), - Italic(String), - BoldItalic(String), - Code(String), - WikiLink(String), - Hyperlink(Hyperlink), -} -impl std::fmt::Debug for Text { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - let string = match self { - Text::Normal(text) => format!("Normal ('{}')", text), - Text::Bold(text) => format!("Bold ('{}')", text), - Text::Italic(text) => format!("Italic ('{}')", text), - Text::BoldItalic(text) => format!("BoldItalic ('{}')", text), - Text::Code(text) => format!("Code ('{}')", text), - Text::WikiLink(text) => format!("WikiLink ('{}')", text), - Text::Hyperlink(Hyperlink { label, target }) => { - format!("Hyperlink (label:'{}', target:'{}')", label, target) - } - }; - f.write_str(&string) - } -} - -pub struct Hyperlink { - pub label: String, - pub target: String, -} |