diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/block.rs | 19 | ||||
-rw-r--r-- | src/document.rs | 172 | ||||
-rw-r--r-- | src/elements.rs | 9 | ||||
-rw-r--r-- | src/elements/block_element.rs | 53 | ||||
-rw-r--r-- | src/elements/line.rs | 117 | ||||
-rw-r--r-- | src/elements/line_element.rs | 61 | ||||
-rw-r--r-- | src/elements/table.rs | 96 | ||||
-rw-r--r-- | src/lib.rs | 166 | ||||
-rw-r--r-- | src/line.rs | 123 | ||||
-rw-r--r-- | src/table.rs | 85 | ||||
-rw-r--r-- | src/token.rs | 24 |
11 files changed, 405 insertions, 520 deletions
diff --git a/src/block.rs b/src/block.rs new file mode 100644 index 0000000..aa56135 --- /dev/null +++ b/src/block.rs @@ -0,0 +1,19 @@ +use crate::*; + +/// Heading level. +pub enum Level { + Heading1, + Heading2, + Heading3, +} + +pub enum Block { + Heading { level: Level, line: Line }, + Paragraph(Line), + List(Vec<Line>), + Note(Vec<Line>), + Table(Table), + Break, + Embedded { label: String, path: String }, + Fragment { language: String, content: String }, +} diff --git a/src/document.rs b/src/document.rs deleted file mode 100644 index fbfea00..0000000 --- a/src/document.rs +++ /dev/null @@ -1,172 +0,0 @@ -use crate::*; - -pub struct MarkdownDocument { - pub block_elements: Vec<BlockElement>, -} - -impl MarkdownDocument { - pub fn from_str(raw_markdown: &str) -> Self { - let mut block_elements = Vec::new(); - let mut current_multiline_block = None; - // Chain a blank line to the end to ensure that the final multi-line block is flushed. - let lines = raw_markdown.lines().chain(std::iter::once("")); - - for incoming_line in lines { - let incoming_line_untrimmed = incoming_line; - let incoming_line = incoming_line.trim(); - // Handle an in-progress subdocument block. - if let Some(MultiLineBlock::Subdocument(language, mut lines)) = current_multiline_block { - if incoming_line == "```" { - let subdocument = Subdocument { language: language.to_string(), content: lines.join("\n") }; - block_elements.push(BlockElement::Subdocument(subdocument)); - current_multiline_block = None; - } else { - lines.push(incoming_line_untrimmed); - current_multiline_block = Some(MultiLineBlock::Subdocument(language, lines)); - } - continue; - } - - // Parse the incoming line. - let incoming_line_block = { - if let Some(("", tail)) = incoming_line.split_once("# ") { - BlockLine::DocumentHeading(tail.trim()) - } else if let Some(("", tail)) = incoming_line.split_once("## ") { - BlockLine::SectionHeading(tail.trim()) - } else if let Some(("", tail)) = incoming_line.split_once("### ") { - BlockLine::ArticleHeading(tail.trim()) - } else if let Some(("", tail)) = incoming_line.split_once("- ") { - BlockLine::List(tail.trim()) - } else if let Some(("", tail)) = incoming_line.split_once("> ") { - BlockLine::Aside(tail.trim()) - } else if incoming_line == ">" { - BlockLine::Aside("") - } else if let Some(("", tail)) = incoming_line.split_once("```") { - BlockLine::SubdocumentHeader(tail.trim()) - } else if incoming_line.starts_with("|") { - BlockLine::Table(incoming_line) - } else if incoming_line.len() >= 3 && incoming_line.chars().all(|c| c=='-') { - BlockLine::Break - } else if incoming_line.is_empty() { - BlockLine::BlankLine - } else { - BlockLine::Paragraph(incoming_line) } - }; - - // If the incoming line is of the same type as the current multiline - // block, append it to the end of that current block and continue. - if let Some(ref mut current_block) = current_multiline_block { - match (&incoming_line_block, current_block) { - (BlockLine::List(line), MultiLineBlock::List(ref mut lines)) => { - lines.push(line); continue; } - (BlockLine::Aside(line), MultiLineBlock::Aside(ref mut lines)) => { - lines.push(line); continue; } - (BlockLine::Table(line), MultiLineBlock::Table(ref mut lines)) => { - lines.push(line); continue; } - _ => (), - }; - } - - // Otherwise, commit the current block, then handle the incoming line. - if let Some(current_block) = current_multiline_block { - match current_block { - MultiLineBlock::List(raw_lines) => { - let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect(); - block_elements.push(BlockElement::List(lines)); } - MultiLineBlock::Aside(raw_lines) => { - let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect(); - block_elements.push(BlockElement::Aside(lines)); } - MultiLineBlock::Table(raw_lines) => { - if let Some(table) = Table::try_from_strs(&raw_lines) { - block_elements.push(BlockElement::Table(table)) } - else { for raw_line in raw_lines { - block_elements.push(BlockElement::Paragraph(Line::from_str(&raw_line))) }}} - MultiLineBlock::Subdocument(..) => unreachable!(), - } - current_multiline_block = None; - } - - // Handle the incoming line. - match incoming_line_block { - BlockLine::DocumentHeading(s) => block_elements.push(BlockElement::DocumentHeading(Line::from_str(&s))), - BlockLine::SectionHeading(s) => block_elements.push(BlockElement::SectionHeading(Line::from_str(&s))), - BlockLine::ArticleHeading(s) => block_elements.push(BlockElement::ArticleHeading(Line::from_str(&s))), - BlockLine::List(s) => current_multiline_block = Some(MultiLineBlock::List(vec![s])), - BlockLine::Aside(s) => current_multiline_block = Some(MultiLineBlock::Aside(vec![s])), - BlockLine::Table(s) => current_multiline_block = Some(MultiLineBlock::Table(vec![s])), - BlockLine::SubdocumentHeader(s) => current_multiline_block = Some(MultiLineBlock::Subdocument(s, Vec::new())), - BlockLine::Paragraph(s) => { - if let Some(embedded_file) = parse_embedded_file(&s) { - block_elements.push(BlockElement::EmbeddedFile(embedded_file)) - } else if let Some(math) = parse_math_block(&s) { - block_elements.push(BlockElement::Math(math)) - } else { - block_elements.push(BlockElement::Paragraph(Line::from_str(&s))) } - }, - BlockLine::Break => block_elements.push(BlockElement::Break), - BlockLine::BlankLine => (), - } - } - - Self { block_elements } - } -} - -fn parse_embedded_file(text: &str) -> Option<EmbeddedFile> { - let chars: Vec<char> = text.trim().chars().collect(); - let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b); - - if starts_with(0, " { break }; - label_end += 1; } - let label: String = chars[label_start..label_end].iter().collect(); - if label.is_empty() || !is_contentful(&label, &['[', ']']) { - return None } - // Try to parse the target. - let target_start = label_end + 2; - let target_end = chars.len() - 1; - if let Some(')') = chars.get(target_end) { - let target: String = chars[target_start..target_end].iter().collect(); - if target.is_empty() || target.contains(")") || !is_contentful(&target, &['(',')']) { - return None } - return Some(EmbeddedFile { label, target }) - } - } - return None; -} - -fn parse_math_block(text: &str) -> Option<String> { - if let Some(("", trailing)) = text.split_once("$$") { - if let Some((math, "")) = trailing.rsplit_once("$$") { - return Some(math.trim().to_string()); - } - } - return None; -} - -/// When parsing, is a single line for a one-line block element. -enum BlockLine<'a> { - DocumentHeading(&'a str), - SectionHeading(&'a str), - ArticleHeading(&'a str), - Paragraph(&'a str), - List(&'a str), - Aside(&'a str), - Table(&'a str), - SubdocumentHeader(&'a str), - Break, - BlankLine, -} - -/// When parsing, is the gathered string lines of a multiline block element. -enum MultiLineBlock<'a> { - List(Vec<&'a str>), - Aside(Vec<&'a str>), - Table(Vec<&'a str>), - Subdocument(&'a str, Vec<&'a str>), -} - diff --git a/src/elements.rs b/src/elements.rs deleted file mode 100644 index a4a9783..0000000 --- a/src/elements.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod block_element; -mod line; -mod line_element; -mod table; - -pub use block_element::*; -pub use line::*; -pub use line_element::*; -pub use table::*; diff --git a/src/elements/block_element.rs b/src/elements/block_element.rs deleted file mode 100644 index cdb7a71..0000000 --- a/src/elements/block_element.rs +++ /dev/null @@ -1,53 +0,0 @@ -use crate::*; - -pub enum BlockElement { - /// A first-level heading. - DocumentHeading(Line), - /// A second-level heading. - SectionHeading(Line), - /// A third-level heading. - ArticleHeading(Line), - Paragraph(Line), - /// A bullet-list. - List(Vec<Line>), - /// A paragraph separate from the main text. - Aside(Vec<Line>), - Table(Table), - EmbeddedFile(EmbeddedFile), - /// A non-markdown sub-document within this document. - Subdocument(Subdocument), - /// A KaTeX block - Math(String), - Break, -} - -pub struct EmbeddedFile { - pub label: String, - pub target: String, -} - -pub struct Subdocument { - pub language: String, - pub content: String, -} - -impl std::fmt::Debug for BlockElement { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - let string = match self { - BlockElement::DocumentHeading(line) => format!("DocumentHeading ('{line}')"), - BlockElement::SectionHeading(line) => format!("SectionHeading ('{line}')"), - BlockElement::ArticleHeading(line) => format!("ArticleHeading ('{line}')"), - BlockElement::Paragraph(line) => format!("Paragraph ('{line}')"), - BlockElement::List(lines) => format!("List (len: {})", lines.len()), - BlockElement::Aside(_) => format!("Aside"), - BlockElement::Table(_) => format!("Table"), - BlockElement::EmbeddedFile(EmbeddedFile {label, target}) => - format!("EmbeddedFile (label:'{label}', target:'{target}')"), - BlockElement::Subdocument(Subdocument {language, ..}) => - format!("Subdocument ('{language}')"), - BlockElement::Math(string) => format!("Math ('{string}')"), - BlockElement::Break => format!("Break"), - }; - f.write_str(&string) - } -} diff --git a/src/elements/line.rs b/src/elements/line.rs deleted file mode 100644 index d5c078e..0000000 --- a/src/elements/line.rs +++ /dev/null @@ -1,117 +0,0 @@ -use crate::*; - -macro_rules! opt { - ($v:expr) => {|s| Some($v(s)) }; -} - -pub struct Line { - pub elements: Vec<LineElement>, -} - -impl Line { - pub fn from_str(raw_string: &str) -> Self { - fn unlabeled_extern_link(target: String) -> Option<LineElement> { - target.contains("/").then( || - LineElement::ExternalLink(ExternalLink { target, label:String::new() }) - ) - } - fn labelled_extern_link(s: String) -> Option<LineElement> { - let (label, target) = match s.split_once("](") { - Some((l, t)) => (l.to_string(), t.to_string()), - None => return None }; - if label.contains("]") || target.contains("]") { return None } - Some(LineElement::ExternalLink(ExternalLink { label, target })) } - const DELIMITERS: [(fn(String)->Option<LineElement>, &str, &str, &str); 7] = [ - ( opt!(LineElement::Bold), "**", "**", "*" ), - ( opt!(LineElement::Italic), "_", "_", "_" ), - ( opt!(LineElement::Monospace), "`", "`", "`" ), - ( opt!(LineElement::Math), "$", "$", "$" ), - ( opt!(LineElement::InternalLink), "[[", "]]", "[]" ), - ( labelled_extern_link, "[", ")", "[]()" ), - ( unlabeled_extern_link, "[", "]", "[]" ), - ]; - let chars: Vec<char> = raw_string.chars().collect(); - let mut elements = Vec::new(); - let mut cached_chars = String::new(); - let mut i = 0; - - let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b); - - 'outer: while let Some(c) = chars.get(i) { - // Only check for opening delimiters that directly follow a whitespace character. - let follows_whitespace = match chars.get(i.wrapping_sub(1)) { - Some(w) => is_whitespace(w), - None => true, - }; - if follows_whitespace { - // Try to parse an opening delimiter. - for (variant, start_delim, end_delim, delim_chars) in DELIMITERS { - // Try to match an opening delimiter with a terminating delimiter. - if starts_with(i, start_delim) { - let s_end = i + start_delim.chars().count(); - let mut e_start = s_end; - let mut e_end = e_start + end_delim.chars().count(); - while e_end <= chars.len() { - e_start += 1; e_end += 1; - let end_is_whitespace = - if let Some(end_char) = chars.get(e_end) { - is_whitespace(end_char) - } else { - e_end == chars.len() - }; - // If the terminating delimiter is found, store the normal - // text and the styled text, and continue to the next character. - if end_is_whitespace && starts_with(e_start, end_delim) { - // Check that there is content within the styled string. - let styled_string: String = chars[s_end..e_start].iter().collect(); - let non_content_chars: Vec<_> = delim_chars.chars().collect(); - if !is_contentful(&styled_string, &non_content_chars) { continue } - if styled_string.len() != styled_string.trim().len() { continue } - let line_element = match variant(styled_string) { - Some(e) => e, - None => continue, - }; - // Commit the normal and styled strings. - if !cached_chars.is_empty() { - let normal_string = std::mem::take(&mut cached_chars); - elements.push(LineElement::Normal(normal_string)); } - elements.push(line_element); - i = e_end; - continue 'outer; - } - } - } - } - } - cached_chars.push(*c); i += 1; - } - if !cached_chars.is_empty() { - let normal_string = std::mem::take(&mut cached_chars); - elements.push(LineElement::Normal(normal_string)); } - Self { elements } - } - - /// Return only the character content, with none of the styling information. - pub fn as_plain_text(&self) -> String { - let mut string = String::new(); - for line_element in &self.elements { - string.push_str(line_element.as_plain_text()) } - return string; - } -} - -impl std::fmt::Display for Line { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - for line_element in &self.elements { - write!(f, "{line_element}")?; } - Ok(()) - } -} - -impl std::fmt::Debug for Line { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - for line_element in &self.elements { - write!(f, "{line_element:?}\n")?; } - Ok(()) - } -} diff --git a/src/elements/line_element.rs b/src/elements/line_element.rs deleted file mode 100644 index cc47b4b..0000000 --- a/src/elements/line_element.rs +++ /dev/null @@ -1,61 +0,0 @@ -pub enum LineElement { - Normal(String), - Bold(String), - Italic(String), - Monospace(String), - Math(String), - InternalLink(String), - ExternalLink(ExternalLink), -} - -impl LineElement { - /// Return only the character content, with none of the styling information. - pub fn as_plain_text(&self) -> &str { - match self { - LineElement::Normal(text) => text, - LineElement::Bold(text) => text, - LineElement::Italic(text) => text, - LineElement::Monospace(text) => text, - LineElement::Math(text) => text, - LineElement::InternalLink(label) => label, - LineElement::ExternalLink(ExternalLink { label, ..}) => label, - } - } -} - -pub struct ExternalLink { - pub label: String, - pub target: String, -} - -impl std::fmt::Display for LineElement { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - let string = match self { - LineElement::Normal(text) => format!("{text}"), - LineElement::Bold(text) => format!("**{text}**"), - LineElement::Italic(text) => format!("_{text}_"), - LineElement::Monospace(text) => format!("`{text}`"), - LineElement::Math(text) => format!("${text}$"), - LineElement::InternalLink(text) => format!("[[{text}]]"), - LineElement::ExternalLink(ExternalLink { label, target }) => { - format!("[{label}]({target})") } - }; - f.write_str(&string) - } -} - -impl std::fmt::Debug for LineElement { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - let string = match self { - LineElement::Normal(text) => format!("Normal ('{text}')"), - LineElement::Bold(text) => format!("Bold ('{text}')"), - LineElement::Italic(text) => format!("Italic ('{text}')"), - LineElement::Monospace(text) => format!("Monospace ('{text}')"), - LineElement::Math(text) => format!("Math ('{text}')"), - LineElement::InternalLink(text) => format!("InternalLink ('{text}')"), - LineElement::ExternalLink(ExternalLink { label, target }) => { - format!("ExternalLink (label:'{label}', target:'{target}')") } - }; - f.write_str(&string) - } -} diff --git a/src/elements/table.rs b/src/elements/table.rs deleted file mode 100644 index 5b354c1..0000000 --- a/src/elements/table.rs +++ /dev/null @@ -1,96 +0,0 @@ -use crate::*; - -pub struct Table { - /// The column definitions for this table. - pub column_definitions: Vec<ColumnDefinition>, - /// The content contained in the rows of the table. An individual [Line] is - /// the contents of a single table cell, a group of cells forms a table row, - /// a group of rows forms a vertical section of the table, with a separator - /// intending to be drawn between each section, and a group of sections forms - /// the table itself. - /// Each row in the table is guaranteed to have the same number of columns - /// as the table header. - pub sections: Vec<Vec<Vec<Line>>>, -} - -impl Table { - pub fn try_from_strs(lines: &[&str]) -> Option<Self> { - let mut lines = lines.into_iter(); - let column_definitions: Vec<ColumnDefinition> = { - let names = split_trimmed_columns(lines.next()?)? - .into_iter().map(|l| Line::from_str(l)); - let alignments = parse_alignments(lines.next()?)?; - if names.len() != alignments.len() { return None } - std::iter::zip(names, alignments).map( - |(name, alignment)| ColumnDefinition { name, alignment } ).collect() - }; - - let mut sections = Vec::new(); - let mut current_section = Vec::new(); - - for line in lines { - if let Some(alignments) = parse_alignments(line) { - if alignments.len() != column_definitions.len() { return None } - sections.push(std::mem::take(&mut current_section)) - } else { - let row: Vec<Line> = split_trimmed_columns(line)? - .into_iter().map(|c| Line::from_str(c)).collect(); - if row.len() != column_definitions.len() { return None } - current_section.push(row); - } - } - - if !current_section.is_empty() { - sections.push(std::mem::take(&mut current_section)); } - Some( Self { column_definitions, sections }) - } -} - -pub struct ColumnDefinition { - /// The name of this column, shown in the header row of the table. - pub name: Line, - /// The alignment of the content in this column. - pub alignment: ColumnAlignment, -} - -pub enum ColumnAlignment { - Left, - Center, - Right, -} - -impl ColumnAlignment { - pub fn from_str(cell: &str) -> Option<Self> { - if !cell.chars().all(|c| c == ':' || c == '-') { - return None } - match (cell.starts_with(':'), cell.ends_with(':')) { - (false, false) => Some(ColumnAlignment::Left), - (false, true) => Some(ColumnAlignment::Right), - (true, false) => Some(ColumnAlignment::Left), - (true, true) => Some(ColumnAlignment::Center), - } - } -} - - -fn split_trimmed_columns(line: &str) -> Option<Vec<&str>> { - Some(split_columns(line)?.into_iter().map(|s| s.trim()).collect()) -} - -fn split_columns(line: &str) -> Option<Vec<&str>> { - if let Some(("", tail)) = line.split_once('|') { - if let Some((head, "")) = tail.rsplit_once('|') { - return Some(head.split('|').collect()); - } - } - return None; -} - -fn parse_alignments(line: &str) -> Option<Vec<ColumnAlignment>> { - let mut alignments = Vec::new(); - for cell in split_columns(line)? { - alignments.push(ColumnAlignment::from_str(cell)?); - } - Some(alignments) -} - @@ -1,13 +1,155 @@ -mod document; -mod elements; - -pub use document::*; -pub use elements::*; - -pub(crate) fn is_whitespace(c: &char) -> bool { - c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c) } -pub(crate) fn is_contentful(s:&str, non_content_chars: &[char]) -> bool { - s.chars().any(|c| !non_content_chars.contains(&c)) - && s.chars().nth(0).map(|c| !non_content_chars.contains(&c)).unwrap_or(false) - && s.chars().last().map(|c| !non_content_chars.contains(&c)).unwrap_or(false) +#![feature(never_type)] + +mod block; pub use block::{Block, Level}; +mod line; pub use line::Line; +mod token; pub use token::Token; +mod table; pub use table::{Table, Column, Alignment}; + +pub struct MarkdownDocument { + pub blocks: Vec<Block>, +} + +impl MarkdownDocument { + pub fn from_str(raw_markdown: &str) -> Self { + let mut blocks = Vec::new(); + let mut current_block = None; + + // Chain a blank line to the end to ensure the final block is flushed. + for line in raw_markdown.lines().chain(std::iter::once("")) { + let line_raw = line; + let line = line.trim(); + + // Handle a fragment block separately, because fragment lines are not prefixed. + if let Some(BlockMultiline::Fragment { language, mut content }) = current_block { + if line == "```" { + let language = language.to_string(); + let content = content.join("\n"); + blocks.push(Block::Fragment { language, content }); + current_block = None; + } else { + content.push(line_raw); + current_block = Some(BlockMultiline::Fragment { language, content }); + } + continue; + } + + // Determine line type from prefix. + let line = { + if let Some(("", tail)) = line.split_once("# ") { + BlockLine::Heading { level: Level::Heading1, line: tail.trim() } + } else if let Some(("", tail)) = line.split_once("## ") { + BlockLine::Heading { level: Level::Heading2, line: tail.trim() } + } else if let Some(("", tail)) = line.split_once("### ") { + BlockLine::Heading { level: Level::Heading3, line: tail.trim() } + } else if let Some(("", tail)) = line.split_once("- ") { + BlockLine::List(tail.trim()) + } else if let Some(("", tail)) = line.split_once("> ") { + BlockLine::Note(tail.trim()) + } else if line == ">" { + BlockLine::Note("") + } else if let Some(("", tail)) = line.split_once("```") { + BlockLine::FragmentHeader(tail.trim()) + } else if line.starts_with("|") { + BlockLine::Table(line) + } else if line.len() >= 3 && line.chars().all(|c| c=='-') { + BlockLine::Break + } else if line.is_empty() { + BlockLine::BlankLine + } else { + BlockLine::Paragraph(line) + } + }; + + // If line has the same type as the current block, append and continue. + if let Some(ref mut block) = current_block { + match (&line, block) { + (BlockLine::List(line), BlockMultiline::List(ref mut lines)) => { + lines.push(line); continue; } + (BlockLine::Note(line), BlockMultiline::Note(ref mut lines)) => { + lines.push(line); continue; } + (BlockLine::Table(line), BlockMultiline::Table(ref mut lines)) => { + lines.push(line); continue; } + _ => (), + }; + } + + // Otherwise commit the current block before handling the new line. + if let Some(current_block) = std::mem::take(&mut current_block) { + match current_block { + BlockMultiline::List(raw_lines) => { + let lines = raw_lines.into_iter().map(Line::from_str).collect(); + blocks.push(Block::List(lines)); } + BlockMultiline::Note(raw_lines) => { + let lines = raw_lines.into_iter().map(Line::from_str).collect(); + blocks.push(Block::Note(lines)); } + BlockMultiline::Table(raw_lines) => { + if let Some(table) = Table::from_strs(&raw_lines) { + blocks.push(Block::Table(table)) } + else { + for raw_line in raw_lines { + blocks.push(Block::Paragraph(Line::from_str(&raw_line))) + } + }} + BlockMultiline::Fragment {..} => unreachable!(), + } + } + + // Handle the new line. + match line { + BlockLine::List(line) => current_block = Some( + BlockMultiline::List(vec![line])), + BlockLine::Note(line) => current_block = Some( + BlockMultiline::Note(vec![line])), + BlockLine::Table(line) => current_block = Some( + BlockMultiline::Table(vec![line])), + BlockLine::FragmentHeader(language) => current_block = Some( + BlockMultiline::Fragment { language, content: Vec::new() }), + BlockLine::Heading {level, line} => blocks.push( + Block::Heading { level, line: Line::from_str(&line) }), + BlockLine::Break => blocks.push(Block::Break), + BlockLine::BlankLine => (), + BlockLine::Paragraph(line) => match parse_embedded(&line) { + Some(embedded) => blocks.push(embedded), + None => blocks.push(Block::Paragraph(Line::from_str(&line))), + } + } + } + + Self { blocks } + } +} + + + +enum BlockLine<'a> { + Heading { level: Level, line: &'a str }, + Paragraph(&'a str), + List(&'a str), + Note(&'a str), + Table(&'a str), + FragmentHeader(&'a str), + Break, + BlankLine, +} + +enum BlockMultiline<'a> { + List(Vec<&'a str>), + Note(Vec<&'a str>), + Table(Vec<&'a str>), + Fragment { language: &'a str, content: Vec<&'a str> }, +} + +fn parse_embedded(line: &str) -> Option<Block> { + let line = line.trim(); + if let Some(("", line)) = line.split_once(".collect(); + if parts.len() == 2 { + let label = parts[0].to_string(); + let path = parts[1].to_string(); + return Some(Block::Embedded { label, path }) + } + } + } + return None; } diff --git a/src/line.rs b/src/line.rs new file mode 100644 index 0000000..fce628c --- /dev/null +++ b/src/line.rs @@ -0,0 +1,123 @@ +use crate::*; + +#[derive(Clone)] +pub struct Line { + pub tokens: Vec<Token>, +} + +impl Line { + pub fn from_str(raw_line: &str) -> Self { + let chars: Vec<char> = raw_line.chars().collect(); + let mut tokens = Vec::new(); + let mut normal_chars = String::new(); + let mut i = 0; + + // Compare chars from i to a delimiter string. + let compare = |i, p:&str| std::iter::zip(&chars[i..], p.chars()) + .all(|(a, b)| *a == b); + + 'find_token: while let Some(c) = chars.get(i) { + let char_follows_whitespace = match chars.get(i.wrapping_sub(1)) { + Some(w) => is_whitespace(w), + None => true, + }; + if char_follows_whitespace { + // Try to parse an opening delimiter. + for (variant, start_delim, end_delim, delim_chars) in DELIMITERS { + let delim_chars: Vec<char> = delim_chars.chars().collect(); + // Try to match an opening delimiter with a terminating delimiter. + if compare(i, start_delim) { + let s_end = i + start_delim.chars().count(); + let mut e_start = s_end; + let mut e_end = e_start + end_delim.chars().count(); + // Scan along chars to find matching end delimiter. + while e_end <= chars.len() { + e_start += 1; e_end += 1; + let followed_by_whitespace = match chars.get(e_end) { + Some(end_char) => is_whitespace(end_char), + None => e_end == chars.len(), + }; + // If end delimiter is found, store the token and continue. + if followed_by_whitespace && compare(e_start, end_delim) { + // Check if captured string contains non-delimiter characters. + let captured: String = chars[s_end..e_start].iter().collect(); + let no_content = !has_content(&captured, &delim_chars); + let air_bubbles = captured.len() != captured.trim().len(); + let token = variant(captured); + if no_content || air_bubbles || token.is_none() { continue } + // Commit the preceding normal token, if any. + if !normal_chars.is_empty() { + let normal = std::mem::take(&mut normal_chars); + tokens.push(Token::Normal(normal)); + } + tokens.push(token.unwrap()); + i = e_end; + continue 'find_token; + } + } + } + } + } + normal_chars.push(*c); + i += 1; + } + + if !normal_chars.is_empty() { + let normal = std::mem::take(&mut normal_chars); + tokens.push(Token::Normal(normal)); + } + Self { tokens } + } +} + + +impl ToString for Line { + fn to_string(&self) -> String { + let mut string = String::new(); + for token in &self.tokens { + string.push_str(token.as_ref()) + } + return string; + } +} + + +fn unlabeled_extern_link(path: String) -> Option<Token> { + Some( Token::ExternalLink { path, label:String::new() } ) +} + +fn labelled_extern_link(s: String) -> Option<Token> { + let (label, path) = match s.split_once("](") { + Some((l, t)) => (l.to_string(), t.to_string()), + None => return None, + }; + if label.contains("]") || path.contains("]") { return None } + Some( Token::ExternalLink { label, path } ) +} + +macro_rules! con { + ($v:expr) => {|s| Some($v(s)) }; +} + +const DELIMITERS: [(fn(String)->Option<Token>, &str, &str, &str); 7] = [ + ( con!(Token::Bold), "**", "**", "*" ), + ( con!(Token::Italic), "_", "_", "_" ), + ( con!(Token::Monospace), "`", "`", "`" ), + ( con!(Token::Math), "$", "$", "$" ), + ( con!(Token::InternalLink), "[[", "]]", "[]" ), + ( labelled_extern_link, "[", ")", "[]()" ), + ( unlabeled_extern_link, "<", ">", "<>" ), +]; + +fn is_whitespace(c: &char) -> bool { + c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c) +} + +/// Check that first and last characters of a string are not delimiters. +fn has_content(s: &str, delimiter_chars: &[char]) -> bool { + let not_delim = |c| match c { + Some(c) => !delimiter_chars.contains(&c), + None => false, + }; + not_delim(s.chars().nth(0)) && not_delim(s.chars().last()) +} diff --git a/src/table.rs b/src/table.rs new file mode 100644 index 0000000..071bd1a --- /dev/null +++ b/src/table.rs @@ -0,0 +1,85 @@ +use crate::*; + +pub struct Table { + /// A [Line] is the content of a cell, a group of cells forms a table row, + /// a group of rows forms a separated section of the table, and a group of + /// sections forms the table itself. + /// Each row in the table has the same number of columns as the table header. + pub sections: Vec<Vec<Vec<Line>>>, + pub columns: Vec<Column>, +} + +impl Table { + pub fn from_strs(lines: &[&str]) -> Option<Self> { + let mut lines = lines.into_iter(); + let columns: Vec<Column> = { + let names = split_cells(lines.next()?)?; + let alignments = parse_alignments(lines.next()?)?; + if names.len() != alignments.len() { return None } + let make_column = |(n, a)| Column { name: n, alignment: a }; + std::iter::zip(names, alignments).map(make_column).collect() + }; + let mut sections = Vec::new(); + let mut rows = Vec::new(); + + for line in lines { + if let Some(alignments) = parse_alignments(line) { + if alignments.len() != columns.len() { return None } + sections.push(std::mem::take(&mut rows)) + } else { + let row: Vec<Line> = split_cells(line)?; + if row.len() != columns.len() { return None } + rows.push(row); + } + } + if !rows.is_empty() { + sections.push(std::mem::take(&mut rows)); + } + return Some( Self { columns, sections } ); + } +} + +pub struct Column { + pub name: Line, + pub alignment: Alignment, +} + +pub enum Alignment { + Left, + Center, + Right, +} + +impl Alignment { + pub fn from_str(cell: &str) -> Option<Self> { + if !cell.chars().all(|c| c == ':' || c == '-') { + return None } + match (cell.starts_with(':'), cell.ends_with(':')) { + (false, false) => Some(Alignment::Left), + (false, true ) => Some(Alignment::Right), + (true, false) => Some(Alignment::Left), + (true, true ) => Some(Alignment::Center), + } + } +} + +fn split_columns(line: &str) -> Option<Vec<&str>> { + if let Some(("", tail)) = line.split_once('|') { + if let Some((head, "")) = tail.rsplit_once('|') { + return Some(head.split('|').map(str::trim).collect()); + } + } + return None; +} + +fn split_cells(line: &str) -> Option<Vec<Line>> { + Some(split_columns(line)?.into_iter().map(Line::from_str).collect()) +} + +fn parse_alignments(line: &str) -> Option<Vec<Alignment>> { + let mut alignments = Vec::new(); + for cell in split_columns(line)? { + alignments.push(Alignment::from_str(cell)?); + } + Some(alignments) +} diff --git a/src/token.rs b/src/token.rs new file mode 100644 index 0000000..c2b1179 --- /dev/null +++ b/src/token.rs @@ -0,0 +1,24 @@ +#[derive(Clone)] +pub enum Token { + Normal(String), + Bold(String), + Italic(String), + Monospace(String), + Math(String), + InternalLink(String), + ExternalLink { label: String, path: String }, +} + +impl AsRef<str> for Token { + fn as_ref(&self) -> &str { + match self { + Token::Normal(text) => text, + Token::Bold(text) => text, + Token::Italic(text) => text, + Token::Monospace(text) => text, + Token::Math(text) => text, + Token::InternalLink(label) => label, + Token::ExternalLink { label, ..} => label, + } + } +} |