From 13cb719b87bcef41c4dd398f5a651ddb2b561e0d Mon Sep 17 00:00:00 2001 From: Ben Bridle Date: Sun, 21 Apr 2024 13:57:03 +1200 Subject: Completely rewrite the library --- src/elements/block_element.rs | 53 +++++++++++++++++++ src/elements/line.rs | 117 ++++++++++++++++++++++++++++++++++++++++++ src/elements/line_element.rs | 61 ++++++++++++++++++++++ src/elements/table.rs | 96 ++++++++++++++++++++++++++++++++++ 4 files changed, 327 insertions(+) create mode 100644 src/elements/block_element.rs create mode 100644 src/elements/line.rs create mode 100644 src/elements/line_element.rs create mode 100644 src/elements/table.rs (limited to 'src/elements') diff --git a/src/elements/block_element.rs b/src/elements/block_element.rs new file mode 100644 index 0000000..cdb7a71 --- /dev/null +++ b/src/elements/block_element.rs @@ -0,0 +1,53 @@ +use crate::*; + +pub enum BlockElement { + /// A first-level heading. + DocumentHeading(Line), + /// A second-level heading. + SectionHeading(Line), + /// A third-level heading. + ArticleHeading(Line), + Paragraph(Line), + /// A bullet-list. + List(Vec), + /// A paragraph separate from the main text. + Aside(Vec), + Table(Table), + EmbeddedFile(EmbeddedFile), + /// A non-markdown sub-document within this document. + Subdocument(Subdocument), + /// A KaTeX block + Math(String), + Break, +} + +pub struct EmbeddedFile { + pub label: String, + pub target: String, +} + +pub struct Subdocument { + pub language: String, + pub content: String, +} + +impl std::fmt::Debug for BlockElement { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let string = match self { + BlockElement::DocumentHeading(line) => format!("DocumentHeading ('{line}')"), + BlockElement::SectionHeading(line) => format!("SectionHeading ('{line}')"), + BlockElement::ArticleHeading(line) => format!("ArticleHeading ('{line}')"), + BlockElement::Paragraph(line) => format!("Paragraph ('{line}')"), + BlockElement::List(lines) => format!("List (len: {})", lines.len()), + BlockElement::Aside(_) => format!("Aside"), + BlockElement::Table(_) => format!("Table"), + BlockElement::EmbeddedFile(EmbeddedFile {label, target}) => + format!("EmbeddedFile (label:'{label}', target:'{target}')"), + BlockElement::Subdocument(Subdocument {language, ..}) => + format!("Subdocument ('{language}')"), + BlockElement::Math(string) => format!("Math ('{string}')"), + BlockElement::Break => format!("Break"), + }; + f.write_str(&string) + } +} diff --git a/src/elements/line.rs b/src/elements/line.rs new file mode 100644 index 0000000..d5c078e --- /dev/null +++ b/src/elements/line.rs @@ -0,0 +1,117 @@ +use crate::*; + +macro_rules! opt { + ($v:expr) => {|s| Some($v(s)) }; +} + +pub struct Line { + pub elements: Vec, +} + +impl Line { + pub fn from_str(raw_string: &str) -> Self { + fn unlabeled_extern_link(target: String) -> Option { + target.contains("/").then( || + LineElement::ExternalLink(ExternalLink { target, label:String::new() }) + ) + } + fn labelled_extern_link(s: String) -> Option { + let (label, target) = match s.split_once("](") { + Some((l, t)) => (l.to_string(), t.to_string()), + None => return None }; + if label.contains("]") || target.contains("]") { return None } + Some(LineElement::ExternalLink(ExternalLink { label, target })) } + const DELIMITERS: [(fn(String)->Option, &str, &str, &str); 7] = [ + ( opt!(LineElement::Bold), "**", "**", "*" ), + ( opt!(LineElement::Italic), "_", "_", "_" ), + ( opt!(LineElement::Monospace), "`", "`", "`" ), + ( opt!(LineElement::Math), "$", "$", "$" ), + ( opt!(LineElement::InternalLink), "[[", "]]", "[]" ), + ( labelled_extern_link, "[", ")", "[]()" ), + ( unlabeled_extern_link, "[", "]", "[]" ), + ]; + let chars: Vec = raw_string.chars().collect(); + let mut elements = Vec::new(); + let mut cached_chars = String::new(); + let mut i = 0; + + let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b); + + 'outer: while let Some(c) = chars.get(i) { + // Only check for opening delimiters that directly follow a whitespace character. + let follows_whitespace = match chars.get(i.wrapping_sub(1)) { + Some(w) => is_whitespace(w), + None => true, + }; + if follows_whitespace { + // Try to parse an opening delimiter. + for (variant, start_delim, end_delim, delim_chars) in DELIMITERS { + // Try to match an opening delimiter with a terminating delimiter. + if starts_with(i, start_delim) { + let s_end = i + start_delim.chars().count(); + let mut e_start = s_end; + let mut e_end = e_start + end_delim.chars().count(); + while e_end <= chars.len() { + e_start += 1; e_end += 1; + let end_is_whitespace = + if let Some(end_char) = chars.get(e_end) { + is_whitespace(end_char) + } else { + e_end == chars.len() + }; + // If the terminating delimiter is found, store the normal + // text and the styled text, and continue to the next character. + if end_is_whitespace && starts_with(e_start, end_delim) { + // Check that there is content within the styled string. + let styled_string: String = chars[s_end..e_start].iter().collect(); + let non_content_chars: Vec<_> = delim_chars.chars().collect(); + if !is_contentful(&styled_string, &non_content_chars) { continue } + if styled_string.len() != styled_string.trim().len() { continue } + let line_element = match variant(styled_string) { + Some(e) => e, + None => continue, + }; + // Commit the normal and styled strings. + if !cached_chars.is_empty() { + let normal_string = std::mem::take(&mut cached_chars); + elements.push(LineElement::Normal(normal_string)); } + elements.push(line_element); + i = e_end; + continue 'outer; + } + } + } + } + } + cached_chars.push(*c); i += 1; + } + if !cached_chars.is_empty() { + let normal_string = std::mem::take(&mut cached_chars); + elements.push(LineElement::Normal(normal_string)); } + Self { elements } + } + + /// Return only the character content, with none of the styling information. + pub fn as_plain_text(&self) -> String { + let mut string = String::new(); + for line_element in &self.elements { + string.push_str(line_element.as_plain_text()) } + return string; + } +} + +impl std::fmt::Display for Line { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + for line_element in &self.elements { + write!(f, "{line_element}")?; } + Ok(()) + } +} + +impl std::fmt::Debug for Line { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + for line_element in &self.elements { + write!(f, "{line_element:?}\n")?; } + Ok(()) + } +} diff --git a/src/elements/line_element.rs b/src/elements/line_element.rs new file mode 100644 index 0000000..cc47b4b --- /dev/null +++ b/src/elements/line_element.rs @@ -0,0 +1,61 @@ +pub enum LineElement { + Normal(String), + Bold(String), + Italic(String), + Monospace(String), + Math(String), + InternalLink(String), + ExternalLink(ExternalLink), +} + +impl LineElement { + /// Return only the character content, with none of the styling information. + pub fn as_plain_text(&self) -> &str { + match self { + LineElement::Normal(text) => text, + LineElement::Bold(text) => text, + LineElement::Italic(text) => text, + LineElement::Monospace(text) => text, + LineElement::Math(text) => text, + LineElement::InternalLink(label) => label, + LineElement::ExternalLink(ExternalLink { label, ..}) => label, + } + } +} + +pub struct ExternalLink { + pub label: String, + pub target: String, +} + +impl std::fmt::Display for LineElement { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let string = match self { + LineElement::Normal(text) => format!("{text}"), + LineElement::Bold(text) => format!("**{text}**"), + LineElement::Italic(text) => format!("_{text}_"), + LineElement::Monospace(text) => format!("`{text}`"), + LineElement::Math(text) => format!("${text}$"), + LineElement::InternalLink(text) => format!("[[{text}]]"), + LineElement::ExternalLink(ExternalLink { label, target }) => { + format!("[{label}]({target})") } + }; + f.write_str(&string) + } +} + +impl std::fmt::Debug for LineElement { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let string = match self { + LineElement::Normal(text) => format!("Normal ('{text}')"), + LineElement::Bold(text) => format!("Bold ('{text}')"), + LineElement::Italic(text) => format!("Italic ('{text}')"), + LineElement::Monospace(text) => format!("Monospace ('{text}')"), + LineElement::Math(text) => format!("Math ('{text}')"), + LineElement::InternalLink(text) => format!("InternalLink ('{text}')"), + LineElement::ExternalLink(ExternalLink { label, target }) => { + format!("ExternalLink (label:'{label}', target:'{target}')") } + }; + f.write_str(&string) + } +} diff --git a/src/elements/table.rs b/src/elements/table.rs new file mode 100644 index 0000000..5b354c1 --- /dev/null +++ b/src/elements/table.rs @@ -0,0 +1,96 @@ +use crate::*; + +pub struct Table { + /// The column definitions for this table. + pub column_definitions: Vec, + /// The content contained in the rows of the table. An individual [Line] is + /// the contents of a single table cell, a group of cells forms a table row, + /// a group of rows forms a vertical section of the table, with a separator + /// intending to be drawn between each section, and a group of sections forms + /// the table itself. + /// Each row in the table is guaranteed to have the same number of columns + /// as the table header. + pub sections: Vec>>, +} + +impl Table { + pub fn try_from_strs(lines: &[&str]) -> Option { + let mut lines = lines.into_iter(); + let column_definitions: Vec = { + let names = split_trimmed_columns(lines.next()?)? + .into_iter().map(|l| Line::from_str(l)); + let alignments = parse_alignments(lines.next()?)?; + if names.len() != alignments.len() { return None } + std::iter::zip(names, alignments).map( + |(name, alignment)| ColumnDefinition { name, alignment } ).collect() + }; + + let mut sections = Vec::new(); + let mut current_section = Vec::new(); + + for line in lines { + if let Some(alignments) = parse_alignments(line) { + if alignments.len() != column_definitions.len() { return None } + sections.push(std::mem::take(&mut current_section)) + } else { + let row: Vec = split_trimmed_columns(line)? + .into_iter().map(|c| Line::from_str(c)).collect(); + if row.len() != column_definitions.len() { return None } + current_section.push(row); + } + } + + if !current_section.is_empty() { + sections.push(std::mem::take(&mut current_section)); } + Some( Self { column_definitions, sections }) + } +} + +pub struct ColumnDefinition { + /// The name of this column, shown in the header row of the table. + pub name: Line, + /// The alignment of the content in this column. + pub alignment: ColumnAlignment, +} + +pub enum ColumnAlignment { + Left, + Center, + Right, +} + +impl ColumnAlignment { + pub fn from_str(cell: &str) -> Option { + if !cell.chars().all(|c| c == ':' || c == '-') { + return None } + match (cell.starts_with(':'), cell.ends_with(':')) { + (false, false) => Some(ColumnAlignment::Left), + (false, true) => Some(ColumnAlignment::Right), + (true, false) => Some(ColumnAlignment::Left), + (true, true) => Some(ColumnAlignment::Center), + } + } +} + + +fn split_trimmed_columns(line: &str) -> Option> { + Some(split_columns(line)?.into_iter().map(|s| s.trim()).collect()) +} + +fn split_columns(line: &str) -> Option> { + if let Some(("", tail)) = line.split_once('|') { + if let Some((head, "")) = tail.rsplit_once('|') { + return Some(head.split('|').collect()); + } + } + return None; +} + +fn parse_alignments(line: &str) -> Option> { + let mut alignments = Vec::new(); + for cell in split_columns(line)? { + alignments.push(ColumnAlignment::from_str(cell)?); + } + Some(alignments) +} + -- cgit v1.2.3-70-g09d2