summaryrefslogtreecommitdiff
path: root/src/elements
diff options
context:
space:
mode:
Diffstat (limited to 'src/elements')
-rw-r--r--src/elements/block_element.rs53
-rw-r--r--src/elements/line.rs117
-rw-r--r--src/elements/line_element.rs61
-rw-r--r--src/elements/table.rs96
4 files changed, 327 insertions, 0 deletions
diff --git a/src/elements/block_element.rs b/src/elements/block_element.rs
new file mode 100644
index 0000000..cdb7a71
--- /dev/null
+++ b/src/elements/block_element.rs
@@ -0,0 +1,53 @@
+use crate::*;
+
+pub enum BlockElement {
+ /// A first-level heading.
+ DocumentHeading(Line),
+ /// A second-level heading.
+ SectionHeading(Line),
+ /// A third-level heading.
+ ArticleHeading(Line),
+ Paragraph(Line),
+ /// A bullet-list.
+ List(Vec<Line>),
+ /// A paragraph separate from the main text.
+ Aside(Vec<Line>),
+ Table(Table),
+ EmbeddedFile(EmbeddedFile),
+ /// A non-markdown sub-document within this document.
+ Subdocument(Subdocument),
+ /// A KaTeX block
+ Math(String),
+ Break,
+}
+
+pub struct EmbeddedFile {
+ pub label: String,
+ pub target: String,
+}
+
+pub struct Subdocument {
+ pub language: String,
+ pub content: String,
+}
+
+impl std::fmt::Debug for BlockElement {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let string = match self {
+ BlockElement::DocumentHeading(line) => format!("DocumentHeading ('{line}')"),
+ BlockElement::SectionHeading(line) => format!("SectionHeading ('{line}')"),
+ BlockElement::ArticleHeading(line) => format!("ArticleHeading ('{line}')"),
+ BlockElement::Paragraph(line) => format!("Paragraph ('{line}')"),
+ BlockElement::List(lines) => format!("List (len: {})", lines.len()),
+ BlockElement::Aside(_) => format!("Aside"),
+ BlockElement::Table(_) => format!("Table"),
+ BlockElement::EmbeddedFile(EmbeddedFile {label, target}) =>
+ format!("EmbeddedFile (label:'{label}', target:'{target}')"),
+ BlockElement::Subdocument(Subdocument {language, ..}) =>
+ format!("Subdocument ('{language}')"),
+ BlockElement::Math(string) => format!("Math ('{string}')"),
+ BlockElement::Break => format!("Break"),
+ };
+ f.write_str(&string)
+ }
+}
diff --git a/src/elements/line.rs b/src/elements/line.rs
new file mode 100644
index 0000000..d5c078e
--- /dev/null
+++ b/src/elements/line.rs
@@ -0,0 +1,117 @@
+use crate::*;
+
+macro_rules! opt {
+ ($v:expr) => {|s| Some($v(s)) };
+}
+
+pub struct Line {
+ pub elements: Vec<LineElement>,
+}
+
+impl Line {
+ pub fn from_str(raw_string: &str) -> Self {
+ fn unlabeled_extern_link(target: String) -> Option<LineElement> {
+ target.contains("/").then( ||
+ LineElement::ExternalLink(ExternalLink { target, label:String::new() })
+ )
+ }
+ fn labelled_extern_link(s: String) -> Option<LineElement> {
+ let (label, target) = match s.split_once("](") {
+ Some((l, t)) => (l.to_string(), t.to_string()),
+ None => return None };
+ if label.contains("]") || target.contains("]") { return None }
+ Some(LineElement::ExternalLink(ExternalLink { label, target })) }
+ const DELIMITERS: [(fn(String)->Option<LineElement>, &str, &str, &str); 7] = [
+ ( opt!(LineElement::Bold), "**", "**", "*" ),
+ ( opt!(LineElement::Italic), "_", "_", "_" ),
+ ( opt!(LineElement::Monospace), "`", "`", "`" ),
+ ( opt!(LineElement::Math), "$", "$", "$" ),
+ ( opt!(LineElement::InternalLink), "[[", "]]", "[]" ),
+ ( labelled_extern_link, "[", ")", "[]()" ),
+ ( unlabeled_extern_link, "[", "]", "[]" ),
+ ];
+ let chars: Vec<char> = raw_string.chars().collect();
+ let mut elements = Vec::new();
+ let mut cached_chars = String::new();
+ let mut i = 0;
+
+ let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b);
+
+ 'outer: while let Some(c) = chars.get(i) {
+ // Only check for opening delimiters that directly follow a whitespace character.
+ let follows_whitespace = match chars.get(i.wrapping_sub(1)) {
+ Some(w) => is_whitespace(w),
+ None => true,
+ };
+ if follows_whitespace {
+ // Try to parse an opening delimiter.
+ for (variant, start_delim, end_delim, delim_chars) in DELIMITERS {
+ // Try to match an opening delimiter with a terminating delimiter.
+ if starts_with(i, start_delim) {
+ let s_end = i + start_delim.chars().count();
+ let mut e_start = s_end;
+ let mut e_end = e_start + end_delim.chars().count();
+ while e_end <= chars.len() {
+ e_start += 1; e_end += 1;
+ let end_is_whitespace =
+ if let Some(end_char) = chars.get(e_end) {
+ is_whitespace(end_char)
+ } else {
+ e_end == chars.len()
+ };
+ // If the terminating delimiter is found, store the normal
+ // text and the styled text, and continue to the next character.
+ if end_is_whitespace && starts_with(e_start, end_delim) {
+ // Check that there is content within the styled string.
+ let styled_string: String = chars[s_end..e_start].iter().collect();
+ let non_content_chars: Vec<_> = delim_chars.chars().collect();
+ if !is_contentful(&styled_string, &non_content_chars) { continue }
+ if styled_string.len() != styled_string.trim().len() { continue }
+ let line_element = match variant(styled_string) {
+ Some(e) => e,
+ None => continue,
+ };
+ // Commit the normal and styled strings.
+ if !cached_chars.is_empty() {
+ let normal_string = std::mem::take(&mut cached_chars);
+ elements.push(LineElement::Normal(normal_string)); }
+ elements.push(line_element);
+ i = e_end;
+ continue 'outer;
+ }
+ }
+ }
+ }
+ }
+ cached_chars.push(*c); i += 1;
+ }
+ if !cached_chars.is_empty() {
+ let normal_string = std::mem::take(&mut cached_chars);
+ elements.push(LineElement::Normal(normal_string)); }
+ Self { elements }
+ }
+
+ /// Return only the character content, with none of the styling information.
+ pub fn as_plain_text(&self) -> String {
+ let mut string = String::new();
+ for line_element in &self.elements {
+ string.push_str(line_element.as_plain_text()) }
+ return string;
+ }
+}
+
+impl std::fmt::Display for Line {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ for line_element in &self.elements {
+ write!(f, "{line_element}")?; }
+ Ok(())
+ }
+}
+
+impl std::fmt::Debug for Line {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ for line_element in &self.elements {
+ write!(f, "{line_element:?}\n")?; }
+ Ok(())
+ }
+}
diff --git a/src/elements/line_element.rs b/src/elements/line_element.rs
new file mode 100644
index 0000000..cc47b4b
--- /dev/null
+++ b/src/elements/line_element.rs
@@ -0,0 +1,61 @@
+pub enum LineElement {
+ Normal(String),
+ Bold(String),
+ Italic(String),
+ Monospace(String),
+ Math(String),
+ InternalLink(String),
+ ExternalLink(ExternalLink),
+}
+
+impl LineElement {
+ /// Return only the character content, with none of the styling information.
+ pub fn as_plain_text(&self) -> &str {
+ match self {
+ LineElement::Normal(text) => text,
+ LineElement::Bold(text) => text,
+ LineElement::Italic(text) => text,
+ LineElement::Monospace(text) => text,
+ LineElement::Math(text) => text,
+ LineElement::InternalLink(label) => label,
+ LineElement::ExternalLink(ExternalLink { label, ..}) => label,
+ }
+ }
+}
+
+pub struct ExternalLink {
+ pub label: String,
+ pub target: String,
+}
+
+impl std::fmt::Display for LineElement {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let string = match self {
+ LineElement::Normal(text) => format!("{text}"),
+ LineElement::Bold(text) => format!("**{text}**"),
+ LineElement::Italic(text) => format!("_{text}_"),
+ LineElement::Monospace(text) => format!("`{text}`"),
+ LineElement::Math(text) => format!("${text}$"),
+ LineElement::InternalLink(text) => format!("[[{text}]]"),
+ LineElement::ExternalLink(ExternalLink { label, target }) => {
+ format!("[{label}]({target})") }
+ };
+ f.write_str(&string)
+ }
+}
+
+impl std::fmt::Debug for LineElement {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let string = match self {
+ LineElement::Normal(text) => format!("Normal ('{text}')"),
+ LineElement::Bold(text) => format!("Bold ('{text}')"),
+ LineElement::Italic(text) => format!("Italic ('{text}')"),
+ LineElement::Monospace(text) => format!("Monospace ('{text}')"),
+ LineElement::Math(text) => format!("Math ('{text}')"),
+ LineElement::InternalLink(text) => format!("InternalLink ('{text}')"),
+ LineElement::ExternalLink(ExternalLink { label, target }) => {
+ format!("ExternalLink (label:'{label}', target:'{target}')") }
+ };
+ f.write_str(&string)
+ }
+}
diff --git a/src/elements/table.rs b/src/elements/table.rs
new file mode 100644
index 0000000..5b354c1
--- /dev/null
+++ b/src/elements/table.rs
@@ -0,0 +1,96 @@
+use crate::*;
+
+pub struct Table {
+ /// The column definitions for this table.
+ pub column_definitions: Vec<ColumnDefinition>,
+ /// The content contained in the rows of the table. An individual [Line] is
+ /// the contents of a single table cell, a group of cells forms a table row,
+ /// a group of rows forms a vertical section of the table, with a separator
+ /// intending to be drawn between each section, and a group of sections forms
+ /// the table itself.
+ /// Each row in the table is guaranteed to have the same number of columns
+ /// as the table header.
+ pub sections: Vec<Vec<Vec<Line>>>,
+}
+
+impl Table {
+ pub fn try_from_strs(lines: &[&str]) -> Option<Self> {
+ let mut lines = lines.into_iter();
+ let column_definitions: Vec<ColumnDefinition> = {
+ let names = split_trimmed_columns(lines.next()?)?
+ .into_iter().map(|l| Line::from_str(l));
+ let alignments = parse_alignments(lines.next()?)?;
+ if names.len() != alignments.len() { return None }
+ std::iter::zip(names, alignments).map(
+ |(name, alignment)| ColumnDefinition { name, alignment } ).collect()
+ };
+
+ let mut sections = Vec::new();
+ let mut current_section = Vec::new();
+
+ for line in lines {
+ if let Some(alignments) = parse_alignments(line) {
+ if alignments.len() != column_definitions.len() { return None }
+ sections.push(std::mem::take(&mut current_section))
+ } else {
+ let row: Vec<Line> = split_trimmed_columns(line)?
+ .into_iter().map(|c| Line::from_str(c)).collect();
+ if row.len() != column_definitions.len() { return None }
+ current_section.push(row);
+ }
+ }
+
+ if !current_section.is_empty() {
+ sections.push(std::mem::take(&mut current_section)); }
+ Some( Self { column_definitions, sections })
+ }
+}
+
+pub struct ColumnDefinition {
+ /// The name of this column, shown in the header row of the table.
+ pub name: Line,
+ /// The alignment of the content in this column.
+ pub alignment: ColumnAlignment,
+}
+
+pub enum ColumnAlignment {
+ Left,
+ Center,
+ Right,
+}
+
+impl ColumnAlignment {
+ pub fn from_str(cell: &str) -> Option<Self> {
+ if !cell.chars().all(|c| c == ':' || c == '-') {
+ return None }
+ match (cell.starts_with(':'), cell.ends_with(':')) {
+ (false, false) => Some(ColumnAlignment::Left),
+ (false, true) => Some(ColumnAlignment::Right),
+ (true, false) => Some(ColumnAlignment::Left),
+ (true, true) => Some(ColumnAlignment::Center),
+ }
+ }
+}
+
+
+fn split_trimmed_columns(line: &str) -> Option<Vec<&str>> {
+ Some(split_columns(line)?.into_iter().map(|s| s.trim()).collect())
+}
+
+fn split_columns(line: &str) -> Option<Vec<&str>> {
+ if let Some(("", tail)) = line.split_once('|') {
+ if let Some((head, "")) = tail.rsplit_once('|') {
+ return Some(head.split('|').collect());
+ }
+ }
+ return None;
+}
+
+fn parse_alignments(line: &str) -> Option<Vec<ColumnAlignment>> {
+ let mut alignments = Vec::new();
+ for cell in split_columns(line)? {
+ alignments.push(ColumnAlignment::from_str(cell)?);
+ }
+ Some(alignments)
+}
+