Completely rewrite the libraryv1.0.0

author: Ben Bridle <bridle.benjamin@gmail.com> 2024-04-21 13:57:03 +1200
committer: Ben Bridle <bridle.benjamin@gmail.com> 2024-04-21 13:57:36 +1200
commit: 13cb719b87bcef41c4dd398f5a651ddb2b561e0d (patch)
tree: e9e52ed33d5ed5a4d68a1161c3db5c2d8c38dd42
parent: 54f5e9fd883e207931baa9c87b6181ca724d6bab (diff)
download: markdown-13cb719b87bcef41c4dd398f5a651ddb2b561e0d.zip
13 files changed, 520 insertions, 605 deletions
diff --git a/src/block.rs b/src/block.rs
deleted file mode 100644
index 2a34fcf..0000000
--- a/src/block.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-use crate::{Line, Table};
-
-pub enum Block {
-    Heading1(Line),
-    Heading2(Line),
-    Heading3(Line),
-    Paragraph(Line),
-    List(Vec<Line>),
-    Quote(Vec<Line>),
-    Code(String, Vec<String>),
-    Table(Table),
-}
-impl std::fmt::Debug for Block {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
-        f.write_str(match self {
-            Self::Heading1(_) => "Heading1",
-            Self::Heading2(_) => "Heading2",
-            Self::Heading3(_) => "Heading3",
-            Self::Paragraph(_) => "Paragraph",
-            Self::List(_) => "List",
-            Self::Quote(_) => "Quote",
-            Self::Code(_, _) => "Code",
-            Self::Table(_) => "Table",
-        })
-    }
-}
diff --git a/src/document.rs b/src/document.rs
new file mode 100644
index 0000000..fbfea00
--- /dev/null
+++ b/src/document.rs
@@ -0,0 +1,172 @@
+use crate::*;
+
+pub struct MarkdownDocument {
+    pub block_elements: Vec<BlockElement>,
+}
+
+impl MarkdownDocument {
+    pub fn from_str(raw_markdown: &str) -> Self {
+        let mut block_elements = Vec::new();
+        let mut current_multiline_block = None;
+        // Chain a blank line to the end to ensure that the final multi-line block is flushed.
+        let lines = raw_markdown.lines().chain(std::iter::once(""));
+
+        for incoming_line in lines {
+            let incoming_line_untrimmed = incoming_line;
+            let incoming_line = incoming_line.trim();
+            // Handle an in-progress subdocument block.
+            if let Some(MultiLineBlock::Subdocument(language, mut lines)) = current_multiline_block {
+                if incoming_line == "```" {
+                    let subdocument = Subdocument { language: language.to_string(), content: lines.join("\n") };
+                    block_elements.push(BlockElement::Subdocument(subdocument));
+                    current_multiline_block = None;
+                } else {
+                    lines.push(incoming_line_untrimmed);
+                    current_multiline_block = Some(MultiLineBlock::Subdocument(language, lines));
+                }
+                continue;
+            }
+
+            // Parse the incoming line.
+            let incoming_line_block = {
+                if let Some(("", tail)) = incoming_line.split_once("# ") {
+                    BlockLine::DocumentHeading(tail.trim())
+                } else if let Some(("", tail)) = incoming_line.split_once("## ") {
+                    BlockLine::SectionHeading(tail.trim())
+                } else if let Some(("", tail)) = incoming_line.split_once("### ") {
+                    BlockLine::ArticleHeading(tail.trim())
+                } else if let Some(("", tail)) = incoming_line.split_once("- ") {
+                    BlockLine::List(tail.trim())
+                } else if let Some(("", tail)) = incoming_line.split_once("> ") {
+                    BlockLine::Aside(tail.trim())
+                } else if incoming_line == ">" {
+                    BlockLine::Aside("")
+                } else if let Some(("", tail)) = incoming_line.split_once("```") {
+                    BlockLine::SubdocumentHeader(tail.trim())
+                } else if incoming_line.starts_with("|") {
+                    BlockLine::Table(incoming_line)
+                } else if incoming_line.len() >= 3 && incoming_line.chars().all(|c| c=='-') {
+                    BlockLine::Break
+                } else if incoming_line.is_empty() {
+                    BlockLine::BlankLine
+                } else {
+                    BlockLine::Paragraph(incoming_line) }
+            };
+
+            // If the incoming line is of the same type as the current multiline
+            // block, append it to the end of that current block and continue.
+            if let Some(ref mut current_block) = current_multiline_block {
+                match (&incoming_line_block, current_block)  {
+                    (BlockLine::List(line), MultiLineBlock::List(ref mut lines)) => {
+                        lines.push(line); continue; }
+                    (BlockLine::Aside(line), MultiLineBlock::Aside(ref mut lines)) => {
+                        lines.push(line); continue; }
+                    (BlockLine::Table(line), MultiLineBlock::Table(ref mut lines)) => {
+                        lines.push(line); continue; }
+                    _ => (),
+                };
+            }
+
+            // Otherwise, commit the current block, then handle the incoming line.
+            if let Some(current_block) = current_multiline_block {
+                match current_block {
+                    MultiLineBlock::List(raw_lines) => {
+                        let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect();
+                        block_elements.push(BlockElement::List(lines)); }
+                    MultiLineBlock::Aside(raw_lines) => {
+                        let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect();
+                        block_elements.push(BlockElement::Aside(lines)); }
+                    MultiLineBlock::Table(raw_lines) => {
+                        if let Some(table) = Table::try_from_strs(&raw_lines) {
+                            block_elements.push(BlockElement::Table(table)) }
+                        else { for raw_line in raw_lines {
+                            block_elements.push(BlockElement::Paragraph(Line::from_str(&raw_line))) }}}
+                    MultiLineBlock::Subdocument(..) => unreachable!(),
+                }
+                current_multiline_block = None;
+            }
+
+            // Handle the incoming line.
+            match incoming_line_block {
+                BlockLine::DocumentHeading(s) => block_elements.push(BlockElement::DocumentHeading(Line::from_str(&s))),
+                BlockLine::SectionHeading(s) => block_elements.push(BlockElement::SectionHeading(Line::from_str(&s))),
+                BlockLine::ArticleHeading(s) => block_elements.push(BlockElement::ArticleHeading(Line::from_str(&s))),
+                BlockLine::List(s) => current_multiline_block = Some(MultiLineBlock::List(vec![s])),
+                BlockLine::Aside(s) => current_multiline_block = Some(MultiLineBlock::Aside(vec![s])),
+                BlockLine::Table(s) => current_multiline_block = Some(MultiLineBlock::Table(vec![s])),
+                BlockLine::SubdocumentHeader(s) => current_multiline_block = Some(MultiLineBlock::Subdocument(s, Vec::new())),
+                BlockLine::Paragraph(s) => {
+                    if let Some(embedded_file) = parse_embedded_file(&s) {
+                        block_elements.push(BlockElement::EmbeddedFile(embedded_file))
+                    } else if let Some(math) = parse_math_block(&s) {
+                        block_elements.push(BlockElement::Math(math))
+                    } else {
+                        block_elements.push(BlockElement::Paragraph(Line::from_str(&s))) }
+                },
+                BlockLine::Break => block_elements.push(BlockElement::Break),
+                BlockLine::BlankLine => (),
+            }
+        }
+
+        Self { block_elements }
+    }
+}
+
+fn parse_embedded_file(text: &str) -> Option<EmbeddedFile> {
+    let chars: Vec<char> = text.trim().chars().collect();
+    let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b);
+
+    if starts_with(0, "![") {
+        let label_start = 2;
+        let mut label_end = label_start;
+        while label_end <= chars.len() {
+            if label_end == chars.len() { return None }
+            if starts_with(label_end, "](") { break };
+            label_end += 1; }
+        let label: String = chars[label_start..label_end].iter().collect();
+        if label.is_empty() || !is_contentful(&label, &['[', ']']) {
+            return None }
+        // Try to parse the target.
+        let target_start = label_end + 2;
+        let target_end = chars.len() - 1;
+        if let Some(')') = chars.get(target_end) {
+            let target: String = chars[target_start..target_end].iter().collect();
+            if target.is_empty() || target.contains(")") || !is_contentful(&target, &['(',')']) {
+                return None }
+            return Some(EmbeddedFile { label, target })
+        }
+    }
+    return None;
+}
+
+fn parse_math_block(text: &str) -> Option<String> {
+    if let Some(("", trailing)) = text.split_once("$$") {
+        if let Some((math, "")) = trailing.rsplit_once("$$") {
+            return Some(math.trim().to_string());
+        }
+    }
+    return None;
+}
+
+/// When parsing, is a single line for a one-line block element.
+enum BlockLine<'a> {
+    DocumentHeading(&'a str),
+    SectionHeading(&'a str),
+    ArticleHeading(&'a str),
+    Paragraph(&'a str),
+    List(&'a str),
+    Aside(&'a str),
+    Table(&'a str),
+    SubdocumentHeader(&'a str),
+    Break,
+    BlankLine,
+}
+
+/// When parsing, is the gathered string lines of a multiline block element.
+enum MultiLineBlock<'a> {
+    List(Vec<&'a str>),
+    Aside(Vec<&'a str>),
+    Table(Vec<&'a str>),
+    Subdocument(&'a str, Vec<&'a str>),
+}
+
diff --git a/src/elements.rs b/src/elements.rs
new file mode 100644
index 0000000..a4a9783
--- /dev/null
+++ b/src/elements.rs
@@ -0,0 +1,9 @@
+mod block_element;
+mod line;
+mod line_element;
+mod table;
+
+pub use block_element::*;
+pub use line::*;
+pub use line_element::*;
+pub use table::*;
diff --git a/src/elements/block_element.rs b/src/elements/block_element.rs
new file mode 100644
index 0000000..cdb7a71
--- /dev/null
+++ b/src/elements/block_element.rs
@@ -0,0 +1,53 @@
+use crate::*;
+
+pub enum BlockElement {
+    /// A first-level heading.
+    DocumentHeading(Line),
+    /// A second-level heading.
+    SectionHeading(Line),
+    /// A third-level heading.
+    ArticleHeading(Line),
+    Paragraph(Line),
+    /// A bullet-list.
+    List(Vec<Line>),
+    /// A paragraph separate from the main text.
+    Aside(Vec<Line>),
+    Table(Table),
+    EmbeddedFile(EmbeddedFile),
+    /// A non-markdown sub-document within this document.
+    Subdocument(Subdocument),
+    /// A KaTeX block
+    Math(String),
+    Break,
+}
+
+pub struct EmbeddedFile {
+    pub label: String,
+    pub target: String,
+}
+
+pub struct Subdocument {
+    pub language: String,
+    pub content: String,
+}
+
+impl std::fmt::Debug for BlockElement {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+        let string = match self {
+            BlockElement::DocumentHeading(line) => format!("DocumentHeading ('{line}')"),
+            BlockElement::SectionHeading(line) => format!("SectionHeading ('{line}')"),
+            BlockElement::ArticleHeading(line) => format!("ArticleHeading ('{line}')"),
+            BlockElement::Paragraph(line) => format!("Paragraph ('{line}')"),
+            BlockElement::List(lines) => format!("List (len: {})", lines.len()),
+            BlockElement::Aside(_) => format!("Aside"),
+            BlockElement::Table(_) => format!("Table"),
+            BlockElement::EmbeddedFile(EmbeddedFile {label, target}) =>
+                format!("EmbeddedFile (label:'{label}', target:'{target}')"),
+            BlockElement::Subdocument(Subdocument {language, ..}) =>
+                format!("Subdocument ('{language}')"),
+            BlockElement::Math(string) => format!("Math ('{string}')"),
+            BlockElement::Break => format!("Break"),
+        };
+        f.write_str(&string)
+    }
+}
diff --git a/src/elements/line.rs b/src/elements/line.rs
new file mode 100644
index 0000000..d5c078e
--- /dev/null
+++ b/src/elements/line.rs
@@ -0,0 +1,117 @@
+use crate::*;
+
+macro_rules! opt {
+    ($v:expr) => {|s| Some($v(s)) };
+}
+
+pub struct Line {
+    pub elements: Vec<LineElement>,
+}
+
+impl Line {
+    pub fn from_str(raw_string: &str) -> Self {
+        fn unlabeled_extern_link(target: String) -> Option<LineElement> {
+            target.contains("/").then( ||
+                LineElement::ExternalLink(ExternalLink { target, label:String::new() })
+            )
+        }
+        fn labelled_extern_link(s: String) -> Option<LineElement> {
+            let (label, target) = match s.split_once("](") {
+                Some((l, t)) => (l.to_string(), t.to_string()),
+                None => return None };
+            if label.contains("]") || target.contains("]") { return None }
+            Some(LineElement::ExternalLink(ExternalLink { label, target })) }
+        const DELIMITERS: [(fn(String)->Option<LineElement>, &str, &str, &str); 7] = [
+            ( opt!(LineElement::Bold),          "**", "**", "*" ),
+            ( opt!(LineElement::Italic),        "_",  "_",  "_" ),
+            ( opt!(LineElement::Monospace),     "`",  "`",  "`" ),
+            ( opt!(LineElement::Math),          "$",  "$",  "$" ),
+            ( opt!(LineElement::InternalLink),  "[[", "]]", "[]" ),
+            ( labelled_extern_link,             "[",  ")",  "[]()" ),
+            ( unlabeled_extern_link,            "[",  "]",  "[]" ),
+        ];
+        let chars: Vec<char> = raw_string.chars().collect();
+        let mut elements = Vec::new();
+        let mut cached_chars = String::new();
+        let mut i = 0;
+
+        let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b);
+
+        'outer: while let Some(c) = chars.get(i) {
+            // Only check for opening delimiters that directly follow a whitespace character.
+            let follows_whitespace = match chars.get(i.wrapping_sub(1)) {
+                Some(w) => is_whitespace(w),
+                None => true,
+            };
+            if follows_whitespace {
+                // Try to parse an opening delimiter.
+                for (variant, start_delim, end_delim, delim_chars) in DELIMITERS {
+                    // Try to match an opening delimiter with a terminating delimiter.
+                    if starts_with(i, start_delim) {
+                        let s_end = i + start_delim.chars().count();
+                        let mut e_start = s_end;
+                        let mut e_end = e_start + end_delim.chars().count();
+                        while e_end <= chars.len() {
+                            e_start += 1; e_end += 1;
+                            let end_is_whitespace =
+                                if let Some(end_char) = chars.get(e_end) {
+                                    is_whitespace(end_char)
+                                } else {
+                                    e_end == chars.len()
+                                };
+                            // If the terminating delimiter is found, store the normal
+                            // text and the styled text, and continue to the next character.
+                            if end_is_whitespace && starts_with(e_start, end_delim) {
+                                // Check that there is content within the styled string.
+                                let styled_string: String = chars[s_end..e_start].iter().collect();
+                                let non_content_chars: Vec<_> = delim_chars.chars().collect();
+                                if !is_contentful(&styled_string, &non_content_chars) { continue }
+                                if styled_string.len() != styled_string.trim().len() { continue }
+                                let line_element = match variant(styled_string) {
+                                    Some(e) => e,
+                                    None => continue,
+                                };
+                                // Commit the normal and styled strings.
+                                if !cached_chars.is_empty() {
+                                    let normal_string = std::mem::take(&mut cached_chars);
+                                    elements.push(LineElement::Normal(normal_string)); }
+                                elements.push(line_element);
+                                i = e_end;
+                                continue 'outer;
+                            }
+                        }
+                    }
+                }
+            }
+            cached_chars.push(*c); i += 1;
+        }
+        if !cached_chars.is_empty() {
+            let normal_string = std::mem::take(&mut cached_chars);
+            elements.push(LineElement::Normal(normal_string)); }
+        Self { elements }
+    }
+
+    /// Return only the character content, with none of the styling information.
+    pub fn as_plain_text(&self) -> String {
+        let mut string = String::new();
+        for line_element in &self.elements {
+            string.push_str(line_element.as_plain_text()) }
+        return string;
+    }
+}
+
+impl std::fmt::Display for Line {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+        for line_element in &self.elements {
+            write!(f, "{line_element}")?; }
+        Ok(())
+    }
+}
+
+impl std::fmt::Debug for Line {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+        for line_element in &self.elements {
+            write!(f, "{line_element:?}\n")?; }
+        Ok(())
+    }
+}
diff --git a/src/elements/line_element.rs b/src/elements/line_element.rs
new file mode 100644
index 0000000..cc47b4b
--- /dev/null
+++ b/src/elements/line_element.rs
@@ -0,0 +1,61 @@
+pub enum LineElement {
+    Normal(String),
+    Bold(String),
+    Italic(String),
+    Monospace(String),
+    Math(String),
+    InternalLink(String),
+    ExternalLink(ExternalLink),
+}
+
+impl LineElement {
+    /// Return only the character content, with none of the styling information.
+    pub fn as_plain_text(&self) -> &str {
+        match self {
+            LineElement::Normal(text) => text,
+            LineElement::Bold(text) => text,
+            LineElement::Italic(text) => text,
+            LineElement::Monospace(text) => text,
+            LineElement::Math(text) => text,
+            LineElement::InternalLink(label) => label,
+            LineElement::ExternalLink(ExternalLink { label, ..}) => label,
+        }
+    }
+}
+
+pub struct ExternalLink {
+    pub label: String,
+    pub target: String,
+}
+
+impl std::fmt::Display for LineElement {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+        let string = match self {
+            LineElement::Normal(text) => format!("{text}"),
+            LineElement::Bold(text) => format!("**{text}**"),
+            LineElement::Italic(text) => format!("_{text}_"),
+            LineElement::Monospace(text) => format!("`{text}`"),
+            LineElement::Math(text) => format!("${text}$"),
+            LineElement::InternalLink(text) => format!("[[{text}]]"),
+            LineElement::ExternalLink(ExternalLink { label, target }) => {
+                format!("[{label}]({target})") }
+        };
+        f.write_str(&string)
+    }
+}
+
+impl std::fmt::Debug for LineElement {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+        let string = match self {
+            LineElement::Normal(text) => format!("Normal ('{text}')"),
+            LineElement::Bold(text) => format!("Bold ('{text}')"),
+            LineElement::Italic(text) => format!("Italic ('{text}')"),
+            LineElement::Monospace(text) => format!("Monospace ('{text}')"),
+            LineElement::Math(text) => format!("Math ('{text}')"),
+            LineElement::InternalLink(text) => format!("InternalLink ('{text}')"),
+            LineElement::ExternalLink(ExternalLink { label, target }) => {
+                format!("ExternalLink (label:'{label}',  target:'{target}')") }
+        };
+        f.write_str(&string)
+    }
+}
diff --git a/src/elements/table.rs b/src/elements/table.rs
new file mode 100644
index 0000000..5b354c1
--- /dev/null
+++ b/src/elements/table.rs
@@ -0,0 +1,96 @@
+use crate::*;
+
+pub struct Table {
+    /// The column definitions for this table.
+    pub column_definitions: Vec<ColumnDefinition>,
+    /// The content contained in the rows of the table. An individual [Line] is
+    /// the contents of a single table cell, a group of cells forms a table row,
+    /// a group of rows forms a vertical section of the table, with a separator
+    /// intending to be drawn between each section, and a group of sections forms
+    /// the table itself.
+    /// Each row in the table is guaranteed to have the same number of columns
+    /// as the table header.
+    pub sections: Vec<Vec<Vec<Line>>>,
+}
+
+impl Table {
+    pub fn try_from_strs(lines: &[&str]) -> Option<Self> {
+        let mut lines = lines.into_iter();
+        let column_definitions: Vec<ColumnDefinition> = {
+            let names = split_trimmed_columns(lines.next()?)?
+                .into_iter().map(|l| Line::from_str(l));
+            let alignments = parse_alignments(lines.next()?)?;
+            if names.len() != alignments.len() { return None }
+            std::iter::zip(names, alignments).map(
+                |(name, alignment)| ColumnDefinition { name, alignment } ).collect()
+        };
+
+        let mut sections = Vec::new();
+        let mut current_section = Vec::new();
+
+        for line in lines {
+            if let Some(alignments) = parse_alignments(line) {
+                if alignments.len() != column_definitions.len() { return None }
+                sections.push(std::mem::take(&mut current_section))
+            } else {
+                let row: Vec<Line> = split_trimmed_columns(line)?
+                    .into_iter().map(|c| Line::from_str(c)).collect();
+                if row.len() != column_definitions.len() { return None }
+                current_section.push(row);
+            }
+        }
+
+        if !current_section.is_empty() {
+            sections.push(std::mem::take(&mut current_section)); }
+        Some( Self { column_definitions, sections })
+    }
+}
+
+pub struct ColumnDefinition {
+    /// The name of this column, shown in the header row of the table.
+    pub name: Line,
+    /// The alignment of the content in this column.
+    pub alignment: ColumnAlignment,
+}
+
+pub enum ColumnAlignment {
+    Left,
+    Center,
+    Right,
+}
+
+impl ColumnAlignment {
+    pub fn from_str(cell: &str) -> Option<Self> {
+        if !cell.chars().all(|c| c == ':' || c == '-') {
+            return None }
+        match (cell.starts_with(':'), cell.ends_with(':')) {
+            (false, false) => Some(ColumnAlignment::Left),
+            (false, true) => Some(ColumnAlignment::Right),
+            (true, false) => Some(ColumnAlignment::Left),
+            (true, true) => Some(ColumnAlignment::Center),
+        }
+    }
+}
+
+
+fn split_trimmed_columns(line: &str) -> Option<Vec<&str>> {
+    Some(split_columns(line)?.into_iter().map(|s| s.trim()).collect())
+}
+
+fn split_columns(line: &str) -> Option<Vec<&str>> {
+    if let Some(("", tail)) = line.split_once('|') {
+        if let Some((head, "")) = tail.rsplit_once('|') {
+            return Some(head.split('|').collect());
+        }
+    }
+    return None;
+}
+
+fn parse_alignments(line: &str) -> Option<Vec<ColumnAlignment>> {
+    let mut alignments = Vec::new();
+    for cell in split_columns(line)? {
+        alignments.push(ColumnAlignment::from_str(cell)?);
+    }
+    Some(alignments)
+}
+
diff --git a/src/lib.rs b/src/lib.rs
index c0b8c84..c70ce77 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,33 +1,13 @@
-#![feature(iter_zip)]
-
-mod block;
-mod parse;
-mod parse_heirarchical;
-mod table;
-mod text;
-
-pub use block::Block;
-pub use parse::parse;
-pub use parse_heirarchical::parse_heirarchical;
-pub use table::{Alignment, Column, Table};
-pub use text::{Hyperlink, Text};
-
-pub type Line = Vec<Text>;
-
-pub fn line_to_string(line: &[Text]) -> String {
-    let mut output = String::new();
-    for text in line {
-        match text {
-            Text::Normal(content) => output.push_str(&content),
-            Text::Bold(content) => output.push_str(&format!("**{}**", content)),
-            Text::Italic(content) => output.push_str(&format!("_{}_", content)),
-            Text::BoldItalic(content) => output.push_str(&format!("**_{}_**", content)),
-            Text::Code(content) => output.push_str(&format!("`{}`", content)),
-            Text::WikiLink(content) => output.push_str(&format!("[[{}]]", content)),
-            Text::Hyperlink(Hyperlink { label, target }) => {
-                output.push_str(&format!("[{}]({})", label, target))
-            }
-        }
-    }
-    return output;
+mod document;
+mod elements;
+
+pub use document::*;
+pub use elements::*;
+
+pub(crate) fn is_whitespace(c: &char) -> bool {
+    c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c) }
+pub(crate) fn is_contentful(s:&str, non_content_chars: &[char]) -> bool {
+    s.chars().any(|c| !non_content_chars.contains(&c))
+    && s.chars().nth(0).map(|c| !non_content_chars.contains(&c)).unwrap_or(false)
+    && s.chars().last().map(|c| !non_content_chars.contains(&c)).unwrap_or(false)
 }
diff --git a/src/main.rs b/src/main.rs
deleted file mode 100644
index 8cc2bfa..0000000
--- a/src/main.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-use markdown_parser::parse;
-
-pub fn main() {
-    // let markdown = std::fs::read_to_string("/home/ben/markdown_test.md").unwrap();
-    let markdown = "
-This _is_ a **test** paragraph.
-[This](http://www.google.com) is a regular full-length link.
-|A|this is **middle** col|CC|
-|-|:-:|---:|
-|A|||
-";
-    let document = parse(&markdown);
-    for node in document {
-        println!("{:?}", node);
-        if let markdown_parser::Block::Paragraph(blocks) = node {
-            for block in blocks {
-                println!("  {:?}", block);
-            }
-        } else if let markdown_parser::Block::List(lines) = node {
-            for line in lines {
-                println!("-");
-                for block in line {
-                    println!("  {:?}", block);
-                }
-            }
-        } else if let markdown_parser::Block::Table(table) = node {
-            for column in table.columns {
-                print!("  {:?}: ", column.alignment);
-                for block in column.name {
-                    print!("{:?} ", block);
-                }
-                println!();
-            }
-            println!();
-        }
-    }
-}
diff --git a/src/parse.rs b/src/parse.rs
deleted file mode 100644
index 6e4cdd9..0000000
--- a/src/parse.rs
+++ /dev/null
@@ -1,283 +0,0 @@
-use crate::*;
-
-pub fn parse(markdown: &str) -> Vec<Block> {
-    let mut document = Vec::new();
-    let lines: Vec<&str> = markdown.lines().map(|l| l.trim_start()).collect();
-    let mut i = 0;
-
-    // Gather all consecutive lines that begin with a given substring and run a
-    // function over them. The function must be `fn(&[&str])->Result<Block,()>`.
-    macro_rules! gather {
-        ($prefix:expr, $func:ident) => {{
-            let start = i;
-            for line in &lines[i..] {
-                if line.starts_with($prefix) {
-                    i += 1;
-                    continue;
-                }
-                break;
-            }
-            let gathered_lines = &lines[start..i];
-            match gathered_lines.is_empty() {
-                false => $func(gathered_lines),
-                true => Err(()),
-            }
-        }};
-    }
-
-    loop {
-        let line = match lines.get(i) {
-            Some(line) => line,
-            None => return document,
-        };
-        if line.is_empty() {
-            i += 1;
-            continue;
-        } else if let Ok(heading) = parse_heading(line) {
-            document.push(heading);
-            i += 1;
-        } else if let Ok(quote) = gather!(">", parse_quote) {
-            document.push(quote);
-        } else if let Ok(list) = gather!("- ", parse_list) {
-            document.push(list);
-        } else if let Ok(table) = gather!("|", parse_table) {
-            document.push(table);
-        } else if line.starts_with("```") {
-            let language = line[3..].to_string();
-            let mut code_lines = Vec::new();
-            i += 1;
-            for line in &lines[i..] {
-                match line.trim() == "```" {
-                    true => break,
-                    false => {
-                        code_lines.push(line.to_string());
-                        i += 1
-                    }
-                }
-            }
-            document.push(Block::Code(language, code_lines));
-            i += 1;
-        } else {
-            document.push(parse_paragraph(line));
-            i += 1;
-        };
-    }
-}
-
-/// Returns the substring from `chars` that is between the `start` and `end`
-/// delimiters. Returns None if `chars` does not start with `start`, or if an
-/// occurance of `start` and `end` cannot be found within `chars`. There must
-/// not be a space after the occurance of `start` or before the occurance of
-/// `end`. If `start` and `end` consist of just one or more of the same
-/// character, the content must contain at least one other character than
-/// that one.
-fn capture(chars: &[char], start: &str, end: &str) -> Option<String> {
-    // Determine if `pattern` contains only a single unique character
-    let single_char_in_pattern = match start.chars().next() {
-        Some(first_char) => {
-            let start_and_end = start.chars().chain(end.chars());
-            start_and_end.fold(Some(first_char), |accum, elem| match accum {
-                Some(c) if c == elem => accum,
-                _ => None,
-            })
-        }
-        None => None,
-    };
-    let is_space = |i: usize| chars.get(i) == Some(&' ');
-    fn starts_with_pattern(chars: &[char], pattern: &str) -> bool {
-        let mut i = 0;
-        for ref c in pattern.chars() {
-            match chars.get(i) {
-                Some(v) if v == c => i += 1,
-                _ => return false,
-            }
-        }
-        true
-    }
-    if !starts_with_pattern(chars, start) {
-        return None;
-    }
-    let text_start = start.len();
-    if is_space(text_start) {
-        return None;
-    };
-    let mut i = text_start;
-    loop {
-        i += 1;
-        if chars.get(i).is_none() {
-            return None;
-        }
-        if starts_with_pattern(&chars[i..], end) {
-            if is_space(i - 1) {
-                continue;
-            }
-            let text_content: String = chars[text_start..i].iter().collect();
-            match single_char_in_pattern {
-                None => return Some(text_content),
-                Some(c) => {
-                    if text_content.chars().any(|e| e != c) {
-                        return Some(text_content);
-                    }
-                }
-            };
-        }
-    }
-}
-
-fn parse_text(line: &str) -> Line {
-    let mut block_content: Line = Vec::new();
-    let chars: Vec<char> = line.chars().collect();
-    let mut normal = String::new();
-    let mut i = 0;
-
-    macro_rules! commit_normal {
-        () => {
-            if !normal.is_empty() {
-                let normal_text = Text::Normal(std::mem::take(&mut normal));
-                block_content.push(normal_text);
-            }
-        };
-    }
-    let patterns: [(&str, &str, fn(String) -> Text); 7] = [
-        ("***", "***", Text::BoldItalic),
-        ("**", "**", Text::Bold),
-        ("*", "*", Text::Italic),
-        ("___", "___", Text::BoldItalic),
-        ("__", "__", Text::Bold),
-        ("_", "_", Text::Italic),
-        ("`", "`", Text::Code),
-    ];
-
-    'outer: loop {
-        // Check if a simple, non-Normal text type starts at this character
-        for (start, end, text_type) in patterns.iter() {
-            if let Some(string) = capture(&chars[i..], start, end) {
-                i += string.len() + start.len() + end.len();
-                commit_normal!();
-                block_content.push(text_type(string));
-                continue 'outer;
-            }
-        }
-        // Check if a wiki-style hyperlink starts at this character
-        if let Some(content) = capture(&chars[i..], "[[", "]]") {
-            i += content.len() + 4;
-            commit_normal!();
-            block_content.push(Text::WikiLink(content));
-            continue 'outer;
-        }
-
-        // Check if a long-form hyperlink starts at this character
-        if let Some(label) = capture(&chars[i..], "[", "]") {
-            let target_len = label.len() + 2;
-            if let Some(target) = capture(&chars[i + target_len..], "(", ")") {
-                i += target_len + target.len() + 2;
-                commit_normal!();
-                block_content.push(Text::Hyperlink(Hyperlink { label, target }))
-            }
-        }
-
-        // No new text type started here, this must just be normal text
-        match chars.get(i) {
-            Some(c) => {
-                normal.push(*c);
-                i += 1;
-            }
-            None => {
-                commit_normal!();
-                break;
-            }
-        }
-    }
-    return block_content;
-}
-
-fn parse_heading(line: &str) -> Result<Block, ()> {
-    let (heading_type, content): (fn(Line) -> Block, &str) = if line.starts_with("# ") {
-        (Block::Heading1, &line[2..])
-    } else if line.starts_with("## ") {
-        (Block::Heading2, &line[3..])
-    } else if line.starts_with("### ") {
-        (Block::Heading3, &line[4..])
-    } else {
-        return Err(());
-    };
-    if content.is_empty() {
-        return Err(());
-    };
-    Ok(heading_type(parse_text(content)))
-}
-
-/// Accepts a slice of lines that begin with '>'
-fn parse_quote(lines: &[&str]) -> Result<Block, ()> {
-    let mut content = Vec::new();
-    for line in lines {
-        content.push(if *line == ">" {
-            Vec::new()
-        } else {
-            parse_text(&line[2..])
-        });
-    }
-    Ok(Block::Quote(content))
-}
-
-fn parse_list(lines: &[&str]) -> Result<Block, ()> {
-    Ok(Block::List(
-        lines.iter().map(|l| parse_text(&l[2..])).collect(),
-    ))
-}
-
-fn parse_paragraph(line: &str) -> Block {
-    Block::Paragraph(parse_text(line))
-}
-
-fn parse_table(lines: &[&str]) -> Result<Block, ()> {
-    if lines.len() < 3 {
-        return Err(());
-    }
-    let names = split_columns(lines[0])?;
-    let dividers = split_columns(lines[1])?;
-    if names.len() != dividers.len() {
-        return Err(());
-    }
-    let mut columns = Vec::new();
-    for (name, divider) in std::iter::zip(names.iter(), dividers.iter()) {
-        let alignment = Alignment::from_str(divider)?;
-        columns.push(Column {
-            name: parse_text(name),
-            alignment,
-        })
-    }
-    let mut rows = Vec::new();
-    for row in &lines[2..] {
-        let split_row: Vec<Line> = split_columns(row)?.iter().map(|s| parse_text(s)).collect();
-        if split_row.len() != columns.len() {
-            return Err(());
-        }
-        rows.push(split_row);
-    }
-    Ok(Block::Table(Table { columns, rows }))
-}
-
-fn split_columns(line: &str) -> Result<Vec<String>, ()> {
-    // Find the index after the first |, and before the last |
-    let mut start = None;
-    let mut end = None;
-    for (i, c) in line.chars().enumerate() {
-        if c == '|' {
-            if start.is_none() {
-                start = Some(i + 1);
-            } else {
-                end = Some(i);
-            }
-        }
-    }
-    match (start, end) {
-        (Some(s), Some(e)) => {
-            let chars: Vec<char> = line.chars().collect();
-            let string: String = chars[s..e].iter().collect();
-            let split = string.split('|');
-            Ok(split.map(|s| s.trim().to_string()).collect())
-        }
-        _ => Err(()),
-    }
-}
diff --git a/src/parse_heirarchical.rs b/src/parse_heirarchical.rs
deleted file mode 100644
index 75c2bec..0000000
--- a/src/parse_heirarchical.rs
+++ /dev/null
@@ -1,137 +0,0 @@
-use crate::*;
-
-macro_rules! get_subsection {
-    ($t:ident) => {
-        pub fn get_subsection(&self, name: &str) -> Option<&$t> {
-            for section in &self.sections {
-                if line_to_string(&section.title) == name {
-                    return Some(section);
-                }
-            }
-            return None;
-        }
-    };
-}
-
-#[derive(Default)]
-pub struct Document {
-    pub preamble: Vec<Block>,
-    pub sections: Vec<TopLevelSection>,
-}
-impl Document {
-    get_subsection! {TopLevelSection}
-}
-
-#[derive(Default)]
-pub struct TopLevelSection {
-    pub title: Line,
-    pub content: Vec<Block>,
-    pub sections: Vec<MidLevelSection>,
-}
-impl TopLevelSection {
-    get_subsection! {MidLevelSection}
-}
-
-#[derive(Default)]
-pub struct MidLevelSection {
-    pub title: Line,
-    pub content: Vec<Block>,
-    pub sections: Vec<LowLevelSection>,
-}
-impl MidLevelSection {
-    get_subsection! {LowLevelSection}
-}
-
-#[derive(Default)]
-pub struct LowLevelSection {
-    pub title: Line,
-    pub content: Vec<Block>,
-}
-
-pub fn parse_heirarchical(markdown: &str) -> Result<Document, ()> {
-    macro_rules! push_section {
-        ($from:ident => $to:ident) => {
-            $to.sections.push(std::mem::take(&mut $from))
-        };
-    }
-    let mut document = Document::default();
-    let mut h1_buffer = TopLevelSection::default();
-    let mut h2_buffer = MidLevelSection::default();
-    let mut h3_buffer = LowLevelSection::default();
-    let mut level = 0;
-
-    let blocks = parse(markdown);
-    for block in blocks {
-        match (level, block) {
-            (0, Block::Heading1(title)) => {
-                h1_buffer.title = title;
-                level = 1;
-            }
-            (0, Block::Heading2(_)) => return Err(()),
-            (0, Block::Heading3(_)) => return Err(()),
-            (0, block) => document.preamble.push(block),
-            (1, Block::Heading1(title)) => {
-                push_section!(h1_buffer => document);
-                h1_buffer.title = title;
-            }
-            (1, Block::Heading2(title)) => {
-                h2_buffer.title = title;
-                level = 2;
-            }
-            (1, Block::Heading3(_)) => return Err(()),
-            (1, block) => h1_buffer.content.push(block),
-            (2, Block::Heading1(title)) => {
-                push_section!(h2_buffer => h1_buffer);
-                push_section!(h1_buffer => document);
-                h1_buffer.title = title;
-                level = 1;
-            }
-            (2, Block::Heading2(title)) => {
-                push_section!(h2_buffer => h1_buffer);
-                h2_buffer.title = title;
-            }
-            (2, Block::Heading3(title)) => {
-                h3_buffer.title = title;
-                level = 3;
-            }
-            (2, block) => h2_buffer.content.push(block),
-            (3, Block::Heading1(title)) => {
-                push_section!(h3_buffer => h2_buffer);
-                push_section!(h2_buffer => h1_buffer);
-                push_section!(h1_buffer => document);
-                h1_buffer.title = title;
-                level = 1;
-            }
-            (3, Block::Heading2(title)) => {
-                push_section!(h3_buffer => h2_buffer);
-                push_section!(h2_buffer => h1_buffer);
-                h2_buffer.title = title;
-                level = 2;
-            }
-            (3, Block::Heading3(title)) => {
-                push_section!(h3_buffer => h2_buffer);
-                h3_buffer.title = title;
-            }
-            (3, block) => h3_buffer.content.push(block),
-            _ => unreachable!(),
-        }
-    }
-
-    // Push all in-progress sections
-    match level {
-        3 => {
-            push_section!(h3_buffer => h2_buffer);
-            push_section!(h2_buffer => h1_buffer);
-            push_section!(h1_buffer => document);
-        }
-        2 => {
-            push_section!(h2_buffer => h1_buffer);
-            push_section!(h1_buffer => document);
-        }
-        1 => {
-            push_section!(h1_buffer => document);
-        }
-        _ => (),
-    }
-    Ok(document)
-}
diff --git a/src/table.rs b/src/table.rs
deleted file mode 100644
index cc01ffc..0000000
--- a/src/table.rs
+++ /dev/null
@@ -1,60 +0,0 @@
-use crate::Line;
-
-pub struct Table {
-    pub columns: Vec<Column>,
-    pub rows: Vec<Vec<Line>>,
-}
-
-pub struct Column {
-    pub name: Line,
-    pub alignment: Alignment,
-}
-
-pub enum Alignment {
-    Left,
-    Center,
-    Right,
-}
-impl Alignment {
-    pub fn from_str(s: &str) -> Result<Self, ()> {
-        let mut start = false;
-        let mut end = false;
-        for (i, c) in s.chars().enumerate() {
-            if c == ':' {
-                if i == 0 {
-                    start = true;
-                } else if i == s.len() - 1 {
-                    end = true;
-                } else {
-                    return Err(());
-                }
-            } else if c != '-' {
-                return Err(());
-            }
-        }
-        Ok(match (start, end) {
-            (false, false) => Self::Left,
-            (true, false) => Self::Left,
-            (false, true) => Self::Right,
-            (true, true) => Self::Center,
-        })
-    }
-}
-impl std::fmt::Display for Alignment {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
-        f.write_str(match self {
-            Self::Left => "left",
-            Self::Center => "center",
-            Self::Right => "right",
-        })
-    }
-}
-impl std::fmt::Debug for Alignment {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
-        f.write_str(match self {
-            Self::Left => "Left",
-            Self::Center => "Center",
-            Self::Right => "Right",
-        })
-    }
-}
diff --git a/src/text.rs b/src/text.rs
deleted file mode 100644
index e9dbdeb..0000000
--- a/src/text.rs
+++ /dev/null
@@ -1,30 +0,0 @@
-pub enum Text {
-    Normal(String),
-    Bold(String),
-    Italic(String),
-    BoldItalic(String),
-    Code(String),
-    WikiLink(String),
-    Hyperlink(Hyperlink),
-}
-impl std::fmt::Debug for Text {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
-        let string = match self {
-            Text::Normal(text) => format!("Normal ('{}')", text),
-            Text::Bold(text) => format!("Bold ('{}')", text),
-            Text::Italic(text) => format!("Italic ('{}')", text),
-            Text::BoldItalic(text) => format!("BoldItalic ('{}')", text),
-            Text::Code(text) => format!("Code ('{}')", text),
-            Text::WikiLink(text) => format!("WikiLink ('{}')", text),
-            Text::Hyperlink(Hyperlink { label, target }) => {
-                format!("Hyperlink (label:'{}',  target:'{}')", label, target)
-            }
-        };
-        f.write_str(&string)
-    }
-}
-
-pub struct Hyperlink {
-    pub label: String,
-    pub target: String,
-}
author	Ben Bridle <bridle.benjamin@gmail.com>	2024-04-21 13:57:03 +1200
committer	Ben Bridle <bridle.benjamin@gmail.com>	2024-04-21 13:57:36 +1200
commit	13cb719b87bcef41c4dd398f5a651ddb2b561e0d (patch)
tree	e9e52ed33d5ed5a4d68a1161c3db5c2d8c38dd42
parent	54f5e9fd883e207931baa9c87b6181ca724d6bab (diff)
download	markdown-13cb719b87bcef41c4dd398f5a651ddb2b561e0d.zip