diff options
| author | Ben Bridle <ben@derelict.engineering> | 2025-01-06 12:21:06 +1300 | 
|---|---|---|
| committer | Ben Bridle <ben@derelict.engineering> | 2025-01-06 17:16:24 +1300 | 
| commit | a78feb46aefaf8e8950e9b029984e9ff98fe69b0 (patch) | |
| tree | d524c0656416e27484f8c0ae709f71558ea69bb6 /src | |
| parent | 13cb719b87bcef41c4dd398f5a651ddb2b561e0d (diff) | |
| download | markdown-2.0.0.zip | |
Rewrite the library a second timev2.0.0
Diffstat (limited to 'src')
| -rw-r--r-- | src/block.rs | 19 | ||||
| -rw-r--r-- | src/document.rs | 172 | ||||
| -rw-r--r-- | src/elements.rs | 9 | ||||
| -rw-r--r-- | src/elements/block_element.rs | 53 | ||||
| -rw-r--r-- | src/elements/line.rs | 117 | ||||
| -rw-r--r-- | src/elements/line_element.rs | 61 | ||||
| -rw-r--r-- | src/elements/table.rs | 96 | ||||
| -rw-r--r-- | src/lib.rs | 162 | ||||
| -rw-r--r-- | src/line.rs | 123 | ||||
| -rw-r--r-- | src/table.rs | 85 | ||||
| -rw-r--r-- | src/token.rs | 24 | 
11 files changed, 403 insertions, 518 deletions
| diff --git a/src/block.rs b/src/block.rs new file mode 100644 index 0000000..aa56135 --- /dev/null +++ b/src/block.rs @@ -0,0 +1,19 @@ +use crate::*; + +/// Heading level. +pub enum Level { +    Heading1, +    Heading2, +    Heading3, +} + +pub enum Block { +    Heading { level: Level, line: Line }, +    Paragraph(Line), +    List(Vec<Line>), +    Note(Vec<Line>), +    Table(Table), +    Break, +    Embedded { label: String, path: String }, +    Fragment { language: String, content: String }, +} diff --git a/src/document.rs b/src/document.rs deleted file mode 100644 index fbfea00..0000000 --- a/src/document.rs +++ /dev/null @@ -1,172 +0,0 @@ -use crate::*; - -pub struct MarkdownDocument { -    pub block_elements: Vec<BlockElement>, -} - -impl MarkdownDocument { -    pub fn from_str(raw_markdown: &str) -> Self { -        let mut block_elements = Vec::new(); -        let mut current_multiline_block = None; -        // Chain a blank line to the end to ensure that the final multi-line block is flushed. -        let lines = raw_markdown.lines().chain(std::iter::once("")); - -        for incoming_line in lines { -            let incoming_line_untrimmed = incoming_line; -            let incoming_line = incoming_line.trim(); -            // Handle an in-progress subdocument block. -            if let Some(MultiLineBlock::Subdocument(language, mut lines)) = current_multiline_block { -                if incoming_line == "```" { -                    let subdocument = Subdocument { language: language.to_string(), content: lines.join("\n") }; -                    block_elements.push(BlockElement::Subdocument(subdocument)); -                    current_multiline_block = None; -                } else { -                    lines.push(incoming_line_untrimmed); -                    current_multiline_block = Some(MultiLineBlock::Subdocument(language, lines)); -                } -                continue; -            } - -            // Parse the incoming line. -            let incoming_line_block = { -                if let Some(("", tail)) = incoming_line.split_once("# ") { -                    BlockLine::DocumentHeading(tail.trim()) -                } else if let Some(("", tail)) = incoming_line.split_once("## ") { -                    BlockLine::SectionHeading(tail.trim()) -                } else if let Some(("", tail)) = incoming_line.split_once("### ") { -                    BlockLine::ArticleHeading(tail.trim()) -                } else if let Some(("", tail)) = incoming_line.split_once("- ") { -                    BlockLine::List(tail.trim()) -                } else if let Some(("", tail)) = incoming_line.split_once("> ") { -                    BlockLine::Aside(tail.trim()) -                } else if incoming_line == ">" { -                    BlockLine::Aside("") -                } else if let Some(("", tail)) = incoming_line.split_once("```") { -                    BlockLine::SubdocumentHeader(tail.trim()) -                } else if incoming_line.starts_with("|") { -                    BlockLine::Table(incoming_line) -                } else if incoming_line.len() >= 3 && incoming_line.chars().all(|c| c=='-') { -                    BlockLine::Break -                } else if incoming_line.is_empty() { -                    BlockLine::BlankLine -                } else { -                    BlockLine::Paragraph(incoming_line) } -            }; - -            // If the incoming line is of the same type as the current multiline -            // block, append it to the end of that current block and continue. -            if let Some(ref mut current_block) = current_multiline_block { -                match (&incoming_line_block, current_block)  { -                    (BlockLine::List(line), MultiLineBlock::List(ref mut lines)) => { -                        lines.push(line); continue; } -                    (BlockLine::Aside(line), MultiLineBlock::Aside(ref mut lines)) => { -                        lines.push(line); continue; } -                    (BlockLine::Table(line), MultiLineBlock::Table(ref mut lines)) => { -                        lines.push(line); continue; } -                    _ => (), -                }; -            } - -            // Otherwise, commit the current block, then handle the incoming line. -            if let Some(current_block) = current_multiline_block { -                match current_block { -                    MultiLineBlock::List(raw_lines) => { -                        let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect(); -                        block_elements.push(BlockElement::List(lines)); } -                    MultiLineBlock::Aside(raw_lines) => { -                        let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect(); -                        block_elements.push(BlockElement::Aside(lines)); } -                    MultiLineBlock::Table(raw_lines) => { -                        if let Some(table) = Table::try_from_strs(&raw_lines) { -                            block_elements.push(BlockElement::Table(table)) } -                        else { for raw_line in raw_lines { -                            block_elements.push(BlockElement::Paragraph(Line::from_str(&raw_line))) }}} -                    MultiLineBlock::Subdocument(..) => unreachable!(), -                } -                current_multiline_block = None; -            } - -            // Handle the incoming line. -            match incoming_line_block { -                BlockLine::DocumentHeading(s) => block_elements.push(BlockElement::DocumentHeading(Line::from_str(&s))), -                BlockLine::SectionHeading(s) => block_elements.push(BlockElement::SectionHeading(Line::from_str(&s))), -                BlockLine::ArticleHeading(s) => block_elements.push(BlockElement::ArticleHeading(Line::from_str(&s))), -                BlockLine::List(s) => current_multiline_block = Some(MultiLineBlock::List(vec![s])), -                BlockLine::Aside(s) => current_multiline_block = Some(MultiLineBlock::Aside(vec![s])), -                BlockLine::Table(s) => current_multiline_block = Some(MultiLineBlock::Table(vec![s])), -                BlockLine::SubdocumentHeader(s) => current_multiline_block = Some(MultiLineBlock::Subdocument(s, Vec::new())), -                BlockLine::Paragraph(s) => { -                    if let Some(embedded_file) = parse_embedded_file(&s) { -                        block_elements.push(BlockElement::EmbeddedFile(embedded_file)) -                    } else if let Some(math) = parse_math_block(&s) { -                        block_elements.push(BlockElement::Math(math)) -                    } else { -                        block_elements.push(BlockElement::Paragraph(Line::from_str(&s))) } -                }, -                BlockLine::Break => block_elements.push(BlockElement::Break), -                BlockLine::BlankLine => (), -            } -        } - -        Self { block_elements } -    } -} - -fn parse_embedded_file(text: &str) -> Option<EmbeddedFile> { -    let chars: Vec<char> = text.trim().chars().collect(); -    let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b); - -    if starts_with(0, " { break }; -            label_end += 1; } -        let label: String = chars[label_start..label_end].iter().collect(); -        if label.is_empty() || !is_contentful(&label, &['[', ']']) { -            return None } -        // Try to parse the target. -        let target_start = label_end + 2; -        let target_end = chars.len() - 1; -        if let Some(')') = chars.get(target_end) { -            let target: String = chars[target_start..target_end].iter().collect(); -            if target.is_empty() || target.contains(")") || !is_contentful(&target, &['(',')']) { -                return None } -            return Some(EmbeddedFile { label, target }) -        } -    } -    return None; -} - -fn parse_math_block(text: &str) -> Option<String> { -    if let Some(("", trailing)) = text.split_once("$$") { -        if let Some((math, "")) = trailing.rsplit_once("$$") { -            return Some(math.trim().to_string()); -        } -    } -    return None; -} - -/// When parsing, is a single line for a one-line block element. -enum BlockLine<'a> { -    DocumentHeading(&'a str), -    SectionHeading(&'a str), -    ArticleHeading(&'a str), -    Paragraph(&'a str), -    List(&'a str), -    Aside(&'a str), -    Table(&'a str), -    SubdocumentHeader(&'a str), -    Break, -    BlankLine, -} - -/// When parsing, is the gathered string lines of a multiline block element. -enum MultiLineBlock<'a> { -    List(Vec<&'a str>), -    Aside(Vec<&'a str>), -    Table(Vec<&'a str>), -    Subdocument(&'a str, Vec<&'a str>), -} - diff --git a/src/elements.rs b/src/elements.rs deleted file mode 100644 index a4a9783..0000000 --- a/src/elements.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod block_element; -mod line; -mod line_element; -mod table; - -pub use block_element::*; -pub use line::*; -pub use line_element::*; -pub use table::*; diff --git a/src/elements/block_element.rs b/src/elements/block_element.rs deleted file mode 100644 index cdb7a71..0000000 --- a/src/elements/block_element.rs +++ /dev/null @@ -1,53 +0,0 @@ -use crate::*; - -pub enum BlockElement { -    /// A first-level heading. -    DocumentHeading(Line), -    /// A second-level heading. -    SectionHeading(Line), -    /// A third-level heading. -    ArticleHeading(Line), -    Paragraph(Line), -    /// A bullet-list. -    List(Vec<Line>), -    /// A paragraph separate from the main text. -    Aside(Vec<Line>), -    Table(Table), -    EmbeddedFile(EmbeddedFile), -    /// A non-markdown sub-document within this document. -    Subdocument(Subdocument), -    /// A KaTeX block -    Math(String), -    Break, -} - -pub struct EmbeddedFile { -    pub label: String, -    pub target: String, -} - -pub struct Subdocument { -    pub language: String, -    pub content: String, -} - -impl std::fmt::Debug for BlockElement { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        let string = match self { -            BlockElement::DocumentHeading(line) => format!("DocumentHeading ('{line}')"), -            BlockElement::SectionHeading(line) => format!("SectionHeading ('{line}')"), -            BlockElement::ArticleHeading(line) => format!("ArticleHeading ('{line}')"), -            BlockElement::Paragraph(line) => format!("Paragraph ('{line}')"), -            BlockElement::List(lines) => format!("List (len: {})", lines.len()), -            BlockElement::Aside(_) => format!("Aside"), -            BlockElement::Table(_) => format!("Table"), -            BlockElement::EmbeddedFile(EmbeddedFile {label, target}) => -                format!("EmbeddedFile (label:'{label}', target:'{target}')"), -            BlockElement::Subdocument(Subdocument {language, ..}) => -                format!("Subdocument ('{language}')"), -            BlockElement::Math(string) => format!("Math ('{string}')"), -            BlockElement::Break => format!("Break"), -        }; -        f.write_str(&string) -    } -} diff --git a/src/elements/line.rs b/src/elements/line.rs deleted file mode 100644 index d5c078e..0000000 --- a/src/elements/line.rs +++ /dev/null @@ -1,117 +0,0 @@ -use crate::*; - -macro_rules! opt { -    ($v:expr) => {|s| Some($v(s)) }; -} - -pub struct Line { -    pub elements: Vec<LineElement>, -} - -impl Line { -    pub fn from_str(raw_string: &str) -> Self { -        fn unlabeled_extern_link(target: String) -> Option<LineElement> { -            target.contains("/").then( || -                LineElement::ExternalLink(ExternalLink { target, label:String::new() }) -            ) -        } -        fn labelled_extern_link(s: String) -> Option<LineElement> { -            let (label, target) = match s.split_once("](") { -                Some((l, t)) => (l.to_string(), t.to_string()), -                None => return None }; -            if label.contains("]") || target.contains("]") { return None } -            Some(LineElement::ExternalLink(ExternalLink { label, target })) } -        const DELIMITERS: [(fn(String)->Option<LineElement>, &str, &str, &str); 7] = [ -            ( opt!(LineElement::Bold),          "**", "**", "*" ), -            ( opt!(LineElement::Italic),        "_",  "_",  "_" ), -            ( opt!(LineElement::Monospace),     "`",  "`",  "`" ), -            ( opt!(LineElement::Math),          "$",  "$",  "$" ), -            ( opt!(LineElement::InternalLink),  "[[", "]]", "[]" ), -            ( labelled_extern_link,             "[",  ")",  "[]()" ), -            ( unlabeled_extern_link,            "[",  "]",  "[]" ), -        ]; -        let chars: Vec<char> = raw_string.chars().collect(); -        let mut elements = Vec::new(); -        let mut cached_chars = String::new(); -        let mut i = 0; - -        let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b); - -        'outer: while let Some(c) = chars.get(i) { -            // Only check for opening delimiters that directly follow a whitespace character. -            let follows_whitespace = match chars.get(i.wrapping_sub(1)) { -                Some(w) => is_whitespace(w), -                None => true, -            }; -            if follows_whitespace { -                // Try to parse an opening delimiter. -                for (variant, start_delim, end_delim, delim_chars) in DELIMITERS { -                    // Try to match an opening delimiter with a terminating delimiter. -                    if starts_with(i, start_delim) { -                        let s_end = i + start_delim.chars().count(); -                        let mut e_start = s_end; -                        let mut e_end = e_start + end_delim.chars().count(); -                        while e_end <= chars.len() { -                            e_start += 1; e_end += 1; -                            let end_is_whitespace = -                                if let Some(end_char) = chars.get(e_end) { -                                    is_whitespace(end_char) -                                } else { -                                    e_end == chars.len() -                                }; -                            // If the terminating delimiter is found, store the normal -                            // text and the styled text, and continue to the next character. -                            if end_is_whitespace && starts_with(e_start, end_delim) { -                                // Check that there is content within the styled string. -                                let styled_string: String = chars[s_end..e_start].iter().collect(); -                                let non_content_chars: Vec<_> = delim_chars.chars().collect(); -                                if !is_contentful(&styled_string, &non_content_chars) { continue } -                                if styled_string.len() != styled_string.trim().len() { continue } -                                let line_element = match variant(styled_string) { -                                    Some(e) => e, -                                    None => continue, -                                }; -                                // Commit the normal and styled strings. -                                if !cached_chars.is_empty() { -                                    let normal_string = std::mem::take(&mut cached_chars); -                                    elements.push(LineElement::Normal(normal_string)); } -                                elements.push(line_element); -                                i = e_end; -                                continue 'outer; -                            } -                        } -                    } -                } -            } -            cached_chars.push(*c); i += 1; -        } -        if !cached_chars.is_empty() { -            let normal_string = std::mem::take(&mut cached_chars); -            elements.push(LineElement::Normal(normal_string)); } -        Self { elements } -    } - -    /// Return only the character content, with none of the styling information. -    pub fn as_plain_text(&self) -> String { -        let mut string = String::new(); -        for line_element in &self.elements { -            string.push_str(line_element.as_plain_text()) } -        return string; -    } -} - -impl std::fmt::Display for Line { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        for line_element in &self.elements { -            write!(f, "{line_element}")?; } -        Ok(()) -    } -} - -impl std::fmt::Debug for Line { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        for line_element in &self.elements { -            write!(f, "{line_element:?}\n")?; } -        Ok(()) -    } -} diff --git a/src/elements/line_element.rs b/src/elements/line_element.rs deleted file mode 100644 index cc47b4b..0000000 --- a/src/elements/line_element.rs +++ /dev/null @@ -1,61 +0,0 @@ -pub enum LineElement { -    Normal(String), -    Bold(String), -    Italic(String), -    Monospace(String), -    Math(String), -    InternalLink(String), -    ExternalLink(ExternalLink), -} - -impl LineElement { -    /// Return only the character content, with none of the styling information. -    pub fn as_plain_text(&self) -> &str { -        match self { -            LineElement::Normal(text) => text, -            LineElement::Bold(text) => text, -            LineElement::Italic(text) => text, -            LineElement::Monospace(text) => text, -            LineElement::Math(text) => text, -            LineElement::InternalLink(label) => label, -            LineElement::ExternalLink(ExternalLink { label, ..}) => label, -        } -    } -} - -pub struct ExternalLink { -    pub label: String, -    pub target: String, -} - -impl std::fmt::Display for LineElement { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        let string = match self { -            LineElement::Normal(text) => format!("{text}"), -            LineElement::Bold(text) => format!("**{text}**"), -            LineElement::Italic(text) => format!("_{text}_"), -            LineElement::Monospace(text) => format!("`{text}`"), -            LineElement::Math(text) => format!("${text}$"), -            LineElement::InternalLink(text) => format!("[[{text}]]"), -            LineElement::ExternalLink(ExternalLink { label, target }) => { -                format!("[{label}]({target})") } -        }; -        f.write_str(&string) -    } -} - -impl std::fmt::Debug for LineElement { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        let string = match self { -            LineElement::Normal(text) => format!("Normal ('{text}')"), -            LineElement::Bold(text) => format!("Bold ('{text}')"), -            LineElement::Italic(text) => format!("Italic ('{text}')"), -            LineElement::Monospace(text) => format!("Monospace ('{text}')"), -            LineElement::Math(text) => format!("Math ('{text}')"), -            LineElement::InternalLink(text) => format!("InternalLink ('{text}')"), -            LineElement::ExternalLink(ExternalLink { label, target }) => { -                format!("ExternalLink (label:'{label}',  target:'{target}')") } -        }; -        f.write_str(&string) -    } -} diff --git a/src/elements/table.rs b/src/elements/table.rs deleted file mode 100644 index 5b354c1..0000000 --- a/src/elements/table.rs +++ /dev/null @@ -1,96 +0,0 @@ -use crate::*; - -pub struct Table { -    /// The column definitions for this table. -    pub column_definitions: Vec<ColumnDefinition>, -    /// The content contained in the rows of the table. An individual [Line] is -    /// the contents of a single table cell, a group of cells forms a table row, -    /// a group of rows forms a vertical section of the table, with a separator -    /// intending to be drawn between each section, and a group of sections forms -    /// the table itself. -    /// Each row in the table is guaranteed to have the same number of columns -    /// as the table header. -    pub sections: Vec<Vec<Vec<Line>>>, -} - -impl Table { -    pub fn try_from_strs(lines: &[&str]) -> Option<Self> { -        let mut lines = lines.into_iter(); -        let column_definitions: Vec<ColumnDefinition> = { -            let names = split_trimmed_columns(lines.next()?)? -                .into_iter().map(|l| Line::from_str(l)); -            let alignments = parse_alignments(lines.next()?)?; -            if names.len() != alignments.len() { return None } -            std::iter::zip(names, alignments).map( -                |(name, alignment)| ColumnDefinition { name, alignment } ).collect() -        }; - -        let mut sections = Vec::new(); -        let mut current_section = Vec::new(); - -        for line in lines { -            if let Some(alignments) = parse_alignments(line) { -                if alignments.len() != column_definitions.len() { return None } -                sections.push(std::mem::take(&mut current_section)) -            } else { -                let row: Vec<Line> = split_trimmed_columns(line)? -                    .into_iter().map(|c| Line::from_str(c)).collect(); -                if row.len() != column_definitions.len() { return None } -                current_section.push(row); -            } -        } - -        if !current_section.is_empty() { -            sections.push(std::mem::take(&mut current_section)); } -        Some( Self { column_definitions, sections }) -    } -} - -pub struct ColumnDefinition { -    /// The name of this column, shown in the header row of the table. -    pub name: Line, -    /// The alignment of the content in this column. -    pub alignment: ColumnAlignment, -} - -pub enum ColumnAlignment { -    Left, -    Center, -    Right, -} - -impl ColumnAlignment { -    pub fn from_str(cell: &str) -> Option<Self> { -        if !cell.chars().all(|c| c == ':' || c == '-') { -            return None } -        match (cell.starts_with(':'), cell.ends_with(':')) { -            (false, false) => Some(ColumnAlignment::Left), -            (false, true) => Some(ColumnAlignment::Right), -            (true, false) => Some(ColumnAlignment::Left), -            (true, true) => Some(ColumnAlignment::Center), -        } -    } -} - - -fn split_trimmed_columns(line: &str) -> Option<Vec<&str>> { -    Some(split_columns(line)?.into_iter().map(|s| s.trim()).collect()) -} - -fn split_columns(line: &str) -> Option<Vec<&str>> { -    if let Some(("", tail)) = line.split_once('|') { -        if let Some((head, "")) = tail.rsplit_once('|') { -            return Some(head.split('|').collect()); -        } -    } -    return None; -} - -fn parse_alignments(line: &str) -> Option<Vec<ColumnAlignment>> { -    let mut alignments = Vec::new(); -    for cell in split_columns(line)? { -        alignments.push(ColumnAlignment::from_str(cell)?); -    } -    Some(alignments) -} - @@ -1,13 +1,155 @@ -mod document; -mod elements; +#![feature(never_type)] -pub use document::*; -pub use elements::*; +mod block; pub use block::{Block, Level}; +mod line;  pub use line::Line; +mod token; pub use token::Token; +mod table; pub use table::{Table, Column, Alignment}; -pub(crate) fn is_whitespace(c: &char) -> bool { -    c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c) } -pub(crate) fn is_contentful(s:&str, non_content_chars: &[char]) -> bool { -    s.chars().any(|c| !non_content_chars.contains(&c)) -    && s.chars().nth(0).map(|c| !non_content_chars.contains(&c)).unwrap_or(false) -    && s.chars().last().map(|c| !non_content_chars.contains(&c)).unwrap_or(false) +pub struct MarkdownDocument { +    pub blocks: Vec<Block>, +} + +impl MarkdownDocument { +    pub fn from_str(raw_markdown: &str) -> Self { +        let mut blocks = Vec::new(); +        let mut current_block = None; + +        // Chain a blank line to the end to ensure the final block is flushed. +        for line in raw_markdown.lines().chain(std::iter::once("")) { +            let line_raw = line; +            let line = line.trim(); + +            // Handle a fragment block separately, because fragment lines are not prefixed. +            if let Some(BlockMultiline::Fragment { language, mut content }) = current_block { +                if line == "```" { +                    let language = language.to_string(); +                    let content = content.join("\n"); +                    blocks.push(Block::Fragment { language, content }); +                    current_block = None; +                } else { +                    content.push(line_raw); +                    current_block = Some(BlockMultiline::Fragment { language, content }); +                } +                continue; +            } + +            // Determine line type from prefix. +            let line = { +                if let Some(("", tail)) = line.split_once("# ") { +                    BlockLine::Heading { level: Level::Heading1, line: tail.trim() } +                } else if let Some(("", tail)) = line.split_once("## ") { +                    BlockLine::Heading { level: Level::Heading2, line: tail.trim() } +                } else if let Some(("", tail)) = line.split_once("### ") { +                    BlockLine::Heading { level: Level::Heading3, line: tail.trim() } +                } else if let Some(("", tail)) = line.split_once("- ") { +                    BlockLine::List(tail.trim()) +                } else if let Some(("", tail)) = line.split_once("> ") { +                    BlockLine::Note(tail.trim()) +                } else if line == ">" { +                    BlockLine::Note("") +                } else if let Some(("", tail)) = line.split_once("```") { +                    BlockLine::FragmentHeader(tail.trim()) +                } else if line.starts_with("|") { +                    BlockLine::Table(line) +                } else if line.len() >= 3 && line.chars().all(|c| c=='-') { +                    BlockLine::Break +                } else if line.is_empty() { +                    BlockLine::BlankLine +                } else { +                    BlockLine::Paragraph(line) +                } +            }; + +            // If line has the same type as the current block, append and continue. +            if let Some(ref mut block) = current_block { +                match (&line, block)  { +                    (BlockLine::List(line), BlockMultiline::List(ref mut lines)) => { +                        lines.push(line); continue; } +                    (BlockLine::Note(line), BlockMultiline::Note(ref mut lines)) => { +                        lines.push(line); continue; } +                    (BlockLine::Table(line), BlockMultiline::Table(ref mut lines)) => { +                        lines.push(line); continue; } +                    _ => (), +                }; +            } + +            // Otherwise commit the current block before handling the new line. +            if let Some(current_block) = std::mem::take(&mut current_block) { +                match current_block { +                    BlockMultiline::List(raw_lines) => { +                        let lines = raw_lines.into_iter().map(Line::from_str).collect(); +                        blocks.push(Block::List(lines)); } +                    BlockMultiline::Note(raw_lines) => { +                        let lines = raw_lines.into_iter().map(Line::from_str).collect(); +                        blocks.push(Block::Note(lines)); } +                    BlockMultiline::Table(raw_lines) => { +                        if let Some(table) = Table::from_strs(&raw_lines) { +                            blocks.push(Block::Table(table)) } +                        else { +                            for raw_line in raw_lines { +                                blocks.push(Block::Paragraph(Line::from_str(&raw_line))) +                            } +                        }} +                    BlockMultiline::Fragment {..} => unreachable!(), +                } +            } + +            // Handle the new line. +            match line { +                BlockLine::List(line)  => current_block = Some( +                    BlockMultiline::List(vec![line])), +                BlockLine::Note(line)  => current_block = Some( +                    BlockMultiline::Note(vec![line])), +                BlockLine::Table(line) => current_block = Some( +                    BlockMultiline::Table(vec![line])), +                BlockLine::FragmentHeader(language) => current_block = Some( +                    BlockMultiline::Fragment { language, content: Vec::new() }), +                BlockLine::Heading {level, line} => blocks.push( +                    Block::Heading { level, line: Line::from_str(&line) }), +                BlockLine::Break => blocks.push(Block::Break), +                BlockLine::BlankLine => (), +                BlockLine::Paragraph(line) => match parse_embedded(&line) { +                    Some(embedded) => blocks.push(embedded), +                    None => blocks.push(Block::Paragraph(Line::from_str(&line))), +                } +            } +        } + +        Self { blocks } +    } +} + + + +enum BlockLine<'a> { +    Heading { level: Level, line: &'a str }, +    Paragraph(&'a str), +    List(&'a str), +    Note(&'a str), +    Table(&'a str), +    FragmentHeader(&'a str), +    Break, +    BlankLine, +} + +enum BlockMultiline<'a> { +    List(Vec<&'a str>), +    Note(Vec<&'a str>), +    Table(Vec<&'a str>), +    Fragment { language: &'a str, content: Vec<&'a str> }, +} + +fn parse_embedded(line: &str) -> Option<Block> { +    let line = line.trim(); +    if let Some(("", line)) = line.split_once(".collect(); +            if parts.len() == 2 { +                let label = parts[0].to_string(); +                let path = parts[1].to_string(); +                return Some(Block::Embedded { label, path }) +            } +        } +    } +    return None;  } diff --git a/src/line.rs b/src/line.rs new file mode 100644 index 0000000..fce628c --- /dev/null +++ b/src/line.rs @@ -0,0 +1,123 @@ +use crate::*; + +#[derive(Clone)] +pub struct Line { +    pub tokens: Vec<Token>, +} + +impl Line { +    pub fn from_str(raw_line: &str) -> Self { +        let chars: Vec<char> = raw_line.chars().collect(); +        let mut tokens = Vec::new(); +        let mut normal_chars = String::new(); +        let mut i = 0; + +        // Compare chars from i to a delimiter string. +        let compare = |i, p:&str| std::iter::zip(&chars[i..], p.chars()) +            .all(|(a, b)| *a == b); + +        'find_token: while let Some(c) = chars.get(i) { +            let char_follows_whitespace = match chars.get(i.wrapping_sub(1)) { +                Some(w) => is_whitespace(w), +                None => true, +            }; +            if char_follows_whitespace { +                // Try to parse an opening delimiter. +                for (variant, start_delim, end_delim, delim_chars) in DELIMITERS { +                    let delim_chars: Vec<char> = delim_chars.chars().collect(); +                    // Try to match an opening delimiter with a terminating delimiter. +                    if compare(i, start_delim) { +                        let s_end = i + start_delim.chars().count(); +                        let mut e_start = s_end; +                        let mut e_end = e_start + end_delim.chars().count(); +                        // Scan along chars to find matching end delimiter. +                        while e_end <= chars.len() { +                            e_start += 1; e_end += 1; +                            let followed_by_whitespace = match chars.get(e_end) { +                                Some(end_char) => is_whitespace(end_char), +                                None => e_end == chars.len(), +                            }; +                            // If end delimiter is found, store the token and continue. +                            if followed_by_whitespace && compare(e_start, end_delim) { +                                // Check if captured string contains non-delimiter characters. +                                let captured: String = chars[s_end..e_start].iter().collect(); +                                let no_content = !has_content(&captured, &delim_chars); +                                let air_bubbles = captured.len() != captured.trim().len(); +                                let token = variant(captured); +                                if no_content || air_bubbles || token.is_none() { continue } +                                // Commit the preceding normal token, if any. +                                if !normal_chars.is_empty() { +                                    let normal = std::mem::take(&mut normal_chars); +                                    tokens.push(Token::Normal(normal)); +                                } +                                tokens.push(token.unwrap()); +                                i = e_end; +                                continue 'find_token; +                            } +                        } +                    } +                } +            } +            normal_chars.push(*c); +            i += 1; +        } + +        if !normal_chars.is_empty() { +            let normal = std::mem::take(&mut normal_chars); +            tokens.push(Token::Normal(normal)); +        } +        Self { tokens } +    } +} + + +impl ToString for Line { +    fn to_string(&self) -> String { +        let mut string = String::new(); +        for token in &self.tokens { +            string.push_str(token.as_ref()) +        } +        return string; +    } +} + + +fn unlabeled_extern_link(path: String) -> Option<Token> { +    Some( Token::ExternalLink { path, label:String::new() } ) +} + +fn labelled_extern_link(s: String) -> Option<Token> { +    let (label, path) = match s.split_once("](") { +        Some((l, t)) => (l.to_string(), t.to_string()), +        None => return None, +    }; +    if label.contains("]") || path.contains("]") { return None } +    Some( Token::ExternalLink { label, path } ) +} + +macro_rules! con { +    ($v:expr) => {|s| Some($v(s)) }; +} + +const DELIMITERS: [(fn(String)->Option<Token>, &str, &str, &str); 7] = [ +    ( con!(Token::Bold),          "**", "**", "*" ), +    ( con!(Token::Italic),        "_",  "_",  "_" ), +    ( con!(Token::Monospace),     "`",  "`",  "`" ), +    ( con!(Token::Math),          "$",  "$",  "$" ), +    ( con!(Token::InternalLink),  "[[", "]]", "[]" ), +    ( labelled_extern_link,       "[",  ")",  "[]()" ), +    ( unlabeled_extern_link,      "<",  ">",  "<>" ), +]; + +fn is_whitespace(c: &char) -> bool { +    c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c) +} + +/// Check that first and last characters of a string are not delimiters. +fn has_content(s: &str, delimiter_chars: &[char]) -> bool { +    let not_delim = |c| match c { +        Some(c) => !delimiter_chars.contains(&c), +        None => false, +    }; +    not_delim(s.chars().nth(0)) && not_delim(s.chars().last()) +} diff --git a/src/table.rs b/src/table.rs new file mode 100644 index 0000000..071bd1a --- /dev/null +++ b/src/table.rs @@ -0,0 +1,85 @@ +use crate::*; + +pub struct Table { +    /// A [Line] is the content of a cell, a group of cells forms a table row, +    /// a group of rows forms a separated section of the table, and a group of +    /// sections forms the table itself. +    /// Each row in the table has the same number of columns as the table header. +    pub sections: Vec<Vec<Vec<Line>>>, +    pub columns: Vec<Column>, +} + +impl Table { +    pub fn from_strs(lines: &[&str]) -> Option<Self> { +        let mut lines = lines.into_iter(); +        let columns: Vec<Column> = { +            let names = split_cells(lines.next()?)?; +            let alignments = parse_alignments(lines.next()?)?; +            if names.len() != alignments.len() { return None } +            let make_column = |(n, a)| Column { name: n, alignment: a }; +            std::iter::zip(names, alignments).map(make_column).collect() +        }; +        let mut sections = Vec::new(); +        let mut rows = Vec::new(); + +        for line in lines { +            if let Some(alignments) = parse_alignments(line) { +                if alignments.len() != columns.len() { return None } +                sections.push(std::mem::take(&mut rows)) +            } else { +                let row: Vec<Line> = split_cells(line)?; +                if row.len() != columns.len() { return None } +                rows.push(row); +            } +        } +        if !rows.is_empty() { +            sections.push(std::mem::take(&mut rows)); +        } +        return Some( Self { columns, sections } ); +    } +} + +pub struct Column { +    pub name: Line, +    pub alignment: Alignment, +} + +pub enum Alignment { +    Left, +    Center, +    Right, +} + +impl Alignment { +    pub fn from_str(cell: &str) -> Option<Self> { +        if !cell.chars().all(|c| c == ':' || c == '-') { +            return None } +        match (cell.starts_with(':'), cell.ends_with(':')) { +            (false, false) => Some(Alignment::Left), +            (false, true ) => Some(Alignment::Right), +            (true,  false) => Some(Alignment::Left), +            (true,  true ) => Some(Alignment::Center), +        } +    } +} + +fn split_columns(line: &str) -> Option<Vec<&str>> { +    if let Some(("", tail)) = line.split_once('|') { +        if let Some((head, "")) = tail.rsplit_once('|') { +            return Some(head.split('|').map(str::trim).collect()); +        } +    } +    return None; +} + +fn split_cells(line: &str) -> Option<Vec<Line>> { +    Some(split_columns(line)?.into_iter().map(Line::from_str).collect()) +} + +fn parse_alignments(line: &str) -> Option<Vec<Alignment>> { +    let mut alignments = Vec::new(); +    for cell in split_columns(line)? { +        alignments.push(Alignment::from_str(cell)?); +    } +    Some(alignments) +} diff --git a/src/token.rs b/src/token.rs new file mode 100644 index 0000000..c2b1179 --- /dev/null +++ b/src/token.rs @@ -0,0 +1,24 @@ +#[derive(Clone)] +pub enum Token { +    Normal(String), +    Bold(String), +    Italic(String), +    Monospace(String), +    Math(String), +    InternalLink(String), +    ExternalLink { label: String, path: String }, +} + +impl AsRef<str> for Token { +    fn as_ref(&self) -> &str { +        match self { +            Token::Normal(text) => text, +            Token::Bold(text) => text, +            Token::Italic(text) => text, +            Token::Monospace(text) => text, +            Token::Math(text) => text, +            Token::InternalLink(label) => label, +            Token::ExternalLink { label, ..} => label, +        } +    } +} | 
