diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/block.rs | 26 | ||||
| -rw-r--r-- | src/document.rs | 172 | ||||
| -rw-r--r-- | src/elements.rs | 9 | ||||
| -rw-r--r-- | src/elements/block_element.rs | 53 | ||||
| -rw-r--r-- | src/elements/line.rs | 117 | ||||
| -rw-r--r-- | src/elements/line_element.rs | 61 | ||||
| -rw-r--r-- | src/elements/table.rs | 96 | ||||
| -rw-r--r-- | src/lib.rs | 40 | ||||
| -rw-r--r-- | src/main.rs | 37 | ||||
| -rw-r--r-- | src/parse.rs | 283 | ||||
| -rw-r--r-- | src/parse_heirarchical.rs | 137 | ||||
| -rw-r--r-- | src/table.rs | 60 | ||||
| -rw-r--r-- | src/text.rs | 30 | 
13 files changed, 518 insertions, 603 deletions
| diff --git a/src/block.rs b/src/block.rs deleted file mode 100644 index 2a34fcf..0000000 --- a/src/block.rs +++ /dev/null @@ -1,26 +0,0 @@ -use crate::{Line, Table}; - -pub enum Block { -    Heading1(Line), -    Heading2(Line), -    Heading3(Line), -    Paragraph(Line), -    List(Vec<Line>), -    Quote(Vec<Line>), -    Code(String, Vec<String>), -    Table(Table), -} -impl std::fmt::Debug for Block { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        f.write_str(match self { -            Self::Heading1(_) => "Heading1", -            Self::Heading2(_) => "Heading2", -            Self::Heading3(_) => "Heading3", -            Self::Paragraph(_) => "Paragraph", -            Self::List(_) => "List", -            Self::Quote(_) => "Quote", -            Self::Code(_, _) => "Code", -            Self::Table(_) => "Table", -        }) -    } -} diff --git a/src/document.rs b/src/document.rs new file mode 100644 index 0000000..fbfea00 --- /dev/null +++ b/src/document.rs @@ -0,0 +1,172 @@ +use crate::*; + +pub struct MarkdownDocument { +    pub block_elements: Vec<BlockElement>, +} + +impl MarkdownDocument { +    pub fn from_str(raw_markdown: &str) -> Self { +        let mut block_elements = Vec::new(); +        let mut current_multiline_block = None; +        // Chain a blank line to the end to ensure that the final multi-line block is flushed. +        let lines = raw_markdown.lines().chain(std::iter::once("")); + +        for incoming_line in lines { +            let incoming_line_untrimmed = incoming_line; +            let incoming_line = incoming_line.trim(); +            // Handle an in-progress subdocument block. +            if let Some(MultiLineBlock::Subdocument(language, mut lines)) = current_multiline_block { +                if incoming_line == "```" { +                    let subdocument = Subdocument { language: language.to_string(), content: lines.join("\n") }; +                    block_elements.push(BlockElement::Subdocument(subdocument)); +                    current_multiline_block = None; +                } else { +                    lines.push(incoming_line_untrimmed); +                    current_multiline_block = Some(MultiLineBlock::Subdocument(language, lines)); +                } +                continue; +            } + +            // Parse the incoming line. +            let incoming_line_block = { +                if let Some(("", tail)) = incoming_line.split_once("# ") { +                    BlockLine::DocumentHeading(tail.trim()) +                } else if let Some(("", tail)) = incoming_line.split_once("## ") { +                    BlockLine::SectionHeading(tail.trim()) +                } else if let Some(("", tail)) = incoming_line.split_once("### ") { +                    BlockLine::ArticleHeading(tail.trim()) +                } else if let Some(("", tail)) = incoming_line.split_once("- ") { +                    BlockLine::List(tail.trim()) +                } else if let Some(("", tail)) = incoming_line.split_once("> ") { +                    BlockLine::Aside(tail.trim()) +                } else if incoming_line == ">" { +                    BlockLine::Aside("") +                } else if let Some(("", tail)) = incoming_line.split_once("```") { +                    BlockLine::SubdocumentHeader(tail.trim()) +                } else if incoming_line.starts_with("|") { +                    BlockLine::Table(incoming_line) +                } else if incoming_line.len() >= 3 && incoming_line.chars().all(|c| c=='-') { +                    BlockLine::Break +                } else if incoming_line.is_empty() { +                    BlockLine::BlankLine +                } else { +                    BlockLine::Paragraph(incoming_line) } +            }; + +            // If the incoming line is of the same type as the current multiline +            // block, append it to the end of that current block and continue. +            if let Some(ref mut current_block) = current_multiline_block { +                match (&incoming_line_block, current_block)  { +                    (BlockLine::List(line), MultiLineBlock::List(ref mut lines)) => { +                        lines.push(line); continue; } +                    (BlockLine::Aside(line), MultiLineBlock::Aside(ref mut lines)) => { +                        lines.push(line); continue; } +                    (BlockLine::Table(line), MultiLineBlock::Table(ref mut lines)) => { +                        lines.push(line); continue; } +                    _ => (), +                }; +            } + +            // Otherwise, commit the current block, then handle the incoming line. +            if let Some(current_block) = current_multiline_block { +                match current_block { +                    MultiLineBlock::List(raw_lines) => { +                        let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect(); +                        block_elements.push(BlockElement::List(lines)); } +                    MultiLineBlock::Aside(raw_lines) => { +                        let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect(); +                        block_elements.push(BlockElement::Aside(lines)); } +                    MultiLineBlock::Table(raw_lines) => { +                        if let Some(table) = Table::try_from_strs(&raw_lines) { +                            block_elements.push(BlockElement::Table(table)) } +                        else { for raw_line in raw_lines { +                            block_elements.push(BlockElement::Paragraph(Line::from_str(&raw_line))) }}} +                    MultiLineBlock::Subdocument(..) => unreachable!(), +                } +                current_multiline_block = None; +            } + +            // Handle the incoming line. +            match incoming_line_block { +                BlockLine::DocumentHeading(s) => block_elements.push(BlockElement::DocumentHeading(Line::from_str(&s))), +                BlockLine::SectionHeading(s) => block_elements.push(BlockElement::SectionHeading(Line::from_str(&s))), +                BlockLine::ArticleHeading(s) => block_elements.push(BlockElement::ArticleHeading(Line::from_str(&s))), +                BlockLine::List(s) => current_multiline_block = Some(MultiLineBlock::List(vec![s])), +                BlockLine::Aside(s) => current_multiline_block = Some(MultiLineBlock::Aside(vec![s])), +                BlockLine::Table(s) => current_multiline_block = Some(MultiLineBlock::Table(vec![s])), +                BlockLine::SubdocumentHeader(s) => current_multiline_block = Some(MultiLineBlock::Subdocument(s, Vec::new())), +                BlockLine::Paragraph(s) => { +                    if let Some(embedded_file) = parse_embedded_file(&s) { +                        block_elements.push(BlockElement::EmbeddedFile(embedded_file)) +                    } else if let Some(math) = parse_math_block(&s) { +                        block_elements.push(BlockElement::Math(math)) +                    } else { +                        block_elements.push(BlockElement::Paragraph(Line::from_str(&s))) } +                }, +                BlockLine::Break => block_elements.push(BlockElement::Break), +                BlockLine::BlankLine => (), +            } +        } + +        Self { block_elements } +    } +} + +fn parse_embedded_file(text: &str) -> Option<EmbeddedFile> { +    let chars: Vec<char> = text.trim().chars().collect(); +    let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b); + +    if starts_with(0, " { break }; +            label_end += 1; } +        let label: String = chars[label_start..label_end].iter().collect(); +        if label.is_empty() || !is_contentful(&label, &['[', ']']) { +            return None } +        // Try to parse the target. +        let target_start = label_end + 2; +        let target_end = chars.len() - 1; +        if let Some(')') = chars.get(target_end) { +            let target: String = chars[target_start..target_end].iter().collect(); +            if target.is_empty() || target.contains(")") || !is_contentful(&target, &['(',')']) { +                return None } +            return Some(EmbeddedFile { label, target }) +        } +    } +    return None; +} + +fn parse_math_block(text: &str) -> Option<String> { +    if let Some(("", trailing)) = text.split_once("$$") { +        if let Some((math, "")) = trailing.rsplit_once("$$") { +            return Some(math.trim().to_string()); +        } +    } +    return None; +} + +/// When parsing, is a single line for a one-line block element. +enum BlockLine<'a> { +    DocumentHeading(&'a str), +    SectionHeading(&'a str), +    ArticleHeading(&'a str), +    Paragraph(&'a str), +    List(&'a str), +    Aside(&'a str), +    Table(&'a str), +    SubdocumentHeader(&'a str), +    Break, +    BlankLine, +} + +/// When parsing, is the gathered string lines of a multiline block element. +enum MultiLineBlock<'a> { +    List(Vec<&'a str>), +    Aside(Vec<&'a str>), +    Table(Vec<&'a str>), +    Subdocument(&'a str, Vec<&'a str>), +} + diff --git a/src/elements.rs b/src/elements.rs new file mode 100644 index 0000000..a4a9783 --- /dev/null +++ b/src/elements.rs @@ -0,0 +1,9 @@ +mod block_element; +mod line; +mod line_element; +mod table; + +pub use block_element::*; +pub use line::*; +pub use line_element::*; +pub use table::*; diff --git a/src/elements/block_element.rs b/src/elements/block_element.rs new file mode 100644 index 0000000..cdb7a71 --- /dev/null +++ b/src/elements/block_element.rs @@ -0,0 +1,53 @@ +use crate::*; + +pub enum BlockElement { +    /// A first-level heading. +    DocumentHeading(Line), +    /// A second-level heading. +    SectionHeading(Line), +    /// A third-level heading. +    ArticleHeading(Line), +    Paragraph(Line), +    /// A bullet-list. +    List(Vec<Line>), +    /// A paragraph separate from the main text. +    Aside(Vec<Line>), +    Table(Table), +    EmbeddedFile(EmbeddedFile), +    /// A non-markdown sub-document within this document. +    Subdocument(Subdocument), +    /// A KaTeX block +    Math(String), +    Break, +} + +pub struct EmbeddedFile { +    pub label: String, +    pub target: String, +} + +pub struct Subdocument { +    pub language: String, +    pub content: String, +} + +impl std::fmt::Debug for BlockElement { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        let string = match self { +            BlockElement::DocumentHeading(line) => format!("DocumentHeading ('{line}')"), +            BlockElement::SectionHeading(line) => format!("SectionHeading ('{line}')"), +            BlockElement::ArticleHeading(line) => format!("ArticleHeading ('{line}')"), +            BlockElement::Paragraph(line) => format!("Paragraph ('{line}')"), +            BlockElement::List(lines) => format!("List (len: {})", lines.len()), +            BlockElement::Aside(_) => format!("Aside"), +            BlockElement::Table(_) => format!("Table"), +            BlockElement::EmbeddedFile(EmbeddedFile {label, target}) => +                format!("EmbeddedFile (label:'{label}', target:'{target}')"), +            BlockElement::Subdocument(Subdocument {language, ..}) => +                format!("Subdocument ('{language}')"), +            BlockElement::Math(string) => format!("Math ('{string}')"), +            BlockElement::Break => format!("Break"), +        }; +        f.write_str(&string) +    } +} diff --git a/src/elements/line.rs b/src/elements/line.rs new file mode 100644 index 0000000..d5c078e --- /dev/null +++ b/src/elements/line.rs @@ -0,0 +1,117 @@ +use crate::*; + +macro_rules! opt { +    ($v:expr) => {|s| Some($v(s)) }; +} + +pub struct Line { +    pub elements: Vec<LineElement>, +} + +impl Line { +    pub fn from_str(raw_string: &str) -> Self { +        fn unlabeled_extern_link(target: String) -> Option<LineElement> { +            target.contains("/").then( || +                LineElement::ExternalLink(ExternalLink { target, label:String::new() }) +            ) +        } +        fn labelled_extern_link(s: String) -> Option<LineElement> { +            let (label, target) = match s.split_once("](") { +                Some((l, t)) => (l.to_string(), t.to_string()), +                None => return None }; +            if label.contains("]") || target.contains("]") { return None } +            Some(LineElement::ExternalLink(ExternalLink { label, target })) } +        const DELIMITERS: [(fn(String)->Option<LineElement>, &str, &str, &str); 7] = [ +            ( opt!(LineElement::Bold),          "**", "**", "*" ), +            ( opt!(LineElement::Italic),        "_",  "_",  "_" ), +            ( opt!(LineElement::Monospace),     "`",  "`",  "`" ), +            ( opt!(LineElement::Math),          "$",  "$",  "$" ), +            ( opt!(LineElement::InternalLink),  "[[", "]]", "[]" ), +            ( labelled_extern_link,             "[",  ")",  "[]()" ), +            ( unlabeled_extern_link,            "[",  "]",  "[]" ), +        ]; +        let chars: Vec<char> = raw_string.chars().collect(); +        let mut elements = Vec::new(); +        let mut cached_chars = String::new(); +        let mut i = 0; + +        let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b); + +        'outer: while let Some(c) = chars.get(i) { +            // Only check for opening delimiters that directly follow a whitespace character. +            let follows_whitespace = match chars.get(i.wrapping_sub(1)) { +                Some(w) => is_whitespace(w), +                None => true, +            }; +            if follows_whitespace { +                // Try to parse an opening delimiter. +                for (variant, start_delim, end_delim, delim_chars) in DELIMITERS { +                    // Try to match an opening delimiter with a terminating delimiter. +                    if starts_with(i, start_delim) { +                        let s_end = i + start_delim.chars().count(); +                        let mut e_start = s_end; +                        let mut e_end = e_start + end_delim.chars().count(); +                        while e_end <= chars.len() { +                            e_start += 1; e_end += 1; +                            let end_is_whitespace = +                                if let Some(end_char) = chars.get(e_end) { +                                    is_whitespace(end_char) +                                } else { +                                    e_end == chars.len() +                                }; +                            // If the terminating delimiter is found, store the normal +                            // text and the styled text, and continue to the next character. +                            if end_is_whitespace && starts_with(e_start, end_delim) { +                                // Check that there is content within the styled string. +                                let styled_string: String = chars[s_end..e_start].iter().collect(); +                                let non_content_chars: Vec<_> = delim_chars.chars().collect(); +                                if !is_contentful(&styled_string, &non_content_chars) { continue } +                                if styled_string.len() != styled_string.trim().len() { continue } +                                let line_element = match variant(styled_string) { +                                    Some(e) => e, +                                    None => continue, +                                }; +                                // Commit the normal and styled strings. +                                if !cached_chars.is_empty() { +                                    let normal_string = std::mem::take(&mut cached_chars); +                                    elements.push(LineElement::Normal(normal_string)); } +                                elements.push(line_element); +                                i = e_end; +                                continue 'outer; +                            } +                        } +                    } +                } +            } +            cached_chars.push(*c); i += 1; +        } +        if !cached_chars.is_empty() { +            let normal_string = std::mem::take(&mut cached_chars); +            elements.push(LineElement::Normal(normal_string)); } +        Self { elements } +    } + +    /// Return only the character content, with none of the styling information. +    pub fn as_plain_text(&self) -> String { +        let mut string = String::new(); +        for line_element in &self.elements { +            string.push_str(line_element.as_plain_text()) } +        return string; +    } +} + +impl std::fmt::Display for Line { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        for line_element in &self.elements { +            write!(f, "{line_element}")?; } +        Ok(()) +    } +} + +impl std::fmt::Debug for Line { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        for line_element in &self.elements { +            write!(f, "{line_element:?}\n")?; } +        Ok(()) +    } +} diff --git a/src/elements/line_element.rs b/src/elements/line_element.rs new file mode 100644 index 0000000..cc47b4b --- /dev/null +++ b/src/elements/line_element.rs @@ -0,0 +1,61 @@ +pub enum LineElement { +    Normal(String), +    Bold(String), +    Italic(String), +    Monospace(String), +    Math(String), +    InternalLink(String), +    ExternalLink(ExternalLink), +} + +impl LineElement { +    /// Return only the character content, with none of the styling information. +    pub fn as_plain_text(&self) -> &str { +        match self { +            LineElement::Normal(text) => text, +            LineElement::Bold(text) => text, +            LineElement::Italic(text) => text, +            LineElement::Monospace(text) => text, +            LineElement::Math(text) => text, +            LineElement::InternalLink(label) => label, +            LineElement::ExternalLink(ExternalLink { label, ..}) => label, +        } +    } +} + +pub struct ExternalLink { +    pub label: String, +    pub target: String, +} + +impl std::fmt::Display for LineElement { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        let string = match self { +            LineElement::Normal(text) => format!("{text}"), +            LineElement::Bold(text) => format!("**{text}**"), +            LineElement::Italic(text) => format!("_{text}_"), +            LineElement::Monospace(text) => format!("`{text}`"), +            LineElement::Math(text) => format!("${text}$"), +            LineElement::InternalLink(text) => format!("[[{text}]]"), +            LineElement::ExternalLink(ExternalLink { label, target }) => { +                format!("[{label}]({target})") } +        }; +        f.write_str(&string) +    } +} + +impl std::fmt::Debug for LineElement { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        let string = match self { +            LineElement::Normal(text) => format!("Normal ('{text}')"), +            LineElement::Bold(text) => format!("Bold ('{text}')"), +            LineElement::Italic(text) => format!("Italic ('{text}')"), +            LineElement::Monospace(text) => format!("Monospace ('{text}')"), +            LineElement::Math(text) => format!("Math ('{text}')"), +            LineElement::InternalLink(text) => format!("InternalLink ('{text}')"), +            LineElement::ExternalLink(ExternalLink { label, target }) => { +                format!("ExternalLink (label:'{label}',  target:'{target}')") } +        }; +        f.write_str(&string) +    } +} diff --git a/src/elements/table.rs b/src/elements/table.rs new file mode 100644 index 0000000..5b354c1 --- /dev/null +++ b/src/elements/table.rs @@ -0,0 +1,96 @@ +use crate::*; + +pub struct Table { +    /// The column definitions for this table. +    pub column_definitions: Vec<ColumnDefinition>, +    /// The content contained in the rows of the table. An individual [Line] is +    /// the contents of a single table cell, a group of cells forms a table row, +    /// a group of rows forms a vertical section of the table, with a separator +    /// intending to be drawn between each section, and a group of sections forms +    /// the table itself. +    /// Each row in the table is guaranteed to have the same number of columns +    /// as the table header. +    pub sections: Vec<Vec<Vec<Line>>>, +} + +impl Table { +    pub fn try_from_strs(lines: &[&str]) -> Option<Self> { +        let mut lines = lines.into_iter(); +        let column_definitions: Vec<ColumnDefinition> = { +            let names = split_trimmed_columns(lines.next()?)? +                .into_iter().map(|l| Line::from_str(l)); +            let alignments = parse_alignments(lines.next()?)?; +            if names.len() != alignments.len() { return None } +            std::iter::zip(names, alignments).map( +                |(name, alignment)| ColumnDefinition { name, alignment } ).collect() +        }; + +        let mut sections = Vec::new(); +        let mut current_section = Vec::new(); + +        for line in lines { +            if let Some(alignments) = parse_alignments(line) { +                if alignments.len() != column_definitions.len() { return None } +                sections.push(std::mem::take(&mut current_section)) +            } else { +                let row: Vec<Line> = split_trimmed_columns(line)? +                    .into_iter().map(|c| Line::from_str(c)).collect(); +                if row.len() != column_definitions.len() { return None } +                current_section.push(row); +            } +        } + +        if !current_section.is_empty() { +            sections.push(std::mem::take(&mut current_section)); } +        Some( Self { column_definitions, sections }) +    } +} + +pub struct ColumnDefinition { +    /// The name of this column, shown in the header row of the table. +    pub name: Line, +    /// The alignment of the content in this column. +    pub alignment: ColumnAlignment, +} + +pub enum ColumnAlignment { +    Left, +    Center, +    Right, +} + +impl ColumnAlignment { +    pub fn from_str(cell: &str) -> Option<Self> { +        if !cell.chars().all(|c| c == ':' || c == '-') { +            return None } +        match (cell.starts_with(':'), cell.ends_with(':')) { +            (false, false) => Some(ColumnAlignment::Left), +            (false, true) => Some(ColumnAlignment::Right), +            (true, false) => Some(ColumnAlignment::Left), +            (true, true) => Some(ColumnAlignment::Center), +        } +    } +} + + +fn split_trimmed_columns(line: &str) -> Option<Vec<&str>> { +    Some(split_columns(line)?.into_iter().map(|s| s.trim()).collect()) +} + +fn split_columns(line: &str) -> Option<Vec<&str>> { +    if let Some(("", tail)) = line.split_once('|') { +        if let Some((head, "")) = tail.rsplit_once('|') { +            return Some(head.split('|').collect()); +        } +    } +    return None; +} + +fn parse_alignments(line: &str) -> Option<Vec<ColumnAlignment>> { +    let mut alignments = Vec::new(); +    for cell in split_columns(line)? { +        alignments.push(ColumnAlignment::from_str(cell)?); +    } +    Some(alignments) +} + @@ -1,33 +1,13 @@ -#![feature(iter_zip)] +mod document; +mod elements; -mod block; -mod parse; -mod parse_heirarchical; -mod table; -mod text; +pub use document::*; +pub use elements::*; -pub use block::Block; -pub use parse::parse; -pub use parse_heirarchical::parse_heirarchical; -pub use table::{Alignment, Column, Table}; -pub use text::{Hyperlink, Text}; - -pub type Line = Vec<Text>; - -pub fn line_to_string(line: &[Text]) -> String { -    let mut output = String::new(); -    for text in line { -        match text { -            Text::Normal(content) => output.push_str(&content), -            Text::Bold(content) => output.push_str(&format!("**{}**", content)), -            Text::Italic(content) => output.push_str(&format!("_{}_", content)), -            Text::BoldItalic(content) => output.push_str(&format!("**_{}_**", content)), -            Text::Code(content) => output.push_str(&format!("`{}`", content)), -            Text::WikiLink(content) => output.push_str(&format!("[[{}]]", content)), -            Text::Hyperlink(Hyperlink { label, target }) => { -                output.push_str(&format!("[{}]({})", label, target)) -            } -        } -    } -    return output; +pub(crate) fn is_whitespace(c: &char) -> bool { +    c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c) } +pub(crate) fn is_contentful(s:&str, non_content_chars: &[char]) -> bool { +    s.chars().any(|c| !non_content_chars.contains(&c)) +    && s.chars().nth(0).map(|c| !non_content_chars.contains(&c)).unwrap_or(false) +    && s.chars().last().map(|c| !non_content_chars.contains(&c)).unwrap_or(false)  } diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 8cc2bfa..0000000 --- a/src/main.rs +++ /dev/null @@ -1,37 +0,0 @@ -use markdown_parser::parse; - -pub fn main() { -    // let markdown = std::fs::read_to_string("/home/ben/markdown_test.md").unwrap(); -    let markdown = " -This _is_ a **test** paragraph. -[This](http://www.google.com) is a regular full-length link. -|A|this is **middle** col|CC| -|-|:-:|---:| -|A||| -"; -    let document = parse(&markdown); -    for node in document { -        println!("{:?}", node); -        if let markdown_parser::Block::Paragraph(blocks) = node { -            for block in blocks { -                println!("  {:?}", block); -            } -        } else if let markdown_parser::Block::List(lines) = node { -            for line in lines { -                println!("-"); -                for block in line { -                    println!("  {:?}", block); -                } -            } -        } else if let markdown_parser::Block::Table(table) = node { -            for column in table.columns { -                print!("  {:?}: ", column.alignment); -                for block in column.name { -                    print!("{:?} ", block); -                } -                println!(); -            } -            println!(); -        } -    } -} diff --git a/src/parse.rs b/src/parse.rs deleted file mode 100644 index 6e4cdd9..0000000 --- a/src/parse.rs +++ /dev/null @@ -1,283 +0,0 @@ -use crate::*; - -pub fn parse(markdown: &str) -> Vec<Block> { -    let mut document = Vec::new(); -    let lines: Vec<&str> = markdown.lines().map(|l| l.trim_start()).collect(); -    let mut i = 0; - -    // Gather all consecutive lines that begin with a given substring and run a -    // function over them. The function must be `fn(&[&str])->Result<Block,()>`. -    macro_rules! gather { -        ($prefix:expr, $func:ident) => {{ -            let start = i; -            for line in &lines[i..] { -                if line.starts_with($prefix) { -                    i += 1; -                    continue; -                } -                break; -            } -            let gathered_lines = &lines[start..i]; -            match gathered_lines.is_empty() { -                false => $func(gathered_lines), -                true => Err(()), -            } -        }}; -    } - -    loop { -        let line = match lines.get(i) { -            Some(line) => line, -            None => return document, -        }; -        if line.is_empty() { -            i += 1; -            continue; -        } else if let Ok(heading) = parse_heading(line) { -            document.push(heading); -            i += 1; -        } else if let Ok(quote) = gather!(">", parse_quote) { -            document.push(quote); -        } else if let Ok(list) = gather!("- ", parse_list) { -            document.push(list); -        } else if let Ok(table) = gather!("|", parse_table) { -            document.push(table); -        } else if line.starts_with("```") { -            let language = line[3..].to_string(); -            let mut code_lines = Vec::new(); -            i += 1; -            for line in &lines[i..] { -                match line.trim() == "```" { -                    true => break, -                    false => { -                        code_lines.push(line.to_string()); -                        i += 1 -                    } -                } -            } -            document.push(Block::Code(language, code_lines)); -            i += 1; -        } else { -            document.push(parse_paragraph(line)); -            i += 1; -        }; -    } -} - -/// Returns the substring from `chars` that is between the `start` and `end` -/// delimiters. Returns None if `chars` does not start with `start`, or if an -/// occurance of `start` and `end` cannot be found within `chars`. There must -/// not be a space after the occurance of `start` or before the occurance of -/// `end`. If `start` and `end` consist of just one or more of the same -/// character, the content must contain at least one other character than -/// that one. -fn capture(chars: &[char], start: &str, end: &str) -> Option<String> { -    // Determine if `pattern` contains only a single unique character -    let single_char_in_pattern = match start.chars().next() { -        Some(first_char) => { -            let start_and_end = start.chars().chain(end.chars()); -            start_and_end.fold(Some(first_char), |accum, elem| match accum { -                Some(c) if c == elem => accum, -                _ => None, -            }) -        } -        None => None, -    }; -    let is_space = |i: usize| chars.get(i) == Some(&' '); -    fn starts_with_pattern(chars: &[char], pattern: &str) -> bool { -        let mut i = 0; -        for ref c in pattern.chars() { -            match chars.get(i) { -                Some(v) if v == c => i += 1, -                _ => return false, -            } -        } -        true -    } -    if !starts_with_pattern(chars, start) { -        return None; -    } -    let text_start = start.len(); -    if is_space(text_start) { -        return None; -    }; -    let mut i = text_start; -    loop { -        i += 1; -        if chars.get(i).is_none() { -            return None; -        } -        if starts_with_pattern(&chars[i..], end) { -            if is_space(i - 1) { -                continue; -            } -            let text_content: String = chars[text_start..i].iter().collect(); -            match single_char_in_pattern { -                None => return Some(text_content), -                Some(c) => { -                    if text_content.chars().any(|e| e != c) { -                        return Some(text_content); -                    } -                } -            }; -        } -    } -} - -fn parse_text(line: &str) -> Line { -    let mut block_content: Line = Vec::new(); -    let chars: Vec<char> = line.chars().collect(); -    let mut normal = String::new(); -    let mut i = 0; - -    macro_rules! commit_normal { -        () => { -            if !normal.is_empty() { -                let normal_text = Text::Normal(std::mem::take(&mut normal)); -                block_content.push(normal_text); -            } -        }; -    } -    let patterns: [(&str, &str, fn(String) -> Text); 7] = [ -        ("***", "***", Text::BoldItalic), -        ("**", "**", Text::Bold), -        ("*", "*", Text::Italic), -        ("___", "___", Text::BoldItalic), -        ("__", "__", Text::Bold), -        ("_", "_", Text::Italic), -        ("`", "`", Text::Code), -    ]; - -    'outer: loop { -        // Check if a simple, non-Normal text type starts at this character -        for (start, end, text_type) in patterns.iter() { -            if let Some(string) = capture(&chars[i..], start, end) { -                i += string.len() + start.len() + end.len(); -                commit_normal!(); -                block_content.push(text_type(string)); -                continue 'outer; -            } -        } -        // Check if a wiki-style hyperlink starts at this character -        if let Some(content) = capture(&chars[i..], "[[", "]]") { -            i += content.len() + 4; -            commit_normal!(); -            block_content.push(Text::WikiLink(content)); -            continue 'outer; -        } - -        // Check if a long-form hyperlink starts at this character -        if let Some(label) = capture(&chars[i..], "[", "]") { -            let target_len = label.len() + 2; -            if let Some(target) = capture(&chars[i + target_len..], "(", ")") { -                i += target_len + target.len() + 2; -                commit_normal!(); -                block_content.push(Text::Hyperlink(Hyperlink { label, target })) -            } -        } - -        // No new text type started here, this must just be normal text -        match chars.get(i) { -            Some(c) => { -                normal.push(*c); -                i += 1; -            } -            None => { -                commit_normal!(); -                break; -            } -        } -    } -    return block_content; -} - -fn parse_heading(line: &str) -> Result<Block, ()> { -    let (heading_type, content): (fn(Line) -> Block, &str) = if line.starts_with("# ") { -        (Block::Heading1, &line[2..]) -    } else if line.starts_with("## ") { -        (Block::Heading2, &line[3..]) -    } else if line.starts_with("### ") { -        (Block::Heading3, &line[4..]) -    } else { -        return Err(()); -    }; -    if content.is_empty() { -        return Err(()); -    }; -    Ok(heading_type(parse_text(content))) -} - -/// Accepts a slice of lines that begin with '>' -fn parse_quote(lines: &[&str]) -> Result<Block, ()> { -    let mut content = Vec::new(); -    for line in lines { -        content.push(if *line == ">" { -            Vec::new() -        } else { -            parse_text(&line[2..]) -        }); -    } -    Ok(Block::Quote(content)) -} - -fn parse_list(lines: &[&str]) -> Result<Block, ()> { -    Ok(Block::List( -        lines.iter().map(|l| parse_text(&l[2..])).collect(), -    )) -} - -fn parse_paragraph(line: &str) -> Block { -    Block::Paragraph(parse_text(line)) -} - -fn parse_table(lines: &[&str]) -> Result<Block, ()> { -    if lines.len() < 3 { -        return Err(()); -    } -    let names = split_columns(lines[0])?; -    let dividers = split_columns(lines[1])?; -    if names.len() != dividers.len() { -        return Err(()); -    } -    let mut columns = Vec::new(); -    for (name, divider) in std::iter::zip(names.iter(), dividers.iter()) { -        let alignment = Alignment::from_str(divider)?; -        columns.push(Column { -            name: parse_text(name), -            alignment, -        }) -    } -    let mut rows = Vec::new(); -    for row in &lines[2..] { -        let split_row: Vec<Line> = split_columns(row)?.iter().map(|s| parse_text(s)).collect(); -        if split_row.len() != columns.len() { -            return Err(()); -        } -        rows.push(split_row); -    } -    Ok(Block::Table(Table { columns, rows })) -} - -fn split_columns(line: &str) -> Result<Vec<String>, ()> { -    // Find the index after the first |, and before the last | -    let mut start = None; -    let mut end = None; -    for (i, c) in line.chars().enumerate() { -        if c == '|' { -            if start.is_none() { -                start = Some(i + 1); -            } else { -                end = Some(i); -            } -        } -    } -    match (start, end) { -        (Some(s), Some(e)) => { -            let chars: Vec<char> = line.chars().collect(); -            let string: String = chars[s..e].iter().collect(); -            let split = string.split('|'); -            Ok(split.map(|s| s.trim().to_string()).collect()) -        } -        _ => Err(()), -    } -} diff --git a/src/parse_heirarchical.rs b/src/parse_heirarchical.rs deleted file mode 100644 index 75c2bec..0000000 --- a/src/parse_heirarchical.rs +++ /dev/null @@ -1,137 +0,0 @@ -use crate::*; - -macro_rules! get_subsection { -    ($t:ident) => { -        pub fn get_subsection(&self, name: &str) -> Option<&$t> { -            for section in &self.sections { -                if line_to_string(§ion.title) == name { -                    return Some(section); -                } -            } -            return None; -        } -    }; -} - -#[derive(Default)] -pub struct Document { -    pub preamble: Vec<Block>, -    pub sections: Vec<TopLevelSection>, -} -impl Document { -    get_subsection! {TopLevelSection} -} - -#[derive(Default)] -pub struct TopLevelSection { -    pub title: Line, -    pub content: Vec<Block>, -    pub sections: Vec<MidLevelSection>, -} -impl TopLevelSection { -    get_subsection! {MidLevelSection} -} - -#[derive(Default)] -pub struct MidLevelSection { -    pub title: Line, -    pub content: Vec<Block>, -    pub sections: Vec<LowLevelSection>, -} -impl MidLevelSection { -    get_subsection! {LowLevelSection} -} - -#[derive(Default)] -pub struct LowLevelSection { -    pub title: Line, -    pub content: Vec<Block>, -} - -pub fn parse_heirarchical(markdown: &str) -> Result<Document, ()> { -    macro_rules! push_section { -        ($from:ident => $to:ident) => { -            $to.sections.push(std::mem::take(&mut $from)) -        }; -    } -    let mut document = Document::default(); -    let mut h1_buffer = TopLevelSection::default(); -    let mut h2_buffer = MidLevelSection::default(); -    let mut h3_buffer = LowLevelSection::default(); -    let mut level = 0; - -    let blocks = parse(markdown); -    for block in blocks { -        match (level, block) { -            (0, Block::Heading1(title)) => { -                h1_buffer.title = title; -                level = 1; -            } -            (0, Block::Heading2(_)) => return Err(()), -            (0, Block::Heading3(_)) => return Err(()), -            (0, block) => document.preamble.push(block), -            (1, Block::Heading1(title)) => { -                push_section!(h1_buffer => document); -                h1_buffer.title = title; -            } -            (1, Block::Heading2(title)) => { -                h2_buffer.title = title; -                level = 2; -            } -            (1, Block::Heading3(_)) => return Err(()), -            (1, block) => h1_buffer.content.push(block), -            (2, Block::Heading1(title)) => { -                push_section!(h2_buffer => h1_buffer); -                push_section!(h1_buffer => document); -                h1_buffer.title = title; -                level = 1; -            } -            (2, Block::Heading2(title)) => { -                push_section!(h2_buffer => h1_buffer); -                h2_buffer.title = title; -            } -            (2, Block::Heading3(title)) => { -                h3_buffer.title = title; -                level = 3; -            } -            (2, block) => h2_buffer.content.push(block), -            (3, Block::Heading1(title)) => { -                push_section!(h3_buffer => h2_buffer); -                push_section!(h2_buffer => h1_buffer); -                push_section!(h1_buffer => document); -                h1_buffer.title = title; -                level = 1; -            } -            (3, Block::Heading2(title)) => { -                push_section!(h3_buffer => h2_buffer); -                push_section!(h2_buffer => h1_buffer); -                h2_buffer.title = title; -                level = 2; -            } -            (3, Block::Heading3(title)) => { -                push_section!(h3_buffer => h2_buffer); -                h3_buffer.title = title; -            } -            (3, block) => h3_buffer.content.push(block), -            _ => unreachable!(), -        } -    } - -    // Push all in-progress sections -    match level { -        3 => { -            push_section!(h3_buffer => h2_buffer); -            push_section!(h2_buffer => h1_buffer); -            push_section!(h1_buffer => document); -        } -        2 => { -            push_section!(h2_buffer => h1_buffer); -            push_section!(h1_buffer => document); -        } -        1 => { -            push_section!(h1_buffer => document); -        } -        _ => (), -    } -    Ok(document) -} diff --git a/src/table.rs b/src/table.rs deleted file mode 100644 index cc01ffc..0000000 --- a/src/table.rs +++ /dev/null @@ -1,60 +0,0 @@ -use crate::Line; - -pub struct Table { -    pub columns: Vec<Column>, -    pub rows: Vec<Vec<Line>>, -} - -pub struct Column { -    pub name: Line, -    pub alignment: Alignment, -} - -pub enum Alignment { -    Left, -    Center, -    Right, -} -impl Alignment { -    pub fn from_str(s: &str) -> Result<Self, ()> { -        let mut start = false; -        let mut end = false; -        for (i, c) in s.chars().enumerate() { -            if c == ':' { -                if i == 0 { -                    start = true; -                } else if i == s.len() - 1 { -                    end = true; -                } else { -                    return Err(()); -                } -            } else if c != '-' { -                return Err(()); -            } -        } -        Ok(match (start, end) { -            (false, false) => Self::Left, -            (true, false) => Self::Left, -            (false, true) => Self::Right, -            (true, true) => Self::Center, -        }) -    } -} -impl std::fmt::Display for Alignment { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        f.write_str(match self { -            Self::Left => "left", -            Self::Center => "center", -            Self::Right => "right", -        }) -    } -} -impl std::fmt::Debug for Alignment { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        f.write_str(match self { -            Self::Left => "Left", -            Self::Center => "Center", -            Self::Right => "Right", -        }) -    } -} diff --git a/src/text.rs b/src/text.rs deleted file mode 100644 index e9dbdeb..0000000 --- a/src/text.rs +++ /dev/null @@ -1,30 +0,0 @@ -pub enum Text { -    Normal(String), -    Bold(String), -    Italic(String), -    BoldItalic(String), -    Code(String), -    WikiLink(String), -    Hyperlink(Hyperlink), -} -impl std::fmt::Debug for Text { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        let string = match self { -            Text::Normal(text) => format!("Normal ('{}')", text), -            Text::Bold(text) => format!("Bold ('{}')", text), -            Text::Italic(text) => format!("Italic ('{}')", text), -            Text::BoldItalic(text) => format!("BoldItalic ('{}')", text), -            Text::Code(text) => format!("Code ('{}')", text), -            Text::WikiLink(text) => format!("WikiLink ('{}')", text), -            Text::Hyperlink(Hyperlink { label, target }) => { -                format!("Hyperlink (label:'{}',  target:'{}')", label, target) -            } -        }; -        f.write_str(&string) -    } -} - -pub struct Hyperlink { -    pub label: String, -    pub target: String, -} | 
