summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Bridle <bridle.benjamin@gmail.com>2024-04-21 13:57:03 +1200
committerBen Bridle <bridle.benjamin@gmail.com>2024-04-21 13:57:36 +1200
commit13cb719b87bcef41c4dd398f5a651ddb2b561e0d (patch)
treee9e52ed33d5ed5a4d68a1161c3db5c2d8c38dd42
parent54f5e9fd883e207931baa9c87b6181ca724d6bab (diff)
downloadmarkdown-13cb719b87bcef41c4dd398f5a651ddb2b561e0d.zip
Completely rewrite the libraryv1.0.0
-rw-r--r--src/block.rs26
-rw-r--r--src/document.rs172
-rw-r--r--src/elements.rs9
-rw-r--r--src/elements/block_element.rs53
-rw-r--r--src/elements/line.rs117
-rw-r--r--src/elements/line_element.rs61
-rw-r--r--src/elements/table.rs96
-rw-r--r--src/lib.rs44
-rw-r--r--src/main.rs37
-rw-r--r--src/parse.rs283
-rw-r--r--src/parse_heirarchical.rs137
-rw-r--r--src/table.rs60
-rw-r--r--src/text.rs30
13 files changed, 520 insertions, 605 deletions
diff --git a/src/block.rs b/src/block.rs
deleted file mode 100644
index 2a34fcf..0000000
--- a/src/block.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-use crate::{Line, Table};
-
-pub enum Block {
- Heading1(Line),
- Heading2(Line),
- Heading3(Line),
- Paragraph(Line),
- List(Vec<Line>),
- Quote(Vec<Line>),
- Code(String, Vec<String>),
- Table(Table),
-}
-impl std::fmt::Debug for Block {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- f.write_str(match self {
- Self::Heading1(_) => "Heading1",
- Self::Heading2(_) => "Heading2",
- Self::Heading3(_) => "Heading3",
- Self::Paragraph(_) => "Paragraph",
- Self::List(_) => "List",
- Self::Quote(_) => "Quote",
- Self::Code(_, _) => "Code",
- Self::Table(_) => "Table",
- })
- }
-}
diff --git a/src/document.rs b/src/document.rs
new file mode 100644
index 0000000..fbfea00
--- /dev/null
+++ b/src/document.rs
@@ -0,0 +1,172 @@
+use crate::*;
+
+pub struct MarkdownDocument {
+ pub block_elements: Vec<BlockElement>,
+}
+
+impl MarkdownDocument {
+ pub fn from_str(raw_markdown: &str) -> Self {
+ let mut block_elements = Vec::new();
+ let mut current_multiline_block = None;
+ // Chain a blank line to the end to ensure that the final multi-line block is flushed.
+ let lines = raw_markdown.lines().chain(std::iter::once(""));
+
+ for incoming_line in lines {
+ let incoming_line_untrimmed = incoming_line;
+ let incoming_line = incoming_line.trim();
+ // Handle an in-progress subdocument block.
+ if let Some(MultiLineBlock::Subdocument(language, mut lines)) = current_multiline_block {
+ if incoming_line == "```" {
+ let subdocument = Subdocument { language: language.to_string(), content: lines.join("\n") };
+ block_elements.push(BlockElement::Subdocument(subdocument));
+ current_multiline_block = None;
+ } else {
+ lines.push(incoming_line_untrimmed);
+ current_multiline_block = Some(MultiLineBlock::Subdocument(language, lines));
+ }
+ continue;
+ }
+
+ // Parse the incoming line.
+ let incoming_line_block = {
+ if let Some(("", tail)) = incoming_line.split_once("# ") {
+ BlockLine::DocumentHeading(tail.trim())
+ } else if let Some(("", tail)) = incoming_line.split_once("## ") {
+ BlockLine::SectionHeading(tail.trim())
+ } else if let Some(("", tail)) = incoming_line.split_once("### ") {
+ BlockLine::ArticleHeading(tail.trim())
+ } else if let Some(("", tail)) = incoming_line.split_once("- ") {
+ BlockLine::List(tail.trim())
+ } else if let Some(("", tail)) = incoming_line.split_once("> ") {
+ BlockLine::Aside(tail.trim())
+ } else if incoming_line == ">" {
+ BlockLine::Aside("")
+ } else if let Some(("", tail)) = incoming_line.split_once("```") {
+ BlockLine::SubdocumentHeader(tail.trim())
+ } else if incoming_line.starts_with("|") {
+ BlockLine::Table(incoming_line)
+ } else if incoming_line.len() >= 3 && incoming_line.chars().all(|c| c=='-') {
+ BlockLine::Break
+ } else if incoming_line.is_empty() {
+ BlockLine::BlankLine
+ } else {
+ BlockLine::Paragraph(incoming_line) }
+ };
+
+ // If the incoming line is of the same type as the current multiline
+ // block, append it to the end of that current block and continue.
+ if let Some(ref mut current_block) = current_multiline_block {
+ match (&incoming_line_block, current_block) {
+ (BlockLine::List(line), MultiLineBlock::List(ref mut lines)) => {
+ lines.push(line); continue; }
+ (BlockLine::Aside(line), MultiLineBlock::Aside(ref mut lines)) => {
+ lines.push(line); continue; }
+ (BlockLine::Table(line), MultiLineBlock::Table(ref mut lines)) => {
+ lines.push(line); continue; }
+ _ => (),
+ };
+ }
+
+ // Otherwise, commit the current block, then handle the incoming line.
+ if let Some(current_block) = current_multiline_block {
+ match current_block {
+ MultiLineBlock::List(raw_lines) => {
+ let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect();
+ block_elements.push(BlockElement::List(lines)); }
+ MultiLineBlock::Aside(raw_lines) => {
+ let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect();
+ block_elements.push(BlockElement::Aside(lines)); }
+ MultiLineBlock::Table(raw_lines) => {
+ if let Some(table) = Table::try_from_strs(&raw_lines) {
+ block_elements.push(BlockElement::Table(table)) }
+ else { for raw_line in raw_lines {
+ block_elements.push(BlockElement::Paragraph(Line::from_str(&raw_line))) }}}
+ MultiLineBlock::Subdocument(..) => unreachable!(),
+ }
+ current_multiline_block = None;
+ }
+
+ // Handle the incoming line.
+ match incoming_line_block {
+ BlockLine::DocumentHeading(s) => block_elements.push(BlockElement::DocumentHeading(Line::from_str(&s))),
+ BlockLine::SectionHeading(s) => block_elements.push(BlockElement::SectionHeading(Line::from_str(&s))),
+ BlockLine::ArticleHeading(s) => block_elements.push(BlockElement::ArticleHeading(Line::from_str(&s))),
+ BlockLine::List(s) => current_multiline_block = Some(MultiLineBlock::List(vec![s])),
+ BlockLine::Aside(s) => current_multiline_block = Some(MultiLineBlock::Aside(vec![s])),
+ BlockLine::Table(s) => current_multiline_block = Some(MultiLineBlock::Table(vec![s])),
+ BlockLine::SubdocumentHeader(s) => current_multiline_block = Some(MultiLineBlock::Subdocument(s, Vec::new())),
+ BlockLine::Paragraph(s) => {
+ if let Some(embedded_file) = parse_embedded_file(&s) {
+ block_elements.push(BlockElement::EmbeddedFile(embedded_file))
+ } else if let Some(math) = parse_math_block(&s) {
+ block_elements.push(BlockElement::Math(math))
+ } else {
+ block_elements.push(BlockElement::Paragraph(Line::from_str(&s))) }
+ },
+ BlockLine::Break => block_elements.push(BlockElement::Break),
+ BlockLine::BlankLine => (),
+ }
+ }
+
+ Self { block_elements }
+ }
+}
+
+fn parse_embedded_file(text: &str) -> Option<EmbeddedFile> {
+ let chars: Vec<char> = text.trim().chars().collect();
+ let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b);
+
+ if starts_with(0, "![") {
+ let label_start = 2;
+ let mut label_end = label_start;
+ while label_end <= chars.len() {
+ if label_end == chars.len() { return None }
+ if starts_with(label_end, "](") { break };
+ label_end += 1; }
+ let label: String = chars[label_start..label_end].iter().collect();
+ if label.is_empty() || !is_contentful(&label, &['[', ']']) {
+ return None }
+ // Try to parse the target.
+ let target_start = label_end + 2;
+ let target_end = chars.len() - 1;
+ if let Some(')') = chars.get(target_end) {
+ let target: String = chars[target_start..target_end].iter().collect();
+ if target.is_empty() || target.contains(")") || !is_contentful(&target, &['(',')']) {
+ return None }
+ return Some(EmbeddedFile { label, target })
+ }
+ }
+ return None;
+}
+
+fn parse_math_block(text: &str) -> Option<String> {
+ if let Some(("", trailing)) = text.split_once("$$") {
+ if let Some((math, "")) = trailing.rsplit_once("$$") {
+ return Some(math.trim().to_string());
+ }
+ }
+ return None;
+}
+
+/// When parsing, is a single line for a one-line block element.
+enum BlockLine<'a> {
+ DocumentHeading(&'a str),
+ SectionHeading(&'a str),
+ ArticleHeading(&'a str),
+ Paragraph(&'a str),
+ List(&'a str),
+ Aside(&'a str),
+ Table(&'a str),
+ SubdocumentHeader(&'a str),
+ Break,
+ BlankLine,
+}
+
+/// When parsing, is the gathered string lines of a multiline block element.
+enum MultiLineBlock<'a> {
+ List(Vec<&'a str>),
+ Aside(Vec<&'a str>),
+ Table(Vec<&'a str>),
+ Subdocument(&'a str, Vec<&'a str>),
+}
+
diff --git a/src/elements.rs b/src/elements.rs
new file mode 100644
index 0000000..a4a9783
--- /dev/null
+++ b/src/elements.rs
@@ -0,0 +1,9 @@
+mod block_element;
+mod line;
+mod line_element;
+mod table;
+
+pub use block_element::*;
+pub use line::*;
+pub use line_element::*;
+pub use table::*;
diff --git a/src/elements/block_element.rs b/src/elements/block_element.rs
new file mode 100644
index 0000000..cdb7a71
--- /dev/null
+++ b/src/elements/block_element.rs
@@ -0,0 +1,53 @@
+use crate::*;
+
+pub enum BlockElement {
+ /// A first-level heading.
+ DocumentHeading(Line),
+ /// A second-level heading.
+ SectionHeading(Line),
+ /// A third-level heading.
+ ArticleHeading(Line),
+ Paragraph(Line),
+ /// A bullet-list.
+ List(Vec<Line>),
+ /// A paragraph separate from the main text.
+ Aside(Vec<Line>),
+ Table(Table),
+ EmbeddedFile(EmbeddedFile),
+ /// A non-markdown sub-document within this document.
+ Subdocument(Subdocument),
+ /// A KaTeX block
+ Math(String),
+ Break,
+}
+
+pub struct EmbeddedFile {
+ pub label: String,
+ pub target: String,
+}
+
+pub struct Subdocument {
+ pub language: String,
+ pub content: String,
+}
+
+impl std::fmt::Debug for BlockElement {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let string = match self {
+ BlockElement::DocumentHeading(line) => format!("DocumentHeading ('{line}')"),
+ BlockElement::SectionHeading(line) => format!("SectionHeading ('{line}')"),
+ BlockElement::ArticleHeading(line) => format!("ArticleHeading ('{line}')"),
+ BlockElement::Paragraph(line) => format!("Paragraph ('{line}')"),
+ BlockElement::List(lines) => format!("List (len: {})", lines.len()),
+ BlockElement::Aside(_) => format!("Aside"),
+ BlockElement::Table(_) => format!("Table"),
+ BlockElement::EmbeddedFile(EmbeddedFile {label, target}) =>
+ format!("EmbeddedFile (label:'{label}', target:'{target}')"),
+ BlockElement::Subdocument(Subdocument {language, ..}) =>
+ format!("Subdocument ('{language}')"),
+ BlockElement::Math(string) => format!("Math ('{string}')"),
+ BlockElement::Break => format!("Break"),
+ };
+ f.write_str(&string)
+ }
+}
diff --git a/src/elements/line.rs b/src/elements/line.rs
new file mode 100644
index 0000000..d5c078e
--- /dev/null
+++ b/src/elements/line.rs
@@ -0,0 +1,117 @@
+use crate::*;
+
+macro_rules! opt {
+ ($v:expr) => {|s| Some($v(s)) };
+}
+
+pub struct Line {
+ pub elements: Vec<LineElement>,
+}
+
+impl Line {
+ pub fn from_str(raw_string: &str) -> Self {
+ fn unlabeled_extern_link(target: String) -> Option<LineElement> {
+ target.contains("/").then( ||
+ LineElement::ExternalLink(ExternalLink { target, label:String::new() })
+ )
+ }
+ fn labelled_extern_link(s: String) -> Option<LineElement> {
+ let (label, target) = match s.split_once("](") {
+ Some((l, t)) => (l.to_string(), t.to_string()),
+ None => return None };
+ if label.contains("]") || target.contains("]") { return None }
+ Some(LineElement::ExternalLink(ExternalLink { label, target })) }
+ const DELIMITERS: [(fn(String)->Option<LineElement>, &str, &str, &str); 7] = [
+ ( opt!(LineElement::Bold), "**", "**", "*" ),
+ ( opt!(LineElement::Italic), "_", "_", "_" ),
+ ( opt!(LineElement::Monospace), "`", "`", "`" ),
+ ( opt!(LineElement::Math), "$", "$", "$" ),
+ ( opt!(LineElement::InternalLink), "[[", "]]", "[]" ),
+ ( labelled_extern_link, "[", ")", "[]()" ),
+ ( unlabeled_extern_link, "[", "]", "[]" ),
+ ];
+ let chars: Vec<char> = raw_string.chars().collect();
+ let mut elements = Vec::new();
+ let mut cached_chars = String::new();
+ let mut i = 0;
+
+ let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b);
+
+ 'outer: while let Some(c) = chars.get(i) {
+ // Only check for opening delimiters that directly follow a whitespace character.
+ let follows_whitespace = match chars.get(i.wrapping_sub(1)) {
+ Some(w) => is_whitespace(w),
+ None => true,
+ };
+ if follows_whitespace {
+ // Try to parse an opening delimiter.
+ for (variant, start_delim, end_delim, delim_chars) in DELIMITERS {
+ // Try to match an opening delimiter with a terminating delimiter.
+ if starts_with(i, start_delim) {
+ let s_end = i + start_delim.chars().count();
+ let mut e_start = s_end;
+ let mut e_end = e_start + end_delim.chars().count();
+ while e_end <= chars.len() {
+ e_start += 1; e_end += 1;
+ let end_is_whitespace =
+ if let Some(end_char) = chars.get(e_end) {
+ is_whitespace(end_char)
+ } else {
+ e_end == chars.len()
+ };
+ // If the terminating delimiter is found, store the normal
+ // text and the styled text, and continue to the next character.
+ if end_is_whitespace && starts_with(e_start, end_delim) {
+ // Check that there is content within the styled string.
+ let styled_string: String = chars[s_end..e_start].iter().collect();
+ let non_content_chars: Vec<_> = delim_chars.chars().collect();
+ if !is_contentful(&styled_string, &non_content_chars) { continue }
+ if styled_string.len() != styled_string.trim().len() { continue }
+ let line_element = match variant(styled_string) {
+ Some(e) => e,
+ None => continue,
+ };
+ // Commit the normal and styled strings.
+ if !cached_chars.is_empty() {
+ let normal_string = std::mem::take(&mut cached_chars);
+ elements.push(LineElement::Normal(normal_string)); }
+ elements.push(line_element);
+ i = e_end;
+ continue 'outer;
+ }
+ }
+ }
+ }
+ }
+ cached_chars.push(*c); i += 1;
+ }
+ if !cached_chars.is_empty() {
+ let normal_string = std::mem::take(&mut cached_chars);
+ elements.push(LineElement::Normal(normal_string)); }
+ Self { elements }
+ }
+
+ /// Return only the character content, with none of the styling information.
+ pub fn as_plain_text(&self) -> String {
+ let mut string = String::new();
+ for line_element in &self.elements {
+ string.push_str(line_element.as_plain_text()) }
+ return string;
+ }
+}
+
+impl std::fmt::Display for Line {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ for line_element in &self.elements {
+ write!(f, "{line_element}")?; }
+ Ok(())
+ }
+}
+
+impl std::fmt::Debug for Line {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ for line_element in &self.elements {
+ write!(f, "{line_element:?}\n")?; }
+ Ok(())
+ }
+}
diff --git a/src/elements/line_element.rs b/src/elements/line_element.rs
new file mode 100644
index 0000000..cc47b4b
--- /dev/null
+++ b/src/elements/line_element.rs
@@ -0,0 +1,61 @@
+pub enum LineElement {
+ Normal(String),
+ Bold(String),
+ Italic(String),
+ Monospace(String),
+ Math(String),
+ InternalLink(String),
+ ExternalLink(ExternalLink),
+}
+
+impl LineElement {
+ /// Return only the character content, with none of the styling information.
+ pub fn as_plain_text(&self) -> &str {
+ match self {
+ LineElement::Normal(text) => text,
+ LineElement::Bold(text) => text,
+ LineElement::Italic(text) => text,
+ LineElement::Monospace(text) => text,
+ LineElement::Math(text) => text,
+ LineElement::InternalLink(label) => label,
+ LineElement::ExternalLink(ExternalLink { label, ..}) => label,
+ }
+ }
+}
+
+pub struct ExternalLink {
+ pub label: String,
+ pub target: String,
+}
+
+impl std::fmt::Display for LineElement {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let string = match self {
+ LineElement::Normal(text) => format!("{text}"),
+ LineElement::Bold(text) => format!("**{text}**"),
+ LineElement::Italic(text) => format!("_{text}_"),
+ LineElement::Monospace(text) => format!("`{text}`"),
+ LineElement::Math(text) => format!("${text}$"),
+ LineElement::InternalLink(text) => format!("[[{text}]]"),
+ LineElement::ExternalLink(ExternalLink { label, target }) => {
+ format!("[{label}]({target})") }
+ };
+ f.write_str(&string)
+ }
+}
+
+impl std::fmt::Debug for LineElement {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let string = match self {
+ LineElement::Normal(text) => format!("Normal ('{text}')"),
+ LineElement::Bold(text) => format!("Bold ('{text}')"),
+ LineElement::Italic(text) => format!("Italic ('{text}')"),
+ LineElement::Monospace(text) => format!("Monospace ('{text}')"),
+ LineElement::Math(text) => format!("Math ('{text}')"),
+ LineElement::InternalLink(text) => format!("InternalLink ('{text}')"),
+ LineElement::ExternalLink(ExternalLink { label, target }) => {
+ format!("ExternalLink (label:'{label}', target:'{target}')") }
+ };
+ f.write_str(&string)
+ }
+}
diff --git a/src/elements/table.rs b/src/elements/table.rs
new file mode 100644
index 0000000..5b354c1
--- /dev/null
+++ b/src/elements/table.rs
@@ -0,0 +1,96 @@
+use crate::*;
+
+pub struct Table {
+ /// The column definitions for this table.
+ pub column_definitions: Vec<ColumnDefinition>,
+ /// The content contained in the rows of the table. An individual [Line] is
+ /// the contents of a single table cell, a group of cells forms a table row,
+ /// a group of rows forms a vertical section of the table, with a separator
+ /// intending to be drawn between each section, and a group of sections forms
+ /// the table itself.
+ /// Each row in the table is guaranteed to have the same number of columns
+ /// as the table header.
+ pub sections: Vec<Vec<Vec<Line>>>,
+}
+
+impl Table {
+ pub fn try_from_strs(lines: &[&str]) -> Option<Self> {
+ let mut lines = lines.into_iter();
+ let column_definitions: Vec<ColumnDefinition> = {
+ let names = split_trimmed_columns(lines.next()?)?
+ .into_iter().map(|l| Line::from_str(l));
+ let alignments = parse_alignments(lines.next()?)?;
+ if names.len() != alignments.len() { return None }
+ std::iter::zip(names, alignments).map(
+ |(name, alignment)| ColumnDefinition { name, alignment } ).collect()
+ };
+
+ let mut sections = Vec::new();
+ let mut current_section = Vec::new();
+
+ for line in lines {
+ if let Some(alignments) = parse_alignments(line) {
+ if alignments.len() != column_definitions.len() { return None }
+ sections.push(std::mem::take(&mut current_section))
+ } else {
+ let row: Vec<Line> = split_trimmed_columns(line)?
+ .into_iter().map(|c| Line::from_str(c)).collect();
+ if row.len() != column_definitions.len() { return None }
+ current_section.push(row);
+ }
+ }
+
+ if !current_section.is_empty() {
+ sections.push(std::mem::take(&mut current_section)); }
+ Some( Self { column_definitions, sections })
+ }
+}
+
+pub struct ColumnDefinition {
+ /// The name of this column, shown in the header row of the table.
+ pub name: Line,
+ /// The alignment of the content in this column.
+ pub alignment: ColumnAlignment,
+}
+
+pub enum ColumnAlignment {
+ Left,
+ Center,
+ Right,
+}
+
+impl ColumnAlignment {
+ pub fn from_str(cell: &str) -> Option<Self> {
+ if !cell.chars().all(|c| c == ':' || c == '-') {
+ return None }
+ match (cell.starts_with(':'), cell.ends_with(':')) {
+ (false, false) => Some(ColumnAlignment::Left),
+ (false, true) => Some(ColumnAlignment::Right),
+ (true, false) => Some(ColumnAlignment::Left),
+ (true, true) => Some(ColumnAlignment::Center),
+ }
+ }
+}
+
+
+fn split_trimmed_columns(line: &str) -> Option<Vec<&str>> {
+ Some(split_columns(line)?.into_iter().map(|s| s.trim()).collect())
+}
+
+fn split_columns(line: &str) -> Option<Vec<&str>> {
+ if let Some(("", tail)) = line.split_once('|') {
+ if let Some((head, "")) = tail.rsplit_once('|') {
+ return Some(head.split('|').collect());
+ }
+ }
+ return None;
+}
+
+fn parse_alignments(line: &str) -> Option<Vec<ColumnAlignment>> {
+ let mut alignments = Vec::new();
+ for cell in split_columns(line)? {
+ alignments.push(ColumnAlignment::from_str(cell)?);
+ }
+ Some(alignments)
+}
+
diff --git a/src/lib.rs b/src/lib.rs
index c0b8c84..c70ce77 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,33 +1,13 @@
-#![feature(iter_zip)]
-
-mod block;
-mod parse;
-mod parse_heirarchical;
-mod table;
-mod text;
-
-pub use block::Block;
-pub use parse::parse;
-pub use parse_heirarchical::parse_heirarchical;
-pub use table::{Alignment, Column, Table};
-pub use text::{Hyperlink, Text};
-
-pub type Line = Vec<Text>;
-
-pub fn line_to_string(line: &[Text]) -> String {
- let mut output = String::new();
- for text in line {
- match text {
- Text::Normal(content) => output.push_str(&content),
- Text::Bold(content) => output.push_str(&format!("**{}**", content)),
- Text::Italic(content) => output.push_str(&format!("_{}_", content)),
- Text::BoldItalic(content) => output.push_str(&format!("**_{}_**", content)),
- Text::Code(content) => output.push_str(&format!("`{}`", content)),
- Text::WikiLink(content) => output.push_str(&format!("[[{}]]", content)),
- Text::Hyperlink(Hyperlink { label, target }) => {
- output.push_str(&format!("[{}]({})", label, target))
- }
- }
- }
- return output;
+mod document;
+mod elements;
+
+pub use document::*;
+pub use elements::*;
+
+pub(crate) fn is_whitespace(c: &char) -> bool {
+ c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c) }
+pub(crate) fn is_contentful(s:&str, non_content_chars: &[char]) -> bool {
+ s.chars().any(|c| !non_content_chars.contains(&c))
+ && s.chars().nth(0).map(|c| !non_content_chars.contains(&c)).unwrap_or(false)
+ && s.chars().last().map(|c| !non_content_chars.contains(&c)).unwrap_or(false)
}
diff --git a/src/main.rs b/src/main.rs
deleted file mode 100644
index 8cc2bfa..0000000
--- a/src/main.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-use markdown_parser::parse;
-
-pub fn main() {
- // let markdown = std::fs::read_to_string("/home/ben/markdown_test.md").unwrap();
- let markdown = "
-This _is_ a **test** paragraph.
-[This](http://www.google.com) is a regular full-length link.
-|A|this is **middle** col|CC|
-|-|:-:|---:|
-|A|||
-";
- let document = parse(&markdown);
- for node in document {
- println!("{:?}", node);
- if let markdown_parser::Block::Paragraph(blocks) = node {
- for block in blocks {
- println!(" {:?}", block);
- }
- } else if let markdown_parser::Block::List(lines) = node {
- for line in lines {
- println!("-");
- for block in line {
- println!(" {:?}", block);
- }
- }
- } else if let markdown_parser::Block::Table(table) = node {
- for column in table.columns {
- print!(" {:?}: ", column.alignment);
- for block in column.name {
- print!("{:?} ", block);
- }
- println!();
- }
- println!();
- }
- }
-}
diff --git a/src/parse.rs b/src/parse.rs
deleted file mode 100644
index 6e4cdd9..0000000
--- a/src/parse.rs
+++ /dev/null
@@ -1,283 +0,0 @@
-use crate::*;
-
-pub fn parse(markdown: &str) -> Vec<Block> {
- let mut document = Vec::new();
- let lines: Vec<&str> = markdown.lines().map(|l| l.trim_start()).collect();
- let mut i = 0;
-
- // Gather all consecutive lines that begin with a given substring and run a
- // function over them. The function must be `fn(&[&str])->Result<Block,()>`.
- macro_rules! gather {
- ($prefix:expr, $func:ident) => {{
- let start = i;
- for line in &lines[i..] {
- if line.starts_with($prefix) {
- i += 1;
- continue;
- }
- break;
- }
- let gathered_lines = &lines[start..i];
- match gathered_lines.is_empty() {
- false => $func(gathered_lines),
- true => Err(()),
- }
- }};
- }
-
- loop {
- let line = match lines.get(i) {
- Some(line) => line,
- None => return document,
- };
- if line.is_empty() {
- i += 1;
- continue;
- } else if let Ok(heading) = parse_heading(line) {
- document.push(heading);
- i += 1;
- } else if let Ok(quote) = gather!(">", parse_quote) {
- document.push(quote);
- } else if let Ok(list) = gather!("- ", parse_list) {
- document.push(list);
- } else if let Ok(table) = gather!("|", parse_table) {
- document.push(table);
- } else if line.starts_with("```") {
- let language = line[3..].to_string();
- let mut code_lines = Vec::new();
- i += 1;
- for line in &lines[i..] {
- match line.trim() == "```" {
- true => break,
- false => {
- code_lines.push(line.to_string());
- i += 1
- }
- }
- }
- document.push(Block::Code(language, code_lines));
- i += 1;
- } else {
- document.push(parse_paragraph(line));
- i += 1;
- };
- }
-}
-
-/// Returns the substring from `chars` that is between the `start` and `end`
-/// delimiters. Returns None if `chars` does not start with `start`, or if an
-/// occurance of `start` and `end` cannot be found within `chars`. There must
-/// not be a space after the occurance of `start` or before the occurance of
-/// `end`. If `start` and `end` consist of just one or more of the same
-/// character, the content must contain at least one other character than
-/// that one.
-fn capture(chars: &[char], start: &str, end: &str) -> Option<String> {
- // Determine if `pattern` contains only a single unique character
- let single_char_in_pattern = match start.chars().next() {
- Some(first_char) => {
- let start_and_end = start.chars().chain(end.chars());
- start_and_end.fold(Some(first_char), |accum, elem| match accum {
- Some(c) if c == elem => accum,
- _ => None,
- })
- }
- None => None,
- };
- let is_space = |i: usize| chars.get(i) == Some(&' ');
- fn starts_with_pattern(chars: &[char], pattern: &str) -> bool {
- let mut i = 0;
- for ref c in pattern.chars() {
- match chars.get(i) {
- Some(v) if v == c => i += 1,
- _ => return false,
- }
- }
- true
- }
- if !starts_with_pattern(chars, start) {
- return None;
- }
- let text_start = start.len();
- if is_space(text_start) {
- return None;
- };
- let mut i = text_start;
- loop {
- i += 1;
- if chars.get(i).is_none() {
- return None;
- }
- if starts_with_pattern(&chars[i..], end) {
- if is_space(i - 1) {
- continue;
- }
- let text_content: String = chars[text_start..i].iter().collect();
- match single_char_in_pattern {
- None => return Some(text_content),
- Some(c) => {
- if text_content.chars().any(|e| e != c) {
- return Some(text_content);
- }
- }
- };
- }
- }
-}
-
-fn parse_text(line: &str) -> Line {
- let mut block_content: Line = Vec::new();
- let chars: Vec<char> = line.chars().collect();
- let mut normal = String::new();
- let mut i = 0;
-
- macro_rules! commit_normal {
- () => {
- if !normal.is_empty() {
- let normal_text = Text::Normal(std::mem::take(&mut normal));
- block_content.push(normal_text);
- }
- };
- }
- let patterns: [(&str, &str, fn(String) -> Text); 7] = [
- ("***", "***", Text::BoldItalic),
- ("**", "**", Text::Bold),
- ("*", "*", Text::Italic),
- ("___", "___", Text::BoldItalic),
- ("__", "__", Text::Bold),
- ("_", "_", Text::Italic),
- ("`", "`", Text::Code),
- ];
-
- 'outer: loop {
- // Check if a simple, non-Normal text type starts at this character
- for (start, end, text_type) in patterns.iter() {
- if let Some(string) = capture(&chars[i..], start, end) {
- i += string.len() + start.len() + end.len();
- commit_normal!();
- block_content.push(text_type(string));
- continue 'outer;
- }
- }
- // Check if a wiki-style hyperlink starts at this character
- if let Some(content) = capture(&chars[i..], "[[", "]]") {
- i += content.len() + 4;
- commit_normal!();
- block_content.push(Text::WikiLink(content));
- continue 'outer;
- }
-
- // Check if a long-form hyperlink starts at this character
- if let Some(label) = capture(&chars[i..], "[", "]") {
- let target_len = label.len() + 2;
- if let Some(target) = capture(&chars[i + target_len..], "(", ")") {
- i += target_len + target.len() + 2;
- commit_normal!();
- block_content.push(Text::Hyperlink(Hyperlink { label, target }))
- }
- }
-
- // No new text type started here, this must just be normal text
- match chars.get(i) {
- Some(c) => {
- normal.push(*c);
- i += 1;
- }
- None => {
- commit_normal!();
- break;
- }
- }
- }
- return block_content;
-}
-
-fn parse_heading(line: &str) -> Result<Block, ()> {
- let (heading_type, content): (fn(Line) -> Block, &str) = if line.starts_with("# ") {
- (Block::Heading1, &line[2..])
- } else if line.starts_with("## ") {
- (Block::Heading2, &line[3..])
- } else if line.starts_with("### ") {
- (Block::Heading3, &line[4..])
- } else {
- return Err(());
- };
- if content.is_empty() {
- return Err(());
- };
- Ok(heading_type(parse_text(content)))
-}
-
-/// Accepts a slice of lines that begin with '>'
-fn parse_quote(lines: &[&str]) -> Result<Block, ()> {
- let mut content = Vec::new();
- for line in lines {
- content.push(if *line == ">" {
- Vec::new()
- } else {
- parse_text(&line[2..])
- });
- }
- Ok(Block::Quote(content))
-}
-
-fn parse_list(lines: &[&str]) -> Result<Block, ()> {
- Ok(Block::List(
- lines.iter().map(|l| parse_text(&l[2..])).collect(),
- ))
-}
-
-fn parse_paragraph(line: &str) -> Block {
- Block::Paragraph(parse_text(line))
-}
-
-fn parse_table(lines: &[&str]) -> Result<Block, ()> {
- if lines.len() < 3 {
- return Err(());
- }
- let names = split_columns(lines[0])?;
- let dividers = split_columns(lines[1])?;
- if names.len() != dividers.len() {
- return Err(());
- }
- let mut columns = Vec::new();
- for (name, divider) in std::iter::zip(names.iter(), dividers.iter()) {
- let alignment = Alignment::from_str(divider)?;
- columns.push(Column {
- name: parse_text(name),
- alignment,
- })
- }
- let mut rows = Vec::new();
- for row in &lines[2..] {
- let split_row: Vec<Line> = split_columns(row)?.iter().map(|s| parse_text(s)).collect();
- if split_row.len() != columns.len() {
- return Err(());
- }
- rows.push(split_row);
- }
- Ok(Block::Table(Table { columns, rows }))
-}
-
-fn split_columns(line: &str) -> Result<Vec<String>, ()> {
- // Find the index after the first |, and before the last |
- let mut start = None;
- let mut end = None;
- for (i, c) in line.chars().enumerate() {
- if c == '|' {
- if start.is_none() {
- start = Some(i + 1);
- } else {
- end = Some(i);
- }
- }
- }
- match (start, end) {
- (Some(s), Some(e)) => {
- let chars: Vec<char> = line.chars().collect();
- let string: String = chars[s..e].iter().collect();
- let split = string.split('|');
- Ok(split.map(|s| s.trim().to_string()).collect())
- }
- _ => Err(()),
- }
-}
diff --git a/src/parse_heirarchical.rs b/src/parse_heirarchical.rs
deleted file mode 100644
index 75c2bec..0000000
--- a/src/parse_heirarchical.rs
+++ /dev/null
@@ -1,137 +0,0 @@
-use crate::*;
-
-macro_rules! get_subsection {
- ($t:ident) => {
- pub fn get_subsection(&self, name: &str) -> Option<&$t> {
- for section in &self.sections {
- if line_to_string(&section.title) == name {
- return Some(section);
- }
- }
- return None;
- }
- };
-}
-
-#[derive(Default)]
-pub struct Document {
- pub preamble: Vec<Block>,
- pub sections: Vec<TopLevelSection>,
-}
-impl Document {
- get_subsection! {TopLevelSection}
-}
-
-#[derive(Default)]
-pub struct TopLevelSection {
- pub title: Line,
- pub content: Vec<Block>,
- pub sections: Vec<MidLevelSection>,
-}
-impl TopLevelSection {
- get_subsection! {MidLevelSection}
-}
-
-#[derive(Default)]
-pub struct MidLevelSection {
- pub title: Line,
- pub content: Vec<Block>,
- pub sections: Vec<LowLevelSection>,
-}
-impl MidLevelSection {
- get_subsection! {LowLevelSection}
-}
-
-#[derive(Default)]
-pub struct LowLevelSection {
- pub title: Line,
- pub content: Vec<Block>,
-}
-
-pub fn parse_heirarchical(markdown: &str) -> Result<Document, ()> {
- macro_rules! push_section {
- ($from:ident => $to:ident) => {
- $to.sections.push(std::mem::take(&mut $from))
- };
- }
- let mut document = Document::default();
- let mut h1_buffer = TopLevelSection::default();
- let mut h2_buffer = MidLevelSection::default();
- let mut h3_buffer = LowLevelSection::default();
- let mut level = 0;
-
- let blocks = parse(markdown);
- for block in blocks {
- match (level, block) {
- (0, Block::Heading1(title)) => {
- h1_buffer.title = title;
- level = 1;
- }
- (0, Block::Heading2(_)) => return Err(()),
- (0, Block::Heading3(_)) => return Err(()),
- (0, block) => document.preamble.push(block),
- (1, Block::Heading1(title)) => {
- push_section!(h1_buffer => document);
- h1_buffer.title = title;
- }
- (1, Block::Heading2(title)) => {
- h2_buffer.title = title;
- level = 2;
- }
- (1, Block::Heading3(_)) => return Err(()),
- (1, block) => h1_buffer.content.push(block),
- (2, Block::Heading1(title)) => {
- push_section!(h2_buffer => h1_buffer);
- push_section!(h1_buffer => document);
- h1_buffer.title = title;
- level = 1;
- }
- (2, Block::Heading2(title)) => {
- push_section!(h2_buffer => h1_buffer);
- h2_buffer.title = title;
- }
- (2, Block::Heading3(title)) => {
- h3_buffer.title = title;
- level = 3;
- }
- (2, block) => h2_buffer.content.push(block),
- (3, Block::Heading1(title)) => {
- push_section!(h3_buffer => h2_buffer);
- push_section!(h2_buffer => h1_buffer);
- push_section!(h1_buffer => document);
- h1_buffer.title = title;
- level = 1;
- }
- (3, Block::Heading2(title)) => {
- push_section!(h3_buffer => h2_buffer);
- push_section!(h2_buffer => h1_buffer);
- h2_buffer.title = title;
- level = 2;
- }
- (3, Block::Heading3(title)) => {
- push_section!(h3_buffer => h2_buffer);
- h3_buffer.title = title;
- }
- (3, block) => h3_buffer.content.push(block),
- _ => unreachable!(),
- }
- }
-
- // Push all in-progress sections
- match level {
- 3 => {
- push_section!(h3_buffer => h2_buffer);
- push_section!(h2_buffer => h1_buffer);
- push_section!(h1_buffer => document);
- }
- 2 => {
- push_section!(h2_buffer => h1_buffer);
- push_section!(h1_buffer => document);
- }
- 1 => {
- push_section!(h1_buffer => document);
- }
- _ => (),
- }
- Ok(document)
-}
diff --git a/src/table.rs b/src/table.rs
deleted file mode 100644
index cc01ffc..0000000
--- a/src/table.rs
+++ /dev/null
@@ -1,60 +0,0 @@
-use crate::Line;
-
-pub struct Table {
- pub columns: Vec<Column>,
- pub rows: Vec<Vec<Line>>,
-}
-
-pub struct Column {
- pub name: Line,
- pub alignment: Alignment,
-}
-
-pub enum Alignment {
- Left,
- Center,
- Right,
-}
-impl Alignment {
- pub fn from_str(s: &str) -> Result<Self, ()> {
- let mut start = false;
- let mut end = false;
- for (i, c) in s.chars().enumerate() {
- if c == ':' {
- if i == 0 {
- start = true;
- } else if i == s.len() - 1 {
- end = true;
- } else {
- return Err(());
- }
- } else if c != '-' {
- return Err(());
- }
- }
- Ok(match (start, end) {
- (false, false) => Self::Left,
- (true, false) => Self::Left,
- (false, true) => Self::Right,
- (true, true) => Self::Center,
- })
- }
-}
-impl std::fmt::Display for Alignment {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- f.write_str(match self {
- Self::Left => "left",
- Self::Center => "center",
- Self::Right => "right",
- })
- }
-}
-impl std::fmt::Debug for Alignment {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- f.write_str(match self {
- Self::Left => "Left",
- Self::Center => "Center",
- Self::Right => "Right",
- })
- }
-}
diff --git a/src/text.rs b/src/text.rs
deleted file mode 100644
index e9dbdeb..0000000
--- a/src/text.rs
+++ /dev/null
@@ -1,30 +0,0 @@
-pub enum Text {
- Normal(String),
- Bold(String),
- Italic(String),
- BoldItalic(String),
- Code(String),
- WikiLink(String),
- Hyperlink(Hyperlink),
-}
-impl std::fmt::Debug for Text {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- let string = match self {
- Text::Normal(text) => format!("Normal ('{}')", text),
- Text::Bold(text) => format!("Bold ('{}')", text),
- Text::Italic(text) => format!("Italic ('{}')", text),
- Text::BoldItalic(text) => format!("BoldItalic ('{}')", text),
- Text::Code(text) => format!("Code ('{}')", text),
- Text::WikiLink(text) => format!("WikiLink ('{}')", text),
- Text::Hyperlink(Hyperlink { label, target }) => {
- format!("Hyperlink (label:'{}', target:'{}')", label, target)
- }
- };
- f.write_str(&string)
- }
-}
-
-pub struct Hyperlink {
- pub label: String,
- pub target: String,
-}