summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBen Bridle <ben@derelict.engineering>2025-01-06 12:21:06 +1300
committerBen Bridle <ben@derelict.engineering>2025-01-06 17:16:24 +1300
commita78feb46aefaf8e8950e9b029984e9ff98fe69b0 (patch)
treed524c0656416e27484f8c0ae709f71558ea69bb6 /src
parent13cb719b87bcef41c4dd398f5a651ddb2b561e0d (diff)
downloadmarkdown-a78feb46aefaf8e8950e9b029984e9ff98fe69b0.zip
Rewrite the library a second timev2.0.0
Diffstat (limited to 'src')
-rw-r--r--src/block.rs19
-rw-r--r--src/document.rs172
-rw-r--r--src/elements.rs9
-rw-r--r--src/elements/block_element.rs53
-rw-r--r--src/elements/line.rs117
-rw-r--r--src/elements/line_element.rs61
-rw-r--r--src/elements/table.rs96
-rw-r--r--src/lib.rs166
-rw-r--r--src/line.rs123
-rw-r--r--src/table.rs85
-rw-r--r--src/token.rs24
11 files changed, 405 insertions, 520 deletions
diff --git a/src/block.rs b/src/block.rs
new file mode 100644
index 0000000..aa56135
--- /dev/null
+++ b/src/block.rs
@@ -0,0 +1,19 @@
+use crate::*;
+
+/// Heading level.
+pub enum Level {
+ Heading1,
+ Heading2,
+ Heading3,
+}
+
+pub enum Block {
+ Heading { level: Level, line: Line },
+ Paragraph(Line),
+ List(Vec<Line>),
+ Note(Vec<Line>),
+ Table(Table),
+ Break,
+ Embedded { label: String, path: String },
+ Fragment { language: String, content: String },
+}
diff --git a/src/document.rs b/src/document.rs
deleted file mode 100644
index fbfea00..0000000
--- a/src/document.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-use crate::*;
-
-pub struct MarkdownDocument {
- pub block_elements: Vec<BlockElement>,
-}
-
-impl MarkdownDocument {
- pub fn from_str(raw_markdown: &str) -> Self {
- let mut block_elements = Vec::new();
- let mut current_multiline_block = None;
- // Chain a blank line to the end to ensure that the final multi-line block is flushed.
- let lines = raw_markdown.lines().chain(std::iter::once(""));
-
- for incoming_line in lines {
- let incoming_line_untrimmed = incoming_line;
- let incoming_line = incoming_line.trim();
- // Handle an in-progress subdocument block.
- if let Some(MultiLineBlock::Subdocument(language, mut lines)) = current_multiline_block {
- if incoming_line == "```" {
- let subdocument = Subdocument { language: language.to_string(), content: lines.join("\n") };
- block_elements.push(BlockElement::Subdocument(subdocument));
- current_multiline_block = None;
- } else {
- lines.push(incoming_line_untrimmed);
- current_multiline_block = Some(MultiLineBlock::Subdocument(language, lines));
- }
- continue;
- }
-
- // Parse the incoming line.
- let incoming_line_block = {
- if let Some(("", tail)) = incoming_line.split_once("# ") {
- BlockLine::DocumentHeading(tail.trim())
- } else if let Some(("", tail)) = incoming_line.split_once("## ") {
- BlockLine::SectionHeading(tail.trim())
- } else if let Some(("", tail)) = incoming_line.split_once("### ") {
- BlockLine::ArticleHeading(tail.trim())
- } else if let Some(("", tail)) = incoming_line.split_once("- ") {
- BlockLine::List(tail.trim())
- } else if let Some(("", tail)) = incoming_line.split_once("> ") {
- BlockLine::Aside(tail.trim())
- } else if incoming_line == ">" {
- BlockLine::Aside("")
- } else if let Some(("", tail)) = incoming_line.split_once("```") {
- BlockLine::SubdocumentHeader(tail.trim())
- } else if incoming_line.starts_with("|") {
- BlockLine::Table(incoming_line)
- } else if incoming_line.len() >= 3 && incoming_line.chars().all(|c| c=='-') {
- BlockLine::Break
- } else if incoming_line.is_empty() {
- BlockLine::BlankLine
- } else {
- BlockLine::Paragraph(incoming_line) }
- };
-
- // If the incoming line is of the same type as the current multiline
- // block, append it to the end of that current block and continue.
- if let Some(ref mut current_block) = current_multiline_block {
- match (&incoming_line_block, current_block) {
- (BlockLine::List(line), MultiLineBlock::List(ref mut lines)) => {
- lines.push(line); continue; }
- (BlockLine::Aside(line), MultiLineBlock::Aside(ref mut lines)) => {
- lines.push(line); continue; }
- (BlockLine::Table(line), MultiLineBlock::Table(ref mut lines)) => {
- lines.push(line); continue; }
- _ => (),
- };
- }
-
- // Otherwise, commit the current block, then handle the incoming line.
- if let Some(current_block) = current_multiline_block {
- match current_block {
- MultiLineBlock::List(raw_lines) => {
- let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect();
- block_elements.push(BlockElement::List(lines)); }
- MultiLineBlock::Aside(raw_lines) => {
- let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect();
- block_elements.push(BlockElement::Aside(lines)); }
- MultiLineBlock::Table(raw_lines) => {
- if let Some(table) = Table::try_from_strs(&raw_lines) {
- block_elements.push(BlockElement::Table(table)) }
- else { for raw_line in raw_lines {
- block_elements.push(BlockElement::Paragraph(Line::from_str(&raw_line))) }}}
- MultiLineBlock::Subdocument(..) => unreachable!(),
- }
- current_multiline_block = None;
- }
-
- // Handle the incoming line.
- match incoming_line_block {
- BlockLine::DocumentHeading(s) => block_elements.push(BlockElement::DocumentHeading(Line::from_str(&s))),
- BlockLine::SectionHeading(s) => block_elements.push(BlockElement::SectionHeading(Line::from_str(&s))),
- BlockLine::ArticleHeading(s) => block_elements.push(BlockElement::ArticleHeading(Line::from_str(&s))),
- BlockLine::List(s) => current_multiline_block = Some(MultiLineBlock::List(vec![s])),
- BlockLine::Aside(s) => current_multiline_block = Some(MultiLineBlock::Aside(vec![s])),
- BlockLine::Table(s) => current_multiline_block = Some(MultiLineBlock::Table(vec![s])),
- BlockLine::SubdocumentHeader(s) => current_multiline_block = Some(MultiLineBlock::Subdocument(s, Vec::new())),
- BlockLine::Paragraph(s) => {
- if let Some(embedded_file) = parse_embedded_file(&s) {
- block_elements.push(BlockElement::EmbeddedFile(embedded_file))
- } else if let Some(math) = parse_math_block(&s) {
- block_elements.push(BlockElement::Math(math))
- } else {
- block_elements.push(BlockElement::Paragraph(Line::from_str(&s))) }
- },
- BlockLine::Break => block_elements.push(BlockElement::Break),
- BlockLine::BlankLine => (),
- }
- }
-
- Self { block_elements }
- }
-}
-
-fn parse_embedded_file(text: &str) -> Option<EmbeddedFile> {
- let chars: Vec<char> = text.trim().chars().collect();
- let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b);
-
- if starts_with(0, "![") {
- let label_start = 2;
- let mut label_end = label_start;
- while label_end <= chars.len() {
- if label_end == chars.len() { return None }
- if starts_with(label_end, "](") { break };
- label_end += 1; }
- let label: String = chars[label_start..label_end].iter().collect();
- if label.is_empty() || !is_contentful(&label, &['[', ']']) {
- return None }
- // Try to parse the target.
- let target_start = label_end + 2;
- let target_end = chars.len() - 1;
- if let Some(')') = chars.get(target_end) {
- let target: String = chars[target_start..target_end].iter().collect();
- if target.is_empty() || target.contains(")") || !is_contentful(&target, &['(',')']) {
- return None }
- return Some(EmbeddedFile { label, target })
- }
- }
- return None;
-}
-
-fn parse_math_block(text: &str) -> Option<String> {
- if let Some(("", trailing)) = text.split_once("$$") {
- if let Some((math, "")) = trailing.rsplit_once("$$") {
- return Some(math.trim().to_string());
- }
- }
- return None;
-}
-
-/// When parsing, is a single line for a one-line block element.
-enum BlockLine<'a> {
- DocumentHeading(&'a str),
- SectionHeading(&'a str),
- ArticleHeading(&'a str),
- Paragraph(&'a str),
- List(&'a str),
- Aside(&'a str),
- Table(&'a str),
- SubdocumentHeader(&'a str),
- Break,
- BlankLine,
-}
-
-/// When parsing, is the gathered string lines of a multiline block element.
-enum MultiLineBlock<'a> {
- List(Vec<&'a str>),
- Aside(Vec<&'a str>),
- Table(Vec<&'a str>),
- Subdocument(&'a str, Vec<&'a str>),
-}
-
diff --git a/src/elements.rs b/src/elements.rs
deleted file mode 100644
index a4a9783..0000000
--- a/src/elements.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-mod block_element;
-mod line;
-mod line_element;
-mod table;
-
-pub use block_element::*;
-pub use line::*;
-pub use line_element::*;
-pub use table::*;
diff --git a/src/elements/block_element.rs b/src/elements/block_element.rs
deleted file mode 100644
index cdb7a71..0000000
--- a/src/elements/block_element.rs
+++ /dev/null
@@ -1,53 +0,0 @@
-use crate::*;
-
-pub enum BlockElement {
- /// A first-level heading.
- DocumentHeading(Line),
- /// A second-level heading.
- SectionHeading(Line),
- /// A third-level heading.
- ArticleHeading(Line),
- Paragraph(Line),
- /// A bullet-list.
- List(Vec<Line>),
- /// A paragraph separate from the main text.
- Aside(Vec<Line>),
- Table(Table),
- EmbeddedFile(EmbeddedFile),
- /// A non-markdown sub-document within this document.
- Subdocument(Subdocument),
- /// A KaTeX block
- Math(String),
- Break,
-}
-
-pub struct EmbeddedFile {
- pub label: String,
- pub target: String,
-}
-
-pub struct Subdocument {
- pub language: String,
- pub content: String,
-}
-
-impl std::fmt::Debug for BlockElement {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- let string = match self {
- BlockElement::DocumentHeading(line) => format!("DocumentHeading ('{line}')"),
- BlockElement::SectionHeading(line) => format!("SectionHeading ('{line}')"),
- BlockElement::ArticleHeading(line) => format!("ArticleHeading ('{line}')"),
- BlockElement::Paragraph(line) => format!("Paragraph ('{line}')"),
- BlockElement::List(lines) => format!("List (len: {})", lines.len()),
- BlockElement::Aside(_) => format!("Aside"),
- BlockElement::Table(_) => format!("Table"),
- BlockElement::EmbeddedFile(EmbeddedFile {label, target}) =>
- format!("EmbeddedFile (label:'{label}', target:'{target}')"),
- BlockElement::Subdocument(Subdocument {language, ..}) =>
- format!("Subdocument ('{language}')"),
- BlockElement::Math(string) => format!("Math ('{string}')"),
- BlockElement::Break => format!("Break"),
- };
- f.write_str(&string)
- }
-}
diff --git a/src/elements/line.rs b/src/elements/line.rs
deleted file mode 100644
index d5c078e..0000000
--- a/src/elements/line.rs
+++ /dev/null
@@ -1,117 +0,0 @@
-use crate::*;
-
-macro_rules! opt {
- ($v:expr) => {|s| Some($v(s)) };
-}
-
-pub struct Line {
- pub elements: Vec<LineElement>,
-}
-
-impl Line {
- pub fn from_str(raw_string: &str) -> Self {
- fn unlabeled_extern_link(target: String) -> Option<LineElement> {
- target.contains("/").then( ||
- LineElement::ExternalLink(ExternalLink { target, label:String::new() })
- )
- }
- fn labelled_extern_link(s: String) -> Option<LineElement> {
- let (label, target) = match s.split_once("](") {
- Some((l, t)) => (l.to_string(), t.to_string()),
- None => return None };
- if label.contains("]") || target.contains("]") { return None }
- Some(LineElement::ExternalLink(ExternalLink { label, target })) }
- const DELIMITERS: [(fn(String)->Option<LineElement>, &str, &str, &str); 7] = [
- ( opt!(LineElement::Bold), "**", "**", "*" ),
- ( opt!(LineElement::Italic), "_", "_", "_" ),
- ( opt!(LineElement::Monospace), "`", "`", "`" ),
- ( opt!(LineElement::Math), "$", "$", "$" ),
- ( opt!(LineElement::InternalLink), "[[", "]]", "[]" ),
- ( labelled_extern_link, "[", ")", "[]()" ),
- ( unlabeled_extern_link, "[", "]", "[]" ),
- ];
- let chars: Vec<char> = raw_string.chars().collect();
- let mut elements = Vec::new();
- let mut cached_chars = String::new();
- let mut i = 0;
-
- let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b);
-
- 'outer: while let Some(c) = chars.get(i) {
- // Only check for opening delimiters that directly follow a whitespace character.
- let follows_whitespace = match chars.get(i.wrapping_sub(1)) {
- Some(w) => is_whitespace(w),
- None => true,
- };
- if follows_whitespace {
- // Try to parse an opening delimiter.
- for (variant, start_delim, end_delim, delim_chars) in DELIMITERS {
- // Try to match an opening delimiter with a terminating delimiter.
- if starts_with(i, start_delim) {
- let s_end = i + start_delim.chars().count();
- let mut e_start = s_end;
- let mut e_end = e_start + end_delim.chars().count();
- while e_end <= chars.len() {
- e_start += 1; e_end += 1;
- let end_is_whitespace =
- if let Some(end_char) = chars.get(e_end) {
- is_whitespace(end_char)
- } else {
- e_end == chars.len()
- };
- // If the terminating delimiter is found, store the normal
- // text and the styled text, and continue to the next character.
- if end_is_whitespace && starts_with(e_start, end_delim) {
- // Check that there is content within the styled string.
- let styled_string: String = chars[s_end..e_start].iter().collect();
- let non_content_chars: Vec<_> = delim_chars.chars().collect();
- if !is_contentful(&styled_string, &non_content_chars) { continue }
- if styled_string.len() != styled_string.trim().len() { continue }
- let line_element = match variant(styled_string) {
- Some(e) => e,
- None => continue,
- };
- // Commit the normal and styled strings.
- if !cached_chars.is_empty() {
- let normal_string = std::mem::take(&mut cached_chars);
- elements.push(LineElement::Normal(normal_string)); }
- elements.push(line_element);
- i = e_end;
- continue 'outer;
- }
- }
- }
- }
- }
- cached_chars.push(*c); i += 1;
- }
- if !cached_chars.is_empty() {
- let normal_string = std::mem::take(&mut cached_chars);
- elements.push(LineElement::Normal(normal_string)); }
- Self { elements }
- }
-
- /// Return only the character content, with none of the styling information.
- pub fn as_plain_text(&self) -> String {
- let mut string = String::new();
- for line_element in &self.elements {
- string.push_str(line_element.as_plain_text()) }
- return string;
- }
-}
-
-impl std::fmt::Display for Line {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- for line_element in &self.elements {
- write!(f, "{line_element}")?; }
- Ok(())
- }
-}
-
-impl std::fmt::Debug for Line {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- for line_element in &self.elements {
- write!(f, "{line_element:?}\n")?; }
- Ok(())
- }
-}
diff --git a/src/elements/line_element.rs b/src/elements/line_element.rs
deleted file mode 100644
index cc47b4b..0000000
--- a/src/elements/line_element.rs
+++ /dev/null
@@ -1,61 +0,0 @@
-pub enum LineElement {
- Normal(String),
- Bold(String),
- Italic(String),
- Monospace(String),
- Math(String),
- InternalLink(String),
- ExternalLink(ExternalLink),
-}
-
-impl LineElement {
- /// Return only the character content, with none of the styling information.
- pub fn as_plain_text(&self) -> &str {
- match self {
- LineElement::Normal(text) => text,
- LineElement::Bold(text) => text,
- LineElement::Italic(text) => text,
- LineElement::Monospace(text) => text,
- LineElement::Math(text) => text,
- LineElement::InternalLink(label) => label,
- LineElement::ExternalLink(ExternalLink { label, ..}) => label,
- }
- }
-}
-
-pub struct ExternalLink {
- pub label: String,
- pub target: String,
-}
-
-impl std::fmt::Display for LineElement {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- let string = match self {
- LineElement::Normal(text) => format!("{text}"),
- LineElement::Bold(text) => format!("**{text}**"),
- LineElement::Italic(text) => format!("_{text}_"),
- LineElement::Monospace(text) => format!("`{text}`"),
- LineElement::Math(text) => format!("${text}$"),
- LineElement::InternalLink(text) => format!("[[{text}]]"),
- LineElement::ExternalLink(ExternalLink { label, target }) => {
- format!("[{label}]({target})") }
- };
- f.write_str(&string)
- }
-}
-
-impl std::fmt::Debug for LineElement {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- let string = match self {
- LineElement::Normal(text) => format!("Normal ('{text}')"),
- LineElement::Bold(text) => format!("Bold ('{text}')"),
- LineElement::Italic(text) => format!("Italic ('{text}')"),
- LineElement::Monospace(text) => format!("Monospace ('{text}')"),
- LineElement::Math(text) => format!("Math ('{text}')"),
- LineElement::InternalLink(text) => format!("InternalLink ('{text}')"),
- LineElement::ExternalLink(ExternalLink { label, target }) => {
- format!("ExternalLink (label:'{label}', target:'{target}')") }
- };
- f.write_str(&string)
- }
-}
diff --git a/src/elements/table.rs b/src/elements/table.rs
deleted file mode 100644
index 5b354c1..0000000
--- a/src/elements/table.rs
+++ /dev/null
@@ -1,96 +0,0 @@
-use crate::*;
-
-pub struct Table {
- /// The column definitions for this table.
- pub column_definitions: Vec<ColumnDefinition>,
- /// The content contained in the rows of the table. An individual [Line] is
- /// the contents of a single table cell, a group of cells forms a table row,
- /// a group of rows forms a vertical section of the table, with a separator
- /// intending to be drawn between each section, and a group of sections forms
- /// the table itself.
- /// Each row in the table is guaranteed to have the same number of columns
- /// as the table header.
- pub sections: Vec<Vec<Vec<Line>>>,
-}
-
-impl Table {
- pub fn try_from_strs(lines: &[&str]) -> Option<Self> {
- let mut lines = lines.into_iter();
- let column_definitions: Vec<ColumnDefinition> = {
- let names = split_trimmed_columns(lines.next()?)?
- .into_iter().map(|l| Line::from_str(l));
- let alignments = parse_alignments(lines.next()?)?;
- if names.len() != alignments.len() { return None }
- std::iter::zip(names, alignments).map(
- |(name, alignment)| ColumnDefinition { name, alignment } ).collect()
- };
-
- let mut sections = Vec::new();
- let mut current_section = Vec::new();
-
- for line in lines {
- if let Some(alignments) = parse_alignments(line) {
- if alignments.len() != column_definitions.len() { return None }
- sections.push(std::mem::take(&mut current_section))
- } else {
- let row: Vec<Line> = split_trimmed_columns(line)?
- .into_iter().map(|c| Line::from_str(c)).collect();
- if row.len() != column_definitions.len() { return None }
- current_section.push(row);
- }
- }
-
- if !current_section.is_empty() {
- sections.push(std::mem::take(&mut current_section)); }
- Some( Self { column_definitions, sections })
- }
-}
-
-pub struct ColumnDefinition {
- /// The name of this column, shown in the header row of the table.
- pub name: Line,
- /// The alignment of the content in this column.
- pub alignment: ColumnAlignment,
-}
-
-pub enum ColumnAlignment {
- Left,
- Center,
- Right,
-}
-
-impl ColumnAlignment {
- pub fn from_str(cell: &str) -> Option<Self> {
- if !cell.chars().all(|c| c == ':' || c == '-') {
- return None }
- match (cell.starts_with(':'), cell.ends_with(':')) {
- (false, false) => Some(ColumnAlignment::Left),
- (false, true) => Some(ColumnAlignment::Right),
- (true, false) => Some(ColumnAlignment::Left),
- (true, true) => Some(ColumnAlignment::Center),
- }
- }
-}
-
-
-fn split_trimmed_columns(line: &str) -> Option<Vec<&str>> {
- Some(split_columns(line)?.into_iter().map(|s| s.trim()).collect())
-}
-
-fn split_columns(line: &str) -> Option<Vec<&str>> {
- if let Some(("", tail)) = line.split_once('|') {
- if let Some((head, "")) = tail.rsplit_once('|') {
- return Some(head.split('|').collect());
- }
- }
- return None;
-}
-
-fn parse_alignments(line: &str) -> Option<Vec<ColumnAlignment>> {
- let mut alignments = Vec::new();
- for cell in split_columns(line)? {
- alignments.push(ColumnAlignment::from_str(cell)?);
- }
- Some(alignments)
-}
-
diff --git a/src/lib.rs b/src/lib.rs
index c70ce77..68fc777 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,13 +1,155 @@
-mod document;
-mod elements;
-
-pub use document::*;
-pub use elements::*;
-
-pub(crate) fn is_whitespace(c: &char) -> bool {
- c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c) }
-pub(crate) fn is_contentful(s:&str, non_content_chars: &[char]) -> bool {
- s.chars().any(|c| !non_content_chars.contains(&c))
- && s.chars().nth(0).map(|c| !non_content_chars.contains(&c)).unwrap_or(false)
- && s.chars().last().map(|c| !non_content_chars.contains(&c)).unwrap_or(false)
+#![feature(never_type)]
+
+mod block; pub use block::{Block, Level};
+mod line; pub use line::Line;
+mod token; pub use token::Token;
+mod table; pub use table::{Table, Column, Alignment};
+
+pub struct MarkdownDocument {
+ pub blocks: Vec<Block>,
+}
+
+impl MarkdownDocument {
+ pub fn from_str(raw_markdown: &str) -> Self {
+ let mut blocks = Vec::new();
+ let mut current_block = None;
+
+ // Chain a blank line to the end to ensure the final block is flushed.
+ for line in raw_markdown.lines().chain(std::iter::once("")) {
+ let line_raw = line;
+ let line = line.trim();
+
+ // Handle a fragment block separately, because fragment lines are not prefixed.
+ if let Some(BlockMultiline::Fragment { language, mut content }) = current_block {
+ if line == "```" {
+ let language = language.to_string();
+ let content = content.join("\n");
+ blocks.push(Block::Fragment { language, content });
+ current_block = None;
+ } else {
+ content.push(line_raw);
+ current_block = Some(BlockMultiline::Fragment { language, content });
+ }
+ continue;
+ }
+
+ // Determine line type from prefix.
+ let line = {
+ if let Some(("", tail)) = line.split_once("# ") {
+ BlockLine::Heading { level: Level::Heading1, line: tail.trim() }
+ } else if let Some(("", tail)) = line.split_once("## ") {
+ BlockLine::Heading { level: Level::Heading2, line: tail.trim() }
+ } else if let Some(("", tail)) = line.split_once("### ") {
+ BlockLine::Heading { level: Level::Heading3, line: tail.trim() }
+ } else if let Some(("", tail)) = line.split_once("- ") {
+ BlockLine::List(tail.trim())
+ } else if let Some(("", tail)) = line.split_once("> ") {
+ BlockLine::Note(tail.trim())
+ } else if line == ">" {
+ BlockLine::Note("")
+ } else if let Some(("", tail)) = line.split_once("```") {
+ BlockLine::FragmentHeader(tail.trim())
+ } else if line.starts_with("|") {
+ BlockLine::Table(line)
+ } else if line.len() >= 3 && line.chars().all(|c| c=='-') {
+ BlockLine::Break
+ } else if line.is_empty() {
+ BlockLine::BlankLine
+ } else {
+ BlockLine::Paragraph(line)
+ }
+ };
+
+ // If line has the same type as the current block, append and continue.
+ if let Some(ref mut block) = current_block {
+ match (&line, block) {
+ (BlockLine::List(line), BlockMultiline::List(ref mut lines)) => {
+ lines.push(line); continue; }
+ (BlockLine::Note(line), BlockMultiline::Note(ref mut lines)) => {
+ lines.push(line); continue; }
+ (BlockLine::Table(line), BlockMultiline::Table(ref mut lines)) => {
+ lines.push(line); continue; }
+ _ => (),
+ };
+ }
+
+ // Otherwise commit the current block before handling the new line.
+ if let Some(current_block) = std::mem::take(&mut current_block) {
+ match current_block {
+ BlockMultiline::List(raw_lines) => {
+ let lines = raw_lines.into_iter().map(Line::from_str).collect();
+ blocks.push(Block::List(lines)); }
+ BlockMultiline::Note(raw_lines) => {
+ let lines = raw_lines.into_iter().map(Line::from_str).collect();
+ blocks.push(Block::Note(lines)); }
+ BlockMultiline::Table(raw_lines) => {
+ if let Some(table) = Table::from_strs(&raw_lines) {
+ blocks.push(Block::Table(table)) }
+ else {
+ for raw_line in raw_lines {
+ blocks.push(Block::Paragraph(Line::from_str(&raw_line)))
+ }
+ }}
+ BlockMultiline::Fragment {..} => unreachable!(),
+ }
+ }
+
+ // Handle the new line.
+ match line {
+ BlockLine::List(line) => current_block = Some(
+ BlockMultiline::List(vec![line])),
+ BlockLine::Note(line) => current_block = Some(
+ BlockMultiline::Note(vec![line])),
+ BlockLine::Table(line) => current_block = Some(
+ BlockMultiline::Table(vec![line])),
+ BlockLine::FragmentHeader(language) => current_block = Some(
+ BlockMultiline::Fragment { language, content: Vec::new() }),
+ BlockLine::Heading {level, line} => blocks.push(
+ Block::Heading { level, line: Line::from_str(&line) }),
+ BlockLine::Break => blocks.push(Block::Break),
+ BlockLine::BlankLine => (),
+ BlockLine::Paragraph(line) => match parse_embedded(&line) {
+ Some(embedded) => blocks.push(embedded),
+ None => blocks.push(Block::Paragraph(Line::from_str(&line))),
+ }
+ }
+ }
+
+ Self { blocks }
+ }
+}
+
+
+
+enum BlockLine<'a> {
+ Heading { level: Level, line: &'a str },
+ Paragraph(&'a str),
+ List(&'a str),
+ Note(&'a str),
+ Table(&'a str),
+ FragmentHeader(&'a str),
+ Break,
+ BlankLine,
+}
+
+enum BlockMultiline<'a> {
+ List(Vec<&'a str>),
+ Note(Vec<&'a str>),
+ Table(Vec<&'a str>),
+ Fragment { language: &'a str, content: Vec<&'a str> },
+}
+
+fn parse_embedded(line: &str) -> Option<Block> {
+ let line = line.trim();
+ if let Some(("", line)) = line.split_once("![") {
+ if let Some((line, "")) = line.rsplit_once(")") {
+ let parts: Vec<&str> = line.split("](").collect();
+ if parts.len() == 2 {
+ let label = parts[0].to_string();
+ let path = parts[1].to_string();
+ return Some(Block::Embedded { label, path })
+ }
+ }
+ }
+ return None;
}
diff --git a/src/line.rs b/src/line.rs
new file mode 100644
index 0000000..fce628c
--- /dev/null
+++ b/src/line.rs
@@ -0,0 +1,123 @@
+use crate::*;
+
+#[derive(Clone)]
+pub struct Line {
+ pub tokens: Vec<Token>,
+}
+
+impl Line {
+ pub fn from_str(raw_line: &str) -> Self {
+ let chars: Vec<char> = raw_line.chars().collect();
+ let mut tokens = Vec::new();
+ let mut normal_chars = String::new();
+ let mut i = 0;
+
+ // Compare chars from i to a delimiter string.
+ let compare = |i, p:&str| std::iter::zip(&chars[i..], p.chars())
+ .all(|(a, b)| *a == b);
+
+ 'find_token: while let Some(c) = chars.get(i) {
+ let char_follows_whitespace = match chars.get(i.wrapping_sub(1)) {
+ Some(w) => is_whitespace(w),
+ None => true,
+ };
+ if char_follows_whitespace {
+ // Try to parse an opening delimiter.
+ for (variant, start_delim, end_delim, delim_chars) in DELIMITERS {
+ let delim_chars: Vec<char> = delim_chars.chars().collect();
+ // Try to match an opening delimiter with a terminating delimiter.
+ if compare(i, start_delim) {
+ let s_end = i + start_delim.chars().count();
+ let mut e_start = s_end;
+ let mut e_end = e_start + end_delim.chars().count();
+ // Scan along chars to find matching end delimiter.
+ while e_end <= chars.len() {
+ e_start += 1; e_end += 1;
+ let followed_by_whitespace = match chars.get(e_end) {
+ Some(end_char) => is_whitespace(end_char),
+ None => e_end == chars.len(),
+ };
+ // If end delimiter is found, store the token and continue.
+ if followed_by_whitespace && compare(e_start, end_delim) {
+ // Check if captured string contains non-delimiter characters.
+ let captured: String = chars[s_end..e_start].iter().collect();
+ let no_content = !has_content(&captured, &delim_chars);
+ let air_bubbles = captured.len() != captured.trim().len();
+ let token = variant(captured);
+ if no_content || air_bubbles || token.is_none() { continue }
+ // Commit the preceding normal token, if any.
+ if !normal_chars.is_empty() {
+ let normal = std::mem::take(&mut normal_chars);
+ tokens.push(Token::Normal(normal));
+ }
+ tokens.push(token.unwrap());
+ i = e_end;
+ continue 'find_token;
+ }
+ }
+ }
+ }
+ }
+ normal_chars.push(*c);
+ i += 1;
+ }
+
+ if !normal_chars.is_empty() {
+ let normal = std::mem::take(&mut normal_chars);
+ tokens.push(Token::Normal(normal));
+ }
+ Self { tokens }
+ }
+}
+
+
+impl ToString for Line {
+ fn to_string(&self) -> String {
+ let mut string = String::new();
+ for token in &self.tokens {
+ string.push_str(token.as_ref())
+ }
+ return string;
+ }
+}
+
+
+fn unlabeled_extern_link(path: String) -> Option<Token> {
+ Some( Token::ExternalLink { path, label:String::new() } )
+}
+
+fn labelled_extern_link(s: String) -> Option<Token> {
+ let (label, path) = match s.split_once("](") {
+ Some((l, t)) => (l.to_string(), t.to_string()),
+ None => return None,
+ };
+ if label.contains("]") || path.contains("]") { return None }
+ Some( Token::ExternalLink { label, path } )
+}
+
+macro_rules! con {
+ ($v:expr) => {|s| Some($v(s)) };
+}
+
+const DELIMITERS: [(fn(String)->Option<Token>, &str, &str, &str); 7] = [
+ ( con!(Token::Bold), "**", "**", "*" ),
+ ( con!(Token::Italic), "_", "_", "_" ),
+ ( con!(Token::Monospace), "`", "`", "`" ),
+ ( con!(Token::Math), "$", "$", "$" ),
+ ( con!(Token::InternalLink), "[[", "]]", "[]" ),
+ ( labelled_extern_link, "[", ")", "[]()" ),
+ ( unlabeled_extern_link, "<", ">", "<>" ),
+];
+
+fn is_whitespace(c: &char) -> bool {
+ c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c)
+}
+
+/// Check that first and last characters of a string are not delimiters.
+fn has_content(s: &str, delimiter_chars: &[char]) -> bool {
+ let not_delim = |c| match c {
+ Some(c) => !delimiter_chars.contains(&c),
+ None => false,
+ };
+ not_delim(s.chars().nth(0)) && not_delim(s.chars().last())
+}
diff --git a/src/table.rs b/src/table.rs
new file mode 100644
index 0000000..071bd1a
--- /dev/null
+++ b/src/table.rs
@@ -0,0 +1,85 @@
+use crate::*;
+
+pub struct Table {
+ /// A [Line] is the content of a cell, a group of cells forms a table row,
+ /// a group of rows forms a separated section of the table, and a group of
+ /// sections forms the table itself.
+ /// Each row in the table has the same number of columns as the table header.
+ pub sections: Vec<Vec<Vec<Line>>>,
+ pub columns: Vec<Column>,
+}
+
+impl Table {
+ pub fn from_strs(lines: &[&str]) -> Option<Self> {
+ let mut lines = lines.into_iter();
+ let columns: Vec<Column> = {
+ let names = split_cells(lines.next()?)?;
+ let alignments = parse_alignments(lines.next()?)?;
+ if names.len() != alignments.len() { return None }
+ let make_column = |(n, a)| Column { name: n, alignment: a };
+ std::iter::zip(names, alignments).map(make_column).collect()
+ };
+ let mut sections = Vec::new();
+ let mut rows = Vec::new();
+
+ for line in lines {
+ if let Some(alignments) = parse_alignments(line) {
+ if alignments.len() != columns.len() { return None }
+ sections.push(std::mem::take(&mut rows))
+ } else {
+ let row: Vec<Line> = split_cells(line)?;
+ if row.len() != columns.len() { return None }
+ rows.push(row);
+ }
+ }
+ if !rows.is_empty() {
+ sections.push(std::mem::take(&mut rows));
+ }
+ return Some( Self { columns, sections } );
+ }
+}
+
+pub struct Column {
+ pub name: Line,
+ pub alignment: Alignment,
+}
+
+pub enum Alignment {
+ Left,
+ Center,
+ Right,
+}
+
+impl Alignment {
+ pub fn from_str(cell: &str) -> Option<Self> {
+ if !cell.chars().all(|c| c == ':' || c == '-') {
+ return None }
+ match (cell.starts_with(':'), cell.ends_with(':')) {
+ (false, false) => Some(Alignment::Left),
+ (false, true ) => Some(Alignment::Right),
+ (true, false) => Some(Alignment::Left),
+ (true, true ) => Some(Alignment::Center),
+ }
+ }
+}
+
+fn split_columns(line: &str) -> Option<Vec<&str>> {
+ if let Some(("", tail)) = line.split_once('|') {
+ if let Some((head, "")) = tail.rsplit_once('|') {
+ return Some(head.split('|').map(str::trim).collect());
+ }
+ }
+ return None;
+}
+
+fn split_cells(line: &str) -> Option<Vec<Line>> {
+ Some(split_columns(line)?.into_iter().map(Line::from_str).collect())
+}
+
+fn parse_alignments(line: &str) -> Option<Vec<Alignment>> {
+ let mut alignments = Vec::new();
+ for cell in split_columns(line)? {
+ alignments.push(Alignment::from_str(cell)?);
+ }
+ Some(alignments)
+}
diff --git a/src/token.rs b/src/token.rs
new file mode 100644
index 0000000..c2b1179
--- /dev/null
+++ b/src/token.rs
@@ -0,0 +1,24 @@
+#[derive(Clone)]
+pub enum Token {
+ Normal(String),
+ Bold(String),
+ Italic(String),
+ Monospace(String),
+ Math(String),
+ InternalLink(String),
+ ExternalLink { label: String, path: String },
+}
+
+impl AsRef<str> for Token {
+ fn as_ref(&self) -> &str {
+ match self {
+ Token::Normal(text) => text,
+ Token::Bold(text) => text,
+ Token::Italic(text) => text,
+ Token::Monospace(text) => text,
+ Token::Math(text) => text,
+ Token::InternalLink(label) => label,
+ Token::ExternalLink { label, ..} => label,
+ }
+ }
+}