summaryrefslogtreecommitdiff
path: root/src/lib.rs
diff options
context:
space:
mode:
authorBen Bridle <ben@derelict.engineering>2025-01-06 12:21:06 +1300
committerBen Bridle <ben@derelict.engineering>2025-01-06 17:16:24 +1300
commita78feb46aefaf8e8950e9b029984e9ff98fe69b0 (patch)
treed524c0656416e27484f8c0ae709f71558ea69bb6 /src/lib.rs
parent13cb719b87bcef41c4dd398f5a651ddb2b561e0d (diff)
downloadmarkdown-1378443f84cbc8f7fbc23c0ece22a9a1c2c8a73a.zip
Rewrite the library a second timev2.0.0
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs166
1 files changed, 154 insertions, 12 deletions
diff --git a/src/lib.rs b/src/lib.rs
index c70ce77..68fc777 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,13 +1,155 @@
-mod document;
-mod elements;
-
-pub use document::*;
-pub use elements::*;
-
-pub(crate) fn is_whitespace(c: &char) -> bool {
- c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c) }
-pub(crate) fn is_contentful(s:&str, non_content_chars: &[char]) -> bool {
- s.chars().any(|c| !non_content_chars.contains(&c))
- && s.chars().nth(0).map(|c| !non_content_chars.contains(&c)).unwrap_or(false)
- && s.chars().last().map(|c| !non_content_chars.contains(&c)).unwrap_or(false)
+#![feature(never_type)]
+
+mod block; pub use block::{Block, Level};
+mod line; pub use line::Line;
+mod token; pub use token::Token;
+mod table; pub use table::{Table, Column, Alignment};
+
+pub struct MarkdownDocument {
+ pub blocks: Vec<Block>,
+}
+
+impl MarkdownDocument {
+ pub fn from_str(raw_markdown: &str) -> Self {
+ let mut blocks = Vec::new();
+ let mut current_block = None;
+
+ // Chain a blank line to the end to ensure the final block is flushed.
+ for line in raw_markdown.lines().chain(std::iter::once("")) {
+ let line_raw = line;
+ let line = line.trim();
+
+ // Handle a fragment block separately, because fragment lines are not prefixed.
+ if let Some(BlockMultiline::Fragment { language, mut content }) = current_block {
+ if line == "```" {
+ let language = language.to_string();
+ let content = content.join("\n");
+ blocks.push(Block::Fragment { language, content });
+ current_block = None;
+ } else {
+ content.push(line_raw);
+ current_block = Some(BlockMultiline::Fragment { language, content });
+ }
+ continue;
+ }
+
+ // Determine line type from prefix.
+ let line = {
+ if let Some(("", tail)) = line.split_once("# ") {
+ BlockLine::Heading { level: Level::Heading1, line: tail.trim() }
+ } else if let Some(("", tail)) = line.split_once("## ") {
+ BlockLine::Heading { level: Level::Heading2, line: tail.trim() }
+ } else if let Some(("", tail)) = line.split_once("### ") {
+ BlockLine::Heading { level: Level::Heading3, line: tail.trim() }
+ } else if let Some(("", tail)) = line.split_once("- ") {
+ BlockLine::List(tail.trim())
+ } else if let Some(("", tail)) = line.split_once("> ") {
+ BlockLine::Note(tail.trim())
+ } else if line == ">" {
+ BlockLine::Note("")
+ } else if let Some(("", tail)) = line.split_once("```") {
+ BlockLine::FragmentHeader(tail.trim())
+ } else if line.starts_with("|") {
+ BlockLine::Table(line)
+ } else if line.len() >= 3 && line.chars().all(|c| c=='-') {
+ BlockLine::Break
+ } else if line.is_empty() {
+ BlockLine::BlankLine
+ } else {
+ BlockLine::Paragraph(line)
+ }
+ };
+
+ // If line has the same type as the current block, append and continue.
+ if let Some(ref mut block) = current_block {
+ match (&line, block) {
+ (BlockLine::List(line), BlockMultiline::List(ref mut lines)) => {
+ lines.push(line); continue; }
+ (BlockLine::Note(line), BlockMultiline::Note(ref mut lines)) => {
+ lines.push(line); continue; }
+ (BlockLine::Table(line), BlockMultiline::Table(ref mut lines)) => {
+ lines.push(line); continue; }
+ _ => (),
+ };
+ }
+
+ // Otherwise commit the current block before handling the new line.
+ if let Some(current_block) = std::mem::take(&mut current_block) {
+ match current_block {
+ BlockMultiline::List(raw_lines) => {
+ let lines = raw_lines.into_iter().map(Line::from_str).collect();
+ blocks.push(Block::List(lines)); }
+ BlockMultiline::Note(raw_lines) => {
+ let lines = raw_lines.into_iter().map(Line::from_str).collect();
+ blocks.push(Block::Note(lines)); }
+ BlockMultiline::Table(raw_lines) => {
+ if let Some(table) = Table::from_strs(&raw_lines) {
+ blocks.push(Block::Table(table)) }
+ else {
+ for raw_line in raw_lines {
+ blocks.push(Block::Paragraph(Line::from_str(&raw_line)))
+ }
+ }}
+ BlockMultiline::Fragment {..} => unreachable!(),
+ }
+ }
+
+ // Handle the new line.
+ match line {
+ BlockLine::List(line) => current_block = Some(
+ BlockMultiline::List(vec![line])),
+ BlockLine::Note(line) => current_block = Some(
+ BlockMultiline::Note(vec![line])),
+ BlockLine::Table(line) => current_block = Some(
+ BlockMultiline::Table(vec![line])),
+ BlockLine::FragmentHeader(language) => current_block = Some(
+ BlockMultiline::Fragment { language, content: Vec::new() }),
+ BlockLine::Heading {level, line} => blocks.push(
+ Block::Heading { level, line: Line::from_str(&line) }),
+ BlockLine::Break => blocks.push(Block::Break),
+ BlockLine::BlankLine => (),
+ BlockLine::Paragraph(line) => match parse_embedded(&line) {
+ Some(embedded) => blocks.push(embedded),
+ None => blocks.push(Block::Paragraph(Line::from_str(&line))),
+ }
+ }
+ }
+
+ Self { blocks }
+ }
+}
+
+
+
+enum BlockLine<'a> {
+ Heading { level: Level, line: &'a str },
+ Paragraph(&'a str),
+ List(&'a str),
+ Note(&'a str),
+ Table(&'a str),
+ FragmentHeader(&'a str),
+ Break,
+ BlankLine,
+}
+
+enum BlockMultiline<'a> {
+ List(Vec<&'a str>),
+ Note(Vec<&'a str>),
+ Table(Vec<&'a str>),
+ Fragment { language: &'a str, content: Vec<&'a str> },
+}
+
+fn parse_embedded(line: &str) -> Option<Block> {
+ let line = line.trim();
+ if let Some(("", line)) = line.split_once("![") {
+ if let Some((line, "")) = line.rsplit_once(")") {
+ let parts: Vec<&str> = line.split("](").collect();
+ if parts.len() == 2 {
+ let label = parts[0].to_string();
+ let path = parts[1].to_string();
+ return Some(Block::Embedded { label, path })
+ }
+ }
+ }
+ return None;
}