1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
|
use crate::*;
pub struct MarkdownDocument {
pub block_elements: Vec<BlockElement>,
}
impl MarkdownDocument {
pub fn from_str(raw_markdown: &str) -> Self {
let mut block_elements = Vec::new();
let mut current_multiline_block = None;
// Chain a blank line to the end to ensure that the final multi-line block is flushed.
let lines = raw_markdown.lines().chain(std::iter::once(""));
for incoming_line in lines {
let incoming_line_untrimmed = incoming_line;
let incoming_line = incoming_line.trim();
// Handle an in-progress subdocument block.
if let Some(MultiLineBlock::Subdocument(language, mut lines)) = current_multiline_block {
if incoming_line == "```" {
let subdocument = Subdocument { language: language.to_string(), content: lines.join("\n") };
block_elements.push(BlockElement::Subdocument(subdocument));
current_multiline_block = None;
} else {
lines.push(incoming_line_untrimmed);
current_multiline_block = Some(MultiLineBlock::Subdocument(language, lines));
}
continue;
}
// Parse the incoming line.
let incoming_line_block = {
if let Some(("", tail)) = incoming_line.split_once("# ") {
BlockLine::DocumentHeading(tail.trim())
} else if let Some(("", tail)) = incoming_line.split_once("## ") {
BlockLine::SectionHeading(tail.trim())
} else if let Some(("", tail)) = incoming_line.split_once("### ") {
BlockLine::ArticleHeading(tail.trim())
} else if let Some(("", tail)) = incoming_line.split_once("- ") {
BlockLine::List(tail.trim())
} else if let Some(("", tail)) = incoming_line.split_once("> ") {
BlockLine::Aside(tail.trim())
} else if incoming_line == ">" {
BlockLine::Aside("")
} else if let Some(("", tail)) = incoming_line.split_once("```") {
BlockLine::SubdocumentHeader(tail.trim())
} else if incoming_line.starts_with("|") {
BlockLine::Table(incoming_line)
} else if incoming_line.len() >= 3 && incoming_line.chars().all(|c| c=='-') {
BlockLine::Break
} else if incoming_line.is_empty() {
BlockLine::BlankLine
} else {
BlockLine::Paragraph(incoming_line) }
};
// If the incoming line is of the same type as the current multiline
// block, append it to the end of that current block and continue.
if let Some(ref mut current_block) = current_multiline_block {
match (&incoming_line_block, current_block) {
(BlockLine::List(line), MultiLineBlock::List(ref mut lines)) => {
lines.push(line); continue; }
(BlockLine::Aside(line), MultiLineBlock::Aside(ref mut lines)) => {
lines.push(line); continue; }
(BlockLine::Table(line), MultiLineBlock::Table(ref mut lines)) => {
lines.push(line); continue; }
_ => (),
};
}
// Otherwise, commit the current block, then handle the incoming line.
if let Some(current_block) = current_multiline_block {
match current_block {
MultiLineBlock::List(raw_lines) => {
let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect();
block_elements.push(BlockElement::List(lines)); }
MultiLineBlock::Aside(raw_lines) => {
let lines = raw_lines.into_iter().map(|l| Line::from_str(l)).collect();
block_elements.push(BlockElement::Aside(lines)); }
MultiLineBlock::Table(raw_lines) => {
if let Some(table) = Table::try_from_strs(&raw_lines) {
block_elements.push(BlockElement::Table(table)) }
else { for raw_line in raw_lines {
block_elements.push(BlockElement::Paragraph(Line::from_str(&raw_line))) }}}
MultiLineBlock::Subdocument(..) => unreachable!(),
}
current_multiline_block = None;
}
// Handle the incoming line.
match incoming_line_block {
BlockLine::DocumentHeading(s) => block_elements.push(BlockElement::DocumentHeading(Line::from_str(&s))),
BlockLine::SectionHeading(s) => block_elements.push(BlockElement::SectionHeading(Line::from_str(&s))),
BlockLine::ArticleHeading(s) => block_elements.push(BlockElement::ArticleHeading(Line::from_str(&s))),
BlockLine::List(s) => current_multiline_block = Some(MultiLineBlock::List(vec![s])),
BlockLine::Aside(s) => current_multiline_block = Some(MultiLineBlock::Aside(vec![s])),
BlockLine::Table(s) => current_multiline_block = Some(MultiLineBlock::Table(vec![s])),
BlockLine::SubdocumentHeader(s) => current_multiline_block = Some(MultiLineBlock::Subdocument(s, Vec::new())),
BlockLine::Paragraph(s) => {
if let Some(embedded_file) = parse_embedded_file(&s) {
block_elements.push(BlockElement::EmbeddedFile(embedded_file))
} else if let Some(math) = parse_math_block(&s) {
block_elements.push(BlockElement::Math(math))
} else {
block_elements.push(BlockElement::Paragraph(Line::from_str(&s))) }
},
BlockLine::Break => block_elements.push(BlockElement::Break),
BlockLine::BlankLine => (),
}
}
Self { block_elements }
}
}
fn parse_embedded_file(text: &str) -> Option<EmbeddedFile> {
let chars: Vec<char> = text.trim().chars().collect();
let starts_with = |i, p:&str| std::iter::zip(&chars[i..], p.chars()).all(|(a, b)| *a == b);
if starts_with(0, " { break };
label_end += 1; }
let label: String = chars[label_start..label_end].iter().collect();
if label.is_empty() || !is_contentful(&label, &['[', ']']) {
return None }
// Try to parse the target.
let target_start = label_end + 2;
let target_end = chars.len() - 1;
if let Some(')') = chars.get(target_end) {
let target: String = chars[target_start..target_end].iter().collect();
if target.is_empty() || target.contains(")") || !is_contentful(&target, &['(',')']) {
return None }
return Some(EmbeddedFile { label, target })
}
}
return None;
}
fn parse_math_block(text: &str) -> Option<String> {
if let Some(("", trailing)) = text.split_once("$$") {
if let Some((math, "")) = trailing.rsplit_once("$$") {
return Some(math.trim().to_string());
}
}
return None;
}
/// When parsing, is a single line for a one-line block element.
enum BlockLine<'a> {
DocumentHeading(&'a str),
SectionHeading(&'a str),
ArticleHeading(&'a str),
Paragraph(&'a str),
List(&'a str),
Aside(&'a str),
Table(&'a str),
SubdocumentHeader(&'a str),
Break,
BlankLine,
}
/// When parsing, is the gathered string lines of a multiline block element.
enum MultiLineBlock<'a> {
List(Vec<&'a str>),
Aside(Vec<&'a str>),
Table(Vec<&'a str>),
Subdocument(&'a str, Vec<&'a str>),
}
|