summaryrefslogtreecommitdiff
path: root/src/line.rs
diff options
context:
space:
mode:
authorBen Bridle <ben@derelict.engineering>2025-01-06 12:21:06 +1300
committerBen Bridle <ben@derelict.engineering>2025-01-06 17:16:24 +1300
commita78feb46aefaf8e8950e9b029984e9ff98fe69b0 (patch)
treed524c0656416e27484f8c0ae709f71558ea69bb6 /src/line.rs
parent13cb719b87bcef41c4dd398f5a651ddb2b561e0d (diff)
downloadmarkdown-a78feb46aefaf8e8950e9b029984e9ff98fe69b0.zip
Rewrite the library a second timev2.0.0
Diffstat (limited to 'src/line.rs')
-rw-r--r--src/line.rs123
1 files changed, 123 insertions, 0 deletions
diff --git a/src/line.rs b/src/line.rs
new file mode 100644
index 0000000..fce628c
--- /dev/null
+++ b/src/line.rs
@@ -0,0 +1,123 @@
+use crate::*;
+
+#[derive(Clone)]
+pub struct Line {
+ pub tokens: Vec<Token>,
+}
+
+impl Line {
+ pub fn from_str(raw_line: &str) -> Self {
+ let chars: Vec<char> = raw_line.chars().collect();
+ let mut tokens = Vec::new();
+ let mut normal_chars = String::new();
+ let mut i = 0;
+
+ // Compare chars from i to a delimiter string.
+ let compare = |i, p:&str| std::iter::zip(&chars[i..], p.chars())
+ .all(|(a, b)| *a == b);
+
+ 'find_token: while let Some(c) = chars.get(i) {
+ let char_follows_whitespace = match chars.get(i.wrapping_sub(1)) {
+ Some(w) => is_whitespace(w),
+ None => true,
+ };
+ if char_follows_whitespace {
+ // Try to parse an opening delimiter.
+ for (variant, start_delim, end_delim, delim_chars) in DELIMITERS {
+ let delim_chars: Vec<char> = delim_chars.chars().collect();
+ // Try to match an opening delimiter with a terminating delimiter.
+ if compare(i, start_delim) {
+ let s_end = i + start_delim.chars().count();
+ let mut e_start = s_end;
+ let mut e_end = e_start + end_delim.chars().count();
+ // Scan along chars to find matching end delimiter.
+ while e_end <= chars.len() {
+ e_start += 1; e_end += 1;
+ let followed_by_whitespace = match chars.get(e_end) {
+ Some(end_char) => is_whitespace(end_char),
+ None => e_end == chars.len(),
+ };
+ // If end delimiter is found, store the token and continue.
+ if followed_by_whitespace && compare(e_start, end_delim) {
+ // Check if captured string contains non-delimiter characters.
+ let captured: String = chars[s_end..e_start].iter().collect();
+ let no_content = !has_content(&captured, &delim_chars);
+ let air_bubbles = captured.len() != captured.trim().len();
+ let token = variant(captured);
+ if no_content || air_bubbles || token.is_none() { continue }
+ // Commit the preceding normal token, if any.
+ if !normal_chars.is_empty() {
+ let normal = std::mem::take(&mut normal_chars);
+ tokens.push(Token::Normal(normal));
+ }
+ tokens.push(token.unwrap());
+ i = e_end;
+ continue 'find_token;
+ }
+ }
+ }
+ }
+ }
+ normal_chars.push(*c);
+ i += 1;
+ }
+
+ if !normal_chars.is_empty() {
+ let normal = std::mem::take(&mut normal_chars);
+ tokens.push(Token::Normal(normal));
+ }
+ Self { tokens }
+ }
+}
+
+
+impl ToString for Line {
+ fn to_string(&self) -> String {
+ let mut string = String::new();
+ for token in &self.tokens {
+ string.push_str(token.as_ref())
+ }
+ return string;
+ }
+}
+
+
+fn unlabeled_extern_link(path: String) -> Option<Token> {
+ Some( Token::ExternalLink { path, label:String::new() } )
+}
+
+fn labelled_extern_link(s: String) -> Option<Token> {
+ let (label, path) = match s.split_once("](") {
+ Some((l, t)) => (l.to_string(), t.to_string()),
+ None => return None,
+ };
+ if label.contains("]") || path.contains("]") { return None }
+ Some( Token::ExternalLink { label, path } )
+}
+
+macro_rules! con {
+ ($v:expr) => {|s| Some($v(s)) };
+}
+
+const DELIMITERS: [(fn(String)->Option<Token>, &str, &str, &str); 7] = [
+ ( con!(Token::Bold), "**", "**", "*" ),
+ ( con!(Token::Italic), "_", "_", "_" ),
+ ( con!(Token::Monospace), "`", "`", "`" ),
+ ( con!(Token::Math), "$", "$", "$" ),
+ ( con!(Token::InternalLink), "[[", "]]", "[]" ),
+ ( labelled_extern_link, "[", ")", "[]()" ),
+ ( unlabeled_extern_link, "<", ">", "<>" ),
+];
+
+fn is_whitespace(c: &char) -> bool {
+ c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c)
+}
+
+/// Check that first and last characters of a string are not delimiters.
+fn has_content(s: &str, delimiter_chars: &[char]) -> bool {
+ let not_delim = |c| match c {
+ Some(c) => !delimiter_chars.contains(&c),
+ None => false,
+ };
+ not_delim(s.chars().nth(0)) && not_delim(s.chars().last())
+}