diff options
author | Ben Bridle <ben@derelict.engineering> | 2025-01-06 12:21:06 +1300 |
---|---|---|
committer | Ben Bridle <ben@derelict.engineering> | 2025-01-06 17:16:24 +1300 |
commit | a78feb46aefaf8e8950e9b029984e9ff98fe69b0 (patch) | |
tree | d524c0656416e27484f8c0ae709f71558ea69bb6 /src/line.rs | |
parent | 13cb719b87bcef41c4dd398f5a651ddb2b561e0d (diff) | |
download | markdown-a78feb46aefaf8e8950e9b029984e9ff98fe69b0.zip |
Rewrite the library a second timev2.0.0
Diffstat (limited to 'src/line.rs')
-rw-r--r-- | src/line.rs | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/src/line.rs b/src/line.rs new file mode 100644 index 0000000..fce628c --- /dev/null +++ b/src/line.rs @@ -0,0 +1,123 @@ +use crate::*; + +#[derive(Clone)] +pub struct Line { + pub tokens: Vec<Token>, +} + +impl Line { + pub fn from_str(raw_line: &str) -> Self { + let chars: Vec<char> = raw_line.chars().collect(); + let mut tokens = Vec::new(); + let mut normal_chars = String::new(); + let mut i = 0; + + // Compare chars from i to a delimiter string. + let compare = |i, p:&str| std::iter::zip(&chars[i..], p.chars()) + .all(|(a, b)| *a == b); + + 'find_token: while let Some(c) = chars.get(i) { + let char_follows_whitespace = match chars.get(i.wrapping_sub(1)) { + Some(w) => is_whitespace(w), + None => true, + }; + if char_follows_whitespace { + // Try to parse an opening delimiter. + for (variant, start_delim, end_delim, delim_chars) in DELIMITERS { + let delim_chars: Vec<char> = delim_chars.chars().collect(); + // Try to match an opening delimiter with a terminating delimiter. + if compare(i, start_delim) { + let s_end = i + start_delim.chars().count(); + let mut e_start = s_end; + let mut e_end = e_start + end_delim.chars().count(); + // Scan along chars to find matching end delimiter. + while e_end <= chars.len() { + e_start += 1; e_end += 1; + let followed_by_whitespace = match chars.get(e_end) { + Some(end_char) => is_whitespace(end_char), + None => e_end == chars.len(), + }; + // If end delimiter is found, store the token and continue. + if followed_by_whitespace && compare(e_start, end_delim) { + // Check if captured string contains non-delimiter characters. + let captured: String = chars[s_end..e_start].iter().collect(); + let no_content = !has_content(&captured, &delim_chars); + let air_bubbles = captured.len() != captured.trim().len(); + let token = variant(captured); + if no_content || air_bubbles || token.is_none() { continue } + // Commit the preceding normal token, if any. + if !normal_chars.is_empty() { + let normal = std::mem::take(&mut normal_chars); + tokens.push(Token::Normal(normal)); + } + tokens.push(token.unwrap()); + i = e_end; + continue 'find_token; + } + } + } + } + } + normal_chars.push(*c); + i += 1; + } + + if !normal_chars.is_empty() { + let normal = std::mem::take(&mut normal_chars); + tokens.push(Token::Normal(normal)); + } + Self { tokens } + } +} + + +impl ToString for Line { + fn to_string(&self) -> String { + let mut string = String::new(); + for token in &self.tokens { + string.push_str(token.as_ref()) + } + return string; + } +} + + +fn unlabeled_extern_link(path: String) -> Option<Token> { + Some( Token::ExternalLink { path, label:String::new() } ) +} + +fn labelled_extern_link(s: String) -> Option<Token> { + let (label, path) = match s.split_once("](") { + Some((l, t)) => (l.to_string(), t.to_string()), + None => return None, + }; + if label.contains("]") || path.contains("]") { return None } + Some( Token::ExternalLink { label, path } ) +} + +macro_rules! con { + ($v:expr) => {|s| Some($v(s)) }; +} + +const DELIMITERS: [(fn(String)->Option<Token>, &str, &str, &str); 7] = [ + ( con!(Token::Bold), "**", "**", "*" ), + ( con!(Token::Italic), "_", "_", "_" ), + ( con!(Token::Monospace), "`", "`", "`" ), + ( con!(Token::Math), "$", "$", "$" ), + ( con!(Token::InternalLink), "[[", "]]", "[]" ), + ( labelled_extern_link, "[", ")", "[]()" ), + ( unlabeled_extern_link, "<", ">", "<>" ), +]; + +fn is_whitespace(c: &char) -> bool { + c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c) +} + +/// Check that first and last characters of a string are not delimiters. +fn has_content(s: &str, delimiter_chars: &[char]) -> bool { + let not_delim = |c| match c { + Some(c) => !delimiter_chars.contains(&c), + None => false, + }; + not_delim(s.chars().nth(0)) && not_delim(s.chars().last()) +} |