summaryrefslogtreecommitdiff
path: root/src/line.rs
blob: fce628cfe357c52497bf122f8b2adce64ed8cd75 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
use crate::*;

#[derive(Clone)]
pub struct Line {
    pub tokens: Vec<Token>,
}

impl Line {
    pub fn from_str(raw_line: &str) -> Self {
        let chars: Vec<char> = raw_line.chars().collect();
        let mut tokens = Vec::new();
        let mut normal_chars = String::new();
        let mut i = 0;

        // Compare chars from i to a delimiter string.
        let compare = |i, p:&str| std::iter::zip(&chars[i..], p.chars())
            .all(|(a, b)| *a == b);

        'find_token: while let Some(c) = chars.get(i) {
            let char_follows_whitespace = match chars.get(i.wrapping_sub(1)) {
                Some(w) => is_whitespace(w),
                None => true,
            };
            if char_follows_whitespace {
                // Try to parse an opening delimiter.
                for (variant, start_delim, end_delim, delim_chars) in DELIMITERS {
                    let delim_chars: Vec<char> = delim_chars.chars().collect();
                    // Try to match an opening delimiter with a terminating delimiter.
                    if compare(i, start_delim) {
                        let s_end = i + start_delim.chars().count();
                        let mut e_start = s_end;
                        let mut e_end = e_start + end_delim.chars().count();
                        // Scan along chars to find matching end delimiter.
                        while e_end <= chars.len() {
                            e_start += 1; e_end += 1;
                            let followed_by_whitespace = match chars.get(e_end) {
                                Some(end_char) => is_whitespace(end_char),
                                None => e_end == chars.len(),
                            };
                            // If end delimiter is found, store the token and continue.
                            if followed_by_whitespace && compare(e_start, end_delim) {
                                // Check if captured string contains non-delimiter characters.
                                let captured: String = chars[s_end..e_start].iter().collect();
                                let no_content = !has_content(&captured, &delim_chars);
                                let air_bubbles = captured.len() != captured.trim().len();
                                let token = variant(captured);
                                if no_content || air_bubbles || token.is_none() { continue }
                                // Commit the preceding normal token, if any.
                                if !normal_chars.is_empty() {
                                    let normal = std::mem::take(&mut normal_chars);
                                    tokens.push(Token::Normal(normal));
                                }
                                tokens.push(token.unwrap());
                                i = e_end;
                                continue 'find_token;
                            }
                        }
                    }
                }
            }
            normal_chars.push(*c);
            i += 1;
        }

        if !normal_chars.is_empty() {
            let normal = std::mem::take(&mut normal_chars);
            tokens.push(Token::Normal(normal));
        }
        Self { tokens }
    }
}


impl ToString for Line {
    fn to_string(&self) -> String {
        let mut string = String::new();
        for token in &self.tokens {
            string.push_str(token.as_ref())
        }
        return string;
    }
}


fn unlabeled_extern_link(path: String) -> Option<Token> {
    Some( Token::ExternalLink { path, label:String::new() } )
}

fn labelled_extern_link(s: String) -> Option<Token> {
    let (label, path) = match s.split_once("](") {
        Some((l, t)) => (l.to_string(), t.to_string()),
        None => return None,
    };
    if label.contains("]") || path.contains("]") { return None }
    Some( Token::ExternalLink { label, path } )
}

macro_rules! con {
    ($v:expr) => {|s| Some($v(s)) };
}

const DELIMITERS: [(fn(String)->Option<Token>, &str, &str, &str); 7] = [
    ( con!(Token::Bold),          "**", "**", "*" ),
    ( con!(Token::Italic),        "_",  "_",  "_" ),
    ( con!(Token::Monospace),     "`",  "`",  "`" ),
    ( con!(Token::Math),          "$",  "$",  "$" ),
    ( con!(Token::InternalLink),  "[[", "]]", "[]" ),
    ( labelled_extern_link,       "[",  ")",  "[]()" ),
    ( unlabeled_extern_link,      "<",  ">",  "<>" ),
];

fn is_whitespace(c: &char) -> bool {
    c.is_whitespace() || r#".,'"“”_:;-/\()[]{}?"#.contains(*c)
}

/// Check that first and last characters of a string are not delimiters.
fn has_content(s: &str, delimiter_chars: &[char]) -> bool {
    let not_delim = |c| match c {
        Some(c) => !delimiter_chars.contains(&c),
        None => false,
    };
    not_delim(s.chars().nth(0)) && not_delim(s.chars().last())
}