summaryrefslogtreecommitdiff
path: root/src/translators/syntactic_parser.rs
blob: 8f0850bf00848d270da6779b85ccdc1493e26ea6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
use crate::*;

use std::path::PathBuf;


/// Translate raw source code characters into syntactic tokens.
pub struct SyntacticParser {
    tokeniser: Tokeniser,
    /// The name of the most recently parsed label.
    label: String,
}


impl SyntacticParser {
    pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
        let mut tokeniser = Tokeniser::new(source_code, path);
        tokeniser.add_delimiters(&['@','&','%',';','[',']','{','}','(','"','\'','#','~']);
        tokeniser.add_terminators(&[':']);
        Self { tokeniser, label: String::new() }
    }
}


impl Iterator for SyntacticParser {
    type Item = SyntacticToken;

    /// Sequentially parse tokens from the source code.
    fn next(&mut self) -> Option<SyntacticToken> {
        use SyntacticTokenVariant as SynVar;
        use SyntacticParseError as SynErr;
        let t = &mut self.tokeniser;

        t.drop_whitespace();
        t.mark_start_position();

        let variant = match t.eat_char()? {
            '@' => {
                self.label = t.eat_token();
                SynVar::LabelDefinition(self.label.clone())
            }
            '&' => {
                let token = t.eat_token();
                SynVar::LabelDefinition(format!("{}/{token}", self.label))
            }
            '%' => SynVar::MacroDefinition(t.eat_token()),
            ';' => SynVar::MacroDefinitionTerminator,
            '[' => SynVar::MarkOpen,
            ']' => SynVar::MarkClose,
            '{' => SynVar::BlockOpen,
            '}' => SynVar::BlockClose,
            '(' => match t.eat_to_delimiter(')') {
                Some(string) => SynVar::Comment(string),
                None => SynVar::Error(SynErr::UnterminatedComment),
            }
            '\'' => match t.eat_to_delimiter('\'') {
                Some(string) => SynVar::String(string.as_bytes().to_vec()),
                None => SynVar::Error(SynErr::UnterminatedRawString),
            }
            '"' => match t.eat_to_delimiter('"') {
                Some(string) => {
                    let mut bytes = string.as_bytes().to_vec();
                    bytes.push(0x00);
                    SynVar::String(bytes)
                }
                None => SynVar::Error(SynErr::UnterminatedNullString),
            }
            '#' => {
                let token = t.eat_token();
                match token.parse::<Value>() {
                    Ok(value) => SynVar::Padding(value),
                    Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)),
                }
            },
            '~' => {
                let token = t.eat_token();
                let symbol = format!("{}/{token}", self.label);
                SynVar::Symbol(symbol)
            }
            ':' => SynVar::Symbol(String::from(':')),
            c => {
                let token = format!("{c}{}", t.eat_token());
                match token.parse::<Value>() {
                    Ok(value) => SynVar::Literal(value),
                    Err(_) => match token.parse::<Instruction>() {
                        Ok(instruction) => SynVar::Instruction(instruction),
                        Err(_) => SynVar::Symbol(token),
                    }
                }
            }
        };

        // Parse source path comments.
        if let SynVar::Comment(comment) = &variant {
            // Check if the comment fills the entire line.
            if t.start_position.column == 0 && t.end_of_line() {
                if let Some(path) = comment.strip_prefix(": ") {
                    t.source_path = Some(PathBuf::from(path.trim()));
                    t.embedded_first_line = t.start_position.line + 1;
                }
            }
        }

        let source = t.get_source_span();
        Some( SyntacticToken { source, variant } )
    }
}


#[derive(Debug)]
pub enum ParseError {
    InvalidExtension,
    NotFound,
    NotReadable,
    IsADirectory,
    InvalidUtf8,
    Unknown,
}