1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
use crate::*;
use std::path::PathBuf;
/// Translate raw source code characters into syntactic tokens.
pub struct SyntacticParser {
tokeniser: Tokeniser,
/// The name of the most recently parsed label.
label: String,
}
impl SyntacticParser {
pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
let mut tokeniser = Tokeniser::new(source_code, path);
tokeniser.add_delimiters(&['@','&','%',';','[',']','{','}','(','"','\'','#','~']);
tokeniser.add_terminators(&[':']);
Self { tokeniser, label: String::new() }
}
}
impl Iterator for SyntacticParser {
type Item = SyntacticToken;
/// Sequentially parse tokens from the source code.
fn next(&mut self) -> Option<SyntacticToken> {
use SyntacticTokenVariant as SynVar;
use SyntacticParseError as SynErr;
let t = &mut self.tokeniser;
t.drop_whitespace();
t.mark_start_position();
let variant = match t.eat_char()? {
'@' => {
self.label = t.eat_token();
SynVar::LabelDefinition(self.label.clone())
}
'&' => {
let token = t.eat_token();
SynVar::LabelDefinition(format!("{}/{token}", self.label))
}
'%' => SynVar::MacroDefinition(t.eat_token()),
';' => SynVar::MacroDefinitionTerminator,
'[' => SynVar::MarkOpen,
']' => SynVar::MarkClose,
'{' => SynVar::BlockOpen,
'}' => SynVar::BlockClose,
'(' => match t.eat_to_delimiter(')') {
Some(string) => SynVar::Comment(string),
None => SynVar::Error(SynErr::UnterminatedComment),
}
'\'' => match t.eat_to_delimiter('\'') {
Some(string) => SynVar::String(string.as_bytes().to_vec()),
None => SynVar::Error(SynErr::UnterminatedRawString),
}
'"' => match t.eat_to_delimiter('"') {
Some(string) => {
let mut bytes = string.as_bytes().to_vec();
bytes.push(0x00);
SynVar::String(bytes)
}
None => SynVar::Error(SynErr::UnterminatedNullString),
}
'#' => {
let token = t.eat_token();
match token.parse::<Value>() {
Ok(value) => SynVar::Padding(value),
Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)),
}
},
'~' => {
let token = t.eat_token();
let symbol = format!("{}/{token}", self.label);
SynVar::Symbol(symbol)
}
':' => SynVar::Symbol(String::from(':')),
c => {
let token = format!("{c}{}", t.eat_token());
match token.parse::<Value>() {
Ok(value) => SynVar::Literal(value),
Err(_) => match token.parse::<Instruction>() {
Ok(instruction) => SynVar::Instruction(instruction),
Err(_) => SynVar::Symbol(token),
}
}
}
};
// Parse source path comments.
if let SynVar::Comment(comment) = &variant {
// Check if the comment fills the entire line.
if t.start_position.column == 0 && t.end_of_line() {
if let Some(path) = comment.strip_prefix(": ") {
t.source_path = Some(PathBuf::from(path.trim()));
t.embedded_first_line = t.start_position.line + 1;
}
}
}
let source = t.get_source_span();
Some( SyntacticToken { source, variant } )
}
}
#[derive(Debug)]
pub enum ParseError {
InvalidExtension,
NotFound,
NotReadable,
IsADirectory,
InvalidUtf8,
Unknown,
}
|