diff options
Diffstat (limited to 'src/parsers/syntactic.rs')
-rw-r--r-- | src/parsers/syntactic.rs | 197 |
1 files changed, 119 insertions, 78 deletions
diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs index 909dbaa..37f8e6c 100644 --- a/src/parsers/syntactic.rs +++ b/src/parsers/syntactic.rs @@ -1,106 +1,147 @@ use crate::*; -use syntactic::*; pub struct SyntacticParser { tokeniser: Tokeniser, - /// The name of the most recently parsed label. - label_name: String, + tokens: Vec<SyntacticToken>, /// The name of the macro being parsed. macro_name: Option<String>, + /// The name of the most recent label. + label_name: String, } impl SyntacticParser { - pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { + pub fn new<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { let mut tokeniser = Tokeniser::new(source_code, path); - tokeniser.add_delimiters(&['@','&','%',';',':','{','}','(','[','#','~']); + tokeniser.add_delimiters(&['@','%',';',':','{','}','(','[','#','~']); Self { tokeniser, - label_name: String::new(), + tokens: Vec::new(), macro_name: None, + label_name: String::new(), } } -} - -impl Iterator for SyntacticParser { - type Item = Token; - - /// Sequentially parse tokens from the source code. - fn next(&mut self) -> Option<Token> { + pub fn parse(mut self) -> Vec<SyntacticToken> { + use SyntacticTokenVariant as SynVar; + use SyntacticParseError as SynErr; let t = &mut self.tokeniser; - t.drop_whitespace(); - t.mark_start_position(); - let variant = match t.eat_char()? { - '@' => { - self.label_name = t.eat_token(); - TokenVariant::LabelDefinition(self.label_name.clone()) - } - '&' => { - let token = t.eat_token(); - TokenVariant::LabelDefinition(format!("{}/{token}", self.label_name)) - } - '%' => { - let macro_name = t.eat_token(); - self.macro_name = Some(macro_name.clone()); - TokenVariant::MacroDefinition(macro_name) - } - ';' => { - self.macro_name = None; - TokenVariant::MacroDefinitionTerminator - } - '[' => match t.eat_to_delimiter(']') { - Some(string) => { - let constant = ConstantExpression::from_str(&string, t); - TokenVariant::ConstantExpression(constant) + loop { + t.eat_whitespace(); + t.mark_start(); + let Some(c) = t.eat_char() else { break }; + let variant = match c { + ':' => SynVar::Separator, + '{' => SynVar::BlockOpen, + '}' => SynVar::BlockClose, + '@' => match &self.macro_name { + Some(_) => { + t.eat_token(); + SynVar::Error(SynErr::LabelInMacroDefinition) + } + None => { + self.label_name = t.eat_token(); + SynVar::LabelDefinition(self.label_name.clone()) + } } - None => TokenVariant::Error(ParseError::UnterminatedConstantExpression), - } - '{' => TokenVariant::BlockOpen, - '}' => TokenVariant::BlockClose, - '(' => match t.eat_to_delimiter(')') { - Some(string) => TokenVariant::Comment(string), - None => TokenVariant::Error(ParseError::UnterminatedComment), - } - '#' => { - let token = t.eat_token(); - let pbl = PackedBinaryLiteral::from_str(&token, t); - TokenVariant::PackedBinaryLiteral(pbl) - }, - '~' => { - let token = t.eat_token(); - TokenVariant::Symbol(format!("{}/{token}", self.label_name)) - } - ':' => TokenVariant::Separator, - c => { - let token = format!("{c}{}", t.eat_token()); - if let Some(hex_string) = token.strip_prefix("0x") { - match usize::from_str_radix(hex_string, 16) { - Ok(hex) => TokenVariant::HexadecimalLiteral(hex), - Err(_) => TokenVariant::Error(ParseError::InvalidHexadecimalLiteral(token)), + '&' => match &self.macro_name { + Some(macro_name) => { + let label_name = format!("{macro_name}:{}", t.eat_token()); + SynVar::LabelDefinition(label_name) } - } else { - match usize::from_str_radix(&token, 10) { - Ok(value) => TokenVariant::DecimalLiteral(value), - Err(_) => TokenVariant::Symbol(token), + None => { + let label_name = &self.label_name; + let sublabel_name = format!("{label_name}/{}", t.eat_token()); + SynVar::LabelDefinition(sublabel_name) } } - } - }; - - // Parse source path comments. - if let TokenVariant::Comment(comment) = &variant { - // Check if the comment fills the entire line. - if t.start_position.column == 0 && t.end_of_line() { - if let Some(path) = comment.strip_prefix(": ") { - t.embedded_path = Some(PathBuf::from(path.trim())); - t.embedded_first_line = t.start_position.line + 1; + '%' => { + let macro_name = t.eat_token(); + self.macro_name = Some(macro_name.clone()); + SynVar::MacroDefinition(macro_name) + } + ';' => { + self.macro_name = None; + SynVar::MacroDefinitionTerminator } - } + '[' => { + t.mark_child(); + match t.eat_to_delimiter(']') { + Some(_) => { + let child = t.subtokenise(); + t.mark_end(); + let expr = parse_constant_expression(child, t.get_source()); + SynVar::Expression(expr) + } + None => SynVar::Error(SynErr::UnterminatedExpression), + } + } + '(' => match t.eat_to_delimiter(')') { + Some(string) => { + // Check if the comment fills the entire line. + if t.start.position.column == 0 && t.end_of_line() { + if let Some(path) = string.strip_prefix(": ") { + t.embedded_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start.position.line + 1; + } + } + continue; + }, + None => SynVar::Error(SynErr::UnterminatedComment), + } + '|' => { + let token = t.eat_token(); + if let Some(hex_string) = token.strip_prefix("0x") { + match usize::from_str_radix(hex_string, 16) { + Ok(addr) => SynVar::PinnedAddress(addr), + Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), + } + } else { + match usize::from_str_radix(&token, 10) { + Ok(addr) => SynVar::PinnedAddress(addr), + Err(_) => SynVar::Error(SynErr::InvalidDecimalLiteral(token)), + } + } + } + '#' => { + t.mark_child(); + t.eat_token(); + let pbl = parse_packed_binary_literal(t.subtokenise(), t.get_source()); + SynVar::PackedBinaryLiteral(pbl) + }, + '~' => match &self.macro_name { + Some(macro_name) => { + let symbol_name = format!("{macro_name}:{}", t.eat_token()); + SynVar::Symbol(symbol_name) + } + None => { + let label_name = &self.label_name; + let symbol_name = format!("{label_name}/{}", t.eat_token()); + SynVar::Symbol(symbol_name) + } + } + c => { + let token = format!("{c}{}", t.eat_token()); + if let Some(hex_string) = token.strip_prefix("0x") { + match usize::from_str_radix(hex_string, 16) { + Ok(value) => SynVar::IntegerLiteral(value as isize), + Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), + } + } else { + match usize::from_str_radix(&token, 10) { + Ok(value) => SynVar::IntegerLiteral(value as isize), + Err(_) => SynVar::Symbol(token), + } + } + } + }; + + t.mark_end(); + let source = t.get_source(); + self.tokens.push(SyntacticToken { source, variant }); } - let source = t.mark_end_position(); - Some( Token { source, variant } ) + return self.tokens; } } |