diff options
Diffstat (limited to 'src/parsers')
-rw-r--r-- | src/parsers/assembler.rs | 282 | ||||
-rw-r--r-- | src/parsers/bytecode.rs | 161 | ||||
-rw-r--r-- | src/parsers/expression.rs (renamed from src/parsers/constant_expression.rs) | 26 | ||||
-rw-r--r-- | src/parsers/mod.rs | 14 | ||||
-rw-r--r-- | src/parsers/packed_binary_literal.rs | 43 | ||||
-rw-r--r-- | src/parsers/semantic.rs | 339 | ||||
-rw-r--r-- | src/parsers/syntactic.rs | 197 |
7 files changed, 783 insertions, 279 deletions
diff --git a/src/parsers/assembler.rs b/src/parsers/assembler.rs new file mode 100644 index 0000000..eb180e3 --- /dev/null +++ b/src/parsers/assembler.rs @@ -0,0 +1,282 @@ +use crate::*; +use AssemblerErrorVariant as ErrVar; + +use indexmap::IndexMap; + + +static mut ID: usize = 0; +macro_rules! new_id { + () => { unsafe { + let id = ID; + ID += 1; + id + }}; +} + + +impl SemanticProgram { + pub fn assemble(&self) -> Vec<AssembledToken> { + let environment = Environment { + macro_definitions: &self.macro_definitions, + label_definitions: &self.label_definitions, + arguments: &IndexMap::new(), + id: new_id!(), + }; + let mut assembled_tokens = Vec::new(); + for token in &self.body { + let tokens = environment.reify_semantic_token(token); + assembled_tokens.extend(tokens); + } + return assembled_tokens; + } +} + + +pub struct Environment<'a> { + pub macro_definitions: &'a IndexMap<String, MacroDefinition>, + pub label_definitions: &'a IndexMap<String, LabelDefinition>, + pub arguments: &'a IndexMap<String, Argument>, + pub id: usize, +} + +impl<'a> Environment<'a> { + // This is only ever called for the highest level body tokens, never for invocations. + fn reify_semantic_token(&self, token: &SemanticToken) -> Vec<AssembledToken> { + let mut assembled_tokens = Vec::new(); + match token { + SemanticToken::Word(pbl) => { + let word = self.reify_packed_binary_literal(pbl); + assembled_tokens.push(AssembledToken::Word(word)); + } + SemanticToken::Invocation(invocation) => { + match self.reify_invocation(invocation) { + Ok(argument) => match argument { + Argument::Block(block) => assembled_tokens.extend(block), + Argument::Integer(_) => { + let variant = AssemblerErrorVariant::NotABlock; + let source = invocation.source.clone(); + let error = AssemblerError { source, variant }; + assembled_tokens.push(AssembledToken::Error(error)) + } + } + Err(error) => assembled_tokens.push(AssembledToken::Error(error)), + } + } + SemanticToken::LabelDefinition(definition) => { + assembled_tokens.push(AssembledToken::LabelDefinition(definition.clone())); + } + SemanticToken::PinnedAddress(address) => { + assembled_tokens.push(AssembledToken::PinnedAddress(address.clone())); + } + SemanticToken::Error(_) => (), + } + return assembled_tokens; + } + + fn reify_packed_binary_literal(&self, pbl: &PackedBinaryLiteral) -> AssembledWord { + let mut assembled_fields = Vec::new(); + let mut errors = Vec::new(); + for field in &pbl.fields { + let name = field.name.to_string(); + match self.reify_integer_reference(&name, &field.source) { + Ok(value) => assembled_fields.push( + AssembledField { + source: field.source.clone(), + value, + bits: field.bits, + shift: field.shift, + } + ), + Err(error) => errors.push(error), + }; + } + let source = pbl.source.clone(); + let value = pbl.value; + let bits = pbl.bits; + AssembledWord { source, bits, fields: assembled_fields, value, errors } + } + + fn reify_integer_reference(&self, name: &str, source: &SourceSpan) -> Result<IntegerArgument, AssemblerError> { + match self.reify_reference(name, source)? { + Argument::Integer(integer) => Ok(integer), + Argument::Block(_) => Err( + AssemblerError { + source: source.clone(), + variant: ErrVar::NotAnInteger, + } + ), + } + } + + fn reify_reference(&self, name: &str, source: &SourceSpan) -> Result<Argument, AssemblerError> { + let source = source.clone(); + if let Some(argument) = self.arguments.get(name) { + Ok(argument.clone()) + } else if let Some(definition) = self.macro_definitions.get(name) { + self.reify_value(&definition.value) + } else if let Some(label) = self.label_definitions.get(name) { + let name = Tracked::from(self.tag_label_name(&label.name), &source); + Ok(Argument::Integer(IntegerArgument::LabelReference(name))) + } else { + let variant = ErrVar::DefinitionNotFound(name.to_string()); + Err(AssemblerError { source, variant }) + } + } + + fn tag_label_name(&self, name: &str) -> String { + match name.contains(':') { + true => format!("{name}:{}", self.id), + false => name.to_string(), + } + } + + fn reify_value(&self, value: &Value) -> Result<Argument, AssemblerError> { + match value { + Value::Integer(integer) => { + let value = match &integer { + Integer::Literal(integer) => { + IntegerArgument::Integer(integer.clone()) + } + Integer::Expression(expr) => { + let expr = self.reify_constant_expression(expr)?; + IntegerArgument::Expression(expr) + } + Integer::LabelReference(name) => { + let name = Tracked::from(self.tag_label_name(name), &name.source); + IntegerArgument::LabelReference(name) + } + }; + Ok(Argument::Integer(value)) + } + Value::Block(block) => { + let mut assembled_tokens = Vec::new(); + for token in block { + match &token { + SemanticToken::Word(pbl) => { + let word = self.reify_packed_binary_literal(pbl); + assembled_tokens.push(AssembledToken::Word(word)); + } + SemanticToken::Invocation(invocation) => { + match self.reify_invocation(invocation)? { + Argument::Block(block) => assembled_tokens.extend(block), + Argument::Integer(_) => { + let source = invocation.source.clone(); + let variant = AssemblerErrorVariant::IntegerInBlock; + return Err(AssemblerError { source, variant}); + } + } + } + SemanticToken::LabelDefinition(definition) => { + let mut definition = definition.clone(); + definition.name.push_str(&format!(":{}", self.id)); + let token = AssembledToken::LabelDefinition(definition); + assembled_tokens.push(token); + } + SemanticToken::PinnedAddress(address) => { + let token = AssembledToken::PinnedAddress(address.to_owned()); + assembled_tokens.push(token); + } + SemanticToken::Error(_) => (), + } + } + Ok(Argument::Block(assembled_tokens)) + } + Value::Invocation(invocation) => { + self.reify_invocation(invocation) + } + } + } + + fn reify_invocation(&self, invocation: &Invocation) -> Result<Argument, AssemblerError> { + macro_rules! err { + ($variant:expr) => { Err(AssemblerError { + source: invocation.source.clone(), variant: $variant + }) }; + } + if let Some(argument) = self.arguments.get(&invocation.name) { + let expected = 0; + let received = invocation.arguments.len(); + if received != expected { + return err!(ErrVar::IncorrectArgumentCount(expected, received)); + } + Ok(argument.clone()) + } else if let Some(definition) = self.macro_definitions.get(&invocation.name) { + // Check that the correct number of arguments were provided. + let received = invocation.arguments.len(); + let expected = definition.arguments.len(); + if received != expected { + return err!(ErrVar::IncorrectArgumentCount(expected, received)); + } + let mut arguments = IndexMap::new(); + for (i, argument) in invocation.arguments.iter().enumerate() { + // Check that the correct types of arguments were provided. + let arg_invocation = self.reify_value(&argument.value)?; + let arg_invocation_type = match &arg_invocation { + Argument::Integer(_) => ArgumentVariant::Integer, + Argument::Block(_) => ArgumentVariant::Block, + }; + let arg_definition_type = definition.arguments[i].variant; + if arg_invocation_type != arg_definition_type { + let variant = ErrVar::IncorrectArgumentType( + arg_definition_type, arg_invocation_type + ); + return Err(AssemblerError { source: argument.source.clone(), variant }); + } + let name = definition.arguments[i].name.clone(); + arguments.insert(name, arg_invocation); + } + let environment = Environment { + macro_definitions: &self.macro_definitions, + label_definitions: &self.label_definitions, + arguments: &arguments, + id: new_id!(), + }; + environment.reify_value(&definition.value) + } else if let Some(label) = self.label_definitions.get(&invocation.name) { + let expected = 0; + let received = invocation.arguments.len(); + if received != expected { + return err!(ErrVar::IncorrectArgumentCount(expected, received)); + } + let name = Tracked::from(self.tag_label_name(&label.name), &label.source); + Ok(Argument::Integer(IntegerArgument::LabelReference(name))) + } else { + err!(ErrVar::DefinitionNotFound(invocation.name.to_string())) + } + } + + fn reify_constant_expression(&self, expr: &Expression) -> Result<AssembledExpression, AssemblerError> { + use ExpressionTokenVariant as ExprVar; + + let mut assembled_tokens = Vec::new(); + for token in &expr.tokens { + let assembled_token = match &token.variant { + ExprVar::Literal(value) => { + let source = token.source.clone(); + let integer = TrackedInteger { source, value: *value }; + AssembledExpressionToken::Integer(integer) + } + ExprVar::Operator(operator) => { + AssembledExpressionToken::Operator(*operator) + } + ExprVar::Invocation(name) => { + match self.reify_integer_reference(&name, &token.source)? { + IntegerArgument::LabelReference(name) => { + AssembledExpressionToken::LabelReference(name) + } + IntegerArgument::Integer(integer) => { + AssembledExpressionToken::Integer(integer) + } + IntegerArgument::Expression(expr) => { + AssembledExpressionToken::Expression(Box::new(expr)) + }, + } + } + ExprVar::Error(_) => continue, + }; + assembled_tokens.push(assembled_token); + } + Ok(AssembledExpression { source: expr.source.clone(), tokens: assembled_tokens }) + } +} + diff --git a/src/parsers/bytecode.rs b/src/parsers/bytecode.rs new file mode 100644 index 0000000..ec19d9f --- /dev/null +++ b/src/parsers/bytecode.rs @@ -0,0 +1,161 @@ +use crate::*; + +use std::collections::HashMap; + + +pub struct BytecodeGenerator<'a> { + tokens: &'a [AssembledToken], + addresses: HashMap<String, Tracked<usize>>, + words: Vec<Word>, + errors: Vec<BytecodeError>, +} + +impl<'a> BytecodeGenerator<'a> { + pub fn new(tokens: &'a [AssembledToken]) -> Self { + Self { + tokens, + addresses: HashMap::new(), + words: Vec::new(), + errors: Vec::new(), + } + } + + pub fn generate(mut self) -> Bytecode { + self.calculate_addresses(); + for token in self.tokens { + match token { + AssembledToken::Word(assembled_word) => { + let mut value = assembled_word.value; + for field in &assembled_word.fields { + let (field_value, source) = match &field.value { + IntegerArgument::Expression(expr) => + (self.resolve_expression(expr), expr.source.clone()), + IntegerArgument::LabelReference(name) => + (self.resolve_label_reference(name), name.source.clone()), + IntegerArgument::Integer(integer) => + (integer.value, integer.source.clone()), + }; + let bitcount = match field_value { + 0 => 0, + _ => (field_value.ilog2() + 1) as usize, + }; + if field.bits < bitcount { + let variant = BytecodeErrorVariant::ValueTooLarge(field.bits, bitcount); + self.errors.push(BytecodeError { source, variant }); + } else { + value |= (field_value << field.shift) as usize; + } + } + self.words.push(Word { bits: assembled_word.bits, value }); + } + AssembledToken::PinnedAddress(pinned) => { + if self.words.len() > pinned.address { + let variant = BytecodeErrorVariant::PinnedAddressBacktrack( + pinned.address, self.words.len()); + let source = pinned.source.clone(); + self.errors.push(BytecodeError { source, variant }); + } else { + self.words.resize(pinned.address, Word { bits: 0, value: 0}); + } + } + AssembledToken::LabelDefinition(_) => (), + AssembledToken::Error(_) => (), + } + } + + return Bytecode { + words: self.words, + errors: self.errors, + } + } + + fn calculate_addresses(&mut self) { + let mut i = 0; + for token in self.tokens { + match token { + AssembledToken::LabelDefinition(definition) => { + let address = Tracked::from(i, &definition.source); + if let Some(_) = self.addresses.insert(definition.name.clone(), address) { + let name = definition.name.clone(); + let variant = BytecodeErrorVariant::DuplicateLabelDefinition(name); + let source = definition.source.clone(); + self.errors.push(BytecodeError { source, variant }); + } + } + AssembledToken::Word(_) => { + i += 1; + } + AssembledToken::PinnedAddress(pinned) => { + i = pinned.address; + } + AssembledToken::Error(_) => (), + } + } + } + + fn resolve_expression(&mut self, expr: &AssembledExpression) -> isize { + let mut stack = Vec::new(); + macro_rules! push { + ($value:expr) => { stack.push($value) }; + } + macro_rules! pop { + ($name:ident) => { let $name = match stack.pop() { + Some(value) => value, + None => { + let variant = BytecodeErrorVariant::StackUnderflow; + self.errors.push(BytecodeError { source: expr.source.clone(), variant }); + return 0; + }, + }; }; + } + macro_rules! truth { + ($bool:expr) => { match $bool { true => 1, false => 0 } }; + } + + for token in &expr.tokens { + match &token { + AssembledExpressionToken::Integer(value) => { + push!(value.value) + } + AssembledExpressionToken::LabelReference(name) => { + push!(self.resolve_label_reference(name)) + } + AssembledExpressionToken::Expression(expr) => { + push!(self.resolve_expression(expr)) + } + AssembledExpressionToken::Operator(operator) => match operator { + Operator::Equal => { pop!(b); pop!(a); push!(truth!(a==b)) }, + Operator::NotEqual => { pop!(b); pop!(a); push!(truth!(a!=b)) }, + Operator::LessThan => { pop!(b); pop!(a); push!(truth!(a < b)) }, + Operator::GreaterThan => { pop!(b); pop!(a); push!(truth!(a > b)) }, + Operator::Add => { pop!(b); pop!(a); push!(a + b) }, + Operator::Subtract => { pop!(b); pop!(a); push!(a - b) }, + Operator::LeftShift => { pop!(b); pop!(a); push!(a << b) }, + Operator::RightShift => { pop!(b); pop!(a); push!(a >> b) }, + Operator::And => { pop!(b); pop!(a); push!(a & b) }, + Operator::Or => { pop!(b); pop!(a); push!(a | b) }, + Operator::Xor => { pop!(b); pop!(a); push!(a ^ b) }, + Operator::Not => { pop!(a); push!(!a) }, + } + } + } + + let variant = match stack.len() { + 0 => BytecodeErrorVariant::NoReturnValue, + 1 => return stack[0], + _ => BytecodeErrorVariant::MultipleReturnValues, + }; + self.errors.push(BytecodeError { source: expr.source.clone(), variant}); + 0 + } + + fn resolve_label_reference(&mut self, name: &Tracked<String>) -> isize { + if let Some(address) = self.addresses.get(&name.value) { + address.value as isize + } else { + let variant = BytecodeErrorVariant::DefinitionNotFound(name.value.clone()); + self.errors.push(BytecodeError { source: name.source.clone(), variant }); + 0 + } + } +} diff --git a/src/parsers/constant_expression.rs b/src/parsers/expression.rs index 78dc697..f902858 100644 --- a/src/parsers/constant_expression.rs +++ b/src/parsers/expression.rs @@ -1,17 +1,15 @@ use crate::*; -pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantExpression { - use ConstantExpressionTokenVariant as TokenVar; - use ConstantExpressionParseError as ParseError; +pub fn parse_constant_expression(mut t: Tokeniser, source: SourceSpan) -> Expression { + use ExpressionTokenVariant as TokenVar; + use ExpressionParseError as ParseError; let mut tokens = Vec::new(); - let mut t = Tokeniser::new_child(string, parent); - t.position.to_next_char(); // skip opening delimiter loop { - t.drop_whitespace(); - t.mark_start_position(); + t.eat_whitespace(); + t.mark_start(); let token = t.eat_token(); if token.is_empty() { break; @@ -19,7 +17,7 @@ pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantEx let variant = match token.as_str() { "=" => TokenVar::Operator(Operator::Equal), - "!" => TokenVar::Operator(Operator::NotEqual), + "!=" => TokenVar::Operator(Operator::NotEqual), "<" => TokenVar::Operator(Operator::LessThan), ">" => TokenVar::Operator(Operator::GreaterThan), "+" => TokenVar::Operator(Operator::Add), @@ -32,21 +30,21 @@ pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantEx "~" => TokenVar::Operator(Operator::Not), _ => if let Some(stripped) = token.strip_prefix("0x") { match usize::from_str_radix(stripped, 16) { - Ok(value) => TokenVar::IntegerLiteral(value), + Ok(value) => TokenVar::Literal(value as isize), Err(_) => TokenVar::Error( ParseError::InvalidHexadecimalLiteral(stripped.to_string())), } } else { match usize::from_str_radix(&token, 10) { - Ok(value) => TokenVar::IntegerLiteral(value), - Err(_) => TokenVar::SymbolReference(token.to_string()), + Ok(value) => TokenVar::Literal(value as isize), + Err(_) => TokenVar::Invocation(token.to_string()), } } }; - let source = t.mark_end_position(); - tokens.push(ConstantExpressionToken { source, variant }); + let source = t.get_source(); + tokens.push(ExpressionToken { source, variant }); } - return ConstantExpression { tokens }; + return Expression { source, tokens }; } diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs index 91765a9..da2c23a 100644 --- a/src/parsers/mod.rs +++ b/src/parsers/mod.rs @@ -1,11 +1,15 @@ -mod constant_expression; -pub use constant_expression::*; - +mod expression; mod packed_binary_literal; + +pub use expression::*; pub use packed_binary_literal::*; mod syntactic; -pub use syntactic::*; - mod semantic; +mod assembler; +mod bytecode; + +pub use syntactic::*; pub use semantic::*; +pub use assembler::*; +pub use bytecode::*; diff --git a/src/parsers/packed_binary_literal.rs b/src/parsers/packed_binary_literal.rs index 9704fc4..18f8da7 100644 --- a/src/parsers/packed_binary_literal.rs +++ b/src/parsers/packed_binary_literal.rs @@ -1,53 +1,54 @@ use crate::*; -pub fn parse_packed_binary_literal(string: &str, parent: &Tokeniser) -> PackedBinaryLiteral { +/// t is a Tokeniser over the characters of the PBL, excluding the leading hash. +pub fn parse_packed_binary_literal(mut t: Tokeniser, source: SourceSpan) -> PackedBinaryLiteral { use PackedBinaryLiteralParseError as ParseError; use PackedBinaryLiteralParseErrorVariant as ParseErrorVar; let mut value = 0; let mut bits = 0; + let mut field_bits = 0; let mut name = '\0'; let mut fields: Vec<BitField> = Vec::new(); let mut errors: Vec<ParseError> = Vec::new(); macro_rules! push_field { - ($source:expr) => { + () => { if fields.iter().any(|f| f.name == name) { let variant = ParseErrorVar::DuplicateFieldName(name); - errors.push(ParseError { source: $source, variant }); + errors.push(ParseError { source: t.get_source(), variant }); } else { - fields.push(BitField { name, source: $source, bits, shift: 0 }); + fields.push(BitField { name, source: t.get_source(), bits: field_bits, shift: 0 }); } }; } - let mut t = Tokeniser::new_child(string, parent); - t.position.to_next_char(); // skip opening hash character - while let Some(c) = t.eat_char() { // Ignore underscores. if c == '_' { - t.prev_position = t.prev_prev_position; + t.mark.undo(); continue; } // Add a bit to the value; value <<= 1; + bits += 1; for field in &mut fields { field.shift += 1; } // Extend the current field. if c == name { - bits += 1; + field_bits += 1; continue; } // Commit the current field. - if bits > 0 { - push_field!(t.mark_prev_end_position()); - bits = 0; + if field_bits > 0 { + t.mark_end_prev(); + push_field!(); + field_bits = 0; name = '\0'; } @@ -60,21 +61,25 @@ pub fn parse_packed_binary_literal(string: &str, parent: &Tokeniser) -> PackedBi continue; } - t.mark_prev_start_position(); + t.mark_start_prev(); if c.is_alphabetic() { name = c; - bits = 1; + field_bits = 1; continue; } else { - let source = t.mark_end_position(); - errors.push(ParseError { source, variant: ParseErrorVar::InvalidCharacter(c) }); + let source = t.get_source(); + let variant = ParseErrorVar::InvalidCharacter(c); + errors.push(ParseError { source, variant }); } } // Commit the final field. - if bits > 0 { - push_field!(t.mark_end_position()); + for field in &mut fields { + field.shift += 1; + } + if field_bits > 0 { + push_field!(); } - PackedBinaryLiteral { value, fields, errors } + PackedBinaryLiteral { source, bits, value, fields, errors } } diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs index 7ef4a4a..a58fb5f 100644 --- a/src/parsers/semantic.rs +++ b/src/parsers/semantic.rs @@ -1,136 +1,106 @@ use crate::*; - -use syntactic as syn; -use syn::TokenVariant as SynVar; -use semantic::*; +use SyntacticTokenVariant as SynVar; use std::collections::VecDeque; +use indexmap::IndexMap; + macro_rules! fn_is_syn_variant { ($name:ident, $variant:ty) => { paste::paste! { - fn [< is_ $name >](token: &syn::Token) -> bool { + fn [< is_ $name >](token: &SyntacticToken) -> bool { match token.variant { $variant => true, _ => false, } } } }; } -fn_is_syn_variant!(block_open, syn::TokenVariant::BlockOpen); -fn_is_syn_variant!(block_close, syn::TokenVariant::BlockClose); -fn_is_syn_variant!(separator, syn::TokenVariant::Separator); -fn_is_syn_variant!(terminator, syn::TokenVariant::MacroDefinitionTerminator); - - -pub struct Tokens { - tokens: VecDeque<syn::Token>, -} - -impl Tokens { - pub fn new<T: Into<VecDeque<syn::Token>>>(tokens: T) -> Self { - Self { tokens: tokens.into() } - } - - pub fn pop(&mut self) -> Option<syn::Token> { - self.tokens.pop_front() - } +fn_is_syn_variant!(block_open, SyntacticTokenVariant::BlockOpen); +fn_is_syn_variant!(block_close, SyntacticTokenVariant::BlockClose); +fn_is_syn_variant!(separator, SyntacticTokenVariant::Separator); +fn_is_syn_variant!(terminator, SyntacticTokenVariant::MacroDefinitionTerminator); - pub fn pop_if(&mut self, predicate: fn(&syn::Token) -> bool) -> Option<syn::Token> { - match predicate(self.tokens.front()?) { - true => self.tokens.pop_front(), - false => None, - } - } - pub fn unpop(&mut self, token: syn::Token) { - self.tokens.push_front(token); - } - - /// Pull tokens until the predicate returns true, otherwise return Err. - pub fn pull_until(&mut self, mut predicate: impl FnMut(&syn::Token) -> bool) -> Result<Self, ()> { - let mut output = VecDeque::new(); - while let Some(token) = self.tokens.pop_front() { - match predicate(&token) { - true => return Ok(Self::new(output)), - false => output.push_back(token), - }; - } - return Err(()); - } - - pub fn take(&mut self) -> Self { - Self { tokens: std::mem::take(&mut self.tokens) } - } - - pub fn len(&self) -> usize { - self.tokens.len() - } -} - - -pub struct ProgramParser { +pub struct SemanticParser { tokens: Tokens, - definitions: Vec<Definition>, - invocations: Vec<Invocation>, - errors: Vec<ParseError>, + macro_definitions: IndexMap<String, MacroDefinition>, + label_definitions: IndexMap<String, LabelDefinition>, + body: Vec<SemanticToken>, } -impl ProgramParser { - pub fn new(syntactic_tokens: Vec<syn::Token>) -> Self { +impl SemanticParser { + pub fn new(syntactic_tokens: Vec<SyntacticToken>) -> Self { + // Gather all labels ahead of time. + let mut label_definitions = IndexMap::new(); + for token in &syntactic_tokens { + if let SyntacticTokenVariant::LabelDefinition(name) = &token.variant { + let definition = LabelDefinition { + source: token.source.clone(), + name: name.clone(), + }; + let None = label_definitions.insert(name.to_string(), definition) else { + unreachable!("Duplicate definition for label {name:?}"); + }; + } + } Self { tokens: Tokens::new(syntactic_tokens), - definitions: Vec::new(), - invocations: Vec::new(), - errors: Vec::new(), + macro_definitions: IndexMap::new(), + label_definitions, + body: Vec::new(), } } - pub fn parse(mut self) -> Program { + pub fn parse(mut self) -> SemanticProgram { while let Some(syn) = self.tokens.pop() { match syn.variant { SynVar::MacroDefinition(name) => { - // Collect all tokens up to the next definition terminator. let Ok(definition_tokens) = self.tokens.pull_until(is_terminator) else { - let variant = ParseErrorVariant::UnterminatedMacroDefinition(name); - self.errors.push(ParseError { source: syn.source, variant}); + let variant = SemanticParseErrorVariant::UnterminatedMacroDefinition(name); + let error = SemanticParseError { source: syn.source, variant }; + self.body.push(SemanticToken::Error(error)); break; }; - // Parse macro definition arguments. - match DefinitionParser::new(name, syn.source, definition_tokens).parse() { - Ok(definition) => self.definitions.push(definition), - Err(errors) => self.errors.extend(errors), + let definition = MacroDefinitionParser::new(syn.source, definition_tokens).parse(); + let None = self.macro_definitions.insert(name.clone(), definition) else { + unreachable!("Duplicate definition for macro {name}"); }; } - SynVar::Comment(_) => (), + SynVar::LabelDefinition(name) => { + let label_definition = LabelDefinition { source: syn.source, name }; + self.body.push(SemanticToken::LabelDefinition(label_definition)); + } + SynVar::PinnedAddress(address) => { + let pinned_address = PinnedAddress { source: syn.source, address }; + self.body.push(SemanticToken::PinnedAddress(pinned_address)); + } SynVar::Symbol(name) => { - let parser = InvocationParser::new(name, &mut self.tokens); - self.invocations.push(parser.parse()); + let invocation = InvocationParser::new(name, syn.source, &mut self.tokens).parse(); + self.body.push(SemanticToken::Invocation(invocation)); } _ => { - let variant = ParseErrorVariant::InvalidToken; - self.errors.push(ParseError { source: syn.source, variant}); - break; + let variant = SemanticParseErrorVariant::InvalidToken; + let error = SemanticParseError { source: syn.source, variant }; + self.body.push(SemanticToken::Error(error)); } } } - Program { - definitions: self.definitions, - invocations: self.invocations, - errors: self.errors, + SemanticProgram { + macro_definitions: self.macro_definitions, + label_definitions: self.label_definitions, + body: self.body, } } } -pub struct DefinitionParser { - name: String, +pub struct MacroDefinitionParser { source: SourceSpan, tokens: Tokens, arguments: Vec<ArgumentDefinition>, - errors: Vec<ParseError>, + errors: Vec<SemanticParseError>, } -impl DefinitionParser { - pub fn new(name: String, source: SourceSpan, tokens: Tokens) -> Self { +impl MacroDefinitionParser { + pub fn new(source: SourceSpan, tokens: Tokens) -> Self { Self { - name, tokens, source, arguments: Vec::new(), @@ -138,20 +108,15 @@ impl DefinitionParser { } } - pub fn parse(mut self) -> Result<Definition, Vec<ParseError>> { + pub fn parse(mut self) -> MacroDefinition { while let Some(definition) = self.parse_argument_definition() { self.arguments.push(definition) } - if self.errors.is_empty() { - let variant = self.parse_body(); - Ok(Definition { - name: self.name, - source: self.source, - arguments: self.arguments, - variant, - }) - } else { - Err(self.errors) + MacroDefinition { + value: self.parse_body(), + source: self.source, + arguments: self.arguments, + errors: self.errors, } } @@ -172,47 +137,45 @@ impl DefinitionParser { let token = token?; let source = token.source; if let SynVar::Symbol(name) = token.variant { - let variant = ArgumentDefinitionVariant::Integer; + let variant = match is_block { + true => ArgumentVariant::Block, + false => ArgumentVariant::Integer, + }; Some(ArgumentDefinition { name, source, variant }) } else { - let name = self.name.clone(); - let variant = ParseErrorVariant::InvalidArgumentDefinition(name); - self.errors.push(ParseError { source, variant}); + let variant = SemanticParseErrorVariant::InvalidToken; + self.errors.push(SemanticParseError { source, variant}); None } } - fn parse_body(&mut self) -> DefinitionVariant { - // Attempt to parse an IntegerDefinition. + fn parse_body(&mut self) -> Value { + // Attempt to parse an Integer. if self.tokens.len() == 1 { let token = self.tokens.pop().unwrap(); match token.variant { - SynVar::DecimalLiteral(value) | SynVar::HexadecimalLiteral(value) => { - return DefinitionVariant::Integer(IntegerDefinition { - source: token.source, - variant: IntegerDefinitionVariant::Literal(value), - }); + SynVar::IntegerLiteral(value) => { + let integer = TrackedInteger { source: token.source, value }; + return Value::Integer(Integer::Literal(integer)); } - SynVar::ConstantExpression(expr) => { - return DefinitionVariant::Integer(IntegerDefinition { - source: token.source, - variant: IntegerDefinitionVariant::Constant(expr), - }); - } - SynVar::Symbol(name) => { - return DefinitionVariant::Reference(ReferenceDefinition { - source: token.source, - name, - }); + SynVar::Expression(expr) => { + return Value::Integer(Integer::Expression(expr)); } _ => (), } self.tokens.unpop(token); } - - // Parse the remaining tokens as a BlockDefinition. - let block = BlockParser::new(self.tokens.take()).parse(); - return DefinitionVariant::Block(block); + // Parse a Block. + let mut block = BlockParser::new(self.tokens.take()).parse(); + // If the block contains a single invocation, unwrap it. + if block.len() == 1 { + match block.pop() { + Some(SemanticToken::Invocation(invocation)) => return Value::Invocation(invocation), + Some(other) => block.push(other), + None => (), + }; + } + return Value::Block(block); } } @@ -220,52 +183,52 @@ impl DefinitionParser { /// Parse an entire block, excluding delimiters. pub struct BlockParser { tokens: Tokens, - block_tokens: Vec<BlockToken>, - errors: Vec<ParseError>, + semantic_tokens: Vec<SemanticToken>, } impl BlockParser { pub fn new(tokens: Tokens) -> Self { - Self { tokens, block_tokens: Vec::new(), errors: Vec::new() } + Self { tokens, semantic_tokens: Vec::new() } } - pub fn parse(mut self) -> BlockDefinition { + pub fn parse(mut self) -> Vec<SemanticToken> { while let Some(token) = self.tokens.pop() { let source = token.source; match token.variant { SynVar::Symbol(name) => { - let parser = InvocationParser::new(name, &mut self.tokens); - let invocation = parser.parse(); - let variant = BlockTokenVariant::Invocation(invocation); - let block_token = BlockToken { source, variant }; - self.block_tokens.push(block_token); + let invocation = InvocationParser::new(name, source, &mut self.tokens).parse(); + self.semantic_tokens.push(SemanticToken::Invocation(invocation)); } SynVar::PackedBinaryLiteral(pbl) => { - let variant = BlockTokenVariant::Word(pbl); - let block_token = BlockToken { source, variant }; - self.block_tokens.push(block_token); + self.semantic_tokens.push(SemanticToken::Word(pbl)); + } + SynVar::LabelDefinition(name) => { + let label_definition = LabelDefinition { source, name }; + self.semantic_tokens.push(SemanticToken::LabelDefinition(label_definition)); } _ => { - let variant = ParseErrorVariant::InvalidToken; - self.errors.push(ParseError { source, variant }) + let variant = SemanticParseErrorVariant::InvalidToken; + let error = SemanticParseError { source, variant }; + self.semantic_tokens.push(SemanticToken::Error(error)); } } } - BlockDefinition { tokens: self.block_tokens, errors: self.errors } + return self.semantic_tokens; } } struct InvocationParser<'a> { name: String, + source: SourceSpan, tokens: &'a mut Tokens, - arguments: Vec<DefinitionVariant>, - errors: Vec<ParseError>, + arguments: Vec<ArgumentInvocation>, + errors: Vec<SemanticParseError>, } impl<'a> InvocationParser<'a> { - pub fn new(name: String, tokens: &'a mut Tokens) -> Self { - Self { name, tokens, arguments: Vec::new(), errors: Vec::new() } + pub fn new(name: String, source: SourceSpan, tokens: &'a mut Tokens) -> Self { + Self { name, source, tokens, arguments: Vec::new(), errors: Vec::new() } } pub fn parse(mut self) -> Invocation { @@ -274,33 +237,34 @@ impl<'a> InvocationParser<'a> { } Invocation { name: self.name, + source: self.source, arguments: self.arguments, errors: self.errors, } } - fn parse_invocation_argument(&mut self) -> Option<DefinitionVariant> { + fn parse_invocation_argument(&mut self) -> Option<ArgumentInvocation> { // Only continue if the first token is a separator. self.tokens.pop_if(is_separator)?; if let Some(block_open) = self.tokens.pop_if(is_block_open) { let source = block_open.source; let mut depth = 1; - let is_matching_block_close = |token: &syntactic::Token| { + let is_matching_block_close = |token: &SyntacticToken| { match token.variant { - syntactic::TokenVariant::BlockOpen => { + SyntacticTokenVariant::BlockOpen => { depth += 1; false } - syntactic::TokenVariant::BlockClose => { + SyntacticTokenVariant::BlockClose => { depth -= 1; depth == 0 } _ => false, } }; if let Ok(block_tokens) = self.tokens.pull_until(is_matching_block_close) { let block = BlockParser::new(block_tokens).parse(); - Some(DefinitionVariant::Block(block)) + Some(ArgumentInvocation { source, value: Value::Block(block) }) } else { - let variant = ParseErrorVariant::UnterminatedBlockDefinition; - self.errors.push(ParseError { source, variant }); + let variant = SemanticParseErrorVariant::UnterminatedBlock; + self.errors.push(SemanticParseError { source, variant }); None } } else { @@ -308,25 +272,74 @@ impl<'a> InvocationParser<'a> { let source = token.source; match token.variant { SynVar::Symbol(name) => { - let reference = ReferenceDefinition { source, name }; - Some(DefinitionVariant::Reference(reference)) + let arguments = Vec::new(); + let errors = Vec::new(); + let invocation = Invocation { source: source.clone(), name, arguments, errors }; + let value = Value::Invocation(invocation); + Some(ArgumentInvocation { source, value }) } - SynVar::DecimalLiteral(value) | SynVar::HexadecimalLiteral(value) => { - let variant = IntegerDefinitionVariant::Literal(value); - let integer = IntegerDefinition { source, variant }; - Some(DefinitionVariant::Integer(integer)) + SynVar::IntegerLiteral(value) => { + let integer = TrackedInteger { source: source.clone(), value }; + let value = Value::Integer(Integer::Literal(integer)); + Some(ArgumentInvocation { source, value }) } - SynVar::ConstantExpression(expr) => { - let variant = IntegerDefinitionVariant::Constant(expr); - let integer = IntegerDefinition { source, variant }; - Some(DefinitionVariant::Integer(integer)) + SynVar::Expression(expr) => { + let value = Value::Integer(Integer::Expression(expr)); + Some(ArgumentInvocation { source, value }) } _ => { - let variant = ParseErrorVariant::InvalidToken; - self.errors.push(ParseError { source, variant }); + let variant = SemanticParseErrorVariant::InvalidToken; + self.errors.push(SemanticParseError { source, variant }); None } } } } } + + +pub struct Tokens { + tokens: VecDeque<SyntacticToken>, +} + +impl Tokens { + pub fn new<T: Into<VecDeque<SyntacticToken>>>(tokens: T) -> Self { + Self { tokens: tokens.into() } + } + + pub fn pop(&mut self) -> Option<SyntacticToken> { + self.tokens.pop_front() + } + + pub fn pop_if(&mut self, predicate: fn(&SyntacticToken) -> bool) -> Option<SyntacticToken> { + match predicate(self.tokens.front()?) { + true => self.tokens.pop_front(), + false => None, + } + } + + pub fn unpop(&mut self, token: SyntacticToken) { + self.tokens.push_front(token); + } + + /// Pull tokens until the predicate returns true, otherwise return Err. + pub fn pull_until(&mut self, mut predicate: impl FnMut(&SyntacticToken) -> bool) -> Result<Self, ()> { + let mut output = VecDeque::new(); + while let Some(token) = self.tokens.pop_front() { + match predicate(&token) { + true => return Ok(Self::new(output)), + false => output.push_back(token), + }; + } + return Err(()); + } + + pub fn take(&mut self) -> Self { + Self { tokens: std::mem::take(&mut self.tokens) } + } + + pub fn len(&self) -> usize { + self.tokens.len() + } +} + diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs index 909dbaa..37f8e6c 100644 --- a/src/parsers/syntactic.rs +++ b/src/parsers/syntactic.rs @@ -1,106 +1,147 @@ use crate::*; -use syntactic::*; pub struct SyntacticParser { tokeniser: Tokeniser, - /// The name of the most recently parsed label. - label_name: String, + tokens: Vec<SyntacticToken>, /// The name of the macro being parsed. macro_name: Option<String>, + /// The name of the most recent label. + label_name: String, } impl SyntacticParser { - pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { + pub fn new<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { let mut tokeniser = Tokeniser::new(source_code, path); - tokeniser.add_delimiters(&['@','&','%',';',':','{','}','(','[','#','~']); + tokeniser.add_delimiters(&['@','%',';',':','{','}','(','[','#','~']); Self { tokeniser, - label_name: String::new(), + tokens: Vec::new(), macro_name: None, + label_name: String::new(), } } -} - -impl Iterator for SyntacticParser { - type Item = Token; - - /// Sequentially parse tokens from the source code. - fn next(&mut self) -> Option<Token> { + pub fn parse(mut self) -> Vec<SyntacticToken> { + use SyntacticTokenVariant as SynVar; + use SyntacticParseError as SynErr; let t = &mut self.tokeniser; - t.drop_whitespace(); - t.mark_start_position(); - let variant = match t.eat_char()? { - '@' => { - self.label_name = t.eat_token(); - TokenVariant::LabelDefinition(self.label_name.clone()) - } - '&' => { - let token = t.eat_token(); - TokenVariant::LabelDefinition(format!("{}/{token}", self.label_name)) - } - '%' => { - let macro_name = t.eat_token(); - self.macro_name = Some(macro_name.clone()); - TokenVariant::MacroDefinition(macro_name) - } - ';' => { - self.macro_name = None; - TokenVariant::MacroDefinitionTerminator - } - '[' => match t.eat_to_delimiter(']') { - Some(string) => { - let constant = ConstantExpression::from_str(&string, t); - TokenVariant::ConstantExpression(constant) + loop { + t.eat_whitespace(); + t.mark_start(); + let Some(c) = t.eat_char() else { break }; + let variant = match c { + ':' => SynVar::Separator, + '{' => SynVar::BlockOpen, + '}' => SynVar::BlockClose, + '@' => match &self.macro_name { + Some(_) => { + t.eat_token(); + SynVar::Error(SynErr::LabelInMacroDefinition) + } + None => { + self.label_name = t.eat_token(); + SynVar::LabelDefinition(self.label_name.clone()) + } } - None => TokenVariant::Error(ParseError::UnterminatedConstantExpression), - } - '{' => TokenVariant::BlockOpen, - '}' => TokenVariant::BlockClose, - '(' => match t.eat_to_delimiter(')') { - Some(string) => TokenVariant::Comment(string), - None => TokenVariant::Error(ParseError::UnterminatedComment), - } - '#' => { - let token = t.eat_token(); - let pbl = PackedBinaryLiteral::from_str(&token, t); - TokenVariant::PackedBinaryLiteral(pbl) - }, - '~' => { - let token = t.eat_token(); - TokenVariant::Symbol(format!("{}/{token}", self.label_name)) - } - ':' => TokenVariant::Separator, - c => { - let token = format!("{c}{}", t.eat_token()); - if let Some(hex_string) = token.strip_prefix("0x") { - match usize::from_str_radix(hex_string, 16) { - Ok(hex) => TokenVariant::HexadecimalLiteral(hex), - Err(_) => TokenVariant::Error(ParseError::InvalidHexadecimalLiteral(token)), + '&' => match &self.macro_name { + Some(macro_name) => { + let label_name = format!("{macro_name}:{}", t.eat_token()); + SynVar::LabelDefinition(label_name) } - } else { - match usize::from_str_radix(&token, 10) { - Ok(value) => TokenVariant::DecimalLiteral(value), - Err(_) => TokenVariant::Symbol(token), + None => { + let label_name = &self.label_name; + let sublabel_name = format!("{label_name}/{}", t.eat_token()); + SynVar::LabelDefinition(sublabel_name) } } - } - }; - - // Parse source path comments. - if let TokenVariant::Comment(comment) = &variant { - // Check if the comment fills the entire line. - if t.start_position.column == 0 && t.end_of_line() { - if let Some(path) = comment.strip_prefix(": ") { - t.embedded_path = Some(PathBuf::from(path.trim())); - t.embedded_first_line = t.start_position.line + 1; + '%' => { + let macro_name = t.eat_token(); + self.macro_name = Some(macro_name.clone()); + SynVar::MacroDefinition(macro_name) + } + ';' => { + self.macro_name = None; + SynVar::MacroDefinitionTerminator } - } + '[' => { + t.mark_child(); + match t.eat_to_delimiter(']') { + Some(_) => { + let child = t.subtokenise(); + t.mark_end(); + let expr = parse_constant_expression(child, t.get_source()); + SynVar::Expression(expr) + } + None => SynVar::Error(SynErr::UnterminatedExpression), + } + } + '(' => match t.eat_to_delimiter(')') { + Some(string) => { + // Check if the comment fills the entire line. + if t.start.position.column == 0 && t.end_of_line() { + if let Some(path) = string.strip_prefix(": ") { + t.embedded_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start.position.line + 1; + } + } + continue; + }, + None => SynVar::Error(SynErr::UnterminatedComment), + } + '|' => { + let token = t.eat_token(); + if let Some(hex_string) = token.strip_prefix("0x") { + match usize::from_str_radix(hex_string, 16) { + Ok(addr) => SynVar::PinnedAddress(addr), + Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), + } + } else { + match usize::from_str_radix(&token, 10) { + Ok(addr) => SynVar::PinnedAddress(addr), + Err(_) => SynVar::Error(SynErr::InvalidDecimalLiteral(token)), + } + } + } + '#' => { + t.mark_child(); + t.eat_token(); + let pbl = parse_packed_binary_literal(t.subtokenise(), t.get_source()); + SynVar::PackedBinaryLiteral(pbl) + }, + '~' => match &self.macro_name { + Some(macro_name) => { + let symbol_name = format!("{macro_name}:{}", t.eat_token()); + SynVar::Symbol(symbol_name) + } + None => { + let label_name = &self.label_name; + let symbol_name = format!("{label_name}/{}", t.eat_token()); + SynVar::Symbol(symbol_name) + } + } + c => { + let token = format!("{c}{}", t.eat_token()); + if let Some(hex_string) = token.strip_prefix("0x") { + match usize::from_str_radix(hex_string, 16) { + Ok(value) => SynVar::IntegerLiteral(value as isize), + Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), + } + } else { + match usize::from_str_radix(&token, 10) { + Ok(value) => SynVar::IntegerLiteral(value as isize), + Err(_) => SynVar::Symbol(token), + } + } + } + }; + + t.mark_end(); + let source = t.get_source(); + self.tokens.push(SyntacticToken { source, variant }); } - let source = t.mark_end_position(); - Some( Token { source, variant } ) + return self.tokens; } } |