From 1ecee352f5844b0809d7ae66df52e34f42b44c8e Mon Sep 17 00:00:00 2001 From: Ben Bridle Date: Thu, 6 Mar 2025 20:33:27 +1300 Subject: Rewrite entire assembler The language is now more general, the code is better structured, error reporting is more detailed, and many new language features have been implemented: - conditional blocks - first-class strings - more expression operators - binary literals - negative values - invocations in constant expressions --- src/stages/semantic.rs | 478 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 478 insertions(+) create mode 100644 src/stages/semantic.rs (limited to 'src/stages/semantic.rs') diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..e225608 --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,478 @@ +use crate::*; + +use std::collections::VecDeque; + + +pub fn parse_semantic(syntactic: Vec>) -> Result>, Vec>> { + SemanticParser::from(syntactic, Namespace::None).parse() +} + +#[derive(Clone)] +enum Namespace { + Macro(String), + Label(String), + None, +} + + +struct SemanticParser { + namespace: Namespace, + syntactic: SyntacticTokenStream, + semantic: Vec>, + errors: Vec>, +} + +impl SemanticParser { + pub fn from(syntactic: Vec>, namespace: Namespace) -> Self { + Self { + namespace, + syntactic: SyntacticTokenStream::from(syntactic), + semantic: Vec::new(), + errors: Vec::new(), + } + } + + fn pull_from(&mut self, mut other: SemanticParser) { + self.errors.append(&mut other.errors); + if let Namespace::Macro(_) = other.namespace { + () + } else { + self.namespace = other.namespace; + } + } + + fn resolve_label_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option { + match symbol { + ScopedSymbol::Global(name) => match &self.namespace { + Namespace::Macro(_) => { + let error = SemanticError::LabelInMacroDefinition; + self.errors.push(Tracked::from(error, source.to_owned())); + None + } + Namespace::Label(_) | Namespace::None => { + self.namespace = Namespace::Label(name.clone()); + Some(name) + } + } + ScopedSymbol::Local(name) => match &self.namespace { + Namespace::Macro(macro_ns) => { + Some(format!("{macro_ns}:{name}")) + } + Namespace::Label(label_ns) => { + Some(format!("{label_ns}/{name}")) + } + Namespace::None => { + let error = SemanticError::SublabelWithoutNamespace; + self.errors.push(Tracked::from(error, source.to_owned())); + None + } + } + } + } + + fn resolve_symbol_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option { + match symbol { + ScopedSymbol::Global(name) => { + Some(name) + } + ScopedSymbol::Local(name) => match &self.namespace { + Namespace::Macro(macro_ns) => { + Some(format!("{macro_ns}:{name}")) + } + Namespace::Label(label_ns) => { + Some(format!("{label_ns}/{name}")) + } + Namespace::None => { + let error = SemanticError::LocalSymbolWithoutNamespace; + self.errors.push(Tracked::from(error, source.to_owned())); + None + } + } + } + } + + /// Parse the remaining syntactic tokens as a full program. + pub fn parse(mut self) -> Result>, Vec>> { + while let Some(token) = self.syntactic.pop() { + if let SyntacticToken::MacroDefinition(definition) = token.value { + let namespace = Namespace::Macro(definition.name.to_string()); + let mut parser = SemanticParser::from(definition.tokens, namespace); + let mut arguments = Vec::new(); + while let Some(argument) = parser.pull_argument_definition() { + arguments.push(argument); + } + let body = parser.parse_macro_definition_body(SemanticLocation::MacroDefinitionBody); + self.pull_from(parser); + let definition = MacroDefinition { name: definition.name, arguments, body }; + let semantic = SemanticToken::MacroDefinition(definition); + self.semantic.push(Tracked::from(semantic, token.source)); + } else { + self.syntactic.unpop(token); + if let Some(token) = self.pull_block_token(SemanticLocation::Program) { + let semantic = SemanticToken::BlockToken(token.value); + self.semantic.push(Tracked::from(semantic, token.source)); + } + } + } + match self.errors.is_empty() { + true => Ok(self.semantic), + false => Err(self.errors), + } + } + + /// Parse the remaining syntactic tokens as a macro definition body. + fn parse_macro_definition_body(&mut self, location: SemanticLocation) -> MacroDefinitionBody { + let mut tokens = Vec::new(); + while !self.syntactic.is_empty() { + if let Some(token) = self.pull_macro_definition_body_token() { + tokens.push(token); + } + } + if tokens.is_empty() { + MacroDefinitionBody::Block(Vec::new()) + } else if tokens.len() == 1 { + tokens.pop().unwrap() + } else { + let mut block_tokens = Vec::new(); + for token in tokens { + match token { + MacroDefinitionBody::Integer(integer) => { + let error = SemanticError::ExpectedInteger(location); + let tracked = Tracked::from(error, integer.source); + self.errors.push(tracked); + } + MacroDefinitionBody::Block(mut tokens) => { + block_tokens.append(&mut tokens); + } + MacroDefinitionBody::Invocation(invocation) => { + // Convert invocation to a block invocation. + let token = BlockToken::Invocation(invocation.value); + block_tokens.push(Tracked::from(token, invocation.source)); + } + } + } + MacroDefinitionBody::Block(block_tokens) + } + } + + /// Attempt to pull a MacroDefinitionBody token from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + /// Each BodyToken is wrapped in a separate MacroDefinitionBody. + fn pull_macro_definition_body_token(&mut self) -> Option { + let token = self.syntactic.pop()?; + let source = token.source; + match token.value { + SyntacticToken::LabelDefinition(symbol) => { + let name = self.resolve_label_name(symbol, &source)?; + let token = BlockToken::LabelDefinition(name); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::MacroDefinition(_) => { + let error = SemanticError::MisplacedMacroDefinition; + self.errors.push(Tracked::from(error, source)); + None + } + SyntacticToken::IntegerLiteral(value) => { + let token = IntegerToken::IntegerLiteral(value); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Integer(tracked)) + } + SyntacticToken::StringLiteral(_) => { + let error = SemanticError::MisplacedStringLiteral; + self.errors.push(Tracked::from(error, source)); + None + } + SyntacticToken::WordTemplate(word_template) => { + let token = BlockToken::WordTemplate(word_template); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::BlockLiteral(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let tokens = parser.parse_block(); + self.pull_from(parser); + let token = BlockToken::Block(tokens); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::Expression(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let expression = parser.parse_expression(); + self.pull_from(parser); + let token = IntegerToken::Expression(expression); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Integer(tracked)) + } + SyntacticToken::Symbol(symbol) => { + let name = self.resolve_symbol_name(symbol, &source)?; + let arguments = self.pull_all_invocation_arguments(); + // Extend invocation source span to cover all arguments. + let mut source = source; + if let Some(last) = arguments.last() { + source.in_merged.end = last.source.in_merged.end; + if let Some(last_in_source) = &last.source.in_source { + if let Some(in_source) = &mut source.in_source { + in_source.end = last_in_source.end.clone(); + } + } + } + let invocation = Invocation { name, arguments }; + let tracked = Tracked::from(invocation, source); + Some(MacroDefinitionBody::Invocation(tracked)) + } + SyntacticToken::Separator => { + let error = SemanticError::MisplacedSeparator; + self.errors.push(Tracked::from(error, source)); + None + } + SyntacticToken::Condition => { + let conditional = self.pull_conditional_block()?; + let token = BlockToken::ConditionalBlock(Box::new(conditional)); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::Pin => { + let integer = self.pull_integer_token(SemanticLocation::PinAddress)?; + let token = BlockToken::PinnedAddress(integer); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + } + } + + /// Attempt to pull an integer token from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + fn pull_integer_token(&mut self, location: SemanticLocation) -> Option> { + match self.pull_macro_definition_body_token()? { + MacroDefinitionBody::Integer(integer) => { + Some(integer) + } + MacroDefinitionBody::Invocation(invocation) => { + // Convert invocation to an integer invocation. + let token = IntegerToken::Invocation(invocation.value); + Some(Tracked::from(token, invocation.source)) + } + MacroDefinitionBody::Block(mut tokens) => { + assert_eq!(tokens.len(), 1); + let token = tokens.pop().unwrap(); + let error = SemanticError::ExpectedInteger(location); + self.errors.push(Tracked::from(error, token.source)); + None + } + } + } + + /// Attempt to pull a BlockToken from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + fn pull_block_token(&mut self, location: SemanticLocation) -> Option> { + match self.pull_macro_definition_body_token()? { + MacroDefinitionBody::Block(mut tokens) => { + assert_eq!(tokens.len(), 1); + tokens.pop() + } + MacroDefinitionBody::Invocation(invocation) => { + // Convert invocation to a block invocation. + let token = BlockToken::Invocation(invocation.value); + Some(Tracked::from(token, invocation.source)) + } + MacroDefinitionBody::Integer(integer) => { + let error = SemanticError::ExpectedBlock(location); + self.errors.push(Tracked::from(error, integer.source)); + None + } + } + } + + /// Parse the remaining syntactic tokens as the contents of a block. + fn parse_block(&mut self) -> Vec> { + let mut tokens = Vec::new(); + while !self.syntactic.is_empty() { + if let Some(token) = self.pull_block_token(SemanticLocation::BlockLiteral) { + tokens.push(token); + } + } + tokens + } + + /// Parse the remaining syntactic tokens as a list of integer tokens. + fn parse_integer_list(&mut self, location: SemanticLocation) -> Vec> { + let mut tokens = Vec::new(); + while !self.syntactic.is_empty() { + if let Some(token) = self.pull_integer_token(location) { + tokens.push(token); + } + } + tokens + } + + /// Parse the remaining syntactic tokens as the contents of an expression. + fn parse_expression(&mut self) -> Expression { + let mut tokens = Vec::new(); + for token in self.parse_integer_list(SemanticLocation::Expression) { + let source = token.source; + match token.value { + IntegerToken::IntegerLiteral(value) => { + let integer = Box::new(IntegerToken::IntegerLiteral(value)); + let token = ExpressionToken::IntegerToken(integer); + tokens.push(Tracked::from(token, source)); + } + IntegerToken::Expression(expression) => { + let integer = Box::new(IntegerToken::Expression(expression)); + let token = ExpressionToken::IntegerToken(integer); + tokens.push(Tracked::from(token, source)); + } + IntegerToken::Invocation(invocation) => { + // Parse the invocation as an operator instead. + if invocation.arguments.is_empty() { + if let Some(operator) = Operator::from_str(&invocation.name) { + let token = ExpressionToken::Operator(operator); + tokens.push(Tracked::from(token, source)); + continue; + } + } + // Parse the invocation as an invocation. + let integer = Box::new(IntegerToken::Invocation(invocation)); + let token = ExpressionToken::IntegerToken(integer); + tokens.push(Tracked::from(token, source)); + } + } + } + Expression { tokens } + } + + /// Attempt to pull a conditional block from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + fn pull_conditional_block(&mut self) -> Option { + let predicate = self.pull_integer_token(SemanticLocation::ConditionPredicate)?; + let body = self.pull_block_token(SemanticLocation::ConditionBody)?; + Some(ConditionalBlock { predicate, body }) + } + + /// Attempt to pull an invocation argument from the token stream. + /// Invalid values are not dropped, a None indicates that no arguments remain. + fn pull_invocation_argument(&mut self) -> Option> { + self.syntactic.pop_if(is_separator)?; + let token = self.syntactic.pop()?; + let source = token.source; + match token.value { + SyntacticToken::StringLiteral(string_literal) => { + let argument = InvocationArgument::String(string_literal); + Some(Tracked::from(argument, source)) + } + SyntacticToken::IntegerLiteral(value) => { + let integer = IntegerToken::IntegerLiteral(value); + let argument = InvocationArgument::IntegerToken(integer); + Some(Tracked::from(argument, source)) + } + SyntacticToken::Expression(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let expression = parser.parse_expression(); + self.pull_from(parser); + let integer = IntegerToken::Expression(expression); + let argument = InvocationArgument::IntegerToken(integer); + Some(Tracked::from(argument, source)) + } + SyntacticToken::BlockLiteral(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let tokens = parser.parse_block(); + self.pull_from(parser); + let block = BlockToken::Block(tokens); + let argument = InvocationArgument::BlockToken(block); + Some(Tracked::from(argument, source)) + } + SyntacticToken::Symbol(symbol) => { + let name = self.resolve_symbol_name(symbol, &source)?; + let invocation = Invocation { name, arguments: Vec::new() }; + let argument = InvocationArgument::Invocation(invocation); + Some(Tracked::from(argument, source)) + } + SyntacticToken::WordTemplate(word_template) => { + let block = BlockToken::WordTemplate(word_template); + let argument = InvocationArgument::BlockToken(block); + Some(Tracked::from(argument, source)) + } + _ => { + let error = SemanticError::InvalidInvocationArgument; + self.errors.push(Tracked::from(error, source)); + return None; + } + } + } + + fn pull_all_invocation_arguments(&mut self) -> Vec> { + let mut arguments = Vec::new(); + while let Some(argument) = self.pull_invocation_argument() { + arguments.push(argument); + } + return arguments; + } + + /// Attempt to pull an argument definition from the token stream. + /// Invalid values are not dropped, a None indicates that no arguments remain. + fn pull_argument_definition(&mut self) -> Option> { + self.syntactic.pop_if(is_separator)?; + let token = self.syntactic.pop()?; + let source = token.source; + match token.value { + SyntacticToken::Symbol(ScopedSymbol::Global(name)) => { + let variant = ArgumentType::Integer; + let definition = ArgumentDefinition { name, variant }; + return Some(Tracked::from(definition, source)); + } + SyntacticToken::BlockLiteral(mut tokens) => { + if tokens.len() == 1 { + let token = tokens.pop().unwrap(); + if let SyntacticToken::Symbol(ScopedSymbol::Global(name)) = token.value { + let variant = ArgumentType::Block; + let definition = ArgumentDefinition { name, variant }; + return Some(Tracked::from(definition, source)); + } + } + } + _ => (), + }; + let error = SemanticError::InvalidArgumentDefinition; + self.errors.push(Tracked::from(error, source)); + return None; + } +} + + + +struct SyntacticTokenStream { + tokens: VecDeque>, +} + +impl SyntacticTokenStream { + pub fn from>>>(tokens: T) -> Self { + Self { tokens: tokens.into() } + } + + pub fn pop(&mut self) -> Option> { + self.tokens.pop_front() + } + + pub fn pop_if(&mut self, predicate: fn(&Tracked) -> bool) -> Option> { + match predicate(self.tokens.front()?) { + true => self.tokens.pop_front(), + false => None, + } + } + + pub fn unpop(&mut self, token: Tracked) { + self.tokens.push_front(token); + } + + pub fn is_empty(&self) -> bool { + self.tokens.is_empty() + } +} + + +fn is_separator(token: &Tracked) -> bool { + match token.value { + SyntacticToken::Separator => true, + _ => false, + } +} -- cgit v1.2.3-70-g09d2