diff options
author | Ben Bridle <ben@derelict.engineering> | 2025-03-06 20:33:27 +1300 |
---|---|---|
committer | Ben Bridle <ben@derelict.engineering> | 2025-03-11 16:59:26 +1300 |
commit | 1ecee352f5844b0809d7ae66df52e34f42b44c8e (patch) | |
tree | 472b6fd57ff7f64ac3f8cd676cbe7a113ba01f05 /src/stages/semantic.rs | |
parent | f2ed89083f5326a7a6f0a1720033d3388aa431fb (diff) | |
download | torque-asm-1ecee352f5844b0809d7ae66df52e34f42b44c8e.zip |
Rewrite entire assembler
The language is now more general, the code is better structured, error
reporting is more detailed, and many new language features have
been implemented:
- conditional blocks
- first-class strings
- more expression operators
- binary literals
- negative values
- invocations in constant expressions
Diffstat (limited to 'src/stages/semantic.rs')
-rw-r--r-- | src/stages/semantic.rs | 478 |
1 files changed, 478 insertions, 0 deletions
diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..e225608 --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,478 @@ +use crate::*; + +use std::collections::VecDeque; + + +pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> { + SemanticParser::from(syntactic, Namespace::None).parse() +} + +#[derive(Clone)] +enum Namespace { + Macro(String), + Label(String), + None, +} + + +struct SemanticParser { + namespace: Namespace, + syntactic: SyntacticTokenStream, + semantic: Vec<Tracked<SemanticToken>>, + errors: Vec<Tracked<SemanticError>>, +} + +impl SemanticParser { + pub fn from(syntactic: Vec<Tracked<SyntacticToken>>, namespace: Namespace) -> Self { + Self { + namespace, + syntactic: SyntacticTokenStream::from(syntactic), + semantic: Vec::new(), + errors: Vec::new(), + } + } + + fn pull_from(&mut self, mut other: SemanticParser) { + self.errors.append(&mut other.errors); + if let Namespace::Macro(_) = other.namespace { + () + } else { + self.namespace = other.namespace; + } + } + + fn resolve_label_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> { + match symbol { + ScopedSymbol::Global(name) => match &self.namespace { + Namespace::Macro(_) => { + let error = SemanticError::LabelInMacroDefinition; + self.errors.push(Tracked::from(error, source.to_owned())); + None + } + Namespace::Label(_) | Namespace::None => { + self.namespace = Namespace::Label(name.clone()); + Some(name) + } + } + ScopedSymbol::Local(name) => match &self.namespace { + Namespace::Macro(macro_ns) => { + Some(format!("{macro_ns}:{name}")) + } + Namespace::Label(label_ns) => { + Some(format!("{label_ns}/{name}")) + } + Namespace::None => { + let error = SemanticError::SublabelWithoutNamespace; + self.errors.push(Tracked::from(error, source.to_owned())); + None + } + } + } + } + + fn resolve_symbol_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> { + match symbol { + ScopedSymbol::Global(name) => { + Some(name) + } + ScopedSymbol::Local(name) => match &self.namespace { + Namespace::Macro(macro_ns) => { + Some(format!("{macro_ns}:{name}")) + } + Namespace::Label(label_ns) => { + Some(format!("{label_ns}/{name}")) + } + Namespace::None => { + let error = SemanticError::LocalSymbolWithoutNamespace; + self.errors.push(Tracked::from(error, source.to_owned())); + None + } + } + } + } + + /// Parse the remaining syntactic tokens as a full program. + pub fn parse(mut self) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> { + while let Some(token) = self.syntactic.pop() { + if let SyntacticToken::MacroDefinition(definition) = token.value { + let namespace = Namespace::Macro(definition.name.to_string()); + let mut parser = SemanticParser::from(definition.tokens, namespace); + let mut arguments = Vec::new(); + while let Some(argument) = parser.pull_argument_definition() { + arguments.push(argument); + } + let body = parser.parse_macro_definition_body(SemanticLocation::MacroDefinitionBody); + self.pull_from(parser); + let definition = MacroDefinition { name: definition.name, arguments, body }; + let semantic = SemanticToken::MacroDefinition(definition); + self.semantic.push(Tracked::from(semantic, token.source)); + } else { + self.syntactic.unpop(token); + if let Some(token) = self.pull_block_token(SemanticLocation::Program) { + let semantic = SemanticToken::BlockToken(token.value); + self.semantic.push(Tracked::from(semantic, token.source)); + } + } + } + match self.errors.is_empty() { + true => Ok(self.semantic), + false => Err(self.errors), + } + } + + /// Parse the remaining syntactic tokens as a macro definition body. + fn parse_macro_definition_body(&mut self, location: SemanticLocation) -> MacroDefinitionBody { + let mut tokens = Vec::new(); + while !self.syntactic.is_empty() { + if let Some(token) = self.pull_macro_definition_body_token() { + tokens.push(token); + } + } + if tokens.is_empty() { + MacroDefinitionBody::Block(Vec::new()) + } else if tokens.len() == 1 { + tokens.pop().unwrap() + } else { + let mut block_tokens = Vec::new(); + for token in tokens { + match token { + MacroDefinitionBody::Integer(integer) => { + let error = SemanticError::ExpectedInteger(location); + let tracked = Tracked::from(error, integer.source); + self.errors.push(tracked); + } + MacroDefinitionBody::Block(mut tokens) => { + block_tokens.append(&mut tokens); + } + MacroDefinitionBody::Invocation(invocation) => { + // Convert invocation to a block invocation. + let token = BlockToken::Invocation(invocation.value); + block_tokens.push(Tracked::from(token, invocation.source)); + } + } + } + MacroDefinitionBody::Block(block_tokens) + } + } + + /// Attempt to pull a MacroDefinitionBody token from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + /// Each BodyToken is wrapped in a separate MacroDefinitionBody. + fn pull_macro_definition_body_token(&mut self) -> Option<MacroDefinitionBody> { + let token = self.syntactic.pop()?; + let source = token.source; + match token.value { + SyntacticToken::LabelDefinition(symbol) => { + let name = self.resolve_label_name(symbol, &source)?; + let token = BlockToken::LabelDefinition(name); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::MacroDefinition(_) => { + let error = SemanticError::MisplacedMacroDefinition; + self.errors.push(Tracked::from(error, source)); + None + } + SyntacticToken::IntegerLiteral(value) => { + let token = IntegerToken::IntegerLiteral(value); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Integer(tracked)) + } + SyntacticToken::StringLiteral(_) => { + let error = SemanticError::MisplacedStringLiteral; + self.errors.push(Tracked::from(error, source)); + None + } + SyntacticToken::WordTemplate(word_template) => { + let token = BlockToken::WordTemplate(word_template); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::BlockLiteral(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let tokens = parser.parse_block(); + self.pull_from(parser); + let token = BlockToken::Block(tokens); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::Expression(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let expression = parser.parse_expression(); + self.pull_from(parser); + let token = IntegerToken::Expression(expression); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Integer(tracked)) + } + SyntacticToken::Symbol(symbol) => { + let name = self.resolve_symbol_name(symbol, &source)?; + let arguments = self.pull_all_invocation_arguments(); + // Extend invocation source span to cover all arguments. + let mut source = source; + if let Some(last) = arguments.last() { + source.in_merged.end = last.source.in_merged.end; + if let Some(last_in_source) = &last.source.in_source { + if let Some(in_source) = &mut source.in_source { + in_source.end = last_in_source.end.clone(); + } + } + } + let invocation = Invocation { name, arguments }; + let tracked = Tracked::from(invocation, source); + Some(MacroDefinitionBody::Invocation(tracked)) + } + SyntacticToken::Separator => { + let error = SemanticError::MisplacedSeparator; + self.errors.push(Tracked::from(error, source)); + None + } + SyntacticToken::Condition => { + let conditional = self.pull_conditional_block()?; + let token = BlockToken::ConditionalBlock(Box::new(conditional)); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::Pin => { + let integer = self.pull_integer_token(SemanticLocation::PinAddress)?; + let token = BlockToken::PinnedAddress(integer); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + } + } + + /// Attempt to pull an integer token from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + fn pull_integer_token(&mut self, location: SemanticLocation) -> Option<Tracked<IntegerToken>> { + match self.pull_macro_definition_body_token()? { + MacroDefinitionBody::Integer(integer) => { + Some(integer) + } + MacroDefinitionBody::Invocation(invocation) => { + // Convert invocation to an integer invocation. + let token = IntegerToken::Invocation(invocation.value); + Some(Tracked::from(token, invocation.source)) + } + MacroDefinitionBody::Block(mut tokens) => { + assert_eq!(tokens.len(), 1); + let token = tokens.pop().unwrap(); + let error = SemanticError::ExpectedInteger(location); + self.errors.push(Tracked::from(error, token.source)); + None + } + } + } + + /// Attempt to pull a BlockToken from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + fn pull_block_token(&mut self, location: SemanticLocation) -> Option<Tracked<BlockToken>> { + match self.pull_macro_definition_body_token()? { + MacroDefinitionBody::Block(mut tokens) => { + assert_eq!(tokens.len(), 1); + tokens.pop() + } + MacroDefinitionBody::Invocation(invocation) => { + // Convert invocation to a block invocation. + let token = BlockToken::Invocation(invocation.value); + Some(Tracked::from(token, invocation.source)) + } + MacroDefinitionBody::Integer(integer) => { + let error = SemanticError::ExpectedBlock(location); + self.errors.push(Tracked::from(error, integer.source)); + None + } + } + } + + /// Parse the remaining syntactic tokens as the contents of a block. + fn parse_block(&mut self) -> Vec<Tracked<BlockToken>> { + let mut tokens = Vec::new(); + while !self.syntactic.is_empty() { + if let Some(token) = self.pull_block_token(SemanticLocation::BlockLiteral) { + tokens.push(token); + } + } + tokens + } + + /// Parse the remaining syntactic tokens as a list of integer tokens. + fn parse_integer_list(&mut self, location: SemanticLocation) -> Vec<Tracked<IntegerToken>> { + let mut tokens = Vec::new(); + while !self.syntactic.is_empty() { + if let Some(token) = self.pull_integer_token(location) { + tokens.push(token); + } + } + tokens + } + + /// Parse the remaining syntactic tokens as the contents of an expression. + fn parse_expression(&mut self) -> Expression { + let mut tokens = Vec::new(); + for token in self.parse_integer_list(SemanticLocation::Expression) { + let source = token.source; + match token.value { + IntegerToken::IntegerLiteral(value) => { + let integer = Box::new(IntegerToken::IntegerLiteral(value)); + let token = ExpressionToken::IntegerToken(integer); + tokens.push(Tracked::from(token, source)); + } + IntegerToken::Expression(expression) => { + let integer = Box::new(IntegerToken::Expression(expression)); + let token = ExpressionToken::IntegerToken(integer); + tokens.push(Tracked::from(token, source)); + } + IntegerToken::Invocation(invocation) => { + // Parse the invocation as an operator instead. + if invocation.arguments.is_empty() { + if let Some(operator) = Operator::from_str(&invocation.name) { + let token = ExpressionToken::Operator(operator); + tokens.push(Tracked::from(token, source)); + continue; + } + } + // Parse the invocation as an invocation. + let integer = Box::new(IntegerToken::Invocation(invocation)); + let token = ExpressionToken::IntegerToken(integer); + tokens.push(Tracked::from(token, source)); + } + } + } + Expression { tokens } + } + + /// Attempt to pull a conditional block from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + fn pull_conditional_block(&mut self) -> Option<ConditionalBlock> { + let predicate = self.pull_integer_token(SemanticLocation::ConditionPredicate)?; + let body = self.pull_block_token(SemanticLocation::ConditionBody)?; + Some(ConditionalBlock { predicate, body }) + } + + /// Attempt to pull an invocation argument from the token stream. + /// Invalid values are not dropped, a None indicates that no arguments remain. + fn pull_invocation_argument(&mut self) -> Option<Tracked<InvocationArgument>> { + self.syntactic.pop_if(is_separator)?; + let token = self.syntactic.pop()?; + let source = token.source; + match token.value { + SyntacticToken::StringLiteral(string_literal) => { + let argument = InvocationArgument::String(string_literal); + Some(Tracked::from(argument, source)) + } + SyntacticToken::IntegerLiteral(value) => { + let integer = IntegerToken::IntegerLiteral(value); + let argument = InvocationArgument::IntegerToken(integer); + Some(Tracked::from(argument, source)) + } + SyntacticToken::Expression(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let expression = parser.parse_expression(); + self.pull_from(parser); + let integer = IntegerToken::Expression(expression); + let argument = InvocationArgument::IntegerToken(integer); + Some(Tracked::from(argument, source)) + } + SyntacticToken::BlockLiteral(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let tokens = parser.parse_block(); + self.pull_from(parser); + let block = BlockToken::Block(tokens); + let argument = InvocationArgument::BlockToken(block); + Some(Tracked::from(argument, source)) + } + SyntacticToken::Symbol(symbol) => { + let name = self.resolve_symbol_name(symbol, &source)?; + let invocation = Invocation { name, arguments: Vec::new() }; + let argument = InvocationArgument::Invocation(invocation); + Some(Tracked::from(argument, source)) + } + SyntacticToken::WordTemplate(word_template) => { + let block = BlockToken::WordTemplate(word_template); + let argument = InvocationArgument::BlockToken(block); + Some(Tracked::from(argument, source)) + } + _ => { + let error = SemanticError::InvalidInvocationArgument; + self.errors.push(Tracked::from(error, source)); + return None; + } + } + } + + fn pull_all_invocation_arguments(&mut self) -> Vec<Tracked<InvocationArgument>> { + let mut arguments = Vec::new(); + while let Some(argument) = self.pull_invocation_argument() { + arguments.push(argument); + } + return arguments; + } + + /// Attempt to pull an argument definition from the token stream. + /// Invalid values are not dropped, a None indicates that no arguments remain. + fn pull_argument_definition(&mut self) -> Option<Tracked<ArgumentDefinition>> { + self.syntactic.pop_if(is_separator)?; + let token = self.syntactic.pop()?; + let source = token.source; + match token.value { + SyntacticToken::Symbol(ScopedSymbol::Global(name)) => { + let variant = ArgumentType::Integer; + let definition = ArgumentDefinition { name, variant }; + return Some(Tracked::from(definition, source)); + } + SyntacticToken::BlockLiteral(mut tokens) => { + if tokens.len() == 1 { + let token = tokens.pop().unwrap(); + if let SyntacticToken::Symbol(ScopedSymbol::Global(name)) = token.value { + let variant = ArgumentType::Block; + let definition = ArgumentDefinition { name, variant }; + return Some(Tracked::from(definition, source)); + } + } + } + _ => (), + }; + let error = SemanticError::InvalidArgumentDefinition; + self.errors.push(Tracked::from(error, source)); + return None; + } +} + + + +struct SyntacticTokenStream { + tokens: VecDeque<Tracked<SyntacticToken>>, +} + +impl SyntacticTokenStream { + pub fn from<T: Into<VecDeque<Tracked<SyntacticToken>>>>(tokens: T) -> Self { + Self { tokens: tokens.into() } + } + + pub fn pop(&mut self) -> Option<Tracked<SyntacticToken>> { + self.tokens.pop_front() + } + + pub fn pop_if(&mut self, predicate: fn(&Tracked<SyntacticToken>) -> bool) -> Option<Tracked<SyntacticToken>> { + match predicate(self.tokens.front()?) { + true => self.tokens.pop_front(), + false => None, + } + } + + pub fn unpop(&mut self, token: Tracked<SyntacticToken>) { + self.tokens.push_front(token); + } + + pub fn is_empty(&self) -> bool { + self.tokens.is_empty() + } +} + + +fn is_separator(token: &Tracked<SyntacticToken>) -> bool { + match token.value { + SyntacticToken::Separator => true, + _ => false, + } +} |