summaryrefslogtreecommitdiff
path: root/src/stages/semantic.rs
diff options
context:
space:
mode:
authorBen Bridle <ben@derelict.engineering>2025-03-06 20:33:27 +1300
committerBen Bridle <ben@derelict.engineering>2025-03-11 16:59:26 +1300
commit1ecee352f5844b0809d7ae66df52e34f42b44c8e (patch)
tree472b6fd57ff7f64ac3f8cd676cbe7a113ba01f05 /src/stages/semantic.rs
parentf2ed89083f5326a7a6f0a1720033d3388aa431fb (diff)
downloadtorque-asm-1ecee352f5844b0809d7ae66df52e34f42b44c8e.zip
Rewrite entire assembler
The language is now more general, the code is better structured, error reporting is more detailed, and many new language features have been implemented: - conditional blocks - first-class strings - more expression operators - binary literals - negative values - invocations in constant expressions
Diffstat (limited to 'src/stages/semantic.rs')
-rw-r--r--src/stages/semantic.rs478
1 files changed, 478 insertions, 0 deletions
diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs
new file mode 100644
index 0000000..e225608
--- /dev/null
+++ b/src/stages/semantic.rs
@@ -0,0 +1,478 @@
+use crate::*;
+
+use std::collections::VecDeque;
+
+
+pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> {
+ SemanticParser::from(syntactic, Namespace::None).parse()
+}
+
+#[derive(Clone)]
+enum Namespace {
+ Macro(String),
+ Label(String),
+ None,
+}
+
+
+struct SemanticParser {
+ namespace: Namespace,
+ syntactic: SyntacticTokenStream,
+ semantic: Vec<Tracked<SemanticToken>>,
+ errors: Vec<Tracked<SemanticError>>,
+}
+
+impl SemanticParser {
+ pub fn from(syntactic: Vec<Tracked<SyntacticToken>>, namespace: Namespace) -> Self {
+ Self {
+ namespace,
+ syntactic: SyntacticTokenStream::from(syntactic),
+ semantic: Vec::new(),
+ errors: Vec::new(),
+ }
+ }
+
+ fn pull_from(&mut self, mut other: SemanticParser) {
+ self.errors.append(&mut other.errors);
+ if let Namespace::Macro(_) = other.namespace {
+ ()
+ } else {
+ self.namespace = other.namespace;
+ }
+ }
+
+ fn resolve_label_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> {
+ match symbol {
+ ScopedSymbol::Global(name) => match &self.namespace {
+ Namespace::Macro(_) => {
+ let error = SemanticError::LabelInMacroDefinition;
+ self.errors.push(Tracked::from(error, source.to_owned()));
+ None
+ }
+ Namespace::Label(_) | Namespace::None => {
+ self.namespace = Namespace::Label(name.clone());
+ Some(name)
+ }
+ }
+ ScopedSymbol::Local(name) => match &self.namespace {
+ Namespace::Macro(macro_ns) => {
+ Some(format!("{macro_ns}:{name}"))
+ }
+ Namespace::Label(label_ns) => {
+ Some(format!("{label_ns}/{name}"))
+ }
+ Namespace::None => {
+ let error = SemanticError::SublabelWithoutNamespace;
+ self.errors.push(Tracked::from(error, source.to_owned()));
+ None
+ }
+ }
+ }
+ }
+
+ fn resolve_symbol_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> {
+ match symbol {
+ ScopedSymbol::Global(name) => {
+ Some(name)
+ }
+ ScopedSymbol::Local(name) => match &self.namespace {
+ Namespace::Macro(macro_ns) => {
+ Some(format!("{macro_ns}:{name}"))
+ }
+ Namespace::Label(label_ns) => {
+ Some(format!("{label_ns}/{name}"))
+ }
+ Namespace::None => {
+ let error = SemanticError::LocalSymbolWithoutNamespace;
+ self.errors.push(Tracked::from(error, source.to_owned()));
+ None
+ }
+ }
+ }
+ }
+
+ /// Parse the remaining syntactic tokens as a full program.
+ pub fn parse(mut self) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> {
+ while let Some(token) = self.syntactic.pop() {
+ if let SyntacticToken::MacroDefinition(definition) = token.value {
+ let namespace = Namespace::Macro(definition.name.to_string());
+ let mut parser = SemanticParser::from(definition.tokens, namespace);
+ let mut arguments = Vec::new();
+ while let Some(argument) = parser.pull_argument_definition() {
+ arguments.push(argument);
+ }
+ let body = parser.parse_macro_definition_body(SemanticLocation::MacroDefinitionBody);
+ self.pull_from(parser);
+ let definition = MacroDefinition { name: definition.name, arguments, body };
+ let semantic = SemanticToken::MacroDefinition(definition);
+ self.semantic.push(Tracked::from(semantic, token.source));
+ } else {
+ self.syntactic.unpop(token);
+ if let Some(token) = self.pull_block_token(SemanticLocation::Program) {
+ let semantic = SemanticToken::BlockToken(token.value);
+ self.semantic.push(Tracked::from(semantic, token.source));
+ }
+ }
+ }
+ match self.errors.is_empty() {
+ true => Ok(self.semantic),
+ false => Err(self.errors),
+ }
+ }
+
+ /// Parse the remaining syntactic tokens as a macro definition body.
+ fn parse_macro_definition_body(&mut self, location: SemanticLocation) -> MacroDefinitionBody {
+ let mut tokens = Vec::new();
+ while !self.syntactic.is_empty() {
+ if let Some(token) = self.pull_macro_definition_body_token() {
+ tokens.push(token);
+ }
+ }
+ if tokens.is_empty() {
+ MacroDefinitionBody::Block(Vec::new())
+ } else if tokens.len() == 1 {
+ tokens.pop().unwrap()
+ } else {
+ let mut block_tokens = Vec::new();
+ for token in tokens {
+ match token {
+ MacroDefinitionBody::Integer(integer) => {
+ let error = SemanticError::ExpectedInteger(location);
+ let tracked = Tracked::from(error, integer.source);
+ self.errors.push(tracked);
+ }
+ MacroDefinitionBody::Block(mut tokens) => {
+ block_tokens.append(&mut tokens);
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ // Convert invocation to a block invocation.
+ let token = BlockToken::Invocation(invocation.value);
+ block_tokens.push(Tracked::from(token, invocation.source));
+ }
+ }
+ }
+ MacroDefinitionBody::Block(block_tokens)
+ }
+ }
+
+ /// Attempt to pull a MacroDefinitionBody token from the token stream.
+ /// Invalid values are noted and dropped, and a None is returned.
+ /// Each BodyToken is wrapped in a separate MacroDefinitionBody.
+ fn pull_macro_definition_body_token(&mut self) -> Option<MacroDefinitionBody> {
+ let token = self.syntactic.pop()?;
+ let source = token.source;
+ match token.value {
+ SyntacticToken::LabelDefinition(symbol) => {
+ let name = self.resolve_label_name(symbol, &source)?;
+ let token = BlockToken::LabelDefinition(name);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ SyntacticToken::MacroDefinition(_) => {
+ let error = SemanticError::MisplacedMacroDefinition;
+ self.errors.push(Tracked::from(error, source));
+ None
+ }
+ SyntacticToken::IntegerLiteral(value) => {
+ let token = IntegerToken::IntegerLiteral(value);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Integer(tracked))
+ }
+ SyntacticToken::StringLiteral(_) => {
+ let error = SemanticError::MisplacedStringLiteral;
+ self.errors.push(Tracked::from(error, source));
+ None
+ }
+ SyntacticToken::WordTemplate(word_template) => {
+ let token = BlockToken::WordTemplate(word_template);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ SyntacticToken::BlockLiteral(tokens) => {
+ let mut parser = SemanticParser::from(tokens, self.namespace.clone());
+ let tokens = parser.parse_block();
+ self.pull_from(parser);
+ let token = BlockToken::Block(tokens);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ SyntacticToken::Expression(tokens) => {
+ let mut parser = SemanticParser::from(tokens, self.namespace.clone());
+ let expression = parser.parse_expression();
+ self.pull_from(parser);
+ let token = IntegerToken::Expression(expression);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Integer(tracked))
+ }
+ SyntacticToken::Symbol(symbol) => {
+ let name = self.resolve_symbol_name(symbol, &source)?;
+ let arguments = self.pull_all_invocation_arguments();
+ // Extend invocation source span to cover all arguments.
+ let mut source = source;
+ if let Some(last) = arguments.last() {
+ source.in_merged.end = last.source.in_merged.end;
+ if let Some(last_in_source) = &last.source.in_source {
+ if let Some(in_source) = &mut source.in_source {
+ in_source.end = last_in_source.end.clone();
+ }
+ }
+ }
+ let invocation = Invocation { name, arguments };
+ let tracked = Tracked::from(invocation, source);
+ Some(MacroDefinitionBody::Invocation(tracked))
+ }
+ SyntacticToken::Separator => {
+ let error = SemanticError::MisplacedSeparator;
+ self.errors.push(Tracked::from(error, source));
+ None
+ }
+ SyntacticToken::Condition => {
+ let conditional = self.pull_conditional_block()?;
+ let token = BlockToken::ConditionalBlock(Box::new(conditional));
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ SyntacticToken::Pin => {
+ let integer = self.pull_integer_token(SemanticLocation::PinAddress)?;
+ let token = BlockToken::PinnedAddress(integer);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ }
+ }
+
+ /// Attempt to pull an integer token from the token stream.
+ /// Invalid values are noted and dropped, and a None is returned.
+ fn pull_integer_token(&mut self, location: SemanticLocation) -> Option<Tracked<IntegerToken>> {
+ match self.pull_macro_definition_body_token()? {
+ MacroDefinitionBody::Integer(integer) => {
+ Some(integer)
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ // Convert invocation to an integer invocation.
+ let token = IntegerToken::Invocation(invocation.value);
+ Some(Tracked::from(token, invocation.source))
+ }
+ MacroDefinitionBody::Block(mut tokens) => {
+ assert_eq!(tokens.len(), 1);
+ let token = tokens.pop().unwrap();
+ let error = SemanticError::ExpectedInteger(location);
+ self.errors.push(Tracked::from(error, token.source));
+ None
+ }
+ }
+ }
+
+ /// Attempt to pull a BlockToken from the token stream.
+ /// Invalid values are noted and dropped, and a None is returned.
+ fn pull_block_token(&mut self, location: SemanticLocation) -> Option<Tracked<BlockToken>> {
+ match self.pull_macro_definition_body_token()? {
+ MacroDefinitionBody::Block(mut tokens) => {
+ assert_eq!(tokens.len(), 1);
+ tokens.pop()
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ // Convert invocation to a block invocation.
+ let token = BlockToken::Invocation(invocation.value);
+ Some(Tracked::from(token, invocation.source))
+ }
+ MacroDefinitionBody::Integer(integer) => {
+ let error = SemanticError::ExpectedBlock(location);
+ self.errors.push(Tracked::from(error, integer.source));
+ None
+ }
+ }
+ }
+
+ /// Parse the remaining syntactic tokens as the contents of a block.
+ fn parse_block(&mut self) -> Vec<Tracked<BlockToken>> {
+ let mut tokens = Vec::new();
+ while !self.syntactic.is_empty() {
+ if let Some(token) = self.pull_block_token(SemanticLocation::BlockLiteral) {
+ tokens.push(token);
+ }
+ }
+ tokens
+ }
+
+ /// Parse the remaining syntactic tokens as a list of integer tokens.
+ fn parse_integer_list(&mut self, location: SemanticLocation) -> Vec<Tracked<IntegerToken>> {
+ let mut tokens = Vec::new();
+ while !self.syntactic.is_empty() {
+ if let Some(token) = self.pull_integer_token(location) {
+ tokens.push(token);
+ }
+ }
+ tokens
+ }
+
+ /// Parse the remaining syntactic tokens as the contents of an expression.
+ fn parse_expression(&mut self) -> Expression {
+ let mut tokens = Vec::new();
+ for token in self.parse_integer_list(SemanticLocation::Expression) {
+ let source = token.source;
+ match token.value {
+ IntegerToken::IntegerLiteral(value) => {
+ let integer = Box::new(IntegerToken::IntegerLiteral(value));
+ let token = ExpressionToken::IntegerToken(integer);
+ tokens.push(Tracked::from(token, source));
+ }
+ IntegerToken::Expression(expression) => {
+ let integer = Box::new(IntegerToken::Expression(expression));
+ let token = ExpressionToken::IntegerToken(integer);
+ tokens.push(Tracked::from(token, source));
+ }
+ IntegerToken::Invocation(invocation) => {
+ // Parse the invocation as an operator instead.
+ if invocation.arguments.is_empty() {
+ if let Some(operator) = Operator::from_str(&invocation.name) {
+ let token = ExpressionToken::Operator(operator);
+ tokens.push(Tracked::from(token, source));
+ continue;
+ }
+ }
+ // Parse the invocation as an invocation.
+ let integer = Box::new(IntegerToken::Invocation(invocation));
+ let token = ExpressionToken::IntegerToken(integer);
+ tokens.push(Tracked::from(token, source));
+ }
+ }
+ }
+ Expression { tokens }
+ }
+
+ /// Attempt to pull a conditional block from the token stream.
+ /// Invalid values are noted and dropped, and a None is returned.
+ fn pull_conditional_block(&mut self) -> Option<ConditionalBlock> {
+ let predicate = self.pull_integer_token(SemanticLocation::ConditionPredicate)?;
+ let body = self.pull_block_token(SemanticLocation::ConditionBody)?;
+ Some(ConditionalBlock { predicate, body })
+ }
+
+ /// Attempt to pull an invocation argument from the token stream.
+ /// Invalid values are not dropped, a None indicates that no arguments remain.
+ fn pull_invocation_argument(&mut self) -> Option<Tracked<InvocationArgument>> {
+ self.syntactic.pop_if(is_separator)?;
+ let token = self.syntactic.pop()?;
+ let source = token.source;
+ match token.value {
+ SyntacticToken::StringLiteral(string_literal) => {
+ let argument = InvocationArgument::String(string_literal);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::IntegerLiteral(value) => {
+ let integer = IntegerToken::IntegerLiteral(value);
+ let argument = InvocationArgument::IntegerToken(integer);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::Expression(tokens) => {
+ let mut parser = SemanticParser::from(tokens, self.namespace.clone());
+ let expression = parser.parse_expression();
+ self.pull_from(parser);
+ let integer = IntegerToken::Expression(expression);
+ let argument = InvocationArgument::IntegerToken(integer);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::BlockLiteral(tokens) => {
+ let mut parser = SemanticParser::from(tokens, self.namespace.clone());
+ let tokens = parser.parse_block();
+ self.pull_from(parser);
+ let block = BlockToken::Block(tokens);
+ let argument = InvocationArgument::BlockToken(block);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::Symbol(symbol) => {
+ let name = self.resolve_symbol_name(symbol, &source)?;
+ let invocation = Invocation { name, arguments: Vec::new() };
+ let argument = InvocationArgument::Invocation(invocation);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::WordTemplate(word_template) => {
+ let block = BlockToken::WordTemplate(word_template);
+ let argument = InvocationArgument::BlockToken(block);
+ Some(Tracked::from(argument, source))
+ }
+ _ => {
+ let error = SemanticError::InvalidInvocationArgument;
+ self.errors.push(Tracked::from(error, source));
+ return None;
+ }
+ }
+ }
+
+ fn pull_all_invocation_arguments(&mut self) -> Vec<Tracked<InvocationArgument>> {
+ let mut arguments = Vec::new();
+ while let Some(argument) = self.pull_invocation_argument() {
+ arguments.push(argument);
+ }
+ return arguments;
+ }
+
+ /// Attempt to pull an argument definition from the token stream.
+ /// Invalid values are not dropped, a None indicates that no arguments remain.
+ fn pull_argument_definition(&mut self) -> Option<Tracked<ArgumentDefinition>> {
+ self.syntactic.pop_if(is_separator)?;
+ let token = self.syntactic.pop()?;
+ let source = token.source;
+ match token.value {
+ SyntacticToken::Symbol(ScopedSymbol::Global(name)) => {
+ let variant = ArgumentType::Integer;
+ let definition = ArgumentDefinition { name, variant };
+ return Some(Tracked::from(definition, source));
+ }
+ SyntacticToken::BlockLiteral(mut tokens) => {
+ if tokens.len() == 1 {
+ let token = tokens.pop().unwrap();
+ if let SyntacticToken::Symbol(ScopedSymbol::Global(name)) = token.value {
+ let variant = ArgumentType::Block;
+ let definition = ArgumentDefinition { name, variant };
+ return Some(Tracked::from(definition, source));
+ }
+ }
+ }
+ _ => (),
+ };
+ let error = SemanticError::InvalidArgumentDefinition;
+ self.errors.push(Tracked::from(error, source));
+ return None;
+ }
+}
+
+
+
+struct SyntacticTokenStream {
+ tokens: VecDeque<Tracked<SyntacticToken>>,
+}
+
+impl SyntacticTokenStream {
+ pub fn from<T: Into<VecDeque<Tracked<SyntacticToken>>>>(tokens: T) -> Self {
+ Self { tokens: tokens.into() }
+ }
+
+ pub fn pop(&mut self) -> Option<Tracked<SyntacticToken>> {
+ self.tokens.pop_front()
+ }
+
+ pub fn pop_if(&mut self, predicate: fn(&Tracked<SyntacticToken>) -> bool) -> Option<Tracked<SyntacticToken>> {
+ match predicate(self.tokens.front()?) {
+ true => self.tokens.pop_front(),
+ false => None,
+ }
+ }
+
+ pub fn unpop(&mut self, token: Tracked<SyntacticToken>) {
+ self.tokens.push_front(token);
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.tokens.is_empty()
+ }
+}
+
+
+fn is_separator(token: &Tracked<SyntacticToken>) -> bool {
+ match token.value {
+ SyntacticToken::Separator => true,
+ _ => false,
+ }
+}