From 7d4dd52b8cfc865ae1b975ca3b6a3e72a812ebb9 Mon Sep 17 00:00:00 2001 From: Ben Bridle Date: Fri, 14 Feb 2025 09:36:52 +1300 Subject: Rewrite library --- src/stages/semantic.rs | 141 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 src/stages/semantic.rs (limited to 'src/stages/semantic.rs') diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..da804ec --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,141 @@ +use crate::*; + +use std::collections::{HashMap, HashSet}; + + +pub fn parse_semantic(syntactic: Vec>) -> Result>> { + // Record all label definitions and macro names up front. + let mut definitions = HashMap::new(); + let mut macro_names = HashSet::new(); + for token in &syntactic { + match &token.value { + SyntacticToken::LabelDefinition(name) => { + let name = name.clone(); + let definition = Definition::new(0, DefinitionKind::LabelDefinition); + let tracked = Tracked::from(definition, token.source.clone()); + if let Some(_) = definitions.insert(name.clone(), tracked) { + unreachable!("Uncaught duplicate label definition '{name}'"); + } + } + SyntacticToken::MacroDefinition(definition) => { + let name = definition.name.clone(); + if !macro_names.insert(name.clone()) { + unreachable!("Uncaught duplicate macro definition '{name}'") + } + } + _ => (), + } + } + + // Convert syntactic tokens to semantic tokens. + let mut tokens: Vec> = Vec::new(); + let mut errors = Vec::new(); + let mut stack = Vec::new(); + + for syn_token in syntactic { + let i = tokens.len(); + let sem_token = match syn_token.value { + SyntacticToken::Comment(string) => SemanticToken::Comment(string), + + SyntacticToken::LabelDefinition(name) => { + let definition = definitions.get_mut(&name).unwrap(); + definition.value.definition = i; + SemanticToken::LabelDefinition(name) + } + SyntacticToken::MacroDefinition(definition) => { + let source = definition.name.source.clone(); + let name = definition.name.clone(); + + let mut body: Vec> = Vec::new(); + let mut body_stack = Vec::new(); + for syn_token in definition.body { + let j = body.len(); + let sem_token = match syn_token.value { + SyntacticToken::Comment(string) => + SemanticToken::Comment(string), + + SyntacticToken::LabelDefinition(label) => + unreachable!("Uncaught label definition '{label}' in macro '{name}'"), + SyntacticToken::MacroDefinition(definition) => + unreachable!("Uncaught macro definition '{}' in macro '{name}'", definition.name), + + SyntacticToken::RawValue(value) => SemanticToken::RawValue(value), + SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), + SyntacticToken::Invocation(symbol) => { + if let Some(definition) = definitions.get_mut(&symbol) { + definition.value.deep_references.push((i, j)); + } else if macro_names.contains(&symbol) { + let error = SemanticError::InvocationBeforeDefinition; + errors.push(Tracked::from(error, syn_token.source.clone())); + } else { + unreachable!("Uncaught undefined symbol '{symbol}'"); + }; + SemanticToken::Invocation(symbol) + } + + SyntacticToken::Padding(value) => SemanticToken::Padding(value), + SyntacticToken::String(bytes) => SemanticToken::String(bytes), + + SyntacticToken::BlockOpen => { + body_stack.push(j); + SemanticToken::BlockOpen(0) + } + SyntacticToken::BlockClose => { + let Some(k) = body_stack.pop() else { + unreachable!("Uncaught unmatched block terminator in macro {name}"); + }; + body[k].value = SemanticToken::BlockOpen(j); + SemanticToken::BlockClose(k) + } + }; + body.push(Tracked::from(sem_token, syn_token.source)); + } + + let kind = DefinitionKind::MacroDefinition(body); + let tracked = Tracked::from(Definition::new(i, kind), source); + if let Some(_) = definitions.insert(name.value.clone(), tracked) { + unreachable!("Uncaught duplicate definition '{name}'") + }; + + if !body_stack.is_empty() { + unreachable!("Uncaught unterminated block in macro {name}"); + } + SemanticToken::MacroDefinition(name) + } + + SyntacticToken::RawValue(value) => SemanticToken::RawValue(value), + SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), + SyntacticToken::Invocation(symbol) => { + let Some(definition) = definitions.get_mut(&symbol) else { + unreachable!("Uncaught undefined symbol '{symbol}'"); + }; + definition.value.references.push(i); + SemanticToken::Invocation(symbol) + } + + SyntacticToken::Padding(value) => SemanticToken::Padding(value), + SyntacticToken::String(bytes) => SemanticToken::String(bytes), + + SyntacticToken::BlockOpen => { + stack.push(i); + SemanticToken::BlockOpen(0) + } + SyntacticToken::BlockClose => { + let Some(k) = stack.pop() else { + unreachable!("Uncaught unmatched block terminator"); + }; + tokens[k].value = SemanticToken::BlockOpen(i); + SemanticToken::BlockClose(k) + } + }; + tokens.push(Tracked::from(sem_token, syn_token.source)); + } + + if !stack.is_empty() { + unreachable!("Uncaught unterminated block"); + } + match errors.is_empty() { + true => Ok(Program { definitions, tokens }), + false => Err(errors), + } +} -- cgit v1.2.3-70-g09d2