diff options
Diffstat (limited to 'src/stages/semantic.rs')
-rw-r--r-- | src/stages/semantic.rs | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..da804ec --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,141 @@ +use crate::*; + +use std::collections::{HashMap, HashSet}; + + +pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Program, Vec<Tracked<SemanticError>>> { + // Record all label definitions and macro names up front. + let mut definitions = HashMap::new(); + let mut macro_names = HashSet::new(); + for token in &syntactic { + match &token.value { + SyntacticToken::LabelDefinition(name) => { + let name = name.clone(); + let definition = Definition::new(0, DefinitionKind::LabelDefinition); + let tracked = Tracked::from(definition, token.source.clone()); + if let Some(_) = definitions.insert(name.clone(), tracked) { + unreachable!("Uncaught duplicate label definition '{name}'"); + } + } + SyntacticToken::MacroDefinition(definition) => { + let name = definition.name.clone(); + if !macro_names.insert(name.clone()) { + unreachable!("Uncaught duplicate macro definition '{name}'") + } + } + _ => (), + } + } + + // Convert syntactic tokens to semantic tokens. + let mut tokens: Vec<Tracked<SemanticToken>> = Vec::new(); + let mut errors = Vec::new(); + let mut stack = Vec::new(); + + for syn_token in syntactic { + let i = tokens.len(); + let sem_token = match syn_token.value { + SyntacticToken::Comment(string) => SemanticToken::Comment(string), + + SyntacticToken::LabelDefinition(name) => { + let definition = definitions.get_mut(&name).unwrap(); + definition.value.definition = i; + SemanticToken::LabelDefinition(name) + } + SyntacticToken::MacroDefinition(definition) => { + let source = definition.name.source.clone(); + let name = definition.name.clone(); + + let mut body: Vec<Tracked<SemanticToken>> = Vec::new(); + let mut body_stack = Vec::new(); + for syn_token in definition.body { + let j = body.len(); + let sem_token = match syn_token.value { + SyntacticToken::Comment(string) => + SemanticToken::Comment(string), + + SyntacticToken::LabelDefinition(label) => + unreachable!("Uncaught label definition '{label}' in macro '{name}'"), + SyntacticToken::MacroDefinition(definition) => + unreachable!("Uncaught macro definition '{}' in macro '{name}'", definition.name), + + SyntacticToken::RawValue(value) => SemanticToken::RawValue(value), + SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), + SyntacticToken::Invocation(symbol) => { + if let Some(definition) = definitions.get_mut(&symbol) { + definition.value.deep_references.push((i, j)); + } else if macro_names.contains(&symbol) { + let error = SemanticError::InvocationBeforeDefinition; + errors.push(Tracked::from(error, syn_token.source.clone())); + } else { + unreachable!("Uncaught undefined symbol '{symbol}'"); + }; + SemanticToken::Invocation(symbol) + } + + SyntacticToken::Padding(value) => SemanticToken::Padding(value), + SyntacticToken::String(bytes) => SemanticToken::String(bytes), + + SyntacticToken::BlockOpen => { + body_stack.push(j); + SemanticToken::BlockOpen(0) + } + SyntacticToken::BlockClose => { + let Some(k) = body_stack.pop() else { + unreachable!("Uncaught unmatched block terminator in macro {name}"); + }; + body[k].value = SemanticToken::BlockOpen(j); + SemanticToken::BlockClose(k) + } + }; + body.push(Tracked::from(sem_token, syn_token.source)); + } + + let kind = DefinitionKind::MacroDefinition(body); + let tracked = Tracked::from(Definition::new(i, kind), source); + if let Some(_) = definitions.insert(name.value.clone(), tracked) { + unreachable!("Uncaught duplicate definition '{name}'") + }; + + if !body_stack.is_empty() { + unreachable!("Uncaught unterminated block in macro {name}"); + } + SemanticToken::MacroDefinition(name) + } + + SyntacticToken::RawValue(value) => SemanticToken::RawValue(value), + SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), + SyntacticToken::Invocation(symbol) => { + let Some(definition) = definitions.get_mut(&symbol) else { + unreachable!("Uncaught undefined symbol '{symbol}'"); + }; + definition.value.references.push(i); + SemanticToken::Invocation(symbol) + } + + SyntacticToken::Padding(value) => SemanticToken::Padding(value), + SyntacticToken::String(bytes) => SemanticToken::String(bytes), + + SyntacticToken::BlockOpen => { + stack.push(i); + SemanticToken::BlockOpen(0) + } + SyntacticToken::BlockClose => { + let Some(k) = stack.pop() else { + unreachable!("Uncaught unmatched block terminator"); + }; + tokens[k].value = SemanticToken::BlockOpen(i); + SemanticToken::BlockClose(k) + } + }; + tokens.push(Tracked::from(sem_token, syn_token.source)); + } + + if !stack.is_empty() { + unreachable!("Uncaught unterminated block"); + } + match errors.is_empty() { + true => Ok(Program { definitions, tokens }), + false => Err(errors), + } +} |