diff options
Diffstat (limited to 'src/translators/semantic_parser.rs')
-rw-r--r-- | src/translators/semantic_parser.rs | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs new file mode 100644 index 0000000..cb6a435 --- /dev/null +++ b/src/translators/semantic_parser.rs @@ -0,0 +1,245 @@ +use crate::*; + +use std::collections::HashMap; +use std::path::PathBuf; + +use SyntacticTokenVariant as SynVar; +use SemanticTokenVariant as SemVar; +use SemanticParseError as SemErr; + + +pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> { + let semantic_parser = SemanticParser::from_source_code(source_code, path); + semantic_parser.parse() +} + + +/// Translate syntactic tokens into semantic tokens. +struct SemanticParser { + labels: HashMap<String, Definition>, + macros: HashMap<String, Definition>, + syntactic_tokens: Vec<SyntacticToken>, + /// Index of the current outer token. + current_outer_index: usize, +} + +impl SemanticParser { + pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { + let mut labels = HashMap::new(); + let mut macros = HashMap::new(); + let mut syntactic_tokens = Vec::new(); + + let parser = SyntacticParser::from_source_code(source_code, path); + for syntactic_token in parser { + let definition = Definition::new(syntactic_token.source.clone()); + match &syntactic_token.variant { + SynVar::LabelDefinition(name) => { + let _ = labels.try_insert(name.to_owned(), definition); + }, + SynVar::MacroDefinition(name) => { + let _ = macros.try_insert(name.to_owned(), definition); + }, + _ => (), + } + syntactic_tokens.push(syntactic_token); + } + + Self { + labels, + macros, + syntactic_tokens, + current_outer_index: 0, + } + } + + /// Parse syntactic tokens as semantic tokens. + pub fn parse(mut self) -> Vec<SemanticToken> { + let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens); + let mut syntactic = syntactic_tokens.into_iter(); + let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false); + + // Insert real label definition pointers into label reference tokens. + for definition in self.labels.values_mut() { + if let Some(definition_pointer) = definition.pointer { + // Insert definition pointer into reference tokens. + for reference_pointer in &definition.references { + let reference_token = &mut semantic_tokens[*reference_pointer]; + reference_token.variant = SemVar::LabelReference(definition_pointer); + } + // Insert reference pointers into definition token. + let definition_token = &mut semantic_tokens[definition_pointer]; + if let SemVar::LabelDefinition(ref mut def) = definition_token.variant { + def.references = std::mem::take(&mut definition.references); + } else { unreachable!() } + // Insert definition pointer into reference tokens inside macros. + for (outer, inner) in &definition.deep_references { + let macro_token = &mut semantic_tokens[*outer]; + if let SemVar::MacroDefinition(ref mut def) = macro_token.variant { + let reference_token = &mut def.body_tokens[*inner]; + reference_token.variant = SemVar::LabelReference(definition_pointer); + } else { unreachable!() } + } + // TODO: Record deep references in macro and label definitions? + } + } + + return semantic_tokens; + } + + fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken> + where I: Iterator<Item = SyntacticToken> + { + let mut semantic_tokens: Vec<SemanticToken> = Vec::new(); + let mut block_stack: Vec<usize> = Vec::new(); + + while let Some(syntactic_token) = parser.next() { + let current_index = semantic_tokens.len(); + if !in_macro { + self.current_outer_index = current_index; + } + + let semantic_token_variant = match syntactic_token.variant { + SynVar::LabelDefinition(name) => { + if in_macro { + SemVar::Error(SemErr::LabelDefinitionInMacroDefinition) + } else if let Some(definition) = self.macros.get(&name) { + let source = definition.source.clone(); + SemVar::Error(SemErr::RedefinedSymbol((name, source))) + } else if let Some(definition) = self.labels.get_mut(&name) { + if definition.pointer.is_some() { + let source = definition.source.clone(); + SemVar::Error(SemErr::RedefinedSymbol((name, source))) + } else { + definition.pointer = Some(current_index); + let references = Vec::new(); + SemVar::LabelDefinition(LabelDefinition { name, references }) + } + } else { + unreachable!() + } + } + SynVar::MacroDefinition(name) => { + if in_macro { + SemVar::Error(SemErr::MacroDefinitionInMacroDefinition) + } else if let Some(definition) = self.labels.get(&name) { + let source = definition.source.clone(); + SemVar::Error(SemErr::RedefinedSymbol((name, source))) + } else if let Some(definition) = self.macros.get_mut(&name) { + if definition.pointer.is_some() { + let source = definition.source.clone(); + SemVar::Error(SemErr::RedefinedSymbol((name, source))) + } else { + definition.pointer = Some(current_index); + let references = Vec::new(); + let body_tokens = self.pull_semantic_tokens(parser, true); + SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens }) + } + } else { + unreachable!() + } + } + SynVar::MacroDefinitionTerminator => if in_macro { + break; + } else { + SemVar::Error(SemErr::StrayMacroTerminator) + } + SynVar::Literal(value) => { + SemVar::Literal(value) + } + SynVar::Padding(value) => { + SemVar::Padding(value) + } + SynVar::Instruction(instr) => { + SemVar::Instruction(instr) + } + SynVar::Comment(comment) => { + SemVar::Comment(comment) + } + SynVar::String(bytes) => { + SemVar::String(bytes) + } + SynVar::BlockOpen => { + block_stack.push(current_index); + SemVar::BlockOpen(0) + } + SynVar::BlockClose => { + if let Some(pointer) = block_stack.pop() { + let open = &mut semantic_tokens[pointer]; + open.variant = SemVar::BlockOpen(current_index); + SemVar::BlockClose(pointer) + } else { + SemVar::Error(SemErr::StrayBlockClose) + } + } + SynVar::MarkOpen => { + SemVar::MarkOpen + } + SynVar::MarkClose => { + SemVar::MarkClose + } + SynVar::Symbol(name) => { + if let Some(definition) = self.labels.get_mut(&name) { + if in_macro { + let pointer = (self.current_outer_index, current_index); + definition.deep_references.push(pointer); + } else { + definition.references.push(current_index); + } + SemVar::LabelReference(0) + } else if let Some(definition) = self.macros.get_mut(&name) { + if let Some(pointer) = definition.pointer { + if !in_macro { definition.references.push(current_index); } + SemVar::MacroInvocation(pointer) + } else { + let source = definition.source.clone(); + SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source))) + } + } else { + SemVar::Error(SemErr::UndefinedSymbol(name)) + } + } + SynVar::Error(syntax_err) => { + SemVar::Error(SemErr::SyntaxError(syntax_err)) + } + }; + + let semantic_token = SemanticToken { + source: syntactic_token.source, + bytecode: BytecodeSpan::default(), + variant: semantic_token_variant, + }; + semantic_tokens.push(semantic_token); + } + + if in_macro { + //TODO: UnterminatedMacroDefinition + } + + // Replace each unclosed BlockOpen token with an error. + for block_pointer in block_stack { + semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock); + } + + return semantic_tokens; + } +} + + +struct Definition { + pub source: SourceSpan, + pub pointer: Option<usize>, + pub references: Vec<usize>, + /// (macro index, label reference index) + pub deep_references: Vec<(usize, usize)>, +} + +impl Definition { + pub fn new(source: SourceSpan) -> Self { + Self { + source, + pointer: None, + references: Vec::new(), + deep_references: Vec::new(), + } + } +} |