From f4027cae775e3c9c237675f9df35a744d54f3f2e Mon Sep 17 00:00:00 2001
From: Ben Bridle <bridle.benjamin@gmail.com>
Date: Mon, 28 Oct 2024 19:52:29 +1300
Subject: Rewrite assembler

This is an almost complete rewrite of the entire assembler from the
ground up, with a different parsing strategy and a whole new symbol
resolution mechanism for automatically including library files.

The assembly syntax has also been slightly modified, with padding
tokens now being prefixed with '#' instead of '$', and a block-style
anonymous-label syntax which uses the '{' and '}' characters.
---
 src/translators/semantic_parser.rs | 245 +++++++++++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)
 create mode 100644 src/translators/semantic_parser.rs

(limited to 'src/translators/semantic_parser.rs')

diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs
new file mode 100644
index 0000000..cb6a435
--- /dev/null
+++ b/src/translators/semantic_parser.rs
@@ -0,0 +1,245 @@
+use crate::*;
+
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+use SyntacticTokenVariant as SynVar;
+use SemanticTokenVariant as SemVar;
+use SemanticParseError as SemErr;
+
+
+pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> {
+    let semantic_parser = SemanticParser::from_source_code(source_code, path);
+    semantic_parser.parse()
+}
+
+
+/// Translate syntactic tokens into semantic tokens.
+struct SemanticParser {
+    labels: HashMap<String, Definition>,
+    macros: HashMap<String, Definition>,
+    syntactic_tokens: Vec<SyntacticToken>,
+    /// Index of the current outer token.
+    current_outer_index: usize,
+}
+
+impl SemanticParser {
+    pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
+        let mut labels = HashMap::new();
+        let mut macros = HashMap::new();
+        let mut syntactic_tokens = Vec::new();
+
+        let parser = SyntacticParser::from_source_code(source_code, path);
+        for syntactic_token in parser {
+            let definition = Definition::new(syntactic_token.source.clone());
+            match &syntactic_token.variant {
+                SynVar::LabelDefinition(name) => {
+                    let _ = labels.try_insert(name.to_owned(), definition);
+                },
+                SynVar::MacroDefinition(name) => {
+                    let _ = macros.try_insert(name.to_owned(), definition);
+                },
+                _ => (),
+            }
+            syntactic_tokens.push(syntactic_token);
+        }
+
+        Self {
+            labels,
+            macros,
+            syntactic_tokens,
+            current_outer_index: 0,
+        }
+    }
+
+    /// Parse syntactic tokens as semantic tokens.
+    pub fn parse(mut self) -> Vec<SemanticToken> {
+        let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens);
+        let mut syntactic = syntactic_tokens.into_iter();
+        let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false);
+
+        // Insert real label definition pointers into label reference tokens.
+        for definition in self.labels.values_mut() {
+            if let Some(definition_pointer) = definition.pointer {
+                // Insert definition pointer into reference tokens.
+                for reference_pointer in &definition.references {
+                    let reference_token = &mut semantic_tokens[*reference_pointer];
+                    reference_token.variant = SemVar::LabelReference(definition_pointer);
+                }
+                // Insert reference pointers into definition token.
+                let definition_token = &mut semantic_tokens[definition_pointer];
+                if let SemVar::LabelDefinition(ref mut def) = definition_token.variant {
+                    def.references = std::mem::take(&mut definition.references);
+                } else { unreachable!() }
+                // Insert definition pointer into reference tokens inside macros.
+                for (outer, inner) in &definition.deep_references {
+                    let macro_token = &mut semantic_tokens[*outer];
+                    if let SemVar::MacroDefinition(ref mut def) = macro_token.variant {
+                        let reference_token = &mut def.body_tokens[*inner];
+                        reference_token.variant = SemVar::LabelReference(definition_pointer);
+                    } else { unreachable!() }
+                }
+                // TODO: Record deep references in macro and label definitions?
+            }
+        }
+
+        return semantic_tokens;
+    }
+
+    fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken>
+    where I: Iterator<Item = SyntacticToken>
+    {
+        let mut semantic_tokens: Vec<SemanticToken> = Vec::new();
+        let mut block_stack: Vec<usize> = Vec::new();
+
+        while let Some(syntactic_token) = parser.next() {
+            let current_index = semantic_tokens.len();
+            if !in_macro {
+                self.current_outer_index = current_index;
+            }
+
+            let semantic_token_variant = match syntactic_token.variant {
+                SynVar::LabelDefinition(name) => {
+                    if in_macro {
+                        SemVar::Error(SemErr::LabelDefinitionInMacroDefinition)
+                    } else if let Some(definition) = self.macros.get(&name) {
+                        let source = definition.source.clone();
+                        SemVar::Error(SemErr::RedefinedSymbol((name, source)))
+                    } else if let Some(definition) = self.labels.get_mut(&name) {
+                        if definition.pointer.is_some() {
+                            let source = definition.source.clone();
+                            SemVar::Error(SemErr::RedefinedSymbol((name, source)))
+                        } else {
+                            definition.pointer = Some(current_index);
+                            let references = Vec::new();
+                            SemVar::LabelDefinition(LabelDefinition { name, references })
+                        }
+                    } else {
+                        unreachable!()
+                    }
+                }
+                SynVar::MacroDefinition(name) => {
+                    if in_macro {
+                        SemVar::Error(SemErr::MacroDefinitionInMacroDefinition)
+                    } else if let Some(definition) = self.labels.get(&name) {
+                        let source = definition.source.clone();
+                        SemVar::Error(SemErr::RedefinedSymbol((name, source)))
+                    } else if let Some(definition) = self.macros.get_mut(&name) {
+                        if definition.pointer.is_some() {
+                            let source = definition.source.clone();
+                            SemVar::Error(SemErr::RedefinedSymbol((name, source)))
+                        } else {
+                            definition.pointer = Some(current_index);
+                            let references = Vec::new();
+                            let body_tokens = self.pull_semantic_tokens(parser, true);
+                            SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens })
+                        }
+                    } else {
+                        unreachable!()
+                    }
+                }
+                SynVar::MacroDefinitionTerminator => if in_macro {
+                    break;
+                } else {
+                    SemVar::Error(SemErr::StrayMacroTerminator)
+                }
+                SynVar::Literal(value) => {
+                    SemVar::Literal(value)
+                }
+                SynVar::Padding(value) => {
+                    SemVar::Padding(value)
+                }
+                SynVar::Instruction(instr) => {
+                    SemVar::Instruction(instr)
+                }
+                SynVar::Comment(comment) => {
+                    SemVar::Comment(comment)
+                }
+                SynVar::String(bytes) => {
+                    SemVar::String(bytes)
+                }
+                SynVar::BlockOpen => {
+                    block_stack.push(current_index);
+                    SemVar::BlockOpen(0)
+                }
+                SynVar::BlockClose => {
+                    if let Some(pointer) = block_stack.pop() {
+                        let open = &mut semantic_tokens[pointer];
+                        open.variant = SemVar::BlockOpen(current_index);
+                        SemVar::BlockClose(pointer)
+                    } else {
+                        SemVar::Error(SemErr::StrayBlockClose)
+                    }
+                }
+                SynVar::MarkOpen => {
+                    SemVar::MarkOpen
+                }
+                SynVar::MarkClose => {
+                    SemVar::MarkClose
+                }
+                SynVar::Symbol(name) => {
+                    if let Some(definition) = self.labels.get_mut(&name) {
+                        if in_macro {
+                            let pointer = (self.current_outer_index, current_index);
+                            definition.deep_references.push(pointer);
+                        } else {
+                            definition.references.push(current_index);
+                        }
+                        SemVar::LabelReference(0)
+                    } else if let Some(definition) = self.macros.get_mut(&name) {
+                        if let Some(pointer) = definition.pointer {
+                            if !in_macro { definition.references.push(current_index); }
+                            SemVar::MacroInvocation(pointer)
+                        } else {
+                            let source = definition.source.clone();
+                            SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source)))
+                        }
+                    } else {
+                        SemVar::Error(SemErr::UndefinedSymbol(name))
+                    }
+                }
+                SynVar::Error(syntax_err) => {
+                    SemVar::Error(SemErr::SyntaxError(syntax_err))
+                }
+            };
+
+            let semantic_token = SemanticToken {
+                source: syntactic_token.source,
+                bytecode: BytecodeSpan::default(),
+                variant: semantic_token_variant,
+            };
+            semantic_tokens.push(semantic_token);
+        }
+
+        if in_macro {
+            //TODO: UnterminatedMacroDefinition
+        }
+
+        // Replace each unclosed BlockOpen token with an error.
+        for block_pointer in block_stack {
+            semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock);
+        }
+
+        return semantic_tokens;
+    }
+}
+
+
+struct Definition {
+    pub source: SourceSpan,
+    pub pointer: Option<usize>,
+    pub references: Vec<usize>,
+    /// (macro index, label reference index)
+    pub deep_references: Vec<(usize, usize)>,
+}
+
+impl Definition {
+    pub fn new(source: SourceSpan) -> Self {
+        Self {
+            source,
+            pointer: None,
+            references: Vec::new(),
+            deep_references: Vec::new(),
+        }
+    }
+}
-- 
cgit v1.2.3-70-g09d2