From 7d4dd52b8cfc865ae1b975ca3b6a3e72a812ebb9 Mon Sep 17 00:00:00 2001
From: Ben Bridle <bridle.benjamin@gmail.com>
Date: Fri, 14 Feb 2025 09:36:52 +1300
Subject: Rewrite library

---
 src/translators/bytecode_generator.rs | 131 ------------------
 src/translators/semantic_parser.rs    | 245 ----------------------------------
 src/translators/symbols_generator.rs  |  20 ---
 src/translators/syntactic_parser.rs   | 117 ----------------
 4 files changed, 513 deletions(-)
 delete mode 100644 src/translators/bytecode_generator.rs
 delete mode 100644 src/translators/semantic_parser.rs
 delete mode 100644 src/translators/symbols_generator.rs
 delete mode 100644 src/translators/syntactic_parser.rs

(limited to 'src/translators')

diff --git a/src/translators/bytecode_generator.rs b/src/translators/bytecode_generator.rs
deleted file mode 100644
index 956aca5..0000000
--- a/src/translators/bytecode_generator.rs
+++ /dev/null
@@ -1,131 +0,0 @@
-use crate::*;
-
-use SemanticTokenVariant as SemVar;
-
-
-pub fn generate_bytecode(semantic_tokens: &mut [SemanticToken]) -> Vec<u8> {
-    let generator = BytecodeGenerator::from_semantic_tokens(semantic_tokens);
-    generator.generate()
-}
-
-
-/// Translate semantic tokens into bytecode.
-struct BytecodeGenerator<'a> {
-    semantic_tokens: &'a mut [SemanticToken],
-    block_stack: Vec<usize>,
-    bytecode: Vec<u8>,
-    /// (address in bytecode, label definition token index)
-    label_references: Vec<(usize, usize)>,
-}
-
-impl<'a> BytecodeGenerator<'a> {
-    pub fn from_semantic_tokens(semantic_tokens: &'a mut [SemanticToken]) -> Self {
-        Self {
-            semantic_tokens,
-            block_stack: Vec::new(),
-            bytecode: Vec::new(),
-            label_references: Vec::new(),
-        }
-    }
-
-    pub fn generate(mut self) -> Vec<u8> {
-        for i in 0..self.semantic_tokens.len() {
-            let address = self.bytecode.len();
-            self.generate_bytecode_for_token(i, None);
-            self.semantic_tokens[i].bytecode = BytecodeSpan {
-                bytes: self.bytecode[address..].to_vec(),
-                location: BytecodeLocation {
-                    address,
-                    length: self.bytecode.len().saturating_sub(address),
-                }
-            };
-        }
-
-        // Replace blank label references in bytecode with real label addresses.
-        // The layer of indirection is necessary because the iteration borrows
-        // self immutably.
-        let mut insertions: Vec<(usize, u16)> = Vec::new();
-        for (bytecode_address, token_pointer) in &self.label_references {
-            let label_token = &self.semantic_tokens[*token_pointer];
-            // TODO: If greater than u16, print a warning.
-            let address_value = label_token.bytecode.location.address as u16;
-            insertions.push((*bytecode_address, address_value));
-        }
-        for (bytecode_address, address_value) in insertions {
-            self.replace_address_in_bytecode(bytecode_address, address_value);
-        }
-
-        // Strip trailing null bytes from the bytecode.
-        let mut length = self.bytecode.len();
-        for (i, byte) in self.bytecode.iter().enumerate().rev() {
-            match *byte == 0 {
-                true => length = i,
-                false => break,
-            };
-        }
-        self.bytecode.truncate(length);
-
-        return self.bytecode;
-    }
-
-    fn generate_bytecode_for_token(&mut self, pointer: usize, macro_pointer: Option<usize>) {
-        macro_rules! push_byte {
-            ($byte:expr) => { self.bytecode.push($byte) }; }
-        macro_rules! push_double {
-            ($double:expr) => { self.bytecode.extend_from_slice(&$double.to_be_bytes()) }; }
-        macro_rules! pad {
-            ($len:expr) => { for _ in 0..$len { push_byte!(0); } } }
-
-        let semantic_token = if let Some(macro_pointer) = macro_pointer {
-            let macro_definition = &self.semantic_tokens[macro_pointer];
-            if let SemVar::MacroDefinition(def) = &macro_definition.variant {
-                &def.body_tokens[pointer]
-            } else { unreachable!() }
-        } else {
-                &self.semantic_tokens[pointer]
-        };
-        match &semantic_token.variant {
-            SemVar::MacroInvocation(pointer) => {
-                let macro_definition = &self.semantic_tokens[*pointer];
-                if let SemVar::MacroDefinition(def) = &macro_definition.variant {
-                    let length = def.body_tokens.len();
-                    let macro_pointer = Some(*pointer);
-                    for body_pointer in 0..length {
-                        // Recurse, generate bytecode for each macro body token.
-                        self.generate_bytecode_for_token(body_pointer, macro_pointer);
-                    }
-                } else { unreachable!() }
-            }
-            SemVar::Literal(value) => match value {
-                Value::Byte(value) => push_byte!(*value),
-                Value::Double(value) => push_double!(value),
-            }
-            SemVar::Padding(value) => match value {
-                Value::Byte(value) => pad!(*value),
-                Value::Double(value) => pad!(*value),
-            }
-            SemVar::Instruction(instr) => push_byte!(instr.value),
-            SemVar::String(bytes) => self.bytecode.extend_from_slice(&bytes),
-            SemVar::LabelReference(pointer) => {
-                self.label_references.push((self.bytecode.len(), *pointer));
-                push_double!(0u16);
-            }
-            SemVar::BlockOpen(_) => {
-                self.block_stack.push(self.bytecode.len());
-                push_double!(0u16);
-            }
-            SemVar::BlockClose(_) => {
-                let bytecode_address = self.block_stack.pop().unwrap();
-                // TODO: If greater than u16, print a warning.
-                let address_value = self.bytecode.len() as u16;
-                self.replace_address_in_bytecode(bytecode_address, address_value);
-            }
-            _ => (),
-        };
-    }
-
-    fn replace_address_in_bytecode(&mut self, bytecode_address: usize, address_value: u16) {
-        let range = bytecode_address..bytecode_address+2;
-        self.bytecode[range].clone_from_slice(&address_value.to_be_bytes());
-    }
-}
diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs
deleted file mode 100644
index cb6a435..0000000
--- a/src/translators/semantic_parser.rs
+++ /dev/null
@@ -1,245 +0,0 @@
-use crate::*;
-
-use std::collections::HashMap;
-use std::path::PathBuf;
-
-use SyntacticTokenVariant as SynVar;
-use SemanticTokenVariant as SemVar;
-use SemanticParseError as SemErr;
-
-
-pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> {
-    let semantic_parser = SemanticParser::from_source_code(source_code, path);
-    semantic_parser.parse()
-}
-
-
-/// Translate syntactic tokens into semantic tokens.
-struct SemanticParser {
-    labels: HashMap<String, Definition>,
-    macros: HashMap<String, Definition>,
-    syntactic_tokens: Vec<SyntacticToken>,
-    /// Index of the current outer token.
-    current_outer_index: usize,
-}
-
-impl SemanticParser {
-    pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
-        let mut labels = HashMap::new();
-        let mut macros = HashMap::new();
-        let mut syntactic_tokens = Vec::new();
-
-        let parser = SyntacticParser::from_source_code(source_code, path);
-        for syntactic_token in parser {
-            let definition = Definition::new(syntactic_token.source.clone());
-            match &syntactic_token.variant {
-                SynVar::LabelDefinition(name) => {
-                    let _ = labels.try_insert(name.to_owned(), definition);
-                },
-                SynVar::MacroDefinition(name) => {
-                    let _ = macros.try_insert(name.to_owned(), definition);
-                },
-                _ => (),
-            }
-            syntactic_tokens.push(syntactic_token);
-        }
-
-        Self {
-            labels,
-            macros,
-            syntactic_tokens,
-            current_outer_index: 0,
-        }
-    }
-
-    /// Parse syntactic tokens as semantic tokens.
-    pub fn parse(mut self) -> Vec<SemanticToken> {
-        let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens);
-        let mut syntactic = syntactic_tokens.into_iter();
-        let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false);
-
-        // Insert real label definition pointers into label reference tokens.
-        for definition in self.labels.values_mut() {
-            if let Some(definition_pointer) = definition.pointer {
-                // Insert definition pointer into reference tokens.
-                for reference_pointer in &definition.references {
-                    let reference_token = &mut semantic_tokens[*reference_pointer];
-                    reference_token.variant = SemVar::LabelReference(definition_pointer);
-                }
-                // Insert reference pointers into definition token.
-                let definition_token = &mut semantic_tokens[definition_pointer];
-                if let SemVar::LabelDefinition(ref mut def) = definition_token.variant {
-                    def.references = std::mem::take(&mut definition.references);
-                } else { unreachable!() }
-                // Insert definition pointer into reference tokens inside macros.
-                for (outer, inner) in &definition.deep_references {
-                    let macro_token = &mut semantic_tokens[*outer];
-                    if let SemVar::MacroDefinition(ref mut def) = macro_token.variant {
-                        let reference_token = &mut def.body_tokens[*inner];
-                        reference_token.variant = SemVar::LabelReference(definition_pointer);
-                    } else { unreachable!() }
-                }
-                // TODO: Record deep references in macro and label definitions?
-            }
-        }
-
-        return semantic_tokens;
-    }
-
-    fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken>
-    where I: Iterator<Item = SyntacticToken>
-    {
-        let mut semantic_tokens: Vec<SemanticToken> = Vec::new();
-        let mut block_stack: Vec<usize> = Vec::new();
-
-        while let Some(syntactic_token) = parser.next() {
-            let current_index = semantic_tokens.len();
-            if !in_macro {
-                self.current_outer_index = current_index;
-            }
-
-            let semantic_token_variant = match syntactic_token.variant {
-                SynVar::LabelDefinition(name) => {
-                    if in_macro {
-                        SemVar::Error(SemErr::LabelDefinitionInMacroDefinition)
-                    } else if let Some(definition) = self.macros.get(&name) {
-                        let source = definition.source.clone();
-                        SemVar::Error(SemErr::RedefinedSymbol((name, source)))
-                    } else if let Some(definition) = self.labels.get_mut(&name) {
-                        if definition.pointer.is_some() {
-                            let source = definition.source.clone();
-                            SemVar::Error(SemErr::RedefinedSymbol((name, source)))
-                        } else {
-                            definition.pointer = Some(current_index);
-                            let references = Vec::new();
-                            SemVar::LabelDefinition(LabelDefinition { name, references })
-                        }
-                    } else {
-                        unreachable!()
-                    }
-                }
-                SynVar::MacroDefinition(name) => {
-                    if in_macro {
-                        SemVar::Error(SemErr::MacroDefinitionInMacroDefinition)
-                    } else if let Some(definition) = self.labels.get(&name) {
-                        let source = definition.source.clone();
-                        SemVar::Error(SemErr::RedefinedSymbol((name, source)))
-                    } else if let Some(definition) = self.macros.get_mut(&name) {
-                        if definition.pointer.is_some() {
-                            let source = definition.source.clone();
-                            SemVar::Error(SemErr::RedefinedSymbol((name, source)))
-                        } else {
-                            definition.pointer = Some(current_index);
-                            let references = Vec::new();
-                            let body_tokens = self.pull_semantic_tokens(parser, true);
-                            SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens })
-                        }
-                    } else {
-                        unreachable!()
-                    }
-                }
-                SynVar::MacroDefinitionTerminator => if in_macro {
-                    break;
-                } else {
-                    SemVar::Error(SemErr::StrayMacroTerminator)
-                }
-                SynVar::Literal(value) => {
-                    SemVar::Literal(value)
-                }
-                SynVar::Padding(value) => {
-                    SemVar::Padding(value)
-                }
-                SynVar::Instruction(instr) => {
-                    SemVar::Instruction(instr)
-                }
-                SynVar::Comment(comment) => {
-                    SemVar::Comment(comment)
-                }
-                SynVar::String(bytes) => {
-                    SemVar::String(bytes)
-                }
-                SynVar::BlockOpen => {
-                    block_stack.push(current_index);
-                    SemVar::BlockOpen(0)
-                }
-                SynVar::BlockClose => {
-                    if let Some(pointer) = block_stack.pop() {
-                        let open = &mut semantic_tokens[pointer];
-                        open.variant = SemVar::BlockOpen(current_index);
-                        SemVar::BlockClose(pointer)
-                    } else {
-                        SemVar::Error(SemErr::StrayBlockClose)
-                    }
-                }
-                SynVar::MarkOpen => {
-                    SemVar::MarkOpen
-                }
-                SynVar::MarkClose => {
-                    SemVar::MarkClose
-                }
-                SynVar::Symbol(name) => {
-                    if let Some(definition) = self.labels.get_mut(&name) {
-                        if in_macro {
-                            let pointer = (self.current_outer_index, current_index);
-                            definition.deep_references.push(pointer);
-                        } else {
-                            definition.references.push(current_index);
-                        }
-                        SemVar::LabelReference(0)
-                    } else if let Some(definition) = self.macros.get_mut(&name) {
-                        if let Some(pointer) = definition.pointer {
-                            if !in_macro { definition.references.push(current_index); }
-                            SemVar::MacroInvocation(pointer)
-                        } else {
-                            let source = definition.source.clone();
-                            SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source)))
-                        }
-                    } else {
-                        SemVar::Error(SemErr::UndefinedSymbol(name))
-                    }
-                }
-                SynVar::Error(syntax_err) => {
-                    SemVar::Error(SemErr::SyntaxError(syntax_err))
-                }
-            };
-
-            let semantic_token = SemanticToken {
-                source: syntactic_token.source,
-                bytecode: BytecodeSpan::default(),
-                variant: semantic_token_variant,
-            };
-            semantic_tokens.push(semantic_token);
-        }
-
-        if in_macro {
-            //TODO: UnterminatedMacroDefinition
-        }
-
-        // Replace each unclosed BlockOpen token with an error.
-        for block_pointer in block_stack {
-            semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock);
-        }
-
-        return semantic_tokens;
-    }
-}
-
-
-struct Definition {
-    pub source: SourceSpan,
-    pub pointer: Option<usize>,
-    pub references: Vec<usize>,
-    /// (macro index, label reference index)
-    pub deep_references: Vec<(usize, usize)>,
-}
-
-impl Definition {
-    pub fn new(source: SourceSpan) -> Self {
-        Self {
-            source,
-            pointer: None,
-            references: Vec::new(),
-            deep_references: Vec::new(),
-        }
-    }
-}
diff --git a/src/translators/symbols_generator.rs b/src/translators/symbols_generator.rs
deleted file mode 100644
index d30facd..0000000
--- a/src/translators/symbols_generator.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-use crate::*;
-
-use SemanticTokenVariant as SemVar;
-
-
-pub fn generate_symbols_file(semantic_tokens: &[SemanticToken]) -> String {
-    let mut symbols = String::new();
-
-    for token in semantic_tokens {
-        if let SemVar::LabelDefinition(definition) = &token.variant {
-            let address = token.bytecode.location.address;
-            if address > 0xffff { break; }
-            let name = &definition.name;
-            let location = token.source.location();
-            symbols.push_str(&format!("{address:04x} {name} {location}\n"));
-        }
-    }
-
-    return symbols;
-}
diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs
deleted file mode 100644
index 8f0850b..0000000
--- a/src/translators/syntactic_parser.rs
+++ /dev/null
@@ -1,117 +0,0 @@
-use crate::*;
-
-use std::path::PathBuf;
-
-
-/// Translate raw source code characters into syntactic tokens.
-pub struct SyntacticParser {
-    tokeniser: Tokeniser,
-    /// The name of the most recently parsed label.
-    label: String,
-}
-
-
-impl SyntacticParser {
-    pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
-        let mut tokeniser = Tokeniser::new(source_code, path);
-        tokeniser.add_delimiters(&['@','&','%',';','[',']','{','}','(','"','\'','#','~']);
-        tokeniser.add_terminators(&[':']);
-        Self { tokeniser, label: String::new() }
-    }
-}
-
-
-impl Iterator for SyntacticParser {
-    type Item = SyntacticToken;
-
-    /// Sequentially parse tokens from the source code.
-    fn next(&mut self) -> Option<SyntacticToken> {
-        use SyntacticTokenVariant as SynVar;
-        use SyntacticParseError as SynErr;
-        let t = &mut self.tokeniser;
-
-        t.drop_whitespace();
-        t.mark_start_position();
-
-        let variant = match t.eat_char()? {
-            '@' => {
-                self.label = t.eat_token();
-                SynVar::LabelDefinition(self.label.clone())
-            }
-            '&' => {
-                let token = t.eat_token();
-                SynVar::LabelDefinition(format!("{}/{token}", self.label))
-            }
-            '%' => SynVar::MacroDefinition(t.eat_token()),
-            ';' => SynVar::MacroDefinitionTerminator,
-            '[' => SynVar::MarkOpen,
-            ']' => SynVar::MarkClose,
-            '{' => SynVar::BlockOpen,
-            '}' => SynVar::BlockClose,
-            '(' => match t.eat_to_delimiter(')') {
-                Some(string) => SynVar::Comment(string),
-                None => SynVar::Error(SynErr::UnterminatedComment),
-            }
-            '\'' => match t.eat_to_delimiter('\'') {
-                Some(string) => SynVar::String(string.as_bytes().to_vec()),
-                None => SynVar::Error(SynErr::UnterminatedRawString),
-            }
-            '"' => match t.eat_to_delimiter('"') {
-                Some(string) => {
-                    let mut bytes = string.as_bytes().to_vec();
-                    bytes.push(0x00);
-                    SynVar::String(bytes)
-                }
-                None => SynVar::Error(SynErr::UnterminatedNullString),
-            }
-            '#' => {
-                let token = t.eat_token();
-                match token.parse::<Value>() {
-                    Ok(value) => SynVar::Padding(value),
-                    Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)),
-                }
-            },
-            '~' => {
-                let token = t.eat_token();
-                let symbol = format!("{}/{token}", self.label);
-                SynVar::Symbol(symbol)
-            }
-            ':' => SynVar::Symbol(String::from(':')),
-            c => {
-                let token = format!("{c}{}", t.eat_token());
-                match token.parse::<Value>() {
-                    Ok(value) => SynVar::Literal(value),
-                    Err(_) => match token.parse::<Instruction>() {
-                        Ok(instruction) => SynVar::Instruction(instruction),
-                        Err(_) => SynVar::Symbol(token),
-                    }
-                }
-            }
-        };
-
-        // Parse source path comments.
-        if let SynVar::Comment(comment) = &variant {
-            // Check if the comment fills the entire line.
-            if t.start_position.column == 0 && t.end_of_line() {
-                if let Some(path) = comment.strip_prefix(": ") {
-                    t.source_path = Some(PathBuf::from(path.trim()));
-                    t.embedded_first_line = t.start_position.line + 1;
-                }
-            }
-        }
-
-        let source = t.get_source_span();
-        Some( SyntacticToken { source, variant } )
-    }
-}
-
-
-#[derive(Debug)]
-pub enum ParseError {
-    InvalidExtension,
-    NotFound,
-    NotReadable,
-    IsADirectory,
-    InvalidUtf8,
-    Unknown,
-}
-- 
cgit v1.2.3-70-g09d2