From afa81e9ae6a56efe2eae2990e09c672b74328715 Mon Sep 17 00:00:00 2001
From: Ben Bridle <bridle.benjamin@gmail.com>
Date: Mon, 8 May 2023 12:05:57 +1200
Subject: Added detection of cyclic macro references, and made assembler binary
 usable

---
 src/assembler.rs       | 273 +++++++++++++++++++++++++++++++++++++++
 src/error.rs           |   9 +-
 src/lib.rs             | 339 ++-----------------------------------------------
 src/main.rs            |  64 ++++------
 src/semantic_token.rs  |  74 ++++++++++-
 src/syntactic_token.rs |  14 +-
 src/tokenizer.rs       |  12 +-
 7 files changed, 403 insertions(+), 382 deletions(-)
 create mode 100644 src/assembler.rs

(limited to 'src')

diff --git a/src/assembler.rs b/src/assembler.rs
new file mode 100644
index 0000000..cb6b6f1
--- /dev/null
+++ b/src/assembler.rs
@@ -0,0 +1,273 @@
+use std::mem::take;
+use std::collections::hash_map::Entry;
+
+use SyntacticTokenType as Syn;
+use SemanticTokenType as Sem;
+use crate::*;
+
+use std::collections::HashMap;
+
+/// The inner value is the index of the token that defines this symbol.
+pub enum SymbolDefinition {
+    Macro(usize),
+    Label(usize),
+}
+
+pub struct Assembler {
+    /// The contents of the program as a list of syntactic tokens.
+    syntactic_tokens: Vec<SyntacticToken>,
+    /// The contents of the program as a list of semantic tokens.
+    semantic_tokens: Vec<SemanticToken>,
+    /// Map the name of each defined symbol to the index of the defining token.
+    symbol_definitions: HashMap<String, SymbolDefinition>,
+    /// Map each macro definition token index to a list of syntactic body tokens.
+    syntactic_macro_bodies: HashMap<usize, Vec<SyntacticToken>>,
+    /// Map each macro definition token index to a list of semantic body tokens.
+    semantic_macro_bodies: HashMap<usize, Vec<SemanticToken>>,
+}
+
+impl Assembler {
+    pub fn new() -> Self {
+        Self {
+            syntactic_tokens: Vec::new(),
+            semantic_tokens: Vec::new(),
+            symbol_definitions: HashMap::new(),
+            syntactic_macro_bodies: HashMap::new(),
+            semantic_macro_bodies: HashMap::new(),
+        }
+    }
+
+    pub fn tokenise_source(&mut self, source_code: &str) {
+        // The index of the current macro definition token
+        let mut macro_definition: Option<usize> = None;
+        let mut macro_definition_body_tokens: Vec<SyntacticToken> = Vec::new();
+
+        for mut token in TokenIterator::from_str(source_code) {
+            let next_index = self.syntactic_tokens.len();
+            if let Some(index) = macro_definition {
+                token.use_in_macro_body();
+                if token.is_macro_terminator() {
+                    // Commit the current macro definition
+                    macro_definition_body_tokens.push(token);
+                    self.syntactic_macro_bodies.insert(
+                        index, take(&mut macro_definition_body_tokens));
+                    macro_definition = None;
+                } else {
+                    macro_definition_body_tokens.push(token);
+                }
+            } else {
+                if let Syn::MacroDefinition(ref name) = token.r#type {
+                    macro_definition = Some(next_index);
+                    match self.symbol_definitions.entry(name.to_string()) {
+                        Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);}
+                        Entry::Vacant(v) => {v.insert(SymbolDefinition::Macro(next_index));}
+                    }
+                } else if let Syn::LabelDefinition(ref name) = token.r#type {
+                    match self.symbol_definitions.entry(name.to_string()) {
+                        Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);}
+                        Entry::Vacant(v) => {v.insert(SymbolDefinition::Label(next_index));}
+                    }
+                } else if token.is_macro_terminator() {
+                    token.set_error(Error::OrphanedMacroDefinitionTerminator);
+                }
+                self.syntactic_tokens.push(token);
+            }
+        }
+    }
+
+    pub fn resolve_references(&mut self) {
+        let syntactic_tokens = take(&mut self.syntactic_tokens);
+        let syntactic_token_count = syntactic_tokens.len();
+
+        for (index, syntactic_token) in syntactic_tokens.into_iter().enumerate() {
+            let semantic_token = self.convert_syn_token_to_sem_token(syntactic_token, index);
+            self.semantic_tokens.push(semantic_token);
+        }
+        assert_eq!(syntactic_token_count, self.semantic_tokens.len());
+
+        // Find all cyclic macros
+        let cyclic_macros: Vec<usize> = self.semantic_macro_bodies.keys().map(|i|*i).filter(
+            |i| !self.traverse_macro_definition(*i, 0)).collect();
+        // Replace each cyclic macro reference in a macro definition with an error
+        for body_tokens in &mut self.semantic_macro_bodies.values_mut() {
+            for body_token in body_tokens {
+                if let Sem::MacroReference(i) = body_token.r#type {
+                    if cyclic_macros.contains(&i) {
+                        let name = body_token.source_location.source.clone();
+                        body_token.r#type = Sem::Error(Syn::Reference(name), Error::CyclicMacroReference);
+                    }
+                }
+            }
+        }
+
+    }
+
+    /// Attempt to recursively traverse the body tokens of a macro definition, returning
+    /// false if the depth exceeds a preset maximum, and returning true otherwise.
+    fn traverse_macro_definition(&self, index: usize, level: usize) -> bool {
+        if level == 16 {
+            false
+        } else {
+            self.semantic_macro_bodies[&index].iter().all(
+                |token| if let Sem::MacroReference(i) = token.r#type {
+                    self.traverse_macro_definition(i, level+1)
+                } else {
+                    true
+                }
+            )
+        }
+    }
+
+    pub fn generate_bytecode(&mut self) -> (Vec<u8>, Vec<SemanticToken>) {
+        let mut bytecode: Vec<u8> = Vec::new();
+        // Map each label definition token index to the bytecode addresses of the references
+        let mut reference_addresses: HashMap<usize, Vec<u16>> = HashMap::new();
+        // Map each label and macro definition token to a list of reference token indices
+        let mut reference_tokens: HashMap<usize, Vec<usize>> = HashMap::new();
+
+        macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};}
+        macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};}
+        macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};}
+
+        let mut semantic_tokens = take(&mut self.semantic_tokens);
+
+        // Translate semantic tokens into bytecode
+        for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() {
+            let start_addr = bytecode.len() as u16;
+            match &mut semantic_token.r#type {
+                Sem::LabelReference(i) => {
+                    reference_tokens.entry(*i).or_default().push(index);
+                    reference_addresses.entry(*i).or_default().push(start_addr);
+                    push_u16!(0);
+                }
+                Sem::MacroReference(i) => {
+                    reference_tokens.entry(*i).or_default().push(index);
+                    self.expand_macro_reference(*i, &mut bytecode, &mut reference_addresses);
+                }
+                Sem::LabelDefinition(def) => def.address=start_addr,
+                Sem::MacroDefinition(_) => (),
+
+                Sem::Padding(p) => pad!(*p),
+                Sem::ByteLiteral(b) => push_u8!(*b),
+                Sem::ShortLiteral(s) => push_u16!(*s),
+                Sem::Instruction(b) => push_u8!(*b),
+
+                Sem::MacroDefinitionTerminator => unreachable!(),
+                Sem::Comment => (),
+                Sem::Error(..) => (),
+            };
+            let end_addr = bytecode.len() as u16;
+            semantic_token.bytecode_location.start = start_addr;
+            semantic_token.bytecode_location.length = end_addr - start_addr;
+        }
+
+        // Fill each label reference with the address of the matching label definition
+        for (index, slots) in reference_addresses {
+            if let Sem::LabelDefinition(definition) = &semantic_tokens[index].r#type {
+                let [h,l] = definition.address.to_be_bytes();
+                for slot in slots {
+                    bytecode[slot as usize] = h;
+                    bytecode[slot.wrapping_add(1) as usize] = l;
+                }
+            } else { unreachable!() }
+        }
+
+        // Move references and macro body tokens into label and macro definition tokens
+        for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() {
+            if let Sem::MacroDefinition(definition) = &mut semantic_token.r#type {
+                definition.body_tokens = self.semantic_macro_bodies.remove(&index).unwrap();
+                if let Some(references) = reference_tokens.remove(&index) {
+                    definition.references = references;
+                }
+            } else if let Sem::LabelDefinition(definition) = &mut semantic_token.r#type {
+                if let Some(references) = reference_tokens.remove(&index) {
+                    definition.references = references;
+                }
+            }
+        }
+        assert_eq!(reference_tokens.len(), 0);
+
+        // Remove trailing null bytes from the bytecode
+        if let Some(final_nonnull_byte) = bytecode.iter().rposition(|b| *b != 0) {
+            let truncated_length = final_nonnull_byte + 1;
+            let removed_byte_count = bytecode.len() - truncated_length;
+            if removed_byte_count > 0 {
+                bytecode.truncate(truncated_length);
+            }
+        }
+
+        (bytecode, semantic_tokens)
+    }
+
+    fn convert_syn_token_to_sem_token(&mut self, mut syn_token: SyntacticToken, index: usize) -> SemanticToken {
+        SemanticToken {
+            r#type: {
+                if let Some(err) = syn_token.error {
+                    Sem::Error(syn_token.r#type, err)
+                } else {
+                    match syn_token.r#type {
+                        Syn::Reference(ref name) => {
+                            match self.symbol_definitions.get(name) {
+                                Some(SymbolDefinition::Macro(i)) => Sem::MacroReference(*i),
+                                Some(SymbolDefinition::Label(i)) => Sem::LabelReference(*i),
+                                None => Sem::Error(syn_token.r#type, Error::UnresolvedReference),
+                            }
+                        }
+                        Syn::LabelDefinition(name) => {Sem::LabelDefinition(LabelDefinition::new(name))},
+                        Syn::MacroDefinition(name) => {
+                            let mut sem_body_tokens = Vec::new();
+                            for syn_body_token in self.syntactic_macro_bodies.remove(&index).unwrap() {
+                                // Make the source location of the macro definition token span the entire definition
+                                if syn_body_token.is_macro_terminator() {
+                                    syn_token.source_location.end = syn_body_token.source_location.start;
+                                }
+                                let sem_body_token = self.convert_syn_token_to_sem_token(syn_body_token, 0);
+                                sem_body_tokens.push(sem_body_token);
+                            }
+                            self.semantic_macro_bodies.insert(index, sem_body_tokens);
+                            Sem::MacroDefinition(MacroDefinition::new(name))
+                        },
+                        Syn::MacroDefinitionTerminator => Sem::MacroDefinitionTerminator,
+                        Syn::Padding(v) => Sem::Padding(v),
+                        Syn::ByteLiteral(v) => Sem::ByteLiteral(v),
+                        Syn::ShortLiteral(v) => Sem::ShortLiteral(v),
+                        Syn::Instruction(v) => Sem::Instruction(v),
+                        Syn::Comment => Sem::Comment,
+                    }
+                }
+            },
+            source_location: syn_token.source_location,
+            bytecode_location: BytecodeLocation::zero(),
+        }
+    }
+
+    fn expand_macro_reference(&self, index: usize, bytecode: &mut Vec<u8>, reference_addresses: &mut HashMap<usize, Vec<u16>>) {
+        macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};}
+        macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};}
+        macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};}
+
+        for body_token in self.semantic_macro_bodies.get(&index).unwrap() {
+            let start_addr = bytecode.len() as u16;
+            match &body_token.r#type {
+                Sem::LabelReference(i) => {
+                    reference_addresses.entry(*i).or_default().push(start_addr);
+                    push_u16!(0u16);
+                },
+                Sem::MacroReference(i) => {
+                    self.expand_macro_reference(*i, bytecode, reference_addresses);
+                },
+                Sem::LabelDefinition(_) => unreachable!(),
+                Sem::MacroDefinition(_) => unreachable!(),
+
+                Sem::Padding(p) => pad!(*p),
+                Sem::ByteLiteral(b) => push_u8!(*b),
+                Sem::ShortLiteral(s) => push_u16!(*s),
+                Sem::Instruction(b) => push_u8!(*b),
+
+                Sem::MacroDefinitionTerminator => (),
+                Sem::Comment => (),
+                Sem::Error(..) => (),
+            };
+        }
+    }
+}
diff --git a/src/error.rs b/src/error.rs
index efbfc4f..8a6c0d6 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,9 +1,10 @@
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone)]
 pub enum Error {
     UnresolvedReference,
     DuplicateDefinition,
-    InvalidHexadecimalLiteral,
+    InvalidPaddingValue,
     InvalidTypeInMacroDefinition,
-    OrphanedMacroTerminator,
-    TokenPastEndOfProgram,
+    OrphanedMacroDefinitionTerminator,
+    CyclicMacroReference,
 }
+
diff --git a/src/lib.rs b/src/lib.rs
index 5d84600..a657354 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,330 +3,19 @@ mod syntactic_token;
 mod semantic_token;
 mod tokenizer;
 mod error;
-
-pub use addressing::*;
-pub use syntactic_token::*;
-pub use semantic_token::*;
-pub use error::*;
-pub use tokenizer::*;
-
-use std::collections::hash_map::{HashMap, Entry};
-use std::mem::take;
-
-// On Unicode support: Work with characters, not bytes. This will eventually be
-// used in Verdant and Doctrine, and it'd be nice to be able to support other languages.
-// The only reason to work with bytes over characters would be for a minor decrease in complexity.
-// Only support the assembly of files of up to 64kB. If assets need to be tacked on the end,
-// it can be done by another program. The VM will only be able to access the first 64kB of a file anyway.
-// Treat \t as a space, have it be a single character.
-
-// First, turn the program source code into a vector of SyntacticTokens. These
-// each contain a SourceLocation, and the type and value of the token. Every single
-// non-whitespace character of the program needs to be wrapped in a SyntacticToken.
-// The program source code can be accurately reconstructed from this list of
-// SyntacticTokens, and when I write GRID, if the mouse is hovering over any point
-// in the program listing, I'll be able to determine the exact token that is being hovered.
-// For macros, hovering over any character belonging to a macro definition will
-// highlight the entire macro definition, and also the currently-hovered body token
-// if there is one. Clicking the body token will bring up more information.
-
-// The SyntacticTokens will be collected into a vector, with label and macro definition
-// being constructed as we go. Label definitions are easy, I only need to note down the
-// names of the labels in order to validate label references in a later step. If a label
-// name has already been defined, tag the token with an error. If a macro name has already
-// been defined, tag the token with an error.
-// Collect children into macro definitions. This makes sense.
-
-// Step 2 is to generate bytecode, converting SyntacticTokens into SemanticTokens.
-// Label and macro definitions need to contain a list of usizes to references.
-// Macro definitions need to contain the body tokens as SemanticTokens.
-// Label and macro references need to point to their parents.
-// Can I stream-convert tokens from Syntactic to Semantic?
-// Each SynToken gets converted to a SemToken? Yeah.
-
-// I want to change the parser to be a multi-stage struct thing, holding its own state.
-
-enum SymbolDefinition { Macro(usize), Label(usize) }
-
-pub fn parse(source_code: &str) {
-    use SyntacticTokenType as Syn;
-    use SemanticTokenType as Sem;
-
-    // ============================ STEP 1 ============================
-    // Convert the source code into a sorted vector of syntactic tokens and a
-    // map of symbol definitions.
-    // ================================================================
-    println!("[DEBUG] STEP 1: Parse source code into syntactic tokens");
-    let mut syntactic_tokens: Vec<SyntacticToken> = Vec::new();
-    let mut symbol_definitions: HashMap<String,SymbolDefinition> = HashMap::new();
-    let mut macro_bodies: HashMap<usize, Vec<SyntacticToken>> = HashMap::new();
-    let mut macro_definition: Option<usize> = None;
-    let mut macro_definition_body_tokens: Vec<SyntacticToken> = Vec::new();
-
-    for mut token in TokenIterator::from_str(source_code) {
-        if let Some(mdt) = macro_definition {
-            token.use_in_macro_body();
-            let terminate = token.is_macro_terminator();
-            macro_definition_body_tokens.push(token);
-            if terminate {
-                macro_bodies.insert(mdt, take(&mut macro_definition_body_tokens));
-                macro_definition = None;
-            }
-        } else {
-            if let Syn::MacroDefinition(ref name) = token.r#type {
-                macro_definition = Some(syntactic_tokens.len());
-                match symbol_definitions.entry(name.to_string()) {
-                    Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);}
-                    Entry::Vacant(v) => {v.insert(SymbolDefinition::Macro(syntactic_tokens.len()));}
-                }
-            } else if let Syn::LabelDefinition(ref name) = token.r#type {
-                match symbol_definitions.entry(name.to_string()) {
-                    Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);}
-                    Entry::Vacant(v) => {v.insert(SymbolDefinition::Label(syntactic_tokens.len()));}
-                }
-            } else if token.is_macro_terminator() {
-                token.set_error(Error::OrphanedMacroTerminator);
-            }
-            syntactic_tokens.push(token);
-        }
-    }
-
-
-    // ============================ STEP 2 ============================
-    // Convert syntactic tokens into semantic tokens, resolving label and macro
-    // references in the process.
-    // ================================================================
-    println!("[DEBUG] STEP 2: Resolve label and macro references");
-    let syntactic_token_count = syntactic_tokens.len();
-    let mut semantic_tokens = Vec::new();
-    let mut semantic_macro_bodies: HashMap<usize, Vec<SemanticToken>> = HashMap::new();
-
-    for (i, mut syn_token) in syntactic_tokens.into_iter().enumerate() {
-        let sem_token_type = if let Some(err) = syn_token.error {
-            // Translate over any existing syntax errors
-            Sem::Error(syn_token.r#type, err)
-        } else {
-            match syn_token.r#type {
-                Syn::Reference(ref name) => {
-                    match symbol_definitions.get(name) {
-                        Some(SymbolDefinition::Macro(addr)) => Sem::MacroReference(*addr),
-                        Some(SymbolDefinition::Label(addr)) => Sem::LabelReference(*addr),
-                        None => Sem::Error(syn_token.r#type, Error::UnresolvedReference),
-                    }
-                }
-                Syn::LabelDefinition(name) => {
-                    let label_definition = LabelDefinition {
-                        name, address: 0, references: Vec::new() };
-                    Sem::LabelDefinition(label_definition)
-                }
-                Syn::MacroDefinition(name) => {
-                    let mut sem_body_tokens = Vec::new();
-                    // Iterate over every token in the body of the macro definition,
-                    // converting each one to a semantic token.
-                    for syn_body_token in macro_bodies.remove(&i).unwrap() {
-                        let sem_body_token_type = if let Some(err) = syn_body_token.error {
-                            // Translate over any existing syntax errors
-                            Sem::Error(syn_body_token.r#type, err)
-                        } else {
-                            match syn_body_token.r#type {
-                                Syn::Reference(ref name) => match symbol_definitions.get(name) {
-                                    Some(SymbolDefinition::Macro(addr)) => Sem::MacroReference(*addr),
-                                    Some(SymbolDefinition::Label(addr)) => Sem::LabelReference(*addr),
-                                    None => Sem::Error(syn_body_token.r#type, Error::UnresolvedReference),
-                                },
-
-                                Syn::LabelDefinition(_) => unreachable!(),
-                                Syn::MacroDefinition(_) => unreachable!(),
-                                Syn::MacroTerminator => {
-                                    syn_token.source_location.end =
-                                        syn_body_token.source_location.end;
-                                    Sem::MacroTerminator
-                                },
-
-                                Syn::Pad(v) => Sem::Pad(v),
-                                Syn::Byte(v) => Sem::Byte(v),
-                                Syn::Short(v) => Sem::Short(v),
-                                Syn::Instruction(v) => Sem::Instruction(v),
-
-                                Syn::Comment => Sem::Comment,
-                            }
-                        };
-                        let sem_body_token = SemanticToken {
-                            r#type: sem_body_token_type,
-                            source_location: syn_body_token.source_location,
-                            bytecode_location: BytecodeLocation::zero(),
-                        };
-                        sem_body_tokens.push(sem_body_token);
-                    }
-                    semantic_macro_bodies.insert(i, sem_body_tokens);
-                    let macro_definition = MacroDefinition {
-                        name, body_tokens: Vec::new(), references: Vec::new() };
-                    Sem::MacroDefinition(macro_definition)
-                }
-                Syn::MacroTerminator => unreachable!(),
-
-                Syn::Pad(v) => Sem::Pad(v),
-                Syn::Byte(v) => Sem::Byte(v),
-                Syn::Short(v) => Sem::Short(v),
-                Syn::Instruction(v) => Sem::Instruction(v),
-
-                Syn::Comment => Sem::Comment,
-            }
-        };
-        let sem_token = SemanticToken {
-            r#type: sem_token_type,
-            source_location: syn_token.source_location,
-            bytecode_location: BytecodeLocation::zero(),
-        };
-        semantic_tokens.push(sem_token);
-    }
-    assert_eq!(syntactic_token_count, semantic_tokens.len());
-
-
-    // ============================ STEP 3 ============================
-    // Iterate over each semantic token, generating bytecode.
-    // ================================================================
-    println!("[DEBUG] STEP 3: Generate bytecode");
-    let mut bytecode: Vec<u8> = Vec::new();
-    // Map each label token to a list of bytecode addresses to populate
-    let mut label_reference_addresses: HashMap<usize, Vec<u16>> = HashMap::new();
-    // Map each label or macro definition token to a list of reference token pointers
-    let mut references: HashMap<usize, Vec<usize>> = HashMap::new();
-
-    macro_rules! addr {() => {bytecode.len() as u16};}
-    macro_rules! push_u8 {($v:expr) => {bytecode.push($v); 1};}
-    macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&$v.to_be_bytes()); 2};}
-    macro_rules! pad {($p:expr) => {bytecode.resize(bytecode.len() + $p as usize, 0); $p as u16};}
-
-    for (i, sem_token) in semantic_tokens.iter_mut().enumerate() {
-        let start_addr = addr!();
-        let byte_length: u16 = match &mut sem_token.r#type {
-            Sem::LabelReference(addr) => {
-                references.entry(*addr).or_default().push(i);
-                label_reference_addresses.entry(*addr).or_default().push(addr!());
-                push_u16!(0u16); 2
-            },
-            Sem::MacroReference(addr) => {
-                references.entry(*addr).or_default().push(i);
-                let mut macro_byte_length: u16 = 0;
-                for body_token in semantic_macro_bodies.get(addr).unwrap() {
-                    macro_byte_length += match &body_token.r#type {
-                        Sem::LabelReference(addr) => {
-                            label_reference_addresses.entry(*addr).or_default().push(addr!());
-                            push_u16!(0u16); 2
-                        },
-                        Sem::MacroReference(_) => todo!(),
-
-                        Sem::LabelDefinition(_) => unreachable!(),
-                        Sem::MacroDefinition(_) => unreachable!(),
-
-                        Sem::Pad(p) => { pad!(*p); *p },
-                        Sem::Byte(b) => { push_u8!(*b); 1 },
-                        Sem::Short(s) => { push_u16!(*s); 2 },
-                        Sem::Instruction(b) => { push_u8!(*b); 1 },
-
-                        Sem::MacroTerminator => 0,
-                        Sem::Comment => 0,
-                        Sem::Error(..) => 0,
-                    };
-                }
-                macro_byte_length
-            },
-
-            Sem::LabelDefinition(definition) => {definition.address=addr!(); 1},
-            Sem::MacroDefinition(_) => 0,
-
-            Sem::Pad(p) => { pad!(*p); *p },
-            Sem::Byte(b) => { push_u8!(*b); 1 },
-            Sem::Short(s) => { push_u16!(*s); 2 },
-            Sem::Instruction(b) => { push_u8!(*b); 1 },
-
-            Sem::MacroTerminator => unreachable!(),
-            Sem::Comment => 0,
-            Sem::Error(..) => 0,
-        };
-        sem_token.bytecode_location.start = start_addr;
-        sem_token.bytecode_location.length = byte_length;
-    }
-
-
-    // ============================ STEP 4 ============================
-    // Fill in addresses for label references.
-    // ================================================================
-    println!("[DEBUG] STEP 4: Fill in values for label references");
-    for (label_i, slots) in label_reference_addresses.iter() {
-        if let Sem::LabelDefinition(LabelDefinition { address, .. }) = semantic_tokens[*label_i].r#type {
-            let [h,l] = address.to_be_bytes();
-            for slot in slots {
-                bytecode[*slot as usize] = h;
-                bytecode[slot.wrapping_add(1) as usize] = l;
-            }
-        } else {
-            unreachable!()
-        }
-    }
-
-    // ============================ STEP 5 ============================
-    // Move references and macro body tokens into label and macro definitions.
-    // ================================================================
-    println!("[DEBUG] STEP 5: Move information into label and macro definition tokens");
-    for (i, token) in semantic_tokens.iter_mut().enumerate() {
-        if let Sem::MacroDefinition(macro_definition) = &mut token.r#type {
-            macro_definition.body_tokens = semantic_macro_bodies.remove(&i).unwrap();
-            if let Some(macro_references) = references.remove(&i) {
-                macro_definition.references = macro_references;
-            }
-        } else if let Sem::LabelDefinition(label_definition) = &mut token.r#type {
-            if let Some(label_references) = references.remove(&i) {
-                label_definition.references = label_references;
-            }
-        }
-    }
-    assert_eq!(references.len(), 0);
-
-
-    // ============================ STEP 6 ============================
-    // Remove trailing null-bytes from the bytecode.
-    // ================================================================
-    println!("[DEBUG] STEP 6: Trim trailing null bytes");
-    if let Some(final_nonnull_byte) = bytecode.iter().rposition(|b| *b != 0) {
-        let truncated_length = final_nonnull_byte + 1;
-        let removed_byte_count = bytecode.len() - truncated_length;
-        if removed_byte_count > 0 {
-            println!("[INFO] Removed {removed_byte_count} trailing null bytes from assembled bytecode");
-            bytecode.truncate(truncated_length);
-        }
-    }
-
-
-    for token in &semantic_tokens {
-        if let Sem::MacroDefinition(macro_definition) = &token.r#type {
-            for body_token in &macro_definition.body_tokens {
-                if let Sem::Error(_, err) = body_token.r#type {
-                    println!("[ERROR] (in macro '{}') {err:?} at {}:{}..{}:{}",
-                        macro_definition.name,
-                        body_token.source_location.start.line,
-                        body_token.source_location.start.column,
-                        body_token.source_location.end.line,
-                        body_token.source_location.end.column,
-                    )
-                }
-            }
-        } else if let Sem::Error(_, err) = token.r#type {
-            println!("[ERROR {}:{}-{}:{}] {err:?}",
-                token.source_location.start.line,
-                token.source_location.start.column,
-                token.source_location.end.line,
-                token.source_location.end.column,
-            )
-        }
-    }
-
-    println!("");
-    print!("Generated bytecode: [ ");
-    for i in &bytecode {
-        print!("{i:02x} ");
-    }
-    println!("]");
+mod assembler;
+
+pub use addressing::{CharAddress, SourceLocation, BytecodeLocation};
+pub use syntactic_token::{SyntacticToken, SyntacticTokenType};
+pub use semantic_token::{SemanticToken, SemanticTokenType, LabelDefinition, MacroDefinition};
+pub use error::Error;
+pub use tokenizer::TokenIterator;
+pub use assembler::Assembler;
+
+pub fn assemble(source_code: &str) -> (Vec<u8>, Vec<SemanticToken>) {
+    let mut assembler = Assembler::new();
+    assembler.tokenise_source(source_code);
+    assembler.resolve_references();
+    assembler.generate_bytecode()
 }
 
diff --git a/src/main.rs b/src/main.rs
index 82bd92d..c7d3590 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,41 +1,33 @@
-const SOURCE:&'static str =
-// "
-// %SCREEN-SHUNT 00;
-// %SCREEN-DRAW 00;
-
-// @draw_horizontal_line  ( len* clr -- )
-// (1)  PSHr                       ( len*    | clr )
-// (4)  PSH:41 STD:SCREEN-SHUNT    ( len*    | clr )
-//       &loop
-// (2)   SKDr:SCREEN-DRAW          ( len*    | clr )
-// (4)   DEC* JKC*:~loop           ( len*    | clr )
-// (2)  STD:SCREEN-SHUNT
-// (3)  POP POPr JMPr*
-// ";
-
-"
-%RED 1234;
-%GREEN 5678 @test;
-%BLUE 9ABC;
-
-@start
-    RED
-    start
-    GREEN
-    BLUE
-    $4
-    @end
-";
-
+use std::io::{Read, Write};
 use bedrock_asm::*;
 
 fn main() {
-    println!("------- PROGRAM START -------");
-    for line in SOURCE.lines() {
-        println!("{line}");
-    }
-    println!("-------- PROGRAM END --------");
-    println!();
+    // Read source code from standard input
+    let mut source_code = String::new();
+    let mut stdin = std::io::stdin().lock();
+    if let Err(err) = stdin.read_to_string(&mut source_code) {
+        eprintln!("Could not read from standard input, quitting.");
+        eprintln!("({err:?})");
+        std::process::exit(1);
+    };
+
+    let (bytecode, tokens) = assemble(&source_code);
+    for token in &tokens {
+        token.print_error(&source_code); }
+    eprintln!("Assembled program in {} bytes.", bytecode.len());
+    let bytecode_len = bytecode.len();
 
-    parse(SOURCE);
+    // Write bytecode to standard output
+    let mut stdout = std::io::stdout().lock();
+    match stdout.write(&bytecode) {
+        Ok(len) => if len != bytecode_len {
+            eprintln!("Only wrote {len} of {bytecode_len} bytes")
+        }
+        Err(err) => {
+            eprintln!("Could not write to standard output, quitting.");
+            eprintln!("({err:?})");
+            std::process::exit(1);
+        }
+    }
 }
+
diff --git a/src/semantic_token.rs b/src/semantic_token.rs
index cac82a9..3d08b25 100644
--- a/src/semantic_token.rs
+++ b/src/semantic_token.rs
@@ -7,12 +7,12 @@ pub enum SemanticTokenType {
     LabelDefinition(LabelDefinition),
     MacroDefinition(MacroDefinition),
 
-    Pad(u16),
-    Byte(u8),
-    Short(u16),
+    Padding(u16),
+    ByteLiteral(u8),
+    ShortLiteral(u16),
     Instruction(u8),
 
-    MacroTerminator,
+    MacroDefinitionTerminator,
     Comment,
     Error(SyntacticTokenType, Error),
 }
@@ -23,16 +23,82 @@ pub struct SemanticToken {
     pub bytecode_location: BytecodeLocation,
 }
 
+impl SemanticToken {
+    pub fn print_error(&self, source_code: &str) {
+        macro_rules! red {()=>{eprint!("\x1b[31m")};}
+        macro_rules! normal {()=>{eprint!("\x1b[0m")};}
+
+        if let SemanticTokenType::Error(token, error) = &self.r#type {
+            red!(); eprint!("[ERROR] "); normal!();
+            let source = &self.source_location.source;
+            match error {
+                Error::UnresolvedReference => {
+                    eprintln!("Unresolved reference, no label or macro has been defined with the name '{source}'") }
+                Error::DuplicateDefinition => {
+                    eprintln!("Duplicate definition, a label or macro has already been defined with the name '{source}'") }
+                Error::OrphanedMacroDefinitionTerminator => {
+                    eprintln!("Unmatched macro definition terminator, no macro definition is in progress") }
+                Error::InvalidPaddingValue => {
+                    eprintln!("Invalid value for padding, the value must be at least one and at most four hexadecimal characters") }
+                Error::CyclicMacroReference => {
+                    eprintln!("Cyclic macro reference, this macro reference contains a reference to the macro being defined") }
+                Error::InvalidTypeInMacroDefinition => {
+                    let name = match token {
+                        SyntacticTokenType::Reference(_) => "references",
+                        SyntacticTokenType::LabelDefinition(_) => "label definitions",
+                        SyntacticTokenType::MacroDefinition(_) => "macro definitions",
+                        SyntacticTokenType::MacroDefinitionTerminator => "macro definition terminators",
+                        SyntacticTokenType::Padding(_) => "padding",
+                        SyntacticTokenType::ByteLiteral(_) => "byte literals",
+                        SyntacticTokenType::ShortLiteral(_) => "short literals",
+                        SyntacticTokenType::Instruction(_) => "instructions",
+                        SyntacticTokenType::Comment => "comments",
+                    };
+                    eprintln!("Invalid token in macro definition, macro definitions are not allowed to contain {name}") }
+            }
+            let line = source_code.split('\n').nth(self.source_location.start.line).unwrap();
+            eprint!("{:>5} ", self.source_location.start.line+1);
+            red!(); eprint!("| "); normal!();
+            for (i, c) in line.chars().enumerate() {
+                if i == self.source_location.start.column { red!() }
+                eprint!("{c}");
+                if i == self.source_location.end.column { normal!() }
+            }
+            eprintln!(); red!(); eprint!("      | ");
+            for i in 0..=self.source_location.end.column {
+                if i < self.source_location.start.column { eprint!(" ") } else { eprint!("^") };
+            }
+            normal!(); eprintln!();
+        }
+        if let SemanticTokenType::MacroDefinition(definition) = &self.r#type {
+            for token in &definition.body_tokens {
+                token.print_error(source_code);
+            }
+        }
+    }
+}
+
 pub struct LabelDefinition {
     pub name: String,
     pub address: u16,
     /// A list of pointers to label reference tokens
     pub references: Vec<usize>,
 }
+impl LabelDefinition {
+    pub fn new(name: String) -> Self {
+        Self { name, address:0, references:Vec::new() }
+    }
+}
+
 pub struct MacroDefinition {
     pub name: String,
     pub body_tokens: Vec<SemanticToken>,
     /// A list of pointers to macro reference tokens
     pub references: Vec<usize>,
 }
+impl MacroDefinition {
+    pub fn new(name: String) -> Self {
+        Self { name, body_tokens:Vec::new(), references:Vec::new() }
+    }
+}
 
diff --git a/src/syntactic_token.rs b/src/syntactic_token.rs
index ee473e2..4a50e8a 100644
--- a/src/syntactic_token.rs
+++ b/src/syntactic_token.rs
@@ -5,16 +5,18 @@ pub enum SyntacticTokenType {
 
     LabelDefinition(String),
     MacroDefinition(String),
-    MacroTerminator,
+    MacroDefinitionTerminator,
 
-    Pad(u16),
-    Byte(u8),
-    Short(u16),
+    Padding(u16),
+    ByteLiteral(u8),
+    ShortLiteral(u16),
     Instruction(u8),
 
     Comment,
 }
 
+
+
 pub struct SyntacticToken {
     pub r#type: SyntacticTokenType,
     pub source_location: SourceLocation,
@@ -32,12 +34,10 @@ impl SyntacticToken {
             _ => (),
         };
     }
-
     pub fn set_error(&mut self, error: Error) {
         self.error = Some(error);
     }
-
     pub fn is_macro_terminator(&self) -> bool {
-        if let SyntacticTokenType::MacroTerminator = self.r#type {true} else {false}
+        if let SyntacticTokenType::MacroDefinitionTerminator = self.r#type {true} else {false}
     }
 }
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index b68cc14..508daee 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -106,9 +106,9 @@ impl Iterator for TokenIterator {
         let full = take(&mut self.source);
         let suffix = take(&mut self.suffix);
         let mut error = None;
-        let mut parse_hex_lit = |v| {
+        let mut parse_padding_value = |v| {
             parse_short(v).or_else(|| {
-                error = Some(Error::InvalidHexadecimalLiteral); Some(0)
+                error = Some(Error::InvalidPaddingValue); Some(0)
             }).unwrap()
         };
 
@@ -116,13 +116,13 @@ impl Iterator for TokenIterator {
             '(' => { SyntacticTokenType::Comment }
             '@' => { SyntacticTokenType::LabelDefinition({self.label=suffix.clone(); suffix}) }
             '&' => { SyntacticTokenType::LabelDefinition(format!("{}/{}", self.label, suffix)) }
-            '$' => { SyntacticTokenType::Pad(parse_hex_lit(&suffix)) }
+            '$' => { SyntacticTokenType::Padding(parse_padding_value(&suffix)) }
             '~' => { SyntacticTokenType::Reference(format!("{}/{}", self.label, suffix)) }
             '%' => { SyntacticTokenType::MacroDefinition(suffix) }
             _ => {
-                if ";" == &full                                    { SyntacticTokenType::MacroTerminator }
-                else if let Some(value) = parse_byte_lit(&full)    { SyntacticTokenType::Byte(value) }
-                else if let Some(value) = parse_short_lit(&full)   { SyntacticTokenType::Short(value) }
+                if ";" == &full                                    { SyntacticTokenType::MacroDefinitionTerminator }
+                else if let Some(value) = parse_byte_lit(&full)    { SyntacticTokenType::ByteLiteral(value) }
+                else if let Some(value) = parse_short_lit(&full)   { SyntacticTokenType::ShortLiteral(value) }
                 else if let Some(value) = parse_instruction(&full) { SyntacticTokenType::Instruction(value) }
                 else                                               { SyntacticTokenType::Reference(full.clone()) }
             }
-- 
cgit v1.2.3-70-g09d2