From afa81e9ae6a56efe2eae2990e09c672b74328715 Mon Sep 17 00:00:00 2001 From: Ben Bridle Date: Mon, 8 May 2023 12:05:57 +1200 Subject: Added detection of cyclic macro references, and made assembler binary usable --- src/assembler.rs | 273 +++++++++++++++++++++++++++++++++++++++ src/error.rs | 9 +- src/lib.rs | 339 ++----------------------------------------------- src/main.rs | 64 ++++------ src/semantic_token.rs | 74 ++++++++++- src/syntactic_token.rs | 14 +- src/tokenizer.rs | 12 +- 7 files changed, 403 insertions(+), 382 deletions(-) create mode 100644 src/assembler.rs diff --git a/src/assembler.rs b/src/assembler.rs new file mode 100644 index 0000000..cb6b6f1 --- /dev/null +++ b/src/assembler.rs @@ -0,0 +1,273 @@ +use std::mem::take; +use std::collections::hash_map::Entry; + +use SyntacticTokenType as Syn; +use SemanticTokenType as Sem; +use crate::*; + +use std::collections::HashMap; + +/// The inner value is the index of the token that defines this symbol. +pub enum SymbolDefinition { + Macro(usize), + Label(usize), +} + +pub struct Assembler { + /// The contents of the program as a list of syntactic tokens. + syntactic_tokens: Vec, + /// The contents of the program as a list of semantic tokens. + semantic_tokens: Vec, + /// Map the name of each defined symbol to the index of the defining token. + symbol_definitions: HashMap, + /// Map each macro definition token index to a list of syntactic body tokens. + syntactic_macro_bodies: HashMap>, + /// Map each macro definition token index to a list of semantic body tokens. + semantic_macro_bodies: HashMap>, +} + +impl Assembler { + pub fn new() -> Self { + Self { + syntactic_tokens: Vec::new(), + semantic_tokens: Vec::new(), + symbol_definitions: HashMap::new(), + syntactic_macro_bodies: HashMap::new(), + semantic_macro_bodies: HashMap::new(), + } + } + + pub fn tokenise_source(&mut self, source_code: &str) { + // The index of the current macro definition token + let mut macro_definition: Option = None; + let mut macro_definition_body_tokens: Vec = Vec::new(); + + for mut token in TokenIterator::from_str(source_code) { + let next_index = self.syntactic_tokens.len(); + if let Some(index) = macro_definition { + token.use_in_macro_body(); + if token.is_macro_terminator() { + // Commit the current macro definition + macro_definition_body_tokens.push(token); + self.syntactic_macro_bodies.insert( + index, take(&mut macro_definition_body_tokens)); + macro_definition = None; + } else { + macro_definition_body_tokens.push(token); + } + } else { + if let Syn::MacroDefinition(ref name) = token.r#type { + macro_definition = Some(next_index); + match self.symbol_definitions.entry(name.to_string()) { + Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);} + Entry::Vacant(v) => {v.insert(SymbolDefinition::Macro(next_index));} + } + } else if let Syn::LabelDefinition(ref name) = token.r#type { + match self.symbol_definitions.entry(name.to_string()) { + Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);} + Entry::Vacant(v) => {v.insert(SymbolDefinition::Label(next_index));} + } + } else if token.is_macro_terminator() { + token.set_error(Error::OrphanedMacroDefinitionTerminator); + } + self.syntactic_tokens.push(token); + } + } + } + + pub fn resolve_references(&mut self) { + let syntactic_tokens = take(&mut self.syntactic_tokens); + let syntactic_token_count = syntactic_tokens.len(); + + for (index, syntactic_token) in syntactic_tokens.into_iter().enumerate() { + let semantic_token = self.convert_syn_token_to_sem_token(syntactic_token, index); + self.semantic_tokens.push(semantic_token); + } + assert_eq!(syntactic_token_count, self.semantic_tokens.len()); + + // Find all cyclic macros + let cyclic_macros: Vec = self.semantic_macro_bodies.keys().map(|i|*i).filter( + |i| !self.traverse_macro_definition(*i, 0)).collect(); + // Replace each cyclic macro reference in a macro definition with an error + for body_tokens in &mut self.semantic_macro_bodies.values_mut() { + for body_token in body_tokens { + if let Sem::MacroReference(i) = body_token.r#type { + if cyclic_macros.contains(&i) { + let name = body_token.source_location.source.clone(); + body_token.r#type = Sem::Error(Syn::Reference(name), Error::CyclicMacroReference); + } + } + } + } + + } + + /// Attempt to recursively traverse the body tokens of a macro definition, returning + /// false if the depth exceeds a preset maximum, and returning true otherwise. + fn traverse_macro_definition(&self, index: usize, level: usize) -> bool { + if level == 16 { + false + } else { + self.semantic_macro_bodies[&index].iter().all( + |token| if let Sem::MacroReference(i) = token.r#type { + self.traverse_macro_definition(i, level+1) + } else { + true + } + ) + } + } + + pub fn generate_bytecode(&mut self) -> (Vec, Vec) { + let mut bytecode: Vec = Vec::new(); + // Map each label definition token index to the bytecode addresses of the references + let mut reference_addresses: HashMap> = HashMap::new(); + // Map each label and macro definition token to a list of reference token indices + let mut reference_tokens: HashMap> = HashMap::new(); + + macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};} + macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};} + macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};} + + let mut semantic_tokens = take(&mut self.semantic_tokens); + + // Translate semantic tokens into bytecode + for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() { + let start_addr = bytecode.len() as u16; + match &mut semantic_token.r#type { + Sem::LabelReference(i) => { + reference_tokens.entry(*i).or_default().push(index); + reference_addresses.entry(*i).or_default().push(start_addr); + push_u16!(0); + } + Sem::MacroReference(i) => { + reference_tokens.entry(*i).or_default().push(index); + self.expand_macro_reference(*i, &mut bytecode, &mut reference_addresses); + } + Sem::LabelDefinition(def) => def.address=start_addr, + Sem::MacroDefinition(_) => (), + + Sem::Padding(p) => pad!(*p), + Sem::ByteLiteral(b) => push_u8!(*b), + Sem::ShortLiteral(s) => push_u16!(*s), + Sem::Instruction(b) => push_u8!(*b), + + Sem::MacroDefinitionTerminator => unreachable!(), + Sem::Comment => (), + Sem::Error(..) => (), + }; + let end_addr = bytecode.len() as u16; + semantic_token.bytecode_location.start = start_addr; + semantic_token.bytecode_location.length = end_addr - start_addr; + } + + // Fill each label reference with the address of the matching label definition + for (index, slots) in reference_addresses { + if let Sem::LabelDefinition(definition) = &semantic_tokens[index].r#type { + let [h,l] = definition.address.to_be_bytes(); + for slot in slots { + bytecode[slot as usize] = h; + bytecode[slot.wrapping_add(1) as usize] = l; + } + } else { unreachable!() } + } + + // Move references and macro body tokens into label and macro definition tokens + for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() { + if let Sem::MacroDefinition(definition) = &mut semantic_token.r#type { + definition.body_tokens = self.semantic_macro_bodies.remove(&index).unwrap(); + if let Some(references) = reference_tokens.remove(&index) { + definition.references = references; + } + } else if let Sem::LabelDefinition(definition) = &mut semantic_token.r#type { + if let Some(references) = reference_tokens.remove(&index) { + definition.references = references; + } + } + } + assert_eq!(reference_tokens.len(), 0); + + // Remove trailing null bytes from the bytecode + if let Some(final_nonnull_byte) = bytecode.iter().rposition(|b| *b != 0) { + let truncated_length = final_nonnull_byte + 1; + let removed_byte_count = bytecode.len() - truncated_length; + if removed_byte_count > 0 { + bytecode.truncate(truncated_length); + } + } + + (bytecode, semantic_tokens) + } + + fn convert_syn_token_to_sem_token(&mut self, mut syn_token: SyntacticToken, index: usize) -> SemanticToken { + SemanticToken { + r#type: { + if let Some(err) = syn_token.error { + Sem::Error(syn_token.r#type, err) + } else { + match syn_token.r#type { + Syn::Reference(ref name) => { + match self.symbol_definitions.get(name) { + Some(SymbolDefinition::Macro(i)) => Sem::MacroReference(*i), + Some(SymbolDefinition::Label(i)) => Sem::LabelReference(*i), + None => Sem::Error(syn_token.r#type, Error::UnresolvedReference), + } + } + Syn::LabelDefinition(name) => {Sem::LabelDefinition(LabelDefinition::new(name))}, + Syn::MacroDefinition(name) => { + let mut sem_body_tokens = Vec::new(); + for syn_body_token in self.syntactic_macro_bodies.remove(&index).unwrap() { + // Make the source location of the macro definition token span the entire definition + if syn_body_token.is_macro_terminator() { + syn_token.source_location.end = syn_body_token.source_location.start; + } + let sem_body_token = self.convert_syn_token_to_sem_token(syn_body_token, 0); + sem_body_tokens.push(sem_body_token); + } + self.semantic_macro_bodies.insert(index, sem_body_tokens); + Sem::MacroDefinition(MacroDefinition::new(name)) + }, + Syn::MacroDefinitionTerminator => Sem::MacroDefinitionTerminator, + Syn::Padding(v) => Sem::Padding(v), + Syn::ByteLiteral(v) => Sem::ByteLiteral(v), + Syn::ShortLiteral(v) => Sem::ShortLiteral(v), + Syn::Instruction(v) => Sem::Instruction(v), + Syn::Comment => Sem::Comment, + } + } + }, + source_location: syn_token.source_location, + bytecode_location: BytecodeLocation::zero(), + } + } + + fn expand_macro_reference(&self, index: usize, bytecode: &mut Vec, reference_addresses: &mut HashMap>) { + macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};} + macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};} + macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};} + + for body_token in self.semantic_macro_bodies.get(&index).unwrap() { + let start_addr = bytecode.len() as u16; + match &body_token.r#type { + Sem::LabelReference(i) => { + reference_addresses.entry(*i).or_default().push(start_addr); + push_u16!(0u16); + }, + Sem::MacroReference(i) => { + self.expand_macro_reference(*i, bytecode, reference_addresses); + }, + Sem::LabelDefinition(_) => unreachable!(), + Sem::MacroDefinition(_) => unreachable!(), + + Sem::Padding(p) => pad!(*p), + Sem::ByteLiteral(b) => push_u8!(*b), + Sem::ShortLiteral(s) => push_u16!(*s), + Sem::Instruction(b) => push_u8!(*b), + + Sem::MacroDefinitionTerminator => (), + Sem::Comment => (), + Sem::Error(..) => (), + }; + } + } +} diff --git a/src/error.rs b/src/error.rs index efbfc4f..8a6c0d6 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,9 +1,10 @@ -#[derive(Clone, Copy, Debug)] +#[derive(Clone)] pub enum Error { UnresolvedReference, DuplicateDefinition, - InvalidHexadecimalLiteral, + InvalidPaddingValue, InvalidTypeInMacroDefinition, - OrphanedMacroTerminator, - TokenPastEndOfProgram, + OrphanedMacroDefinitionTerminator, + CyclicMacroReference, } + diff --git a/src/lib.rs b/src/lib.rs index 5d84600..a657354 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,330 +3,19 @@ mod syntactic_token; mod semantic_token; mod tokenizer; mod error; - -pub use addressing::*; -pub use syntactic_token::*; -pub use semantic_token::*; -pub use error::*; -pub use tokenizer::*; - -use std::collections::hash_map::{HashMap, Entry}; -use std::mem::take; - -// On Unicode support: Work with characters, not bytes. This will eventually be -// used in Verdant and Doctrine, and it'd be nice to be able to support other languages. -// The only reason to work with bytes over characters would be for a minor decrease in complexity. -// Only support the assembly of files of up to 64kB. If assets need to be tacked on the end, -// it can be done by another program. The VM will only be able to access the first 64kB of a file anyway. -// Treat \t as a space, have it be a single character. - -// First, turn the program source code into a vector of SyntacticTokens. These -// each contain a SourceLocation, and the type and value of the token. Every single -// non-whitespace character of the program needs to be wrapped in a SyntacticToken. -// The program source code can be accurately reconstructed from this list of -// SyntacticTokens, and when I write GRID, if the mouse is hovering over any point -// in the program listing, I'll be able to determine the exact token that is being hovered. -// For macros, hovering over any character belonging to a macro definition will -// highlight the entire macro definition, and also the currently-hovered body token -// if there is one. Clicking the body token will bring up more information. - -// The SyntacticTokens will be collected into a vector, with label and macro definition -// being constructed as we go. Label definitions are easy, I only need to note down the -// names of the labels in order to validate label references in a later step. If a label -// name has already been defined, tag the token with an error. If a macro name has already -// been defined, tag the token with an error. -// Collect children into macro definitions. This makes sense. - -// Step 2 is to generate bytecode, converting SyntacticTokens into SemanticTokens. -// Label and macro definitions need to contain a list of usizes to references. -// Macro definitions need to contain the body tokens as SemanticTokens. -// Label and macro references need to point to their parents. -// Can I stream-convert tokens from Syntactic to Semantic? -// Each SynToken gets converted to a SemToken? Yeah. - -// I want to change the parser to be a multi-stage struct thing, holding its own state. - -enum SymbolDefinition { Macro(usize), Label(usize) } - -pub fn parse(source_code: &str) { - use SyntacticTokenType as Syn; - use SemanticTokenType as Sem; - - // ============================ STEP 1 ============================ - // Convert the source code into a sorted vector of syntactic tokens and a - // map of symbol definitions. - // ================================================================ - println!("[DEBUG] STEP 1: Parse source code into syntactic tokens"); - let mut syntactic_tokens: Vec = Vec::new(); - let mut symbol_definitions: HashMap = HashMap::new(); - let mut macro_bodies: HashMap> = HashMap::new(); - let mut macro_definition: Option = None; - let mut macro_definition_body_tokens: Vec = Vec::new(); - - for mut token in TokenIterator::from_str(source_code) { - if let Some(mdt) = macro_definition { - token.use_in_macro_body(); - let terminate = token.is_macro_terminator(); - macro_definition_body_tokens.push(token); - if terminate { - macro_bodies.insert(mdt, take(&mut macro_definition_body_tokens)); - macro_definition = None; - } - } else { - if let Syn::MacroDefinition(ref name) = token.r#type { - macro_definition = Some(syntactic_tokens.len()); - match symbol_definitions.entry(name.to_string()) { - Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);} - Entry::Vacant(v) => {v.insert(SymbolDefinition::Macro(syntactic_tokens.len()));} - } - } else if let Syn::LabelDefinition(ref name) = token.r#type { - match symbol_definitions.entry(name.to_string()) { - Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);} - Entry::Vacant(v) => {v.insert(SymbolDefinition::Label(syntactic_tokens.len()));} - } - } else if token.is_macro_terminator() { - token.set_error(Error::OrphanedMacroTerminator); - } - syntactic_tokens.push(token); - } - } - - - // ============================ STEP 2 ============================ - // Convert syntactic tokens into semantic tokens, resolving label and macro - // references in the process. - // ================================================================ - println!("[DEBUG] STEP 2: Resolve label and macro references"); - let syntactic_token_count = syntactic_tokens.len(); - let mut semantic_tokens = Vec::new(); - let mut semantic_macro_bodies: HashMap> = HashMap::new(); - - for (i, mut syn_token) in syntactic_tokens.into_iter().enumerate() { - let sem_token_type = if let Some(err) = syn_token.error { - // Translate over any existing syntax errors - Sem::Error(syn_token.r#type, err) - } else { - match syn_token.r#type { - Syn::Reference(ref name) => { - match symbol_definitions.get(name) { - Some(SymbolDefinition::Macro(addr)) => Sem::MacroReference(*addr), - Some(SymbolDefinition::Label(addr)) => Sem::LabelReference(*addr), - None => Sem::Error(syn_token.r#type, Error::UnresolvedReference), - } - } - Syn::LabelDefinition(name) => { - let label_definition = LabelDefinition { - name, address: 0, references: Vec::new() }; - Sem::LabelDefinition(label_definition) - } - Syn::MacroDefinition(name) => { - let mut sem_body_tokens = Vec::new(); - // Iterate over every token in the body of the macro definition, - // converting each one to a semantic token. - for syn_body_token in macro_bodies.remove(&i).unwrap() { - let sem_body_token_type = if let Some(err) = syn_body_token.error { - // Translate over any existing syntax errors - Sem::Error(syn_body_token.r#type, err) - } else { - match syn_body_token.r#type { - Syn::Reference(ref name) => match symbol_definitions.get(name) { - Some(SymbolDefinition::Macro(addr)) => Sem::MacroReference(*addr), - Some(SymbolDefinition::Label(addr)) => Sem::LabelReference(*addr), - None => Sem::Error(syn_body_token.r#type, Error::UnresolvedReference), - }, - - Syn::LabelDefinition(_) => unreachable!(), - Syn::MacroDefinition(_) => unreachable!(), - Syn::MacroTerminator => { - syn_token.source_location.end = - syn_body_token.source_location.end; - Sem::MacroTerminator - }, - - Syn::Pad(v) => Sem::Pad(v), - Syn::Byte(v) => Sem::Byte(v), - Syn::Short(v) => Sem::Short(v), - Syn::Instruction(v) => Sem::Instruction(v), - - Syn::Comment => Sem::Comment, - } - }; - let sem_body_token = SemanticToken { - r#type: sem_body_token_type, - source_location: syn_body_token.source_location, - bytecode_location: BytecodeLocation::zero(), - }; - sem_body_tokens.push(sem_body_token); - } - semantic_macro_bodies.insert(i, sem_body_tokens); - let macro_definition = MacroDefinition { - name, body_tokens: Vec::new(), references: Vec::new() }; - Sem::MacroDefinition(macro_definition) - } - Syn::MacroTerminator => unreachable!(), - - Syn::Pad(v) => Sem::Pad(v), - Syn::Byte(v) => Sem::Byte(v), - Syn::Short(v) => Sem::Short(v), - Syn::Instruction(v) => Sem::Instruction(v), - - Syn::Comment => Sem::Comment, - } - }; - let sem_token = SemanticToken { - r#type: sem_token_type, - source_location: syn_token.source_location, - bytecode_location: BytecodeLocation::zero(), - }; - semantic_tokens.push(sem_token); - } - assert_eq!(syntactic_token_count, semantic_tokens.len()); - - - // ============================ STEP 3 ============================ - // Iterate over each semantic token, generating bytecode. - // ================================================================ - println!("[DEBUG] STEP 3: Generate bytecode"); - let mut bytecode: Vec = Vec::new(); - // Map each label token to a list of bytecode addresses to populate - let mut label_reference_addresses: HashMap> = HashMap::new(); - // Map each label or macro definition token to a list of reference token pointers - let mut references: HashMap> = HashMap::new(); - - macro_rules! addr {() => {bytecode.len() as u16};} - macro_rules! push_u8 {($v:expr) => {bytecode.push($v); 1};} - macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&$v.to_be_bytes()); 2};} - macro_rules! pad {($p:expr) => {bytecode.resize(bytecode.len() + $p as usize, 0); $p as u16};} - - for (i, sem_token) in semantic_tokens.iter_mut().enumerate() { - let start_addr = addr!(); - let byte_length: u16 = match &mut sem_token.r#type { - Sem::LabelReference(addr) => { - references.entry(*addr).or_default().push(i); - label_reference_addresses.entry(*addr).or_default().push(addr!()); - push_u16!(0u16); 2 - }, - Sem::MacroReference(addr) => { - references.entry(*addr).or_default().push(i); - let mut macro_byte_length: u16 = 0; - for body_token in semantic_macro_bodies.get(addr).unwrap() { - macro_byte_length += match &body_token.r#type { - Sem::LabelReference(addr) => { - label_reference_addresses.entry(*addr).or_default().push(addr!()); - push_u16!(0u16); 2 - }, - Sem::MacroReference(_) => todo!(), - - Sem::LabelDefinition(_) => unreachable!(), - Sem::MacroDefinition(_) => unreachable!(), - - Sem::Pad(p) => { pad!(*p); *p }, - Sem::Byte(b) => { push_u8!(*b); 1 }, - Sem::Short(s) => { push_u16!(*s); 2 }, - Sem::Instruction(b) => { push_u8!(*b); 1 }, - - Sem::MacroTerminator => 0, - Sem::Comment => 0, - Sem::Error(..) => 0, - }; - } - macro_byte_length - }, - - Sem::LabelDefinition(definition) => {definition.address=addr!(); 1}, - Sem::MacroDefinition(_) => 0, - - Sem::Pad(p) => { pad!(*p); *p }, - Sem::Byte(b) => { push_u8!(*b); 1 }, - Sem::Short(s) => { push_u16!(*s); 2 }, - Sem::Instruction(b) => { push_u8!(*b); 1 }, - - Sem::MacroTerminator => unreachable!(), - Sem::Comment => 0, - Sem::Error(..) => 0, - }; - sem_token.bytecode_location.start = start_addr; - sem_token.bytecode_location.length = byte_length; - } - - - // ============================ STEP 4 ============================ - // Fill in addresses for label references. - // ================================================================ - println!("[DEBUG] STEP 4: Fill in values for label references"); - for (label_i, slots) in label_reference_addresses.iter() { - if let Sem::LabelDefinition(LabelDefinition { address, .. }) = semantic_tokens[*label_i].r#type { - let [h,l] = address.to_be_bytes(); - for slot in slots { - bytecode[*slot as usize] = h; - bytecode[slot.wrapping_add(1) as usize] = l; - } - } else { - unreachable!() - } - } - - // ============================ STEP 5 ============================ - // Move references and macro body tokens into label and macro definitions. - // ================================================================ - println!("[DEBUG] STEP 5: Move information into label and macro definition tokens"); - for (i, token) in semantic_tokens.iter_mut().enumerate() { - if let Sem::MacroDefinition(macro_definition) = &mut token.r#type { - macro_definition.body_tokens = semantic_macro_bodies.remove(&i).unwrap(); - if let Some(macro_references) = references.remove(&i) { - macro_definition.references = macro_references; - } - } else if let Sem::LabelDefinition(label_definition) = &mut token.r#type { - if let Some(label_references) = references.remove(&i) { - label_definition.references = label_references; - } - } - } - assert_eq!(references.len(), 0); - - - // ============================ STEP 6 ============================ - // Remove trailing null-bytes from the bytecode. - // ================================================================ - println!("[DEBUG] STEP 6: Trim trailing null bytes"); - if let Some(final_nonnull_byte) = bytecode.iter().rposition(|b| *b != 0) { - let truncated_length = final_nonnull_byte + 1; - let removed_byte_count = bytecode.len() - truncated_length; - if removed_byte_count > 0 { - println!("[INFO] Removed {removed_byte_count} trailing null bytes from assembled bytecode"); - bytecode.truncate(truncated_length); - } - } - - - for token in &semantic_tokens { - if let Sem::MacroDefinition(macro_definition) = &token.r#type { - for body_token in ¯o_definition.body_tokens { - if let Sem::Error(_, err) = body_token.r#type { - println!("[ERROR] (in macro '{}') {err:?} at {}:{}..{}:{}", - macro_definition.name, - body_token.source_location.start.line, - body_token.source_location.start.column, - body_token.source_location.end.line, - body_token.source_location.end.column, - ) - } - } - } else if let Sem::Error(_, err) = token.r#type { - println!("[ERROR {}:{}-{}:{}] {err:?}", - token.source_location.start.line, - token.source_location.start.column, - token.source_location.end.line, - token.source_location.end.column, - ) - } - } - - println!(""); - print!("Generated bytecode: [ "); - for i in &bytecode { - print!("{i:02x} "); - } - println!("]"); +mod assembler; + +pub use addressing::{CharAddress, SourceLocation, BytecodeLocation}; +pub use syntactic_token::{SyntacticToken, SyntacticTokenType}; +pub use semantic_token::{SemanticToken, SemanticTokenType, LabelDefinition, MacroDefinition}; +pub use error::Error; +pub use tokenizer::TokenIterator; +pub use assembler::Assembler; + +pub fn assemble(source_code: &str) -> (Vec, Vec) { + let mut assembler = Assembler::new(); + assembler.tokenise_source(source_code); + assembler.resolve_references(); + assembler.generate_bytecode() } diff --git a/src/main.rs b/src/main.rs index 82bd92d..c7d3590 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,41 +1,33 @@ -const SOURCE:&'static str = -// " -// %SCREEN-SHUNT 00; -// %SCREEN-DRAW 00; - -// @draw_horizontal_line ( len* clr -- ) -// (1) PSHr ( len* | clr ) -// (4) PSH:41 STD:SCREEN-SHUNT ( len* | clr ) -// &loop -// (2) SKDr:SCREEN-DRAW ( len* | clr ) -// (4) DEC* JKC*:~loop ( len* | clr ) -// (2) STD:SCREEN-SHUNT -// (3) POP POPr JMPr* -// "; - -" -%RED 1234; -%GREEN 5678 @test; -%BLUE 9ABC; - -@start - RED - start - GREEN - BLUE - $4 - @end -"; - +use std::io::{Read, Write}; use bedrock_asm::*; fn main() { - println!("------- PROGRAM START -------"); - for line in SOURCE.lines() { - println!("{line}"); - } - println!("-------- PROGRAM END --------"); - println!(); + // Read source code from standard input + let mut source_code = String::new(); + let mut stdin = std::io::stdin().lock(); + if let Err(err) = stdin.read_to_string(&mut source_code) { + eprintln!("Could not read from standard input, quitting."); + eprintln!("({err:?})"); + std::process::exit(1); + }; + + let (bytecode, tokens) = assemble(&source_code); + for token in &tokens { + token.print_error(&source_code); } + eprintln!("Assembled program in {} bytes.", bytecode.len()); + let bytecode_len = bytecode.len(); - parse(SOURCE); + // Write bytecode to standard output + let mut stdout = std::io::stdout().lock(); + match stdout.write(&bytecode) { + Ok(len) => if len != bytecode_len { + eprintln!("Only wrote {len} of {bytecode_len} bytes") + } + Err(err) => { + eprintln!("Could not write to standard output, quitting."); + eprintln!("({err:?})"); + std::process::exit(1); + } + } } + diff --git a/src/semantic_token.rs b/src/semantic_token.rs index cac82a9..3d08b25 100644 --- a/src/semantic_token.rs +++ b/src/semantic_token.rs @@ -7,12 +7,12 @@ pub enum SemanticTokenType { LabelDefinition(LabelDefinition), MacroDefinition(MacroDefinition), - Pad(u16), - Byte(u8), - Short(u16), + Padding(u16), + ByteLiteral(u8), + ShortLiteral(u16), Instruction(u8), - MacroTerminator, + MacroDefinitionTerminator, Comment, Error(SyntacticTokenType, Error), } @@ -23,16 +23,82 @@ pub struct SemanticToken { pub bytecode_location: BytecodeLocation, } +impl SemanticToken { + pub fn print_error(&self, source_code: &str) { + macro_rules! red {()=>{eprint!("\x1b[31m")};} + macro_rules! normal {()=>{eprint!("\x1b[0m")};} + + if let SemanticTokenType::Error(token, error) = &self.r#type { + red!(); eprint!("[ERROR] "); normal!(); + let source = &self.source_location.source; + match error { + Error::UnresolvedReference => { + eprintln!("Unresolved reference, no label or macro has been defined with the name '{source}'") } + Error::DuplicateDefinition => { + eprintln!("Duplicate definition, a label or macro has already been defined with the name '{source}'") } + Error::OrphanedMacroDefinitionTerminator => { + eprintln!("Unmatched macro definition terminator, no macro definition is in progress") } + Error::InvalidPaddingValue => { + eprintln!("Invalid value for padding, the value must be at least one and at most four hexadecimal characters") } + Error::CyclicMacroReference => { + eprintln!("Cyclic macro reference, this macro reference contains a reference to the macro being defined") } + Error::InvalidTypeInMacroDefinition => { + let name = match token { + SyntacticTokenType::Reference(_) => "references", + SyntacticTokenType::LabelDefinition(_) => "label definitions", + SyntacticTokenType::MacroDefinition(_) => "macro definitions", + SyntacticTokenType::MacroDefinitionTerminator => "macro definition terminators", + SyntacticTokenType::Padding(_) => "padding", + SyntacticTokenType::ByteLiteral(_) => "byte literals", + SyntacticTokenType::ShortLiteral(_) => "short literals", + SyntacticTokenType::Instruction(_) => "instructions", + SyntacticTokenType::Comment => "comments", + }; + eprintln!("Invalid token in macro definition, macro definitions are not allowed to contain {name}") } + } + let line = source_code.split('\n').nth(self.source_location.start.line).unwrap(); + eprint!("{:>5} ", self.source_location.start.line+1); + red!(); eprint!("| "); normal!(); + for (i, c) in line.chars().enumerate() { + if i == self.source_location.start.column { red!() } + eprint!("{c}"); + if i == self.source_location.end.column { normal!() } + } + eprintln!(); red!(); eprint!(" | "); + for i in 0..=self.source_location.end.column { + if i < self.source_location.start.column { eprint!(" ") } else { eprint!("^") }; + } + normal!(); eprintln!(); + } + if let SemanticTokenType::MacroDefinition(definition) = &self.r#type { + for token in &definition.body_tokens { + token.print_error(source_code); + } + } + } +} + pub struct LabelDefinition { pub name: String, pub address: u16, /// A list of pointers to label reference tokens pub references: Vec, } +impl LabelDefinition { + pub fn new(name: String) -> Self { + Self { name, address:0, references:Vec::new() } + } +} + pub struct MacroDefinition { pub name: String, pub body_tokens: Vec, /// A list of pointers to macro reference tokens pub references: Vec, } +impl MacroDefinition { + pub fn new(name: String) -> Self { + Self { name, body_tokens:Vec::new(), references:Vec::new() } + } +} diff --git a/src/syntactic_token.rs b/src/syntactic_token.rs index ee473e2..4a50e8a 100644 --- a/src/syntactic_token.rs +++ b/src/syntactic_token.rs @@ -5,16 +5,18 @@ pub enum SyntacticTokenType { LabelDefinition(String), MacroDefinition(String), - MacroTerminator, + MacroDefinitionTerminator, - Pad(u16), - Byte(u8), - Short(u16), + Padding(u16), + ByteLiteral(u8), + ShortLiteral(u16), Instruction(u8), Comment, } + + pub struct SyntacticToken { pub r#type: SyntacticTokenType, pub source_location: SourceLocation, @@ -32,12 +34,10 @@ impl SyntacticToken { _ => (), }; } - pub fn set_error(&mut self, error: Error) { self.error = Some(error); } - pub fn is_macro_terminator(&self) -> bool { - if let SyntacticTokenType::MacroTerminator = self.r#type {true} else {false} + if let SyntacticTokenType::MacroDefinitionTerminator = self.r#type {true} else {false} } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b68cc14..508daee 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -106,9 +106,9 @@ impl Iterator for TokenIterator { let full = take(&mut self.source); let suffix = take(&mut self.suffix); let mut error = None; - let mut parse_hex_lit = |v| { + let mut parse_padding_value = |v| { parse_short(v).or_else(|| { - error = Some(Error::InvalidHexadecimalLiteral); Some(0) + error = Some(Error::InvalidPaddingValue); Some(0) }).unwrap() }; @@ -116,13 +116,13 @@ impl Iterator for TokenIterator { '(' => { SyntacticTokenType::Comment } '@' => { SyntacticTokenType::LabelDefinition({self.label=suffix.clone(); suffix}) } '&' => { SyntacticTokenType::LabelDefinition(format!("{}/{}", self.label, suffix)) } - '$' => { SyntacticTokenType::Pad(parse_hex_lit(&suffix)) } + '$' => { SyntacticTokenType::Padding(parse_padding_value(&suffix)) } '~' => { SyntacticTokenType::Reference(format!("{}/{}", self.label, suffix)) } '%' => { SyntacticTokenType::MacroDefinition(suffix) } _ => { - if ";" == &full { SyntacticTokenType::MacroTerminator } - else if let Some(value) = parse_byte_lit(&full) { SyntacticTokenType::Byte(value) } - else if let Some(value) = parse_short_lit(&full) { SyntacticTokenType::Short(value) } + if ";" == &full { SyntacticTokenType::MacroDefinitionTerminator } + else if let Some(value) = parse_byte_lit(&full) { SyntacticTokenType::ByteLiteral(value) } + else if let Some(value) = parse_short_lit(&full) { SyntacticTokenType::ShortLiteral(value) } else if let Some(value) = parse_instruction(&full) { SyntacticTokenType::Instruction(value) } else { SyntacticTokenType::Reference(full.clone()) } } -- cgit v1.2.3-70-g09d2