diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/addressing.rs | 44 | ||||
| -rw-r--r-- | src/assembler.rs | 278 | ||||
| -rw-r--r-- | src/bin/br-asm.rs | 8 | ||||
| -rw-r--r-- | src/error.rs | 10 | ||||
| -rw-r--r-- | src/formats/clang.rs | 10 | ||||
| -rw-r--r-- | src/formats/mod.rs | 23 | ||||
| -rw-r--r-- | src/lib.rs | 258 | ||||
| -rw-r--r-- | src/main.rs | 43 | ||||
| -rw-r--r-- | src/semantic_token.rs | 116 | ||||
| -rw-r--r-- | src/stages/bytecode.rs | 150 | ||||
| -rw-r--r-- | src/stages/bytecode_tokens.rs | 37 | ||||
| -rw-r--r-- | src/stages/compiler.rs | 84 | ||||
| -rw-r--r-- | src/stages/mod.rs | 26 | ||||
| -rw-r--r-- | src/stages/semantic.rs | 154 | ||||
| -rw-r--r-- | src/stages/semantic_tokens.rs | 97 | ||||
| -rw-r--r-- | src/stages/syntactic.rs | 211 | ||||
| -rw-r--r-- | src/stages/syntactic_tokens.rs | 94 | ||||
| -rw-r--r-- | src/syntactic_token.rs | 43 | ||||
| -rw-r--r-- | src/tokenizer.rs | 239 | ||||
| -rw-r--r-- | src/types/instruction.rs | 168 | ||||
| -rw-r--r-- | src/types/mod.rs | 4 | ||||
| -rw-r--r-- | src/types/value.rs | 48 |
22 files changed, 1355 insertions, 790 deletions
diff --git a/src/addressing.rs b/src/addressing.rs deleted file mode 100644 index dd7638e..0000000 --- a/src/addressing.rs +++ /dev/null @@ -1,44 +0,0 @@ -#[derive(Clone,Copy)] -pub struct CharAddress { - /// The number of lines that precede this line in the file. - pub line:usize, - /// The number of characters that precede this character in the line. - pub column:usize, -} -impl CharAddress { - pub fn new(line:usize, column:usize) -> Self { - Self { line, column } - } - pub fn zero() -> Self { - Self::new(0,0) - } -} - -pub struct SourceLocation { - /// The slice of the source file from which this token was parsed. - pub source: String, - /// The address of the first character of this token. - pub start: CharAddress, - /// The address of the final character of this token. - pub end: CharAddress -} -impl SourceLocation { - pub fn new(source:String, start:CharAddress, end:CharAddress) -> Self { - Self { source, start, end } - } - pub fn zero() -> Self { - Self { source:String::new(), start:CharAddress::zero(), end:CharAddress::zero() } - } -} - -pub struct BytecodeLocation { - /// The number of bytes that precede this byte sequence in the bytecode. - pub start: u16, - /// The length of this byte sequence, in bytes. - pub length: u16, -} -impl BytecodeLocation { - pub fn zero() -> Self { - Self { start:0, length:0 } - } -} diff --git a/src/assembler.rs b/src/assembler.rs deleted file mode 100644 index 692eb14..0000000 --- a/src/assembler.rs +++ /dev/null @@ -1,278 +0,0 @@ -use std::mem::take; -use std::collections::hash_map::Entry; - -use SyntacticTokenType as Syn; -use SemanticTokenType as Sem; -use crate::*; - -use std::collections::HashMap; - -/// The inner value is the index of the token that defines this symbol. -pub enum SymbolDefinition { - Macro(usize), - Label(usize), -} - -pub struct Assembler { - /// The contents of the program as a list of syntactic tokens. - syntactic_tokens: Vec<SyntacticToken>, - /// The contents of the program as a list of semantic tokens. - semantic_tokens: Vec<SemanticToken>, - /// Map the name of each defined symbol to the index of the defining token. - symbol_definitions: HashMap<String, SymbolDefinition>, - /// Map each macro definition token index to a list of syntactic body tokens. - syntactic_macro_bodies: HashMap<usize, Vec<SyntacticToken>>, - /// Map each macro definition token index to a list of semantic body tokens. - semantic_macro_bodies: HashMap<usize, Vec<SemanticToken>>, -} - -impl Assembler { - pub fn new() -> Self { - Self { - syntactic_tokens: Vec::new(), - semantic_tokens: Vec::new(), - symbol_definitions: HashMap::new(), - syntactic_macro_bodies: HashMap::new(), - semantic_macro_bodies: HashMap::new(), - } - } - - pub fn tokenise_source(&mut self, source_code: &str) { - // The index of the current macro definition token - let mut macro_definition: Option<usize> = None; - let mut macro_definition_body_tokens: Vec<SyntacticToken> = Vec::new(); - - for mut token in TokenIterator::from_str(source_code) { - let next_index = self.syntactic_tokens.len(); - if let Some(index) = macro_definition { - token.use_in_macro_body(); - if token.is_macro_terminator() { - // Commit the current macro definition - macro_definition_body_tokens.push(token); - self.syntactic_macro_bodies.insert( - index, take(&mut macro_definition_body_tokens)); - macro_definition = None; - } else { - macro_definition_body_tokens.push(token); - } - } else { - if let Syn::MacroDefinition(ref name) = token.r#type { - macro_definition = Some(next_index); - match self.symbol_definitions.entry(name.to_string()) { - Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);} - Entry::Vacant(v) => {v.insert(SymbolDefinition::Macro(next_index));} - } - } else if let Syn::LabelDefinition(ref name) = token.r#type { - match self.symbol_definitions.entry(name.to_string()) { - Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);} - Entry::Vacant(v) => {v.insert(SymbolDefinition::Label(next_index));} - } - } else if token.is_macro_terminator() { - token.set_error(Error::OrphanedMacroDefinitionTerminator); - } - self.syntactic_tokens.push(token); - } - } - } - - pub fn resolve_references(&mut self) { - let syntactic_tokens = take(&mut self.syntactic_tokens); - let syntactic_token_count = syntactic_tokens.len(); - let mut parent_label = None; - - for (index, syntactic_token) in syntactic_tokens.into_iter().enumerate() { - if let SyntacticTokenType::LabelDefinition(name) = &syntactic_token.r#type { - parent_label = Some(name.to_owned()); - } - let semantic_token = self.convert_syn_token_to_sem_token(syntactic_token, index, parent_label.clone()); - self.semantic_tokens.push(semantic_token); - } - assert_eq!(syntactic_token_count, self.semantic_tokens.len()); - - // Find all cyclic macros - let cyclic_macros: Vec<usize> = self.semantic_macro_bodies.keys().map(|i|*i).filter( - |i| !self.traverse_macro_definition(*i, 0)).collect(); - // Replace each cyclic macro reference in a macro definition with an error - for body_tokens in &mut self.semantic_macro_bodies.values_mut() { - for body_token in body_tokens { - if let Sem::MacroReference(i) = body_token.r#type { - if cyclic_macros.contains(&i) { - let name = body_token.source_location.source.clone(); - body_token.r#type = Sem::Error(Syn::Reference(name), Error::CyclicMacroReference); - } - } - } - } - - } - - /// Attempt to recursively traverse the body tokens of a macro definition, returning - /// false if the depth exceeds a preset maximum, and returning true otherwise. - fn traverse_macro_definition(&self, index: usize, level: usize) -> bool { - if level == 16 { - false - } else { - self.semantic_macro_bodies[&index].iter().all( - |token| if let Sem::MacroReference(i) = token.r#type { - self.traverse_macro_definition(i, level+1) - } else { - true - } - ) - } - } - - pub fn generate_bytecode(&mut self) -> (Vec<u8>, Vec<SemanticToken>) { - let mut bytecode: Vec<u8> = Vec::new(); - // Map each label definition token index to the bytecode addresses of the references - let mut reference_addresses: HashMap<usize, Vec<u16>> = HashMap::new(); - // Map each label and macro definition token to a list of reference token indices - let mut reference_tokens: HashMap<usize, Vec<usize>> = HashMap::new(); - - macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};} - macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};} - macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};} - - let mut semantic_tokens = take(&mut self.semantic_tokens); - - // Translate semantic tokens into bytecode - for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() { - let start_addr = bytecode.len() as u16; - match &mut semantic_token.r#type { - Sem::LabelReference(i) => { - reference_tokens.entry(*i).or_default().push(index); - reference_addresses.entry(*i).or_default().push(start_addr); - push_u16!(0); - } - Sem::MacroReference(i) => { - reference_tokens.entry(*i).or_default().push(index); - self.expand_macro_reference(*i, &mut bytecode, &mut reference_addresses); - } - Sem::LabelDefinition(def) => def.address=start_addr, - Sem::MacroDefinition(_) => (), - - Sem::Padding(p) => pad!(*p), - Sem::ByteLiteral(b) => push_u8!(*b), - Sem::ShortLiteral(s) => push_u16!(*s), - Sem::Instruction(b) => push_u8!(*b), - - Sem::MacroDefinitionTerminator => unreachable!(), - Sem::Comment => (), - Sem::Error(..) => (), - }; - let end_addr = bytecode.len() as u16; - semantic_token.bytecode_location.start = start_addr; - semantic_token.bytecode_location.length = end_addr - start_addr; - } - - // Fill each label reference with the address of the matching label definition - for (index, slots) in reference_addresses { - if let Sem::LabelDefinition(definition) = &semantic_tokens[index].r#type { - let [h,l] = definition.address.to_be_bytes(); - for slot in slots { - bytecode[slot as usize] = h; - bytecode[slot.wrapping_add(1) as usize] = l; - } - } else { unreachable!() } - } - - // Move references and macro body tokens into label and macro definition tokens - for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() { - if let Sem::MacroDefinition(definition) = &mut semantic_token.r#type { - definition.body_tokens = self.semantic_macro_bodies.remove(&index).unwrap(); - if let Some(references) = reference_tokens.remove(&index) { - definition.references = references; - } - } else if let Sem::LabelDefinition(definition) = &mut semantic_token.r#type { - if let Some(references) = reference_tokens.remove(&index) { - definition.references = references; - } - } - } - assert_eq!(reference_tokens.len(), 0); - - // Remove trailing null bytes from the bytecode - if let Some(final_nonnull_byte) = bytecode.iter().rposition(|b| *b != 0) { - let truncated_length = final_nonnull_byte + 1; - let removed_byte_count = bytecode.len() - truncated_length; - if removed_byte_count > 0 { - bytecode.truncate(truncated_length); - } - } - - (bytecode, semantic_tokens) - } - - fn convert_syn_token_to_sem_token(&mut self, mut syn_token: SyntacticToken, index: usize, parent_label: Option<String>) -> SemanticToken { - SemanticToken { - r#type: { - if let Some(err) = syn_token.error { - Sem::Error(syn_token.r#type, err) - } else { - match syn_token.r#type { - Syn::Reference(ref name) => { - match self.symbol_definitions.get(name) { - Some(SymbolDefinition::Macro(i)) => Sem::MacroReference(*i), - Some(SymbolDefinition::Label(i)) => Sem::LabelReference(*i), - None => Sem::Error(syn_token.r#type, Error::UnresolvedReference), - } - } - Syn::LabelDefinition(name) => {Sem::LabelDefinition(LabelDefinition::new(name))}, - Syn::MacroDefinition(name) => { - let mut sem_body_tokens = Vec::new(); - for syn_body_token in self.syntactic_macro_bodies.remove(&index).unwrap() { - // Make the source location of the macro definition token span the entire definition - if syn_body_token.is_macro_terminator() { - syn_token.source_location.end = syn_body_token.source_location.start; - } - let sem_body_token = self.convert_syn_token_to_sem_token(syn_body_token, 0, parent_label.clone()); - sem_body_tokens.push(sem_body_token); - } - self.semantic_macro_bodies.insert(index, sem_body_tokens); - Sem::MacroDefinition(MacroDefinition::new(name)) - }, - Syn::MacroDefinitionTerminator => Sem::MacroDefinitionTerminator, - Syn::Padding(v) => Sem::Padding(v), - Syn::ByteLiteral(v) => Sem::ByteLiteral(v), - Syn::ShortLiteral(v) => Sem::ShortLiteral(v), - Syn::Instruction(v) => Sem::Instruction(v), - Syn::Comment => Sem::Comment, - } - } - }, - source_location: syn_token.source_location, - bytecode_location: BytecodeLocation::zero(), - parent_label, - } - } - - fn expand_macro_reference(&self, index: usize, bytecode: &mut Vec<u8>, reference_addresses: &mut HashMap<usize, Vec<u16>>) { - macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};} - macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};} - macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};} - - for body_token in self.semantic_macro_bodies.get(&index).unwrap() { - let start_addr = bytecode.len() as u16; - match &body_token.r#type { - Sem::LabelReference(i) => { - reference_addresses.entry(*i).or_default().push(start_addr); - push_u16!(0u16); - }, - Sem::MacroReference(i) => { - self.expand_macro_reference(*i, bytecode, reference_addresses); - }, - Sem::LabelDefinition(_) => unreachable!(), - Sem::MacroDefinition(_) => unreachable!(), - - Sem::Padding(p) => pad!(*p), - Sem::ByteLiteral(b) => push_u8!(*b), - Sem::ShortLiteral(s) => push_u16!(*s), - Sem::Instruction(b) => push_u8!(*b), - - Sem::MacroDefinitionTerminator => (), - Sem::Comment => (), - Sem::Error(..) => (), - }; - } - } -} diff --git a/src/bin/br-asm.rs b/src/bin/br-asm.rs new file mode 100644 index 0000000..e7a9230 --- /dev/null +++ b/src/bin/br-asm.rs @@ -0,0 +1,8 @@ +use bedrock_asm::*; +use switchboard::*; + + +fn main() { + let args = Switchboard::from_env(); + assemble(args, "br-asm"); +} diff --git a/src/error.rs b/src/error.rs deleted file mode 100644 index 8a6c0d6..0000000 --- a/src/error.rs +++ /dev/null @@ -1,10 +0,0 @@ -#[derive(Clone)] -pub enum Error { - UnresolvedReference, - DuplicateDefinition, - InvalidPaddingValue, - InvalidTypeInMacroDefinition, - OrphanedMacroDefinitionTerminator, - CyclicMacroReference, -} - diff --git a/src/formats/clang.rs b/src/formats/clang.rs new file mode 100644 index 0000000..524b501 --- /dev/null +++ b/src/formats/clang.rs @@ -0,0 +1,10 @@ +pub fn format_clang(bytecode: &[u8]) -> Vec<u8> { + let mut output = String::new(); + for chunk in bytecode.chunks(16) { + for byte in chunk { + output.push_str(&format!("0x{byte:02X}, ")); + } + output.push('\n'); + } + return output.into_bytes(); +} diff --git a/src/formats/mod.rs b/src/formats/mod.rs new file mode 100644 index 0000000..79b1c51 --- /dev/null +++ b/src/formats/mod.rs @@ -0,0 +1,23 @@ +mod clang; +pub use clang::*; + +use crate::*; + + +#[derive(Clone, Copy, PartialEq)] +pub enum Format { + Raw, + Source, + Clang, +} + +impl Format { + pub fn from_str(string: &str) -> Self { + match string { + "raw" => Self::Raw, + "source" => Self::Source, + "c" => Self::Clang, + _ => fatal!("Unknown format '{string}', expected 'raw', 'c', or 'source'"), + } + } +} @@ -1,21 +1,245 @@ -mod addressing; -mod syntactic_token; -mod semantic_token; -mod tokenizer; -mod error; -mod assembler; +#![feature(path_add_extension)] -pub use addressing::{CharAddress, SourceLocation, BytecodeLocation}; -pub use syntactic_token::{SyntacticToken, SyntacticTokenType}; -pub use semantic_token::{SemanticToken, SemanticTokenType, LabelDefinition, MacroDefinition}; -pub use error::Error; -pub use tokenizer::TokenIterator; -pub use assembler::Assembler; +mod formats; +mod types; +mod stages; +pub use formats::*; +pub use types::*; +pub use stages::*; -pub fn assemble(source_code: &str) -> (Vec<u8>, Vec<SemanticToken>) { - let mut assembler = Assembler::new(); - assembler.tokenise_source(source_code); - assembler.resolve_references(); - assembler.generate_bytecode() +use assembler::*; +use log::*; +use switchboard::*; + +use std::io::Read; +use std::io::Write; + + +pub const RETURN_MODE: u8 = 0x80; +pub const WIDE_MODE: u8 = 0x40; +pub const IMMEDIATE_MODE: u8 = 0x20; + + +pub fn assemble(mut args: Switchboard, invocation: &str) -> ! { + args.named("help").short('h'); + args.named("version"); + args.named("verbose").short('v'); + + if args.get("help").as_bool() { + print_help(invocation); + std::process::exit(0); + } + if args.get("version").as_bool() { + let name = env!("CARGO_PKG_NAME"); + let version = env!("CARGO_PKG_VERSION"); + eprintln!("{name} v{version}"); + eprintln!("Written by Ben Bridle."); + std::process::exit(0); + } + if args.get("verbose").as_bool() { + log::set_log_level(log::LogLevel::Info); + } + + args.positional("source"); + args.positional("destination"); + args.named("extension").default("brc"); + + args.named("no-libs"); + args.named("no-project-libs"); + args.named("no-env-libs"); + args.named("no-truncate"); + + args.named("format").default("raw"); + args.named("dry-run").short('n'); + args.named("tree"); + args.named("with-symbols"); + args.raise_errors(); + + let source_path = args.get("source").as_path_opt().map( + |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}"))); + let destination_path = args.get("destination").as_path_opt(); + let extension = args.get("extension").as_string(); + let opt_extension = Some(extension.as_str()); + + let no_libs = args.get("no-libs").as_bool(); + let no_project_libs = args.get("no-project-libs").as_bool(); + let no_env_libs = args.get("no-env-libs").as_bool(); + let no_truncate = args.get("no-truncate").as_bool(); + + let format = Format::from_str(args.get("format").as_str()); + let dry_run = args.get("dry-run").as_bool(); + let print_tree = args.get("tree").as_bool(); + let export_symbols = args.get("with-symbols").as_bool(); + + // ----------------------------------------------------------------------- + + let mut compiler = new_compiler(); + + if let Some(path) = &source_path { + info!("Reading program source from {path:?}"); + compiler.root_from_path(path).unwrap_or_else(|err| fatal!("{err:?}: {path:?}")); + } else { + let mut source_code = String::new(); + info!("Reading program source from standard input"); + if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { + fatal!("Could not read from standard input\n{err:?}"); + } + compiler.root_from_string(source_code, "<standard input>") + }; + if compiler.error().is_some() && !no_libs && !no_project_libs { + compiler.include_libs_from_parent(opt_extension); + } + if compiler.error().is_some() && !no_libs && !no_env_libs { + compiler.include_libs_from_path_variable("BEDROCK_LIBS", opt_extension); + } + + if print_tree { + compiler.hierarchy().report() + } + if let Some(error) = compiler.error() { + error.report(); + std::process::exit(1); + } + + let merged_source = compiler.get_compiled_source().unwrap_or_else(|error| { + error.report(); + std::process::exit(1); + }); + + if !dry_run && format == Format::Source { + write_bytes_and_exit(merged_source.as_bytes(), destination_path.as_ref()); + } + + // ----------------------------------------------------------------------- + + let path = Some("<merged source>"); + let syntactic = match parse_syntactic(&merged_source, path) { + Ok(tokens) => tokens, + Err(errors) => { + report_syntactic_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let semantic = match parse_semantic(syntactic) { + Ok(tokens) => tokens, + Err(errors) => { + report_semantic_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let program = match generate_bytecode(&semantic) { + Ok(program) => program, + Err(errors) => { + report_bytecode_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let AssembledProgram { mut bytecode, symbols } = program; + + let length = bytecode.len(); + let percentage = (length as f32 / 65536.0 * 100.0).round() as u16; + info!("Assembled program in {length} bytes ({percentage}% of maximum)"); + + if !no_truncate { + // Remove null bytes from end of bytecode. + while let Some(0) = bytecode.last() { + bytecode.pop(); + } + let new_length = bytecode.len(); + let difference = length - new_length; + if difference > 0 { + info!("Truncated program to {new_length} bytes (saved {difference} bytes)"); + } + } + + if !dry_run { + if export_symbols { + if let Some(path) = &destination_path { + let mut symbols_path = path.to_path_buf(); + symbols_path.add_extension("sym"); + let mut symbols_string = String::new(); + for symbol in &symbols { + let address = &symbol.address; + let name = &symbol.name; + let location = &symbol.source.location(); + symbols_string.push_str(&format!( + "{address:04x} {name} {location}\n" + )); + } + match std::fs::write(&symbols_path, symbols_string) { + Ok(_) => info!("Saved symbols to {symbols_path:?}"), + Err(err) => info!("Could not write symbols to {symbols_path:?}\n{err:?}"), + } + } + } + + let bytes = match format { + Format::Raw => bytecode, + Format::Clang => format_clang(&bytecode), + Format::Source => unreachable!("Source output is handled before full assembly"), + }; + write_bytes_and_exit(&bytes, destination_path.as_ref()); + } + std::process::exit(0); } + +fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { + match path { + Some(path) => match std::fs::write(path, bytes) { + Ok(_) => info!("Wrote output to {:?}", path.as_ref()), + Err(err) => fatal!("Could not write to {:?}\n{err:?}", path.as_ref()), + } + None => match std::io::stdout().write_all(bytes) { + Ok(_) => info!("Wrote output to standard output"), + Err(err) => fatal!("Could not write to standard output\n{err:?}"), + } + } + std::process::exit(0); +} + + +fn print_help(invocation: &str) { + eprintln!("\ +Usage: {invocation} [source] [destination] + +Assembler for the Bedrock computer system. + +Usage: + To assemble a Bedrock program from a source file and write to an output + file, run `br-asm [source] [destination]`, where [source] is the path + of the source file and [destination] is the path to write to. + + If [destination] is omitted, the assembled program will be written to + standard output. If [source] is omitted, the program source code will + be read from standard input. + +Environment variables: + BEDROCK_LIBS + A list of colon-separated paths that will be searched to find Bedrock + source code files to use as libraries when assembling a Bedrock program. + If a library file resolves an unresolved symbol in the program being + assembled, the library file will be merged into the program. + +Arguments: + [source] Bedrock source code file to assemble. + [destination] Destination path for assembler output. + +Switches: + --dry-run (-n) Assemble and show errors only, don't write any output + --extension=<ext> File extension to identify source files (default is 'brc') + --format=<fmt> Output format to use for assembled program (default is 'raw') + --no-project-libs Don't search for libraries in the source parent folder + --no-env-libs Don't search for libraries in the BEDROCK_LIBS path variable + --no-libs Combination of --no-project-libs and --no-env-libs + --no-truncate Don't remove trailing zero-bytes from the assembled program + --tree Show a tree diagram of all included library files + --with-symbols Also generate debug symbols file with extension '.sym' + --help (-h) Print this help information + --verbose, (-v) Print additional information + --version Print the program version and exit +"); +} diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 11ce42b..0000000 --- a/src/main.rs +++ /dev/null @@ -1,43 +0,0 @@ -use std::io::{Read, Write}; -use bedrock_asm::*; - -fn main() { - // Read source code from standard input - let mut source_code = String::new(); - if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { - eprintln!("Could not read from standard input, quitting."); - eprintln!("({err:?})"); - std::process::exit(1); - }; - - let (bytecode, tokens) = assemble(&source_code); - let mut is_error = false; - for token in &tokens { - if token.print_error(&source_code) { is_error = true }; - } - if !is_error { - for token in &tokens { - if let SemanticTokenType::LabelDefinition(def) = &token.r#type { - if def.references.is_empty() { - eprintln!("Unused label definition: {}", def.name); - } - } - } - eprintln!(); - } - - let byte_count = bytecode.len(); - let byte_percentage = (byte_count as f32 / 65536.0 * 100.0).round() as u16; - eprintln!("Assembled program in {byte_count} bytes ({byte_percentage}% of maximum)."); - - if is_error { - std::process::exit(1) - } - - // Write bytecode to standard output - if let Err(_) = std::io::stdout().write_all(&bytecode) { - eprintln!("Could not write to standard output, quitting."); - std::process::exit(1); - } -} - diff --git a/src/semantic_token.rs b/src/semantic_token.rs deleted file mode 100644 index 265db91..0000000 --- a/src/semantic_token.rs +++ /dev/null @@ -1,116 +0,0 @@ -use crate::*; - -pub enum SemanticTokenType { - LabelReference(usize), - MacroReference(usize), - - LabelDefinition(LabelDefinition), - MacroDefinition(MacroDefinition), - - Padding(u16), - ByteLiteral(u8), - ShortLiteral(u16), - Instruction(u8), - - MacroDefinitionTerminator, - Comment, - Error(SyntacticTokenType, Error), -} - -pub struct SemanticToken { - pub r#type: SemanticTokenType, - pub source_location: SourceLocation, - pub bytecode_location: BytecodeLocation, - pub parent_label: Option<String>, -} - -impl SemanticToken { - /// Returns true if an error was printed. - pub fn print_error(&self, source_code: &str) -> bool { - let mut is_error = false; - macro_rules! red {()=>{eprint!("\x1b[31m")};} - macro_rules! dim {()=>{eprint!("\x1b[0;2m")};} - macro_rules! normal {()=>{eprint!("\x1b[0m")};} - - if let SemanticTokenType::Error(token, error) = &self.r#type { - is_error = true; - - red!(); eprint!("[ERROR] "); normal!(); - let source = &self.source_location.source; - match error { - Error::UnresolvedReference => { - eprintln!("Unresolved reference, no label or macro has been defined with the name '{source}'") } - Error::DuplicateDefinition => { - eprintln!("Duplicate definition, a label or macro has already been defined with the name '{source}'") } - Error::OrphanedMacroDefinitionTerminator => { - eprintln!("Unmatched macro definition terminator, no macro definition is in progress") } - Error::InvalidPaddingValue => { - eprintln!("Invalid value for padding, the value must be at least one and at most four hexadecimal characters") } - Error::CyclicMacroReference => { - eprintln!("Cyclic macro reference, this macro reference contains a reference to the macro being defined") } - Error::InvalidTypeInMacroDefinition => { - let name = match token { - SyntacticTokenType::Reference(_) => "references", - SyntacticTokenType::LabelDefinition(_) => "label definitions", - SyntacticTokenType::MacroDefinition(_) => "macro definitions", - SyntacticTokenType::MacroDefinitionTerminator => "macro definition terminators", - SyntacticTokenType::Padding(_) => "padding", - SyntacticTokenType::ByteLiteral(_) => "byte literals", - SyntacticTokenType::ShortLiteral(_) => "short literals", - SyntacticTokenType::Instruction(_) => "instructions", - SyntacticTokenType::Comment => "comments", - }; - eprintln!("Invalid token in macro definition, macro definitions are not allowed to contain {name}") } - } - - if let Some(label) = &self.parent_label { - eprint!(" ... "); red!(); eprint!("| "); dim!(); eprintln!("@{label} "); normal!(); - } - - let line = source_code.split('\n').nth(self.source_location.start.line).unwrap(); - eprint!("{:>5} ", self.source_location.start.line+1); - red!(); eprint!("| "); normal!(); - for (i, c) in line.chars().enumerate() { - if i == self.source_location.start.column { red!() } - eprint!("{c}"); - if i == self.source_location.end.column { normal!() } - } - eprintln!(); red!(); eprint!(" | "); - for i in 0..=self.source_location.end.column { - if i < self.source_location.start.column { eprint!(" ") } else { eprint!("^") }; - } - normal!(); eprintln!(); - } - else if let SemanticTokenType::MacroDefinition(definition) = &self.r#type { - for token in &definition.body_tokens { - if token.print_error(source_code) { is_error = true } - } - } - is_error - } -} - -pub struct LabelDefinition { - pub name: String, - pub address: u16, - /// A list of pointers to label reference tokens - pub references: Vec<usize>, -} -impl LabelDefinition { - pub fn new(name: String) -> Self { - Self { name, address:0, references:Vec::new() } - } -} - -pub struct MacroDefinition { - pub name: String, - pub body_tokens: Vec<SemanticToken>, - /// A list of pointers to macro reference tokens - pub references: Vec<usize>, -} -impl MacroDefinition { - pub fn new(name: String) -> Self { - Self { name, body_tokens:Vec::new(), references:Vec::new() } - } -} - diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs new file mode 100644 index 0000000..02cc739 --- /dev/null +++ b/src/stages/bytecode.rs @@ -0,0 +1,150 @@ +use crate::*; + +use indexmap::IndexMap; + + +/// Doesn't truncate trailing null bytes. +pub fn generate_bytecode(semantic: &Program) -> Result<AssembledProgram, Vec<Tracked<BytecodeError>>> { + let mut generator = BytecodeGenerator::new(&semantic.definitions); + generator.parse(&semantic.tokens, false); + generator.fill_slots(); + let mut symbols = Vec::new(); + for (name, information) in generator.labels { + let source = semantic.definitions.get(&name).unwrap().source.clone(); + let address = information.address; + symbols.push(AssembledSymbol { name, address, source }); + } + match generator.errors.is_empty() { + true => Ok(AssembledProgram { bytecode: generator.bytecode, symbols }), + false => Err(generator.errors), + } +} + + +pub struct BytecodeGenerator<'a> { + definitions: &'a IndexMap<String, Tracked<Definition>>, + labels: IndexMap<String, LabelInformation>, + stack: Vec<usize>, + bytecode: Vec<u8>, + errors: Vec<Tracked<BytecodeError>>, +} + +struct LabelInformation { + address: usize, + slots: Vec<usize>, +} + +impl<'a> BytecodeGenerator<'a> { + pub fn new(definitions: &'a IndexMap<String, Tracked<Definition>>) -> Self { + let mut labels = IndexMap::new(); + for (name, definition) in definitions { + if let DefinitionVariant::LabelDefinition = definition.variant { + // Use fake address for now. + let information = LabelInformation { address: 0, slots: Vec::new() }; + labels.insert(name.to_string(), information); + } + } + Self { + definitions, + labels, + stack: Vec::new(), + bytecode: Vec::new(), + errors: Vec::new(), + } + } + + pub fn parse(&mut self, tokens: &[Tracked<SemanticToken>], in_macro: bool) { + macro_rules! byte { + ($byte:expr) => { self.bytecode.push($byte) }; + } + macro_rules! double { + ($double:expr) => {{ + let [high, low] = u16::to_be_bytes($double); + self.bytecode.push(high); self.bytecode.push(low); + }}; + } + + for token in tokens { + let i = self.bytecode.len(); + match &token.value { + SemanticToken::Literal(value) => match value { + Value::Byte(byte) => byte!(*byte), + Value::Double(double) => double!(*double), + } + SemanticToken::Pad(value) => { + self.bytecode.resize(i + usize::from(value), 0); + }, + SemanticToken::String(bytes) => { + self.bytecode.extend_from_slice(bytes) + }, + SemanticToken::Comment(_) => (), + SemanticToken::BlockOpen(_) => { + self.stack.push(i); + // Use a fake index for now. + double!(0); + } + SemanticToken::BlockClose(_) => { + if i > 0xFFFF { + let error = BytecodeError::InvalidBlockAddress(i); + self.errors.push(Tracked::from(error, token.source.clone())); + } + let Some(addr) = self.stack.pop() else { + unreachable!("Uncaught unmatched block terminator"); + }; + let [high, low] = (i as u16).to_be_bytes(); + self.bytecode[addr] = high; + self.bytecode[addr+1] = low; + } + SemanticToken::Symbol(name) => { + if let Some(definition) = self.definitions.get(name) { + match &definition.variant { + DefinitionVariant::MacroDefinition(body) => { + self.parse(body, true); + } + DefinitionVariant::LabelDefinition => { + let information = self.labels.get_mut(name).unwrap(); + information.slots.push(i); + // Use a fake index for now. + double!(0); + } + } + } else { + unreachable!("Uncaught undefined symbol '{name}'"); + } + } + SemanticToken::Instruction(instruction) => { + byte!(instruction.value) + } + SemanticToken::LabelDefinition(name) => if in_macro { + unreachable!("Uncaught label definition in macro"); + } else { + if i > 0xFFFF { + let error = BytecodeError::InvalidLabelAddress(i); + self.errors.push(Tracked::from(error, token.source.clone())); + } + let information = self.labels.get_mut(name).unwrap(); + // Replace fake index with real index. + information.address = i; + } + SemanticToken::MacroDefinition{ .. } => if in_macro { + unreachable!("Uncaught macro definition in macro"); + } + } + } + + if !in_macro && !self.stack.is_empty() { + unreachable!("Uncaught unterminated block"); + } + } + + /// Fill each label slot with a real label address. + pub fn fill_slots(&mut self) { + for information in self.labels.values() { + let [high, low] = (information.address as u16).to_be_bytes(); + for addr in &information.slots { + self.bytecode[*addr] = high; + self.bytecode[*addr + 1] = low; + } + } + } +} diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs new file mode 100644 index 0000000..902fcd7 --- /dev/null +++ b/src/stages/bytecode_tokens.rs @@ -0,0 +1,37 @@ +use crate::*; + + +pub struct AssembledProgram { + pub bytecode: Vec<u8>, + pub symbols: Vec<AssembledSymbol>, +} + +pub struct AssembledSymbol { + pub name: String, + pub address: usize, + pub source: SourceSpan, +} + +pub enum BytecodeError { + InvalidLabelAddress(usize), + InvalidBlockAddress(usize), +} + + +pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) { + for error in errors { + report_bytecode_error(error, source_code); + } +} + + +fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + BytecodeError::InvalidLabelAddress(address) => + &format!("The label address exceeds 0xFFFF: 0x{address:X}"), + BytecodeError::InvalidBlockAddress(address) => + &format!("The block address exceeds 0xFFFF: 0x{address:X}"), + }; + report_source_issue(LogLevel::Error, &context, message); +} diff --git a/src/stages/compiler.rs b/src/stages/compiler.rs new file mode 100644 index 0000000..97bf20c --- /dev/null +++ b/src/stages/compiler.rs @@ -0,0 +1,84 @@ +use crate::*; + +use assembler::SymbolRole::*; +use assembler::DefinitionType::*; + + +pub fn new_compiler() -> Compiler { + Compiler::new(parse_symbols, push_code) +} + + +/// Parse all symbols from a source code string. +pub fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbol>> { + let syntactic = match parse_syntactic(source_code, path) { + Ok(syntactic) => syntactic, + Err(_) => return None, + }; + Some(SymbolParser::new().parse(&syntactic)) +} + +/// Push source code to a source compilation string. +pub fn push_code(compilation: &mut String, source_file: &SourceFile) { + // Skip blank files. + let source_code = &source_file.source_code; + if source_code.chars().all(|c| c.is_whitespace()) { return; } + // Ensure that the previous section is followed by two newline characters. + if !compilation.is_empty() { + if !compilation.ends_with('\n') { compilation.push('\n'); } + if !compilation.ends_with("\n\n") { compilation.push('\n'); } + } + // Push a path comment and the source code. + let path_str = source_file.path.as_os_str().to_string_lossy(); + let path_comment = format!("(: {path_str} )\n"); + compilation.push_str(&path_comment); + compilation.push_str(&source_code); +} + + +// Extract symbol definitions from a list of syntactic tokens. +pub struct SymbolParser { + pub symbols: Vec<Symbol>, +} + +impl SymbolParser { + pub fn new() -> Self { + Self { + symbols: Vec::new(), + } + } + + fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) { + let name = name.to_string(); + let namespace = Vec::new(); + let source = source.to_owned(); + self.symbols.push(Symbol { name, namespace, source, role }); + } + + pub fn parse(mut self, syntactic: &[Tracked<SyntacticToken>]) -> Vec<Symbol> { + for token in syntactic { + match &token.value { + SyntacticToken::MacroDefinition(definition) => { + self.record_symbol( + &definition.name, + &definition.name.source, + Definition(MustPrecedeReference), + ); + for token in &definition.body { + if let SyntacticToken::Symbol(name) = &token.value { + self.record_symbol(&name, &token.source, Reference); + } + } + } + SyntacticToken::LabelDefinition(name) => { + self.record_symbol(&name, &token.source, Definition(CanFollowReference)); + } + SyntacticToken::Symbol(name) => { + self.record_symbol(&name, &token.source, Reference); + } + _ => (), + } + } + return self.symbols; + } +} diff --git a/src/stages/mod.rs b/src/stages/mod.rs new file mode 100644 index 0000000..76bda0d --- /dev/null +++ b/src/stages/mod.rs @@ -0,0 +1,26 @@ +mod compiler; +mod syntactic; +mod syntactic_tokens; +mod semantic; +mod semantic_tokens; +mod bytecode; +mod bytecode_tokens; +pub use compiler::*; +pub use syntactic::*; +pub use syntactic_tokens::*; +pub use semantic::*; +pub use semantic_tokens::*; +pub use bytecode::*; +pub use bytecode_tokens::*; + + +#[macro_export] +macro_rules! indent { + (0, $($tokens:tt)*) => {{ + println!($($tokens)*); + }}; + ($indent:expr, $($tokens:tt)*) => {{ + for _ in 0..$indent { print!(" "); } + println!($($tokens)*); + }}; +} diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..dc9709e --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,154 @@ +use crate::*; + +use std::str::FromStr; + +use indexmap::{IndexMap, IndexSet}; + + +pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Program, Vec<Tracked<SemanticError>>> { + let mut errors = Vec::new(); + + // Record all label definitions and macro names up front. + let mut definitions = IndexMap::new(); + let mut macro_names = IndexSet::new(); + for token in &syntactic { + match &token.value { + SyntacticToken::LabelDefinition(name) => { + // Check if identifier is reserved. + if Instruction::from_str(&name).is_ok() { + let error = SemanticError::ReservedIdentifier(name.to_string()); + errors.push(Tracked::from(error, token.source.clone())); + } + // Use a fake index for now. + let definition = Definition::new(0, DefinitionVariant::LabelDefinition); + let tracked = Tracked::from(definition, token.source.clone()); + if let Some(_) = definitions.insert(name.clone(), tracked) { + unreachable!("Uncaught duplicate label definition '{name}'"); + } + } + SyntacticToken::MacroDefinition(definition) => { + let name = &definition.name; + // Check if identifier is reserved. + if Instruction::from_str(&name).is_ok() { + let error = SemanticError::ReservedIdentifier(name.to_string()); + errors.push(Tracked::from(error, name.source.clone())); + } + if !macro_names.insert(name.clone()) { + unreachable!("Uncaught duplicate macro definition '{name}'") + } + } + _ => (), + } + } + + // Convert syntactic tokens to semantic tokens. + let mut tokens: Vec<Tracked<SemanticToken>> = Vec::new(); + let mut stack = Vec::new(); + + for syn_token in syntactic { + let i = tokens.len(); + let sem_token = match syn_token.value { + SyntacticToken::Literal(value) => SemanticToken::Literal(value), + SyntacticToken::Pad(value) => SemanticToken::Pad(value), + SyntacticToken::String(bytes) => SemanticToken::String(bytes), + SyntacticToken::Comment(string) => SemanticToken::Comment(string), + SyntacticToken::BlockOpen => { + stack.push(i); + // Use a fake index for now. + SemanticToken::BlockOpen(0) + } + SyntacticToken::BlockClose => { + let Some(k) = stack.pop() else { + unreachable!("Uncaught unmatched block terminator"); + }; + // Replace fake index with real index. + tokens[k].value = SemanticToken::BlockOpen(i); + SemanticToken::BlockClose(k) + } + SyntacticToken::Symbol(symbol) => { + if let Some(definition) = definitions.get_mut(&symbol) { + definition.value.references.push(i); + } else if let Some(definition) = macro_names.get(&symbol) { + let error = SemanticError::InvocationBeforeDefinition; + let source = syn_token.source.wrap(definition.source.clone()); + errors.push(Tracked::from(error, source)); + } else { + unreachable!("Uncaught undefined symbol '{symbol}'"); + }; + SemanticToken::Symbol(symbol) + } + SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), + SyntacticToken::LabelDefinition(name) => { + let definition = definitions.get_mut(&name).unwrap(); + // Replace fake index with real index. + definition.value.definition = i; + SemanticToken::LabelDefinition(name) + } + SyntacticToken::MacroDefinition(definition) => { + let name = definition.name.clone(); + let mut body: Vec<Tracked<SemanticToken>> = Vec::new(); + let mut body_stack = Vec::new(); + for syn_token in definition.body { + let j = body.len(); + let sem_token = match syn_token.value { + SyntacticToken::Literal(value) => SemanticToken::Literal(value), + SyntacticToken::Pad(value) => SemanticToken::Pad(value), + SyntacticToken::String(bytes) => SemanticToken::String(bytes), + SyntacticToken::Comment(string) => SemanticToken::Comment(string), + SyntacticToken::BlockOpen => { + body_stack.push(j); + // Use a fake index for now. + SemanticToken::BlockOpen(0) + } + SyntacticToken::BlockClose => { + let Some(k) = body_stack.pop() else { + unreachable!("Uncaught unmatched block terminator in macro '{name}'"); + }; + // Replace fake index with real index. + body[k].value = SemanticToken::BlockOpen(j); + SemanticToken::BlockClose(k) + } + SyntacticToken::Symbol(symbol) => { + if let Some(definition) = definitions.get_mut(&symbol) { + definition.value.deep_references.push((i, j)); + } else if let Some(definition) = macro_names.get(&symbol) { + let error = SemanticError::InvocationBeforeDefinition; + let source = syn_token.source.wrap(definition.source.clone()); + errors.push(Tracked::from(error, source)); + } else { + unreachable!("Uncaught undefined symbol '{symbol}' in macro '{name}'"); + }; + SemanticToken::Symbol(symbol) + } + SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), + SyntacticToken::LabelDefinition(label) => + unreachable!("Uncaught label definition '{label}' in macro '{name}'"), + SyntacticToken::MacroDefinition(definition) => + unreachable!("Uncaught macro definition '{}' in macro '{name}'", definition.name), + }; + body.push(Tracked::from(sem_token, syn_token.source)); + } + + let variant = DefinitionVariant::MacroDefinition(body); + let source = definition.name.source.clone(); + let tracked = Tracked::from(Definition::new(i, variant), source); + if let Some(_) = definitions.insert(name.value.clone(), tracked) { + unreachable!("Uncaught duplicate definition '{name}'") + }; + if !body_stack.is_empty() { + unreachable!("Uncaught unterminated block in macro '{name}'"); + } + SemanticToken::MacroDefinition(name) + } + }; + tokens.push(Tracked::from(sem_token, syn_token.source)); + } + + if !stack.is_empty() { + unreachable!("Uncaught unterminated block"); + } + match errors.is_empty() { + true => Ok(Program { definitions, tokens }), + false => Err(errors), + } +} diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs new file mode 100644 index 0000000..c735828 --- /dev/null +++ b/src/stages/semantic_tokens.rs @@ -0,0 +1,97 @@ +use crate::*; + +use indexmap::IndexMap; + + +pub struct Program { + pub definitions: IndexMap<String, Tracked<Definition>>, + pub tokens: Vec<Tracked<SemanticToken>>, +} + +pub struct Definition { + pub variant: DefinitionVariant, + /// Index of definition token. + pub definition: usize, + /// Indices of symbols referencing this definition. + pub references: Vec<usize>, + /// Indices of references inside other definitions. + pub deep_references: Vec<(usize, usize)>, +} + +impl Definition { + pub fn new(i: usize, variant: DefinitionVariant) -> Self { + Self { + variant, + definition: i, + references: Vec::new(), + deep_references: Vec::new(), + } + } +} + +pub enum DefinitionVariant { + LabelDefinition, + MacroDefinition(Vec<Tracked<SemanticToken>>), +} + +pub enum SemanticToken { + Literal(Value), + Pad(Value), + String(Vec<u8>), + Comment(String), + BlockOpen(usize), // index to matching block-close + BlockClose(usize), // index to matching block-open + Symbol(String), + Instruction(Instruction), + LabelDefinition(String), + MacroDefinition(Tracked<String>), +} + +pub enum SemanticError { + InvocationBeforeDefinition, + ReservedIdentifier(String), +} + + +pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) { + for error in errors { + report_semantic_error(error, source_code); + } +} + + +fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + SemanticError::InvocationBeforeDefinition => + "Macro cannot be invoked before it has been defined", + SemanticError::ReservedIdentifier(name) => + &format!("Identifier '{name}' is reserved for a built-in instruction"), + }; + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_semantic_token(i: usize, token: &SemanticToken, definitions: &IndexMap<String, Tracked<Definition>>) { + match token { + SemanticToken::Literal(value) => indent!(i, "Literal({value})"), + SemanticToken::Pad(value) => indent!(i, "Pad({value})"), + SemanticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)), + SemanticToken::Comment(_) => indent!(i, "Comment"), + SemanticToken::BlockOpen(pointer) => indent!(i, "BlockOpen(*{pointer})"), + SemanticToken::BlockClose(pointer) => indent!(i, "BlockClose(*{pointer})"), + SemanticToken::Symbol(name) => indent!(i, "Symbol({name})"), + SemanticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"), + SemanticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"), + SemanticToken::MacroDefinition(name) => { + indent!(i, "MacroDefinition({name})"); + if let Some(definition) = definitions.get(name.as_str()) { + if let DefinitionVariant::MacroDefinition(body) = &definition.variant { + for token in body { + print_semantic_token(i+1, token, definitions); + } + } + } + } + } +} diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs new file mode 100644 index 0000000..59b8b95 --- /dev/null +++ b/src/stages/syntactic.rs @@ -0,0 +1,211 @@ +use crate::*; + +use std::path::PathBuf; + + +pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path), "") +} + +fn parse_syntactic_from_tokeniser(mut t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + t.add_delimiters(&['(',')','[',']','{','}',';']); + t.add_terminators(&[':']); + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + let mut label_name = label_name.to_string(); + + macro_rules! err { + ($error:expr) => {{ + err!($error, t.get_source()); + }}; + ($error:expr, $source:expr) => {{ + errors.push(Tracked::from($error, $source)); + continue; + }}; + } + + macro_rules! check_name { + ($name:expr) => {{ + check_name!($name, t.get_source()); + }}; + ($name:expr, $source:expr) => { + if $name.chars().count() > 63 { + let error = SyntacticError::InvalidIdentifier($name.clone()); + errors.push(Tracked::from(error, $source.clone())); + } + }; + } + + // Eat characters until the end character is found. + macro_rules! is_end { + ($end:expr) => { + |t: &mut Tokeniser| { + t.eat_char() == Some($end) + } + }; + } + + loop { + // Eat leading whitespace. + while let Some(c) = t.peek_char() { + match [' ', '\n', '\r', '\t'].contains(&c) { + true => t.eat_char(), + false => break, + }; + } + t.mark_start(); + let Some(c) = t.eat_char() else { break }; + let token = match c { + '"' => { + let source = t.get_source(); + match t.track_until(is_end!('"')) { + Some(string) => { + let mut bytes = string.into_bytes(); + bytes.push(0x00); + SyntacticToken::String(bytes) + } + None => err!(SyntacticError::UnterminatedNullString, source), + } + } + '\'' => { + let source = t.get_source(); + match t.track_until(is_end!('\'')) { + Some(string) => SyntacticToken::String(string.into_bytes()), + None => err!(SyntacticError::UnterminatedRawString, source), + } + } + '(' => { + let source = t.get_source(); + if let Some(string) = t.track_until(is_end!(')')) { + // Check if the comment fills the entire line. + if t.start.position.column == 0 && t.end_of_line() { + if let Some(path) = string.strip_prefix(": ") { + t.embedded_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start.position.line + 1; + continue; + } + } + SyntacticToken::Comment(string) + } else { + err!(SyntacticError::UnterminatedComment, source) + } + } + ')' => err!(SyntacticError::UnmatchedCommentTerminator), + '%' => { + let name = t.eat_token(); + let source = t.get_source(); + check_name!(name, source); + t.mark_child(); + if let Some(_) = t.track_until(is_end!(';')) { + let child = t.tokenise_child_span(); + match parse_body_from_tokeniser(child, &label_name) { + Ok(body) => { + let name = Tracked::from(name, source); + let definition = SyntacticMacroDefinition { name, body }; + SyntacticToken::MacroDefinition(definition) + } + Err(mut err) => { + errors.append(&mut err); + continue; + } + } + } else { + err!(SyntacticError::UnterminatedMacroDefinition, source); + } + } + ';' => err!(SyntacticError::UnmatchedMacroTerminator), + '{' => SyntacticToken::BlockOpen, + '}' => SyntacticToken::BlockClose, + '['|']' => continue, + '@' => { + label_name = t.eat_token(); + check_name!(label_name); + SyntacticToken::LabelDefinition(label_name.clone()) + } + '&' => { + let name = format!("{label_name}/{}", t.eat_token()); + check_name!(name); + SyntacticToken::LabelDefinition(name) + } + '~' => { + let name = format!("{label_name}/{}", t.eat_token()); + check_name!(name); + SyntacticToken::Symbol(name) + } + '#' => { + let token = t.eat_token(); + match token.parse::<Value>() { + Ok(value) => SyntacticToken::Pad(value), + Err(_) => err!(SyntacticError::InvalidPadValue), + } + }, + ':' => { + SyntacticToken::Instruction(Instruction { value: 0x21 }) + } + c => { + let token = format!("{c}{}", t.eat_token()); + if let Ok(value) = token.parse::<Value>() { + SyntacticToken::Literal(value) + } else if let Ok(instruction) = token.parse::<Instruction>() { + SyntacticToken::Instruction(instruction) + } else { + check_name!(token); + SyntacticToken::Symbol(token) + } + } + }; + + t.mark_end(); + let source = t.get_source(); + tokens.push(Tracked::from(token, source)); + } + + // Check that every block open matches a block close. + let mut stack = Vec::new(); + for token in &tokens { + match &token.value { + SyntacticToken::BlockOpen => stack.push(token.source.clone()), + SyntacticToken::BlockClose => if let None = stack.pop() { + let error = SyntacticError::UnmatchedBlockTerminator; + errors.push(Tracked::from(error, token.source.clone())); + } + _ => (), + } + } + for source in stack { + let error = SyntacticError::UnterminatedBlock; + errors.push(Tracked::from(error, source)); + } + + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} + + +fn parse_body_from_tokeniser(t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + + for token in parse_syntactic_from_tokeniser(t, label_name)? { + match token.value { + SyntacticToken::LabelDefinition(_) => { + let error = SyntacticError::LabelDefinitionInMacroDefinition; + errors.push(Tracked::from(error, token.source)); + continue; + } + SyntacticToken::MacroDefinition(_) => { + let error = SyntacticError::MacroDefinitionInMacroDefinition; + errors.push(Tracked::from(error, token.source)); + continue; + } + _ => tokens.push(token), + }; + } + + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs new file mode 100644 index 0000000..35afa80 --- /dev/null +++ b/src/stages/syntactic_tokens.rs @@ -0,0 +1,94 @@ +use crate::*; + + +pub enum SyntacticToken { + Literal(Value), + Pad(Value), + String(Vec<u8>), + Comment(String), + BlockOpen, + BlockClose, + Symbol(String), + Instruction(Instruction), + LabelDefinition(String), + MacroDefinition(SyntacticMacroDefinition), +} + +pub struct SyntacticMacroDefinition { + pub name: Tracked<String>, + pub body: Vec<Tracked<SyntacticToken>>, +} + +pub enum SyntacticError { + UnterminatedBlock, + UnterminatedComment, + UnterminatedRawString, + UnterminatedNullString, + UnterminatedMacroDefinition, + UnmatchedBlockTerminator, + UnmatchedCommentTerminator, + UnmatchedMacroTerminator, + InvalidPadValue, + InvalidIdentifier(String), + MacroDefinitionInMacroDefinition, + LabelDefinitionInMacroDefinition, +} + + +pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) { + for error in errors { + report_syntactic_error(error, source_code); + } +} + +fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + SyntacticError::UnterminatedBlock => + "Block was not terminated, add a '}' character to terminate", + SyntacticError::UnterminatedComment => + "Comment was not terminated, add a ')' character to terminate", + SyntacticError::UnterminatedRawString => + "String was not terminated, add a ' character to terminate", + SyntacticError::UnterminatedNullString => + "String was not terminated, add a '\"' character to terminate", + SyntacticError::UnterminatedMacroDefinition => + "Macro definition was not terminated, add a ';' character to terminate", + SyntacticError::UnmatchedBlockTerminator => + "Attempted to terminate a block, but no block was in progress", + SyntacticError::UnmatchedCommentTerminator => + "Attempted to terminate a comment, but no comment was in progress", + SyntacticError::UnmatchedMacroTerminator => + "Attempted to terminate a macro definition, but no macro definition was in progress", + SyntacticError::InvalidPadValue => + "The pad value must be two or four hexadecimal digits", + SyntacticError::InvalidIdentifier(name) => + &format!("An identifier cannot exceed 63 characters in length: {name}"), + SyntacticError::MacroDefinitionInMacroDefinition => + "A macro cannot be defined inside another macro", + SyntacticError::LabelDefinitionInMacroDefinition => + "A label cannot be defined inside a macro", + }; + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_syntactic_token(i: usize, token: &SyntacticToken) { + match token { + SyntacticToken::Literal(value) => indent!(i, "Literal({value})"), + SyntacticToken::Pad(value) => indent!(i, "Pad({value})"), + SyntacticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)), + SyntacticToken::Comment(_) => indent!(i, "Comment"), + SyntacticToken::BlockOpen => indent!(i, "BlockOpen"), + SyntacticToken::BlockClose => indent!(i, "BlockClose"), + SyntacticToken::Symbol(name) => indent!(i, "Symbol({name})"), + SyntacticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"), + SyntacticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"), + SyntacticToken::MacroDefinition(definition) => { + indent!(i, "MacroDefinition({})", definition.name); + for token in &definition.body { + print_syntactic_token(i+1, token); + } + } + } +} diff --git a/src/syntactic_token.rs b/src/syntactic_token.rs deleted file mode 100644 index 4a50e8a..0000000 --- a/src/syntactic_token.rs +++ /dev/null @@ -1,43 +0,0 @@ -use crate::*; - -pub enum SyntacticTokenType { - Reference(String), - - LabelDefinition(String), - MacroDefinition(String), - MacroDefinitionTerminator, - - Padding(u16), - ByteLiteral(u8), - ShortLiteral(u16), - Instruction(u8), - - Comment, -} - - - -pub struct SyntacticToken { - pub r#type: SyntacticTokenType, - pub source_location: SourceLocation, - pub error: Option<Error>, -} - -impl SyntacticToken { - // Call when this token is found inside a macro definition. - pub fn use_in_macro_body(&mut self) { - match self.r#type { - SyntacticTokenType::LabelDefinition(..) | - SyntacticTokenType::MacroDefinition(..) => { - self.set_error(Error::InvalidTypeInMacroDefinition) - } - _ => (), - }; - } - pub fn set_error(&mut self, error: Error) { - self.error = Some(error); - } - pub fn is_macro_terminator(&self) -> bool { - if let SyntacticTokenType::MacroDefinitionTerminator = self.r#type {true} else {false} - } -} diff --git a/src/tokenizer.rs b/src/tokenizer.rs deleted file mode 100644 index 7d887e4..0000000 --- a/src/tokenizer.rs +++ /dev/null @@ -1,239 +0,0 @@ -use std::mem::take; -use crate::*; - -#[derive(PartialEq)] -enum StringLiteral { - None, - Raw, - NullTerminated, -} - -pub struct TokenIterator { - /// The characters that comprise the program souce code. - chars: Vec<char>, - /// The index of the next character to read. - i: usize, - /// The address of the next character to read. - addr: CharAddress, - /// If true, skip over any whitespace characters. If false, stop reading - /// when a whitespace character is encountered. - skip_whitespace: bool, - /// The name of the most recently defined label. - label: String, - /// If not None, each individual character will be tokenised as a ByteLiteral. - parse_string_literal: StringLiteral, - - - /// The address of the first character of the current token. - start: CharAddress, - /// The address of the final character of the current token. - end: CharAddress, - /// The entire current token. - source: String, - /// The first character of the current token. - prefix: char, - /// The second and remaining characters of the current token. - suffix: String, -} - -impl TokenIterator { - /// Create an iterator from a string of program source code. - pub fn from_str(source_code: &str) -> Self { - Self { - chars: source_code.chars().collect(), - i: 0, - addr: CharAddress::zero(), - skip_whitespace: true, - parse_string_literal: StringLiteral::None, - label: String::new(), - start: CharAddress::zero(), - end: CharAddress::zero(), - source: String::new(), - prefix: ' ', - suffix: String::new(), - } - } - /// Append a character to the current token. - fn push(&mut self, c:char) { - self.end = self.addr; - self.source.push(c); - self.suffix.push(c); - self.next(c); - } - /// Move forward to the next source character. - fn next(&mut self, c: char) { - self.addr.column += 1; - self.i += 1; - if c == '\n' { - self.addr.column = 0; - self.addr.line += 1; - } - } - /// Mark the current character as being the first character of a new token. - fn mark_start(&mut self, c:char) { - if c == '"' { - self.parse_string_literal = StringLiteral::NullTerminated; - } else if c == '\'' { - self.parse_string_literal = StringLiteral::Raw; - } else { - self.start=self.addr; - self.end=self.addr; - self.prefix=c; - self.source.push(c); - self.skip_whitespace=false; - } - self.next(c); - } -} - -impl Iterator for TokenIterator { - type Item = SyntacticToken; - - fn next(&mut self) -> Option<SyntacticToken> { - // Initialise values before reading the next token - let mut is_comment = false; - self.skip_whitespace = true; - - // Iterate over source characters until a full token is read - while let Some(c) = self.chars.get(self.i) { - let c = *c; - // Parse individual characters from a string literal - if self.parse_string_literal != StringLiteral::None { - if c == '"' && self.parse_string_literal == StringLiteral::NullTerminated { - self.parse_string_literal = StringLiteral::None; - let token = SyntacticToken { - r#type: SyntacticTokenType::ByteLiteral(0), - source_location: SourceLocation { - source: c.to_string(), start:self.addr, end:self.addr }, - error: None, - }; - self.next(c); - return Some(token); - } else if c == '\'' && self.parse_string_literal == StringLiteral::Raw { - self.parse_string_literal = StringLiteral::None; - self.next(c); - continue - } else { - self.next(c); - return Some(SyntacticToken { - r#type: SyntacticTokenType::ByteLiteral(c as u8), - source_location: SourceLocation { - source: c.to_string(), start:self.addr, end:self.addr }, - error: None, - }); - } - } - // Intercept comments - if is_comment { - self.push(c); if c == ')' { break } else { continue }; } - else if self.skip_whitespace && c == '(' { - is_comment = true; self.mark_start(c); continue } - - // Allow a semicolon at the end of a token to be handled as a separate token - if self.source.len() > 0 && c == ';' { break } - // Handle the current character - match (is_whitespace(c), self.skip_whitespace) { - (true, true) => self.next(c), // c is the expected leading whitespace - (false, true) => self.mark_start(c), // c is the first character of the token - (false, false) => self.push(c), // c is a character of the token - (true, false) => break, // c is trailing whitespace - } - // Allow literal values to be chained to the end of the previous token - if self.source.len() > 0 && c == ':' { break } - } - - // If no source characters were grabbed then we have read through the entire source file - if self.source.len() == 0 { return None; } - // Allow handling macro terminators and symbols of length 1 in the match expression - if self.suffix.len() == 0 { self.prefix = '\0'; } - // Consume the collected characters to be used in the match expression - let full = take(&mut self.source); - let suffix = take(&mut self.suffix); - let mut error = None; - let mut parse_padding_value = |v| { - parse_short(v).or_else(|| { - error = Some(Error::InvalidPaddingValue); Some(0) - }).unwrap() - }; - - let r#type = match self.prefix { - '(' => { SyntacticTokenType::Comment } - '@' => { SyntacticTokenType::LabelDefinition({self.label=suffix.clone(); suffix}) } - '&' => { SyntacticTokenType::LabelDefinition(format!("{}/{}", self.label, suffix)) } - '$' => { SyntacticTokenType::Padding(parse_padding_value(&suffix)) } - '~' => { SyntacticTokenType::Reference(format!("{}/{}", self.label, suffix)) } - '%' => if let Some(("", sublabel)) = suffix.split_once("~") { - SyntacticTokenType::MacroDefinition(format!("{}/{}", self.label, sublabel)) - } else { - SyntacticTokenType::MacroDefinition(suffix) - } - _ => { - if ";" == &full { SyntacticTokenType::MacroDefinitionTerminator } - else if let Some(value) = parse_byte_lit(&full) { SyntacticTokenType::ByteLiteral(value) } - else if let Some(value) = parse_short_lit(&full) { SyntacticTokenType::ShortLiteral(value) } - else if let Some(value) = parse_instruction(&full) { SyntacticTokenType::Instruction(value) } - else { SyntacticTokenType::Reference(full.clone()) } - } - }; - Some(SyntacticToken { - r#type, - source_location:SourceLocation::new(full,self.start,self.end), - error, - }) - } -} - - -fn parse_byte_lit(token: &str) -> Option<u8> { - match token.len() { 2 => u8::from_str_radix(token, 16).ok(), _ => None } } -fn parse_short_lit(token: &str) -> Option<u16> { - match token.len() { 4 => u16::from_str_radix(token, 16).ok(), _ => None } } -fn parse_short(token: &str) -> Option<u16> { - match token.len() { 1..=4 => u16::from_str_radix(token, 16).ok(), _ => None } } -fn is_whitespace(c: char) -> bool { - match c { ' '|'\t'|'\n'|'\r'|'['|']'|'(' =>true, _=>false } } -fn parse_instruction(token: &str) -> Option<u8> { - Some(match token { - // Flow control operators - "HLT"=>0x00,"DBG" =>0x20,"NOP" =>0x80, // The remaining op-codes in this line are unclaimed - "JMP"=>0x01,"JSR" =>0x21,"JMP:" =>0x41,"JSR:" =>0x61,"JMPr" =>0x81,"JSRr" =>0xA1,"JMPr:" =>0xC1,"JSRr:" =>0xE1, - "JCN"=>0x02,"JSN" =>0x22,"JCN:" =>0x42,"JSN:" =>0x62,"JCNr" =>0x82,"JSNr" =>0xA2,"JCNr:" =>0xC2,"JSNr:" =>0xE2, - "JKN"=>0x03,"JKN*"=>0x23,"JKN:" =>0x43,"JKN*:"=>0x63,"JKNr" =>0x83,"JKNr*"=>0xA3,"JKNr:" =>0xC3,"JKNr*:"=>0xE3, - // Relational operators - "EQU"=>0x04,"EQU*"=>0x24,"EQU:"=>0x44,"EQU*:"=>0x64,"EQUr"=>0x84,"EQUr*"=>0xA4,"EQUr:"=>0xC4,"EQUr*:"=>0xE4, - "NKQ"=>0x05,"NKQ*"=>0x25,"NKQ:"=>0x45,"NKQ*:"=>0x65,"NKQr"=>0x85,"NKQr*"=>0xA5,"NKQr:"=>0xC5,"NKQr*:"=>0xE5, - "LTH"=>0x06,"LTH*"=>0x26,"LTH:"=>0x46,"LTH*:"=>0x66,"LTHr"=>0x86,"LTHr*"=>0xA6,"LTHr:"=>0xC6,"LTHr*:"=>0xE6, - "GTH"=>0x07,"GTH*"=>0x27,"GTH:"=>0x47,"GTH*:"=>0x67,"GTHr"=>0x87,"GTHr*"=>0xA7,"GTHr:"=>0xC7,"GTHr*:"=>0xE7, - // Memory operators - "LDA"=>0x08,"LDA*"=>0x28,"LDA:"=>0x48,"LDA*:"=>0x68,"LDAr"=>0x88,"LDAr*"=>0xA8,"LDAr:"=>0xC8,"LDAr*:"=>0xE8, - "LKA"=>0x09,"LKA*"=>0x29,"LKA:"=>0x49,"LKA*:"=>0x69,"LKAr"=>0x89,"LKAr*"=>0xA9,"LKAr:"=>0xC9,"LKAr*:"=>0xE9, - "STA"=>0x0A,"STA*"=>0x2A,"STA:"=>0x4A,"STA*:"=>0x6A,"STAr"=>0x8A,"STAr*"=>0xAA,"STAr:"=>0xCA,"STAr*:"=>0xEA, - "SKA"=>0x0B,"SKA*"=>0x2B,"SKA:"=>0x4B,"SKA*:"=>0x6B,"SKAr"=>0x8B,"SKAr*"=>0xAB,"SKAr:"=>0xCB,"SKAr*:"=>0xEB, - // Device operators - "LDD"=>0x0C,"LDD*"=>0x2C,"LDD:"=>0x4C,"LDD*:"=>0x6C,"LDDr"=>0x8C,"LDDr*"=>0xAC,"LDDr:"=>0xCC,"LDDr*:"=>0xEC, - "LKD"=>0x0D,"LKD*"=>0x2D,"LKD:"=>0x4D,"LKD*:"=>0x6D,"LKDr"=>0x8D,"LKDr*"=>0xAD,"LKDr:"=>0xCD,"LKDr*:"=>0xED, - "STD"=>0x0E,"STD*"=>0x2E,"STD:"=>0x4E,"STD*:"=>0x6E,"STDr"=>0x8E,"STDr*"=>0xAE,"STDr:"=>0xCE,"STDr*:"=>0xEE, - "SKD"=>0x0F,"SKD*"=>0x2F,"SKD:"=>0x4F,"SKD*:"=>0x6F,"SKDr"=>0x8F,"SKDr*"=>0xAF,"SKDr:"=>0xCF,"SKDr*:"=>0xEF, - // - "PSH"=>0x10,"PSH*"=>0x30,"PSH:"=>0x50,"PSH*:"=>0x70,"PSHr"=>0x90,"PSHr*"=>0xB0,"PSHr:"=>0xD0,"PSHr*:"=>0xF0, - "POP"=>0x11,"POP*"=>0x31,"POP:"=>0x51,"POP*:"=>0x71,"POPr"=>0x91,"POPr*"=>0xB1,"POPr:"=>0xD1,"POPr*:"=>0xF1, - "SHF"=>0x12,"SHF*"=>0x32,"SHF:"=>0x52,"SHF*:"=>0x72,"SHFr"=>0x92,"SHFr*"=>0xB2,"SHFr:"=>0xD2,"SHFr*:"=>0xF2, - "SHC"=>0x13,"SHC*"=>0x33,"SHC:"=>0x53,"SHC*:"=>0x73,"SHCr"=>0x93,"SHCr*"=>0xB3,"SHCr:"=>0xD3,"SHCr*:"=>0xF3, - // Stack operators - "SWP"=>0x14,"SWP*"=>0x34,"SWP:"=>0x54,"SWP*:"=>0x74,"SWPr"=>0x94,"SWPr*"=>0xB4,"SWPr:"=>0xD4,"SWPr*:"=>0xF4, - "ROT"=>0x15,"ROT*"=>0x35,"ROT:"=>0x55,"ROT*:"=>0x75,"ROTr"=>0x95,"ROTr*"=>0xB5,"ROTr:"=>0xD5,"ROTr*:"=>0xF5, - "DUP"=>0x16,"DUP*"=>0x36,"DUP:"=>0x56,"DUP*:"=>0x76,"DUPr"=>0x96,"DUPr*"=>0xB6,"DUPr:"=>0xD6,"DUPr*:"=>0xF6, - "OVR"=>0x17,"OVR*"=>0x37,"OVR:"=>0x57,"OVR*:"=>0x77,"OVRr"=>0x97,"OVRr*"=>0xB7,"OVRr:"=>0xD7,"OVRr*:"=>0xF7, - // Arithmetic operators - "ADD"=>0x18,"ADD*"=>0x38,"ADD:"=>0x58,"ADD*:"=>0x78,"ADDr"=>0x98,"ADDr*"=>0xB8,"ADDr:"=>0xD8,"ADDr*:"=>0xF8, - "SUB"=>0x19,"SUB*"=>0x39,"SUB:"=>0x59,"SUB*:"=>0x79,"SUBr"=>0x99,"SUBr*"=>0xB9,"SUBr:"=>0xD9,"SUBr*:"=>0xF9, - "INC"=>0x1A,"INC*"=>0x3A,"INC:"=>0x5A,"INC*:"=>0x7A,"INCr"=>0x9A,"INCr*"=>0xBA,"INCr:"=>0xDA,"INCr*:"=>0xFA, - "DEC"=>0x1B,"DEC*"=>0x3B,"DEC:"=>0x5B,"DEC*:"=>0x7B,"DECr"=>0x9B,"DECr*"=>0xBB,"DECr:"=>0xDB,"DECr*:"=>0xFB, - // Logical operators - "NOT"=>0x1C,"NOT*"=>0x3C,"NOT:"=>0x5C,"NOT*:"=>0x7C,"NOTr"=>0x9C,"NOTr*"=>0xBC,"NOTr:"=>0xDC,"NOTr*:"=>0xFC, - "AND"=>0x1D,"AND*"=>0x3D,"AND:"=>0x5D,"AND*:"=>0x7D,"ANDr"=>0x9D,"ANDr*"=>0xBD,"ANDr:"=>0xDD,"ANDr*:"=>0xFD, - "IOR"=>0x1E,"IOR*"=>0x3E,"IOR:"=>0x5E,"IOR*:"=>0x7E,"IORr"=>0x9E,"IORr*"=>0xBE,"IORr:"=>0xDE,"IORr*:"=>0xFE, - "XOR"=>0x1F,"XOR*"=>0x3F,"XOR:"=>0x5F,"XOR*:"=>0x7F,"XORr"=>0x9F,"XORr*"=>0xBF,"XORr:"=>0xDF,"XORr*:"=>0xFF, - _ => return None, - }) -} diff --git a/src/types/instruction.rs b/src/types/instruction.rs new file mode 100644 index 0000000..252fc68 --- /dev/null +++ b/src/types/instruction.rs @@ -0,0 +1,168 @@ +use crate::*; + +use Operation as Op; + + +pub struct Instruction { + pub value: u8, +} + +impl Instruction { + pub fn operation(&self) -> Operation { + match self.value & 0x1f { + 0x00=>Op::HLT, 0x01=>Op::PSH, 0x02=>Op::POP, 0x03=>Op::CPY, + 0x04=>Op::DUP, 0x05=>Op::OVR, 0x06=>Op::SWP, 0x07=>Op::ROT, + 0x08=>Op::JMP, 0x09=>Op::JMS, 0x0a=>Op::JCN, 0x0b=>Op::JCS, + 0x0c=>Op::LDA, 0x0d=>Op::STA, 0x0e=>Op::LDD, 0x0f=>Op::STD, + 0x10=>Op::ADD, 0x11=>Op::SUB, 0x12=>Op::INC, 0x13=>Op::DEC, + 0x14=>Op::LTH, 0x15=>Op::GTH, 0x16=>Op::EQU, 0x17=>Op::NQK, + 0x18=>Op::SHL, 0x19=>Op::SHR, 0x1a=>Op::ROL, 0x1b=>Op::ROR, + 0x1c=>Op::IOR, 0x1d=>Op::XOR, 0x1e=>Op::AND, 0x1f=>Op::NOT, + _ => unreachable!(), + } + } + + pub fn return_mode(&self) -> bool { + self.value & RETURN_MODE != 0 + } + + pub fn wide_mode(&self) -> bool { + self.value & WIDE_MODE != 0 + } + + pub fn immediate_mode(&self) -> bool { + self.value & IMMEDIATE_MODE != 0 + } +} + +impl std::fmt::Display for Instruction { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}", match self.value { + // Stack operators + 0x00=>"HLT",0x20=>"NOP" ,0x40=>"DB1" ,0x60=>"DB2" ,0x80=>"DB3" ,0xA0=>"DB4" ,0xC0=>"DB5" ,0xE0=>"DB6" , + 0x01=>"PSH",0x21=>"PSH:",0x41=>"PSH*",0x61=>"PSH*:",0x81=>"PSHr",0xA1=>"PSHr:",0xC1=>"PSHr*",0xE1=>"PSHr*:", + 0x02=>"POP",0x22=>"POP:",0x42=>"POP*",0x62=>"POP*:",0x82=>"POPr",0xA2=>"POPr:",0xC2=>"POPr*",0xE2=>"POPr*:", + 0x03=>"CPY",0x23=>"CPY:",0x43=>"CPY*",0x63=>"CPY*:",0x83=>"CPYr",0xA3=>"CPYr:",0xC3=>"CPYr*",0xE3=>"CPYr*:", + 0x04=>"DUP",0x24=>"DUP:",0x44=>"DUP*",0x64=>"DUP*:",0x84=>"DUPr",0xA4=>"DUPr:",0xC4=>"DUPr*",0xE4=>"DUPr*:", + 0x05=>"OVR",0x25=>"OVR:",0x45=>"OVR*",0x65=>"OVR*:",0x85=>"OVRr",0xA5=>"OVRr:",0xC5=>"OVRr*",0xE5=>"OVRr*:", + 0x06=>"SWP",0x26=>"SWP:",0x46=>"SWP*",0x66=>"SWP*:",0x86=>"SWPr",0xA6=>"SWPr:",0xC6=>"SWPr*",0xE6=>"SWPr*:", + 0x07=>"ROT",0x27=>"ROT:",0x47=>"ROT*",0x67=>"ROT*:",0x87=>"ROTr",0xA7=>"ROTr:",0xC7=>"ROTr*",0xE7=>"ROTr*:", + // Control operators + 0x08=>"JMP",0x28=>"JMP:",0x48=>"JMP*",0x68=>"JMP*:",0x88=>"JMPr",0xA8=>"JMPr:",0xC8=>"JMPr*",0xE8=>"JMPr*:", + 0x09=>"JMS",0x29=>"JMS:",0x49=>"JMS*",0x69=>"JMS*:",0x89=>"JMSr",0xA9=>"JMSr:",0xC9=>"JMSr*",0xE9=>"JMSr*:", + 0x0A=>"JCN",0x2A=>"JCN:",0x4A=>"JCN*",0x6A=>"JCN*:",0x8A=>"JCNr",0xAA=>"JCNr:",0xCA=>"JCNr*",0xEA=>"JCNr*:", + 0x0B=>"JCS",0x2B=>"JCS:",0x4B=>"JCS*",0x6B=>"JCS*:",0x8B=>"JCSr",0xAB=>"JCSr:",0xCB=>"JCSr*",0xEB=>"JCSr*:", + 0x0C=>"LDA",0x2C=>"LDA:",0x4C=>"LDA*",0x6C=>"LDA*:",0x8C=>"LDAr",0xAC=>"LDAr:",0xCC=>"LDAr*",0xEC=>"LDAr*:", + 0x0D=>"STA",0x2D=>"STA:",0x4D=>"STA*",0x6D=>"STA*:",0x8D=>"STAr",0xAD=>"STAr:",0xCD=>"STAr*",0xED=>"STAr*:", + 0x0E=>"LDD",0x2E=>"LDD:",0x4E=>"LDD*",0x6E=>"LDD*:",0x8E=>"LDDr",0xAE=>"LDDr:",0xCE=>"LDDr*",0xEE=>"LDDr*:", + 0x0F=>"STD",0x2F=>"STD:",0x4F=>"STD*",0x6F=>"STD*:",0x8F=>"STDr",0xAF=>"STDr:",0xCF=>"STDr*",0xEF=>"STDr*:", + // Numeric operators + 0x10=>"ADD",0x30=>"ADD:",0x50=>"ADD*",0x70=>"ADD*:",0x90=>"ADDr",0xB0=>"ADDr:",0xD0=>"ADDr*",0xF0=>"ADDr*:", + 0x11=>"SUB",0x31=>"SUB:",0x51=>"SUB*",0x71=>"SUB*:",0x91=>"SUBr",0xB1=>"SUBr:",0xD1=>"SUBr*",0xF1=>"SUBr*:", + 0x12=>"INC",0x32=>"INC:",0x52=>"INC*",0x72=>"INC*:",0x92=>"INCr",0xB2=>"INCr:",0xD2=>"INCr*",0xF2=>"INCr*:", + 0x13=>"DEC",0x33=>"DEC:",0x53=>"DEC*",0x73=>"DEC*:",0x93=>"DECr",0xB3=>"DECr:",0xD3=>"DECr*",0xF3=>"DECr*:", + 0x14=>"LTH",0x34=>"LTH:",0x54=>"LTH*",0x74=>"LTH*:",0x94=>"LTHr",0xB4=>"LTHr:",0xD4=>"LTHr*",0xF4=>"LTHr*:", + 0x15=>"GTH",0x35=>"GTH:",0x55=>"GTH*",0x75=>"GTH*:",0x95=>"GTHr",0xB5=>"GTHr:",0xD5=>"GTHr*",0xF5=>"GTHr*:", + 0x16=>"EQU",0x36=>"EQU:",0x56=>"EQU*",0x76=>"EQU*:",0x96=>"EQUr",0xB6=>"EQUr:",0xD6=>"EQUr*",0xF6=>"EQUr*:", + 0x17=>"NQK",0x37=>"NQK:",0x57=>"NQK*",0x77=>"NQK*:",0x97=>"NQKr",0xB7=>"NQKr:",0xD7=>"NQKr*",0xF7=>"NQKr*:", + // Bitwise operators + 0x18=>"SHL",0x38=>"SHL:",0x58=>"SHL*",0x78=>"SHL*:",0x98=>"SHLr",0xB8=>"SHLr:",0xD8=>"SHLr*",0xF8=>"SHLr*:", + 0x19=>"SHR",0x39=>"SHR:",0x59=>"SHR*",0x79=>"SHR*:",0x99=>"SHRr",0xB9=>"SHRr:",0xD9=>"SHRr*",0xF9=>"SHRr*:", + 0x1A=>"ROL",0x3A=>"ROL:",0x5A=>"ROL*",0x7A=>"ROL*:",0x9A=>"ROLr",0xBA=>"ROLr:",0xDA=>"ROLr*",0xFA=>"ROLr*:", + 0x1B=>"ROR",0x3B=>"ROR:",0x5B=>"ROR*",0x7B=>"ROR*:",0x9B=>"RORr",0xBB=>"RORr:",0xDB=>"RORr*",0xFB=>"RORr*:", + 0x1C=>"IOR",0x3C=>"IOR:",0x5C=>"IOR*",0x7C=>"IOR*:",0x9C=>"IORr",0xBC=>"IORr:",0xDC=>"IORr*",0xFC=>"IORr*:", + 0x1D=>"XOR",0x3D=>"XOR:",0x5D=>"XOR*",0x7D=>"XOR*:",0x9D=>"XORr",0xBD=>"XORr:",0xDD=>"XORr*",0xFD=>"XORr*:", + 0x1E=>"AND",0x3E=>"AND:",0x5E=>"AND*",0x7E=>"AND*:",0x9E=>"ANDr",0xBE=>"ANDr:",0xDE=>"ANDr*",0xFE=>"ANDr*:", + 0x1F=>"NOT",0x3F=>"NOT:",0x5F=>"NOT*",0x7F=>"NOT*:",0x9F=>"NOTr",0xBF=>"NOTr:",0xDF=>"NOTr*",0xFF=>"NOTr*:", + }) + } +} + +impl std::str::FromStr for Instruction { + type Err = (); + + fn from_str(token: &str) -> Result<Self, Self::Err> { + Ok( Instruction { value: match token { + // Stack operators + "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2" =>0x60,"DB3" =>0x80,"DB4" =>0xA0,"DB5" =>0xC0,"DB6" =>0xE0, + "PSH"=>0x01,"PSH:"=>0x21,"PSH*"=>0x41,"PSH*:"=>0x61,"PSHr"=>0x81,"PSHr:"=>0xA1,"PSHr*"=>0xC1,"PSHr*:"=>0xE1, + ":"=>0x21, "*:"=>0x61, "r:"=>0xA1, "r*:"=>0xE1, + "POP"=>0x02,"POP:"=>0x22,"POP*"=>0x42,"POP*:"=>0x62,"POPr"=>0x82,"POPr:"=>0xA2,"POPr*"=>0xC2,"POPr*:"=>0xE2, + "CPY"=>0x03,"CPY:"=>0x23,"CPY*"=>0x43,"CPY*:"=>0x63,"CPYr"=>0x83,"CPYr:"=>0xA3,"CPYr*"=>0xC3,"CPYr*:"=>0xE3, + "DUP"=>0x04,"DUP:"=>0x24,"DUP*"=>0x44,"DUP*:"=>0x64,"DUPr"=>0x84,"DUPr:"=>0xA4,"DUPr*"=>0xC4,"DUPr*:"=>0xE4, + "OVR"=>0x05,"OVR:"=>0x25,"OVR*"=>0x45,"OVR*:"=>0x65,"OVRr"=>0x85,"OVRr:"=>0xA5,"OVRr*"=>0xC5,"OVRr*:"=>0xE5, + "SWP"=>0x06,"SWP:"=>0x26,"SWP*"=>0x46,"SWP*:"=>0x66,"SWPr"=>0x86,"SWPr:"=>0xA6,"SWPr*"=>0xC6,"SWPr*:"=>0xE6, + "ROT"=>0x07,"ROT:"=>0x27,"ROT*"=>0x47,"ROT*:"=>0x67,"ROTr"=>0x87,"ROTr:"=>0xA7,"ROTr*"=>0xC7,"ROTr*:"=>0xE7, + // Control operators + "JMP"=>0x08,"JMP:"=>0x28,"JMP*"=>0x48,"JMP*:"=>0x68,"JMPr"=>0x88,"JMPr:"=>0xA8,"JMPr*"=>0xC8,"JMPr*:"=>0xE8, + "JMS"=>0x09,"JMS:"=>0x29,"JMS*"=>0x49,"JMS*:"=>0x69,"JMSr"=>0x89,"JMSr:"=>0xA9,"JMSr*"=>0xC9,"JMSr*:"=>0xE9, + "JCN"=>0x0A,"JCN:"=>0x2A,"JCN*"=>0x4A,"JCN*:"=>0x6A,"JCNr"=>0x8A,"JCNr:"=>0xAA,"JCNr*"=>0xCA,"JCNr*:"=>0xEA, + "JCS"=>0x0B,"JCS:"=>0x2B,"JCS*"=>0x4B,"JCS*:"=>0x6B,"JCSr"=>0x8B,"JCSr:"=>0xAB,"JCSr*"=>0xCB,"JCSr*:"=>0xEB, + "LDA"=>0x0C,"LDA:"=>0x2C,"LDA*"=>0x4C,"LDA*:"=>0x6C,"LDAr"=>0x8C,"LDAr:"=>0xAC,"LDAr*"=>0xCC,"LDAr*:"=>0xEC, + "STA"=>0x0D,"STA:"=>0x2D,"STA*"=>0x4D,"STA*:"=>0x6D,"STAr"=>0x8D,"STAr:"=>0xAD,"STAr*"=>0xCD,"STAr*:"=>0xED, + "LDD"=>0x0E,"LDD:"=>0x2E,"LDD*"=>0x4E,"LDD*:"=>0x6E,"LDDr"=>0x8E,"LDDr:"=>0xAE,"LDDr*"=>0xCE,"LDDr*:"=>0xEE, + "STD"=>0x0F,"STD:"=>0x2F,"STD*"=>0x4F,"STD*:"=>0x6F,"STDr"=>0x8F,"STDr:"=>0xAF,"STDr*"=>0xCF,"STDr*:"=>0xEF, + // Numeric operators + "ADD"=>0x10,"ADD:"=>0x30,"ADD*"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr:"=>0xB0,"ADDr*"=>0xD0,"ADDr*:"=>0xF0, + "SUB"=>0x11,"SUB:"=>0x31,"SUB*"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr:"=>0xB1,"SUBr*"=>0xD1,"SUBr*:"=>0xF1, + "INC"=>0x12,"INC:"=>0x32,"INC*"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr:"=>0xB2,"INCr*"=>0xD2,"INCr*:"=>0xF2, + "DEC"=>0x13,"DEC:"=>0x33,"DEC*"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr:"=>0xB3,"DECr*"=>0xD3,"DECr*:"=>0xF3, + "LTH"=>0x14,"LTH:"=>0x34,"LTH*"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr:"=>0xB4,"LTHr*"=>0xD4,"LTHr*:"=>0xF4, + "GTH"=>0x15,"GTH:"=>0x35,"GTH*"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr:"=>0xB5,"GTHr*"=>0xD5,"GTHr*:"=>0xF5, + "EQU"=>0x16,"EQU:"=>0x36,"EQU*"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr:"=>0xB6,"EQUr*"=>0xD6,"EQUr*:"=>0xF6, + "NQK"=>0x17,"NQK:"=>0x37,"NQK*"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr:"=>0xB7,"NQKr*"=>0xD7,"NQKr*:"=>0xF7, + // Bitwise operators + "SHL"=>0x18,"SHL:"=>0x38,"SHL*"=>0x58,"SHL*:"=>0x78,"SHLr"=>0x98,"SHLr:"=>0xB8,"SHLr*"=>0xD8,"SHLr*:"=>0xF8, + "SHR"=>0x19,"SHR:"=>0x39,"SHR*"=>0x59,"SHR*:"=>0x79,"SHRr"=>0x99,"SHRr:"=>0xB9,"SHRr*"=>0xD9,"SHRr*:"=>0xF9, + "ROL"=>0x1A,"ROL:"=>0x3A,"ROL*"=>0x5A,"ROL*:"=>0x7A,"ROLr"=>0x9A,"ROLr:"=>0xBA,"ROLr*"=>0xDA,"ROLr*:"=>0xFA, + "ROR"=>0x1B,"ROR:"=>0x3B,"ROR*"=>0x5B,"ROR*:"=>0x7B,"RORr"=>0x9B,"RORr:"=>0xBB,"RORr*"=>0xDB,"RORr*:"=>0xFB, + "IOR"=>0x1C,"IOR:"=>0x3C,"IOR*"=>0x5C,"IOR*:"=>0x7C,"IORr"=>0x9C,"IORr:"=>0xBC,"IORr*"=>0xDC,"IORr*:"=>0xFC, + "XOR"=>0x1D,"XOR:"=>0x3D,"XOR*"=>0x5D,"XOR*:"=>0x7D,"XORr"=>0x9D,"XORr:"=>0xBD,"XORr*"=>0xDD,"XORr*:"=>0xFD, + "AND"=>0x1E,"AND:"=>0x3E,"AND*"=>0x5E,"AND*:"=>0x7E,"ANDr"=>0x9E,"ANDr:"=>0xBE,"ANDr*"=>0xDE,"ANDr*:"=>0xFE, + "NOT"=>0x1F,"NOT:"=>0x3F,"NOT*"=>0x5F,"NOT*:"=>0x7F,"NOTr"=>0x9F,"NOTr:"=>0xBF,"NOTr*"=>0xDF,"NOTr*:"=>0xFF, + _ => return Err(()), + }}) + } +} + + +pub enum Operation { + HLT, PSH, POP, CPY, + DUP, OVR, SWP, ROT, + JMP, JMS, JCN, JCS, + LDA, STA, LDD, STD, + ADD, SUB, INC, DEC, + LTH, GTH, EQU, NQK, + SHL, SHR, ROL, ROR, + IOR, XOR, AND, NOT, +} + +impl From<Operation> for u8 { + fn from(operation: Operation) -> Self { + match operation { + Op::HLT=>0x00, Op::PSH=>0x01, Op::POP=>0x02, Op::CPY=>0x03, + Op::DUP=>0x04, Op::OVR=>0x05, Op::SWP=>0x06, Op::ROT=>0x07, + Op::JMP=>0x08, Op::JMS=>0x09, Op::JCN=>0x0A, Op::JCS=>0x0B, + Op::LDA=>0x0C, Op::STA=>0x0D, Op::LDD=>0x0E, Op::STD=>0x0F, + Op::ADD=>0x10, Op::SUB=>0x11, Op::INC=>0x12, Op::DEC=>0x13, + Op::LTH=>0x14, Op::GTH=>0x15, Op::EQU=>0x16, Op::NQK=>0x17, + Op::SHL=>0x1C, Op::SHR=>0x1D, Op::ROL=>0x1E, Op::ROR=>0x1F, + Op::IOR=>0x18, Op::XOR=>0x19, Op::AND=>0x1A, Op::NOT=>0x1B, + } + } +} + +impl std::fmt::Display for Operation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}", match self { + Op::HLT=>"HLT", Op::PSH=>"PSH", Op::POP=>"POP", Op::CPY=>"CPY", + Op::DUP=>"DUP", Op::OVR=>"OVR", Op::SWP=>"SWP", Op::ROT=>"ROT", + Op::JMP=>"JMP", Op::JMS=>"JMS", Op::JCN=>"JCN", Op::JCS=>"JCS", + Op::LDA=>"LDA", Op::STA=>"STA", Op::LDD=>"LDD", Op::STD=>"STD", + Op::ADD=>"ADD", Op::SUB=>"SUB", Op::INC=>"INC", Op::DEC=>"DEC", + Op::LTH=>"LTH", Op::GTH=>"GTH", Op::EQU=>"EQU", Op::NQK=>"NQK", + Op::SHL=>"SHL", Op::SHR=>"SHR", Op::ROL=>"ROL", Op::ROR=>"ROR", + Op::IOR=>"IOR", Op::XOR=>"XOR", Op::AND=>"AND", Op::NOT=>"NOT", + }) + } +} diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 0000000..8094cb1 --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,4 @@ +mod instruction; +mod value; +pub use instruction::*; +pub use value::*; diff --git a/src/types/value.rs b/src/types/value.rs new file mode 100644 index 0000000..fe82710 --- /dev/null +++ b/src/types/value.rs @@ -0,0 +1,48 @@ +#[derive(Clone, Copy)] +pub enum Value { + Byte(u8), + Double(u16), +} + +impl From<Value> for usize { + fn from(value: Value) -> Self { + match value { + Value::Byte(byte) => byte.into(), + Value::Double(double) => double.into(), + } + } +} + +impl From<&Value> for usize { + fn from(value: &Value) -> Self { + (*value).into() + } +} + +impl std::fmt::Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + match self { + Self::Byte(value) => write!(f, "0x{value:02x}"), + Self::Double(value) => write!(f, "0x{value:04x}"), + } + } +} + + +impl std::str::FromStr for Value { + type Err = (); + + fn from_str(token: &str) -> Result<Self, Self::Err> { + match token.len() { + 2 => match u8::from_str_radix(&token, 16) { + Ok(value) => Ok(Value::Byte(value)), + Err(_) => Err(()), + } + 4 => match u16::from_str_radix(&token, 16) { + Ok(value) => Ok(Value::Double(value)), + Err(_) => Err(()), + } + _ => Err(()), + } + } +} |
