diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/addressing.rs | 44 | ||||
-rw-r--r-- | src/assembler.rs | 278 | ||||
-rw-r--r-- | src/bin/bedrock-asm.rs | 148 | ||||
-rw-r--r-- | src/error.rs | 10 | ||||
-rw-r--r-- | src/gather_libraries.rs | 185 | ||||
-rw-r--r-- | src/lib.rs | 35 | ||||
-rw-r--r-- | src/locators.rs | 5 | ||||
-rw-r--r-- | src/locators/bytecode.rs | 39 | ||||
-rw-r--r-- | src/locators/source.rs | 69 | ||||
-rw-r--r-- | src/main.rs | 43 | ||||
-rw-r--r-- | src/print.rs | 237 | ||||
-rw-r--r-- | src/semantic_token.rs | 116 | ||||
-rw-r--r-- | src/symbol_resolver.rs | 230 | ||||
-rw-r--r-- | src/syntactic_token.rs | 43 | ||||
-rw-r--r-- | src/tokenizer.rs | 235 | ||||
-rw-r--r-- | src/tokens.rs | 9 | ||||
-rw-r--r-- | src/tokens/instruction.rs | 170 | ||||
-rw-r--r-- | src/tokens/semantic.rs | 90 | ||||
-rw-r--r-- | src/tokens/syntactic.rs | 39 | ||||
-rw-r--r-- | src/tokens/value.rs | 32 | ||||
-rw-r--r-- | src/translators.rs | 9 | ||||
-rw-r--r-- | src/translators/bytecode_generator.rs | 131 | ||||
-rw-r--r-- | src/translators/semantic_parser.rs | 245 | ||||
-rw-r--r-- | src/translators/symbols_generator.rs | 28 | ||||
-rw-r--r-- | src/translators/syntactic_parser.rs | 247 |
25 files changed, 1930 insertions, 787 deletions
diff --git a/src/addressing.rs b/src/addressing.rs deleted file mode 100644 index dd7638e..0000000 --- a/src/addressing.rs +++ /dev/null @@ -1,44 +0,0 @@ -#[derive(Clone,Copy)] -pub struct CharAddress { - /// The number of lines that precede this line in the file. - pub line:usize, - /// The number of characters that precede this character in the line. - pub column:usize, -} -impl CharAddress { - pub fn new(line:usize, column:usize) -> Self { - Self { line, column } - } - pub fn zero() -> Self { - Self::new(0,0) - } -} - -pub struct SourceLocation { - /// The slice of the source file from which this token was parsed. - pub source: String, - /// The address of the first character of this token. - pub start: CharAddress, - /// The address of the final character of this token. - pub end: CharAddress -} -impl SourceLocation { - pub fn new(source:String, start:CharAddress, end:CharAddress) -> Self { - Self { source, start, end } - } - pub fn zero() -> Self { - Self { source:String::new(), start:CharAddress::zero(), end:CharAddress::zero() } - } -} - -pub struct BytecodeLocation { - /// The number of bytes that precede this byte sequence in the bytecode. - pub start: u16, - /// The length of this byte sequence, in bytes. - pub length: u16, -} -impl BytecodeLocation { - pub fn zero() -> Self { - Self { start:0, length:0 } - } -} diff --git a/src/assembler.rs b/src/assembler.rs deleted file mode 100644 index 692eb14..0000000 --- a/src/assembler.rs +++ /dev/null @@ -1,278 +0,0 @@ -use std::mem::take; -use std::collections::hash_map::Entry; - -use SyntacticTokenType as Syn; -use SemanticTokenType as Sem; -use crate::*; - -use std::collections::HashMap; - -/// The inner value is the index of the token that defines this symbol. -pub enum SymbolDefinition { - Macro(usize), - Label(usize), -} - -pub struct Assembler { - /// The contents of the program as a list of syntactic tokens. - syntactic_tokens: Vec<SyntacticToken>, - /// The contents of the program as a list of semantic tokens. - semantic_tokens: Vec<SemanticToken>, - /// Map the name of each defined symbol to the index of the defining token. - symbol_definitions: HashMap<String, SymbolDefinition>, - /// Map each macro definition token index to a list of syntactic body tokens. - syntactic_macro_bodies: HashMap<usize, Vec<SyntacticToken>>, - /// Map each macro definition token index to a list of semantic body tokens. - semantic_macro_bodies: HashMap<usize, Vec<SemanticToken>>, -} - -impl Assembler { - pub fn new() -> Self { - Self { - syntactic_tokens: Vec::new(), - semantic_tokens: Vec::new(), - symbol_definitions: HashMap::new(), - syntactic_macro_bodies: HashMap::new(), - semantic_macro_bodies: HashMap::new(), - } - } - - pub fn tokenise_source(&mut self, source_code: &str) { - // The index of the current macro definition token - let mut macro_definition: Option<usize> = None; - let mut macro_definition_body_tokens: Vec<SyntacticToken> = Vec::new(); - - for mut token in TokenIterator::from_str(source_code) { - let next_index = self.syntactic_tokens.len(); - if let Some(index) = macro_definition { - token.use_in_macro_body(); - if token.is_macro_terminator() { - // Commit the current macro definition - macro_definition_body_tokens.push(token); - self.syntactic_macro_bodies.insert( - index, take(&mut macro_definition_body_tokens)); - macro_definition = None; - } else { - macro_definition_body_tokens.push(token); - } - } else { - if let Syn::MacroDefinition(ref name) = token.r#type { - macro_definition = Some(next_index); - match self.symbol_definitions.entry(name.to_string()) { - Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);} - Entry::Vacant(v) => {v.insert(SymbolDefinition::Macro(next_index));} - } - } else if let Syn::LabelDefinition(ref name) = token.r#type { - match self.symbol_definitions.entry(name.to_string()) { - Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);} - Entry::Vacant(v) => {v.insert(SymbolDefinition::Label(next_index));} - } - } else if token.is_macro_terminator() { - token.set_error(Error::OrphanedMacroDefinitionTerminator); - } - self.syntactic_tokens.push(token); - } - } - } - - pub fn resolve_references(&mut self) { - let syntactic_tokens = take(&mut self.syntactic_tokens); - let syntactic_token_count = syntactic_tokens.len(); - let mut parent_label = None; - - for (index, syntactic_token) in syntactic_tokens.into_iter().enumerate() { - if let SyntacticTokenType::LabelDefinition(name) = &syntactic_token.r#type { - parent_label = Some(name.to_owned()); - } - let semantic_token = self.convert_syn_token_to_sem_token(syntactic_token, index, parent_label.clone()); - self.semantic_tokens.push(semantic_token); - } - assert_eq!(syntactic_token_count, self.semantic_tokens.len()); - - // Find all cyclic macros - let cyclic_macros: Vec<usize> = self.semantic_macro_bodies.keys().map(|i|*i).filter( - |i| !self.traverse_macro_definition(*i, 0)).collect(); - // Replace each cyclic macro reference in a macro definition with an error - for body_tokens in &mut self.semantic_macro_bodies.values_mut() { - for body_token in body_tokens { - if let Sem::MacroReference(i) = body_token.r#type { - if cyclic_macros.contains(&i) { - let name = body_token.source_location.source.clone(); - body_token.r#type = Sem::Error(Syn::Reference(name), Error::CyclicMacroReference); - } - } - } - } - - } - - /// Attempt to recursively traverse the body tokens of a macro definition, returning - /// false if the depth exceeds a preset maximum, and returning true otherwise. - fn traverse_macro_definition(&self, index: usize, level: usize) -> bool { - if level == 16 { - false - } else { - self.semantic_macro_bodies[&index].iter().all( - |token| if let Sem::MacroReference(i) = token.r#type { - self.traverse_macro_definition(i, level+1) - } else { - true - } - ) - } - } - - pub fn generate_bytecode(&mut self) -> (Vec<u8>, Vec<SemanticToken>) { - let mut bytecode: Vec<u8> = Vec::new(); - // Map each label definition token index to the bytecode addresses of the references - let mut reference_addresses: HashMap<usize, Vec<u16>> = HashMap::new(); - // Map each label and macro definition token to a list of reference token indices - let mut reference_tokens: HashMap<usize, Vec<usize>> = HashMap::new(); - - macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};} - macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};} - macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};} - - let mut semantic_tokens = take(&mut self.semantic_tokens); - - // Translate semantic tokens into bytecode - for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() { - let start_addr = bytecode.len() as u16; - match &mut semantic_token.r#type { - Sem::LabelReference(i) => { - reference_tokens.entry(*i).or_default().push(index); - reference_addresses.entry(*i).or_default().push(start_addr); - push_u16!(0); - } - Sem::MacroReference(i) => { - reference_tokens.entry(*i).or_default().push(index); - self.expand_macro_reference(*i, &mut bytecode, &mut reference_addresses); - } - Sem::LabelDefinition(def) => def.address=start_addr, - Sem::MacroDefinition(_) => (), - - Sem::Padding(p) => pad!(*p), - Sem::ByteLiteral(b) => push_u8!(*b), - Sem::ShortLiteral(s) => push_u16!(*s), - Sem::Instruction(b) => push_u8!(*b), - - Sem::MacroDefinitionTerminator => unreachable!(), - Sem::Comment => (), - Sem::Error(..) => (), - }; - let end_addr = bytecode.len() as u16; - semantic_token.bytecode_location.start = start_addr; - semantic_token.bytecode_location.length = end_addr - start_addr; - } - - // Fill each label reference with the address of the matching label definition - for (index, slots) in reference_addresses { - if let Sem::LabelDefinition(definition) = &semantic_tokens[index].r#type { - let [h,l] = definition.address.to_be_bytes(); - for slot in slots { - bytecode[slot as usize] = h; - bytecode[slot.wrapping_add(1) as usize] = l; - } - } else { unreachable!() } - } - - // Move references and macro body tokens into label and macro definition tokens - for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() { - if let Sem::MacroDefinition(definition) = &mut semantic_token.r#type { - definition.body_tokens = self.semantic_macro_bodies.remove(&index).unwrap(); - if let Some(references) = reference_tokens.remove(&index) { - definition.references = references; - } - } else if let Sem::LabelDefinition(definition) = &mut semantic_token.r#type { - if let Some(references) = reference_tokens.remove(&index) { - definition.references = references; - } - } - } - assert_eq!(reference_tokens.len(), 0); - - // Remove trailing null bytes from the bytecode - if let Some(final_nonnull_byte) = bytecode.iter().rposition(|b| *b != 0) { - let truncated_length = final_nonnull_byte + 1; - let removed_byte_count = bytecode.len() - truncated_length; - if removed_byte_count > 0 { - bytecode.truncate(truncated_length); - } - } - - (bytecode, semantic_tokens) - } - - fn convert_syn_token_to_sem_token(&mut self, mut syn_token: SyntacticToken, index: usize, parent_label: Option<String>) -> SemanticToken { - SemanticToken { - r#type: { - if let Some(err) = syn_token.error { - Sem::Error(syn_token.r#type, err) - } else { - match syn_token.r#type { - Syn::Reference(ref name) => { - match self.symbol_definitions.get(name) { - Some(SymbolDefinition::Macro(i)) => Sem::MacroReference(*i), - Some(SymbolDefinition::Label(i)) => Sem::LabelReference(*i), - None => Sem::Error(syn_token.r#type, Error::UnresolvedReference), - } - } - Syn::LabelDefinition(name) => {Sem::LabelDefinition(LabelDefinition::new(name))}, - Syn::MacroDefinition(name) => { - let mut sem_body_tokens = Vec::new(); - for syn_body_token in self.syntactic_macro_bodies.remove(&index).unwrap() { - // Make the source location of the macro definition token span the entire definition - if syn_body_token.is_macro_terminator() { - syn_token.source_location.end = syn_body_token.source_location.start; - } - let sem_body_token = self.convert_syn_token_to_sem_token(syn_body_token, 0, parent_label.clone()); - sem_body_tokens.push(sem_body_token); - } - self.semantic_macro_bodies.insert(index, sem_body_tokens); - Sem::MacroDefinition(MacroDefinition::new(name)) - }, - Syn::MacroDefinitionTerminator => Sem::MacroDefinitionTerminator, - Syn::Padding(v) => Sem::Padding(v), - Syn::ByteLiteral(v) => Sem::ByteLiteral(v), - Syn::ShortLiteral(v) => Sem::ShortLiteral(v), - Syn::Instruction(v) => Sem::Instruction(v), - Syn::Comment => Sem::Comment, - } - } - }, - source_location: syn_token.source_location, - bytecode_location: BytecodeLocation::zero(), - parent_label, - } - } - - fn expand_macro_reference(&self, index: usize, bytecode: &mut Vec<u8>, reference_addresses: &mut HashMap<usize, Vec<u16>>) { - macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};} - macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};} - macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};} - - for body_token in self.semantic_macro_bodies.get(&index).unwrap() { - let start_addr = bytecode.len() as u16; - match &body_token.r#type { - Sem::LabelReference(i) => { - reference_addresses.entry(*i).or_default().push(start_addr); - push_u16!(0u16); - }, - Sem::MacroReference(i) => { - self.expand_macro_reference(*i, bytecode, reference_addresses); - }, - Sem::LabelDefinition(_) => unreachable!(), - Sem::MacroDefinition(_) => unreachable!(), - - Sem::Padding(p) => pad!(*p), - Sem::ByteLiteral(b) => push_u8!(*b), - Sem::ShortLiteral(s) => push_u16!(*s), - Sem::Instruction(b) => push_u8!(*b), - - Sem::MacroDefinitionTerminator => (), - Sem::Comment => (), - Sem::Error(..) => (), - }; - } - } -} diff --git a/src/bin/bedrock-asm.rs b/src/bin/bedrock-asm.rs new file mode 100644 index 0000000..2a29ee3 --- /dev/null +++ b/src/bin/bedrock-asm.rs @@ -0,0 +1,148 @@ +use bedrock_asm::*; + +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; + + +static mut VERBOSE: bool = false; + +macro_rules! verbose { + ($($tokens:tt)*) => { if unsafe { VERBOSE } { + eprint!("[INFO] "); eprintln!($($tokens)*); + } }; +} +macro_rules! error { + ($($tokens:tt)*) => {{ + eprint!("[ERROR] "); eprintln!($($tokens)*); std::process::exit(1); + }}; +} + + +fn main() { + let args = Arguments::from_env_or_exit(); + + // ----------------------------------------------------------------------- + // RESOLVE syntactic symbols + let ext = args.ext.unwrap_or(String::from("brc")); + let mut resolver = if let Some(path) = &args.source { + match SourceUnit::from_path(&path, &ext) { + Ok(source_unit) => SymbolResolver::from_source_unit(source_unit), + Err(err) => match err { + ParseError::InvalidExtension => error!( + "File {path:?} has invalid extension, must be '.{ext}'"), + ParseError::NotFound => error!( + "File {path:?} was not found"), + ParseError::InvalidUtf8 => error!( + "File {path:?} does not contain valid UTF-8 text"), + ParseError::NotReadable => error!( + "File {path:?} is not readable"), + ParseError::IsADirectory => error!( + "File {path:?} is a directory"), + ParseError::Unknown => error!( + "Unknown error while attempting to read from {path:?}") + } + } + } else { + let mut source_code = String::new(); + verbose!("Reading program source from standard input"); + if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { + eprintln!("Could not read from standard input, exiting."); + eprintln!("({err:?})"); + std::process::exit(1); + } + let path = "<standard input>"; + let source_unit = SourceUnit::from_source_code(source_code, path); + SymbolResolver::from_source_unit(source_unit) + }; + // Load project libraries. + if let Some(path) = &args.source { + if !args.no_libs && !args.no_project_libs { + let project_library = gather_project_libraries(path, &ext); + resolver.add_library_units(project_library); + } + } + // Load environment libraries. + if !args.no_libs && !args.no_env_libs { + for env_library in gather_environment_libraries(&ext) { + resolver.add_library_units(env_library); + } + } + resolver.resolve(); + + // ----------------------------------------------------------------------- + // PRINT information, generate merged source code + if args.tree { + print_source_tree(&resolver); + } + if print_resolver_errors(&resolver) { + std::process::exit(1); + }; + let merged_source = resolver.get_merged_source_code(); + if args.resolve { + write_bytes_and_exit(merged_source.as_bytes(), args.output.as_ref()); + } + + // ----------------------------------------------------------------------- + // PARSE semantic tokens from merged source code + let path = Some("<merged source>"); + let mut semantic_tokens = generate_semantic_tokens(&merged_source, path); + if print_semantic_errors(&semantic_tokens, &merged_source) { + std::process::exit(1); + }; + + // ----------------------------------------------------------------------- + // GENERATE symbols file and bytecode + let bytecode = generate_bytecode(&mut semantic_tokens); + // let symbols = generate_symbols_file(&semantic_tokens); + write_bytes_and_exit(&bytecode, args.output.as_ref()); +} + + +fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { + if let Some(path) = path { + if let Err(err) = std::fs::write(path, bytes) { + eprintln!("Could not write to path {:?}, exiting.", path.as_ref()); + eprintln!("({err:?})"); + std::process::exit(1); + } + } else { + if let Err(err) = std::io::stdout().write_all(bytes) { + eprintln!("Could not write to standard output, exiting."); + eprintln!("({err:?})"); + std::process::exit(1); + } + } + std::process::exit(0); +} + + +xflags::xflags! { + cmd arguments { + /// Print additional debug information + optional --verbose + /// Print the assembler version and exit + optional --version + + + /// Bedrock source code file to assemble. + optional source: PathBuf + /// Destination path for assembler output. + optional output: PathBuf + /// File extension to identify source files. + optional ext: String + + /// Don't include libraries or resolve references. + optional --no-libs + /// Don't include project libraries + optional --no-project-libs + /// Don't include environment libraries. + optional --no-env-libs + + /// Show the resolved source file heirarchy + optional --tree + /// Assemble the program without saving any output + optional --check + /// Only return resolved source code. + optional --resolve + } +} diff --git a/src/error.rs b/src/error.rs deleted file mode 100644 index 8a6c0d6..0000000 --- a/src/error.rs +++ /dev/null @@ -1,10 +0,0 @@ -#[derive(Clone)] -pub enum Error { - UnresolvedReference, - DuplicateDefinition, - InvalidPaddingValue, - InvalidTypeInMacroDefinition, - OrphanedMacroDefinitionTerminator, - CyclicMacroReference, -} - diff --git a/src/gather_libraries.rs b/src/gather_libraries.rs new file mode 100644 index 0000000..0b5d2a6 --- /dev/null +++ b/src/gather_libraries.rs @@ -0,0 +1,185 @@ +use crate::*; + +use vagabond::*; + + +/// Gather all library units from the given path. +pub fn gather_project_libraries(path: &Path, extension: &str) -> Vec<SourceUnit> { + match path.parent() { + Some(parent_path) => gather_source_units(parent_path, extension), + None => Vec::new(), + } +} + + +/// Gather all library units from the paths specified in an environment variable. +pub fn gather_environment_libraries(extension: &str) -> Vec<Vec<SourceUnit>> { + let mut environment_libraries = Vec::new(); + if let Ok(lib_var) = std::env::var("BEDROCK_LIBS") { + for path_str in lib_var.split(":") { + let lib_path = PathBuf::from(path_str); + let source_units = gather_source_units(&lib_path, extension); + if !source_units.is_empty() { + environment_libraries.push(source_units); + } + } + }; + return environment_libraries; +} + + +/// Gather all source units at or descended from the given entry. +fn gather_source_units(path: &Path, extension: &str) -> Vec<SourceUnit> { + let mut source_units = Vec::new(); + if let Ok(entry) = Entry::from_path(path) { + match entry.entry_type { + EntryType::File => { + if let Ok(source) = SourceUnit::from_path(entry.path, extension) { + source_units.push(source); + } + } + EntryType::Directory => { + if let Ok(entries) = traverse_directory(entry.path) { + for entry in entries { + if let Ok(source) = SourceUnit::from_path(entry.path, extension) { + source_units.push(source); + } + } + } + } + } + }; + return source_units; +} + + +pub struct SourceUnit { + pub main: SourceFile, + pub head: Option<SourceFile>, + pub tail: Option<SourceFile>, +} + + +impl SourceUnit { + /// Load from a source file and an associated head and tail file. + pub fn from_path<P: Into<PathBuf>>(path: P, extension: &str) -> Result<Self, ParseError> { + let main_path = canonicalize_path(path); + let main_path_str = main_path.as_os_str().to_string_lossy().to_string(); + let head_extension = format!("head.{extension}"); + let tail_extension = format!("tail.{extension}"); + let is_head = main_path_str.ends_with(&head_extension); + let is_tail = main_path_str.ends_with(&tail_extension); + let is_not_main = !main_path_str.ends_with(extension); + if is_not_main || is_head || is_tail { return Err(ParseError::InvalidExtension); } + + let symbols = parse_symbols_from_file(&main_path)?; + let head_path = main_path.with_extension(head_extension); + let tail_path = main_path.with_extension(tail_extension); + + let main = SourceFile { path: main_path, symbols }; + let head = match parse_symbols_from_file(&head_path) { + Ok(symbols) => Some(SourceFile { path: head_path, symbols }), + Err(_) => None, + }; + let tail = match parse_symbols_from_file(&tail_path) { + Ok(symbols) => Some(SourceFile { path: tail_path, symbols }), + Err(_) => None, + }; + Ok( SourceUnit { main, head, tail } ) + } + + /// Load from a string of source code. + pub fn from_source_code<P: Into<PathBuf>>(source_code: String, path: P) -> Self { + let path = canonicalize_path(path); + let symbols = parse_symbols_from_source(source_code, Some(&path)); + Self { + main: SourceFile { path, symbols }, + head: None, + tail: None, + } + } +} + + +/// Read and parse all symbols from a source file. +fn parse_symbols_from_file(path: &Path) -> Result<Symbols, ParseError> { + let source = read_source_from_file(path)?; + Ok(parse_symbols_from_source(source, Some(path))) +} + + +/// Parse all symbols from a source code string. +fn parse_symbols_from_source(source_code: String, path: Option<&Path>) -> Symbols { + use SyntacticTokenVariant as SynVar; + + let token_iter = SyntacticParser::from_source_code(&source_code, path); + let mut definitions = Vec::new(); + let mut references = Vec::new(); + + for token in token_iter { + match token.variant { + SynVar::LabelDefinition(name) => { + definitions.push(Symbol { name, source: token.source }); + }, + SynVar::MacroDefinition(name) => { + definitions.push(Symbol { name, source: token.source }); + } + SynVar::Symbol(name) => { + references.push(Symbol { name, source: token.source }); + }, + _ => (), + } + } + + Symbols { + definitions: Some(definitions), + references: Some(references), + source_code, + } +} + + +/// Attempt to read program source from a file. +pub fn read_source_from_file(path: &Path) -> Result<String, ParseError> { + match std::fs::read(&path) { + Ok(bytes) => match String::from_utf8(bytes) { + Ok(source) => Ok(source), + Err(_) => return Err(ParseError::InvalidUtf8), + } + Err(err) => return Err( match err.kind() { + std::io::ErrorKind::NotFound => ParseError::NotFound, + std::io::ErrorKind::PermissionDenied => ParseError::NotReadable, + std::io::ErrorKind::IsADirectory => ParseError::IsADirectory, + _ => ParseError::Unknown, + } ) + } +} + + +fn canonicalize_path<P: Into<PathBuf>>(path: P) -> PathBuf { + let pathbuf = path.into(); + match pathbuf.canonicalize() { + Ok(canonical) => canonical, + Err(_) => pathbuf, + } +} + + + +pub struct SourceFile { + pub path: PathBuf, + pub symbols: Symbols, +} + + +pub struct Symbols { + pub definitions: Option<Vec<Symbol>>, + pub references: Option<Vec<Symbol>>, + pub source_code: String, +} + + +pub struct Symbol { + pub name: String, + pub source: SourceSpan, +} @@ -1,21 +1,20 @@ -mod addressing; -mod syntactic_token; -mod semantic_token; -mod tokenizer; -mod error; -mod assembler; +#![feature(io_error_more)] +#![feature(map_try_insert)] -pub use addressing::{CharAddress, SourceLocation, BytecodeLocation}; -pub use syntactic_token::{SyntacticToken, SyntacticTokenType}; -pub use semantic_token::{SemanticToken, SemanticTokenType, LabelDefinition, MacroDefinition}; -pub use error::Error; -pub use tokenizer::TokenIterator; -pub use assembler::Assembler; -pub fn assemble(source_code: &str) -> (Vec<u8>, Vec<SemanticToken>) { - let mut assembler = Assembler::new(); - assembler.tokenise_source(source_code); - assembler.resolve_references(); - assembler.generate_bytecode() -} +mod gather_libraries; +mod symbol_resolver; +pub use gather_libraries::*; +pub use symbol_resolver::*; + +mod locators; +mod tokens; +mod translators; + +pub use locators::*; +pub use tokens::*; +pub use translators::*; + +mod print; +pub use print::*; diff --git a/src/locators.rs b/src/locators.rs new file mode 100644 index 0000000..b7db1ee --- /dev/null +++ b/src/locators.rs @@ -0,0 +1,5 @@ +mod bytecode; +mod source; + +pub use bytecode::*; +pub use source::*; diff --git a/src/locators/bytecode.rs b/src/locators/bytecode.rs new file mode 100644 index 0000000..500e9f0 --- /dev/null +++ b/src/locators/bytecode.rs @@ -0,0 +1,39 @@ +pub struct BytecodeSpan { + /// The location of this span in the assembled bytecode. + pub location: BytecodeLocation, + /// The bytes which this span represents. + pub bytes: Vec<u8>, +} + + +impl Default for BytecodeSpan { + fn default() -> Self { + Self { + location: BytecodeLocation { + address: 0, + length: 0, + }, + bytes: Vec::new(), + } + } +} + + +#[derive(Clone, Copy)] +pub struct BytecodeLocation { + // Address of the first byte. + pub address: usize, + // Length as a number of bytes. + pub length: usize, +} + + +impl std::fmt::Display for BytecodeLocation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "0x{:>04x}", self.address)?; + if self.length > 0 { + write!(f, "-0x{:>04x}", self.address + self.length)?; + } + Ok(()) + } +} diff --git a/src/locators/source.rs b/src/locators/source.rs new file mode 100644 index 0000000..2f10bd9 --- /dev/null +++ b/src/locators/source.rs @@ -0,0 +1,69 @@ +use std::path::PathBuf; + + +#[derive(Clone)] +pub struct SourceSpan { + /// The source characters which this span represents. + pub string: String, + /// The location of this span in the merged source file. + pub in_merged: SourceLocation, + /// The location of this span in the original source file. + pub in_source: Option<SourceLocation>, +} + + +#[derive(Clone)] +pub struct SourceLocation { + /// File path the source was loaded from. + pub path: Option<PathBuf>, + /// Position of the first character of the string. + pub start: Position, + /// Position of the final character of the string. + pub end: Position, +} + +impl std::fmt::Display for SourceLocation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let y = self.start.line + 1; + let x = self.start.column + 1; + match &self.path { + Some(path) => write!(f, "{}:{y}:{x}", path.as_os_str().to_string_lossy()), + None => write!(f, "<unknown>:{y}:{x}"), + } + } +} + + +#[derive(Clone, Copy)] +pub struct Position { + /// The number of lines that precede this line in the file. + pub line: usize, + /// The number of characters that precede this character in the line. + pub column: usize, +} + +impl Position { + pub fn to_next_char(&mut self) { + self.column += 1; + } + + pub fn to_next_line(&mut self) { + self.line += 1; + self.column = 0; + } + + pub fn advance(&mut self, c: char) { + match c { + '\n' => self.to_next_line(), + _ => self.to_next_char(), + } + } +} + +impl std::fmt::Display for Position { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let y = self.line + 1; + let x = self.column + 1; + write!(f, "{y}:{x}") + } +} diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 11ce42b..0000000 --- a/src/main.rs +++ /dev/null @@ -1,43 +0,0 @@ -use std::io::{Read, Write}; -use bedrock_asm::*; - -fn main() { - // Read source code from standard input - let mut source_code = String::new(); - if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { - eprintln!("Could not read from standard input, quitting."); - eprintln!("({err:?})"); - std::process::exit(1); - }; - - let (bytecode, tokens) = assemble(&source_code); - let mut is_error = false; - for token in &tokens { - if token.print_error(&source_code) { is_error = true }; - } - if !is_error { - for token in &tokens { - if let SemanticTokenType::LabelDefinition(def) = &token.r#type { - if def.references.is_empty() { - eprintln!("Unused label definition: {}", def.name); - } - } - } - eprintln!(); - } - - let byte_count = bytecode.len(); - let byte_percentage = (byte_count as f32 / 65536.0 * 100.0).round() as u16; - eprintln!("Assembled program in {byte_count} bytes ({byte_percentage}% of maximum)."); - - if is_error { - std::process::exit(1) - } - - // Write bytecode to standard output - if let Err(_) = std::io::stdout().write_all(&bytecode) { - eprintln!("Could not write to standard output, quitting."); - std::process::exit(1); - } -} - diff --git a/src/print.rs b/src/print.rs new file mode 100644 index 0000000..7f49db2 --- /dev/null +++ b/src/print.rs @@ -0,0 +1,237 @@ +use crate::*; + +use SemanticTokenVariant as SemVar; +use SemanticParseError as SemErr; +use SyntacticParseError as SynErr; + + +const NORMAL: &str = "\x1b[0m"; +const BOLD: &str = "\x1b[1m"; +const DIM: &str = "\x1b[2m"; +const WHITE: &str = "\x1b[37m"; +const RED: &str = "\x1b[31m"; +const YELLOW: &str = "\x1b[33m"; +const BLUE: &str = "\x1b[34m"; + + +pub struct Context<'a> { + pub source_code: &'a str, + pub source: &'a SourceSpan, +} + + +/// Print all errors found in the semantic tokens, including those inside macro +/// definitions. Returns true if at least one error was printed. +pub fn print_semantic_errors(semantic_tokens: &[SemanticToken], source_code: &str) -> bool { + let mut found_error = false; + for semantic_token in semantic_tokens { + match &semantic_token.variant { + SemVar::Error(err) => { + let context = Context { + source_code: source_code, + source: &semantic_token.source, + }; + found_error = true; + print_semantic_error(&err, context) + } + SemVar::MacroDefinition(definition) => { + for body_token in &definition.body_tokens { + if let SemVar::Error(err) = &body_token.variant { + let context = Context { + source_code: source_code, + source: &body_token.source, + }; + found_error = true; + print_semantic_error(err, context) + } + } + } + _ => (), + } + } + return found_error; +} + +fn print_semantic_error(error: &SemanticParseError, context: Context) { + let message = get_message_for_semantic_error(error); + print_error(&message, context); +} + +fn get_message_for_semantic_error(error: &SemanticParseError) -> String { + match error { + SemErr::LabelDefinitionInMacroDefinition => + format!("Labels cannot be defined inside a macro"), + SemErr::MacroDefinitionInMacroDefinition => + format!("Macros cannot be defined inside a macro"), + SemErr::StrayMacroTerminator => + format!("Macro definition terminator is missing a macro definition"), + SemErr::StrayBlockClose => + format!("Block was not opened, add a '{{' character to open"), + SemErr::UnclosedBlock => + format!("Block was not closed, add a '}}' character to close"), + SemErr::UndefinedSymbol(name) => + format!("Undefined symbol, no label or macro has been defined with the name '{name}'"), + SemErr::RedefinedSymbol((_, source)) => { + let location = source.in_source.as_ref().unwrap_or(&source.in_merged); + format!("Redefined symbol, first defined at {location}") + } + SemErr::MacroInvocationBeforeDefinition((_, source)) => { + let location = source.in_source.as_ref().unwrap_or(&source.in_merged); + format!("Macro used before definition, definition is at {location}") + } + SemErr:: SyntaxError(syntax_error) => match syntax_error { + SynErr::UnterminatedComment => + format!("Unclosed comment, add a ')' character to close"), + SynErr::UnterminatedRawString => + format!("Unclosed string, add a ' character to close"), + SynErr::UnterminatedNullString => + format!("Unclosed string, add a \" character to close"), + SynErr::InvalidPaddingValue(_) => + format!("Padding value must be two or four hexidecimal digits"), + } + } +} + + +pub fn print_resolver_errors(resolver: &SymbolResolver) -> bool { + let mut found_error = false; + for reference in &resolver.unresolved { + found_error = true; + let message = format!( + "Undefined symbol, no label or macro has been defined with the name '{}'", + &reference.symbol.source.string, + ); + let source_code = resolver.get_source_code_for_tracked_symbol(reference); + let source = &reference.symbol.source; + print_error(&message, Context { source_code, source } ) + } + for redefinition in &resolver.redefinitions { + found_error = true; + let definition = resolver.definitions.get(redefinition.1).unwrap(); + let message = format!( + "Redefined symbol, first defined at {}", + &definition.symbol.source.in_merged, + ); + let source_code = resolver.get_source_code_for_tracked_symbol(&redefinition.0); + let source = &redefinition.0.symbol.source; + print_error(&message, Context { source_code, source } ) + } + return found_error; +} + + + +pub fn print_error(message: &str, context: Context) { + print_source_issue(message, context, SourceIssueVariant::Error); +} + +pub fn print_warning(message: &str, context: Context) { + print_source_issue(message, context, SourceIssueVariant::Warning); +} + +fn print_source_issue(message: &str, context: Context, variant: SourceIssueVariant) { + let (label, colour) = match variant { + SourceIssueVariant::Warning => ("WARNING", YELLOW), + SourceIssueVariant::Error => ("ERROR", RED), + }; + + // Prepare variables. + let location = &context.source.in_merged; + let digits = location.start.line.to_string().len(); + let y = location.start.line + 1; + let arrow = "-->"; + let space = " "; + + // Print message and file path. + eprintln!("{BOLD}{colour}[{label}]{WHITE}: {message}{NORMAL}"); + eprintln!("{BLUE}{arrow:>w$}{NORMAL} {location}{NORMAL}", w=digits+3); + if let Some(source) = &context.source.in_source { + eprintln!("{BLUE}{arrow:>w$}{NORMAL} {source}{NORMAL}", w=digits+3); + } + + let start = location.start.column; + let end = location.end.column + 1; + + // Print source code line. + eprint!("{BLUE} {y} | {NORMAL}"); + let line = get_line_from_source_code(context.source_code, location.start.line); + for (i, c) in line.chars().enumerate() { + if i == start { eprint!("{colour}") } + if i == end { eprint!("{NORMAL}") } + eprint!("{c}"); + } + eprintln!("{NORMAL}"); + + // Print source code underline. + eprint!("{BLUE} {space:>w$} | {NORMAL}", w=digits); + for _ in 0..start { eprint!(" "); } + eprint!("{colour}"); + for _ in start..end { eprint!("^"); } + eprintln!("{NORMAL}"); +} + + +fn get_line_from_source_code(source_code: &str, line: usize) -> &str { + source_code.split('\n').nth(line).unwrap_or("<error reading line from source>") +} + + +enum SourceIssueVariant { + Warning, + Error, +} + + +/// Print a tree containing the name and path of each source unit. +pub fn print_source_tree(resolver: &SymbolResolver) { + eprintln!("."); + let len = resolver.root_unit_ids.len(); + for (i, id) in resolver.root_unit_ids.iter().enumerate() { + let end = i + 1 == len; + print_source_tree_leaf(resolver, *id, Vec::new(), end); + } + eprintln!(); +} + +fn print_source_tree_leaf(resolver: &SymbolResolver, id: usize, mut levels: Vec<bool>, end: bool) { + // A level entry is true if all entries in that level have been printed. + for level in &levels { + match level { + false => eprint!("│ "), + true => eprint!(" "), + } + } + // The end value is true if all siblings of this entry have been printed. + match end { + false => eprint!("├── "), + true => eprint!("└── "), + } + if let Some(unit) = resolver.source_units.get(id) { + let path = &unit.source_unit.main.path; + let path_str = path.as_os_str().to_string_lossy(); + if let Some(name) = path.file_name() { + let name_str = name.to_string_lossy(); + eprint!("{name_str}{BLUE}"); + if unit.source_unit.head.is_some() { eprint!(" +head") } + if unit.source_unit.tail.is_some() { eprint!(" +tail") } + let mut unresolved = 0; + for symbol in &resolver.unresolved { + if symbol.source_id == id { unresolved += 1; } + } + if unresolved > 0 { eprint!("{RED} ({unresolved})"); } + eprintln!("{NORMAL} {DIM}({path_str}){NORMAL}"); + } else { + eprintln!("{path_str}"); + }; + levels.push(end); + let len = unit.child_ids.len(); + for (i, id) in unit.child_ids.iter().enumerate() { + let end = i + 1 == len; + print_source_tree_leaf(resolver, *id, levels.clone(), end); + } + } else { + eprintln!("<error loading source unit details>"); + } +} + + diff --git a/src/semantic_token.rs b/src/semantic_token.rs deleted file mode 100644 index 265db91..0000000 --- a/src/semantic_token.rs +++ /dev/null @@ -1,116 +0,0 @@ -use crate::*; - -pub enum SemanticTokenType { - LabelReference(usize), - MacroReference(usize), - - LabelDefinition(LabelDefinition), - MacroDefinition(MacroDefinition), - - Padding(u16), - ByteLiteral(u8), - ShortLiteral(u16), - Instruction(u8), - - MacroDefinitionTerminator, - Comment, - Error(SyntacticTokenType, Error), -} - -pub struct SemanticToken { - pub r#type: SemanticTokenType, - pub source_location: SourceLocation, - pub bytecode_location: BytecodeLocation, - pub parent_label: Option<String>, -} - -impl SemanticToken { - /// Returns true if an error was printed. - pub fn print_error(&self, source_code: &str) -> bool { - let mut is_error = false; - macro_rules! red {()=>{eprint!("\x1b[31m")};} - macro_rules! dim {()=>{eprint!("\x1b[0;2m")};} - macro_rules! normal {()=>{eprint!("\x1b[0m")};} - - if let SemanticTokenType::Error(token, error) = &self.r#type { - is_error = true; - - red!(); eprint!("[ERROR] "); normal!(); - let source = &self.source_location.source; - match error { - Error::UnresolvedReference => { - eprintln!("Unresolved reference, no label or macro has been defined with the name '{source}'") } - Error::DuplicateDefinition => { - eprintln!("Duplicate definition, a label or macro has already been defined with the name '{source}'") } - Error::OrphanedMacroDefinitionTerminator => { - eprintln!("Unmatched macro definition terminator, no macro definition is in progress") } - Error::InvalidPaddingValue => { - eprintln!("Invalid value for padding, the value must be at least one and at most four hexadecimal characters") } - Error::CyclicMacroReference => { - eprintln!("Cyclic macro reference, this macro reference contains a reference to the macro being defined") } - Error::InvalidTypeInMacroDefinition => { - let name = match token { - SyntacticTokenType::Reference(_) => "references", - SyntacticTokenType::LabelDefinition(_) => "label definitions", - SyntacticTokenType::MacroDefinition(_) => "macro definitions", - SyntacticTokenType::MacroDefinitionTerminator => "macro definition terminators", - SyntacticTokenType::Padding(_) => "padding", - SyntacticTokenType::ByteLiteral(_) => "byte literals", - SyntacticTokenType::ShortLiteral(_) => "short literals", - SyntacticTokenType::Instruction(_) => "instructions", - SyntacticTokenType::Comment => "comments", - }; - eprintln!("Invalid token in macro definition, macro definitions are not allowed to contain {name}") } - } - - if let Some(label) = &self.parent_label { - eprint!(" ... "); red!(); eprint!("| "); dim!(); eprintln!("@{label} "); normal!(); - } - - let line = source_code.split('\n').nth(self.source_location.start.line).unwrap(); - eprint!("{:>5} ", self.source_location.start.line+1); - red!(); eprint!("| "); normal!(); - for (i, c) in line.chars().enumerate() { - if i == self.source_location.start.column { red!() } - eprint!("{c}"); - if i == self.source_location.end.column { normal!() } - } - eprintln!(); red!(); eprint!(" | "); - for i in 0..=self.source_location.end.column { - if i < self.source_location.start.column { eprint!(" ") } else { eprint!("^") }; - } - normal!(); eprintln!(); - } - else if let SemanticTokenType::MacroDefinition(definition) = &self.r#type { - for token in &definition.body_tokens { - if token.print_error(source_code) { is_error = true } - } - } - is_error - } -} - -pub struct LabelDefinition { - pub name: String, - pub address: u16, - /// A list of pointers to label reference tokens - pub references: Vec<usize>, -} -impl LabelDefinition { - pub fn new(name: String) -> Self { - Self { name, address:0, references:Vec::new() } - } -} - -pub struct MacroDefinition { - pub name: String, - pub body_tokens: Vec<SemanticToken>, - /// A list of pointers to macro reference tokens - pub references: Vec<usize>, -} -impl MacroDefinition { - pub fn new(name: String) -> Self { - Self { name, body_tokens:Vec::new(), references:Vec::new() } - } -} - diff --git a/src/symbol_resolver.rs b/src/symbol_resolver.rs new file mode 100644 index 0000000..cced994 --- /dev/null +++ b/src/symbol_resolver.rs @@ -0,0 +1,230 @@ +use crate::*; + +use std::mem::take; + + +/// Resolve symbol references across source units. +pub struct SymbolResolver { + pub definitions: Vec<TrackedSymbol>, + pub unresolved: Vec<TrackedSymbol>, + /// Contains the ID of the owner of the original definition. + pub redefinitions: Vec<(TrackedSymbol, usize)>, + pub source_units: Vec<HeirarchicalSourceUnit>, + pub root_unit_ids: Vec<usize>, + pub unused_library_units: Vec<SourceUnit>, +} + + +impl SymbolResolver { + /// Construct a resolver from a root source unit. + pub fn from_source_unit(source_unit: SourceUnit) -> Self { + let mut new = Self { + definitions: Vec::new(), + unresolved: Vec::new(), + redefinitions: Vec::new(), + source_units: Vec::new(), + root_unit_ids: Vec::new(), + unused_library_units: Vec::new(), + }; + new.add_source_unit(source_unit, None); + return new; + } + + pub fn add_library_units(&mut self, mut source_units: Vec<SourceUnit>) { + self.unused_library_units.append(&mut source_units); + } + + pub fn resolve(&mut self) { + // Repeatedly test if any unused source unit resolves an unresolved symbol, + // breaking the loop when no new resolutions are found. + 'outer: loop { + for (i, source_unit) in self.unused_library_units.iter().enumerate() { + if let Some(id) = self.resolves_reference(&source_unit) { + let source_unit = self.unused_library_units.remove(i); + self.add_source_unit(source_unit, Some(id)); + continue 'outer; + } + } + break; + } + } + + /// Add a source unit to the resolver and link it to a parent unit. + pub fn add_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) { + let source_id = self.source_units.len(); + + // Add all main symbols. + if let Some(definitions) = take(&mut source_unit.main.symbols.definitions) { + self.add_definitions(definitions, source_id, SourceRole::Main); } + if let Some(references) = take(&mut source_unit.main.symbols.references) { + self.add_references(references, source_id, SourceRole::Main); } + + // Add all head symbols. + if let Some(head) = &mut source_unit.head { + if let Some(references) = take(&mut head.symbols.references) { + self.add_references(references, source_id, SourceRole::Head); } + if let Some(definitions) = take(&mut head.symbols.definitions) { + self.add_definitions(definitions, source_id, SourceRole::Head); } + } + + // Add all tail symbols. + if let Some(tail) = &mut source_unit.tail { + if let Some(references) = take(&mut tail.symbols.references) { + self.add_references(references, source_id, SourceRole::Tail); } + if let Some(definitions) = take(&mut tail.symbols.definitions) { + self.add_definitions(definitions, source_id, SourceRole::Tail); } + } + + if let Some(parent_id) = parent_id { + if let Some(parent_unit) = self.source_units.get_mut(parent_id) { + parent_unit.child_ids.push(source_id); + } + } else { + self.root_unit_ids.push(source_id); + } + + let source_unit = HeirarchicalSourceUnit { source_unit, child_ids: Vec::new() }; + self.source_units.push(source_unit); + } + + fn add_references(&mut self, references: Vec<Symbol>, source_id: usize, source_role: SourceRole) { + for symbol in references { + let reference = TrackedSymbol { symbol, source_id, source_role }; + if !self.definitions.contains(&reference) { + self.unresolved.push(reference); + } + } + } + + fn add_definitions(&mut self, definitions: Vec<Symbol>, source_id: usize, source_role: SourceRole) { + for symbol in definitions { + let predicate = |d: &&TrackedSymbol| { &d.symbol.name == &symbol.name }; + if let Some(def) = self.definitions.iter().find(predicate) { + let definition = TrackedSymbol { symbol, source_id, source_role }; + let redefinition = (definition, def.source_id); + self.redefinitions.push(redefinition); + } else { + self.unresolved.retain(|s| s.symbol.name != symbol.name); + let definition = TrackedSymbol { symbol, source_id, source_role }; + self.definitions.push(definition); + } + } + } + + /// Returns the ID of the owner of a symbol resolved by this unit. + pub fn resolves_reference(&self, source_unit: &SourceUnit) -> Option<usize> { + if let Some(definitions) = &source_unit.main.symbols.definitions { + if let Some(id) = self.source_id_of_unresolved(&definitions) { + return Some(id); + } + } + if let Some(head) = &source_unit.head { + if let Some(definitions) = &head.symbols.definitions { + if let Some(id) = self.source_id_of_unresolved(&definitions) { + return Some(id); + } + } + } + if let Some(tail) = &source_unit.tail { + if let Some(definitions) = &tail.symbols.definitions { + if let Some(id) = self.source_id_of_unresolved(&definitions) { + return Some(id); + } + } + } + return None; + } + + /// Returns the ID of the owner of a reference to one of these symbols. + fn source_id_of_unresolved(&self, symbols: &[Symbol]) -> Option<usize> { + for symbol in symbols { + let opt = self.unresolved.iter().find(|s| s.symbol.name == symbol.name); + if let Some(unresolved) = opt { + return Some(unresolved.source_id); + } + } + return None; + } + + pub fn get_source_code_for_tracked_symbol(&self, symbol: &TrackedSymbol) -> &str { + let source_unit = &self.source_units[symbol.source_id].source_unit; + match symbol.source_role { + SourceRole::Main => source_unit.main.symbols.source_code.as_str(), + SourceRole::Head => match &source_unit.head { + Some(head) => head.symbols.source_code.as_str(), + None => unreachable!("Failed to find source for token"), + } + SourceRole::Tail => match &source_unit.tail { + Some(tail) => tail.symbols.source_code.as_str(), + None => unreachable!("Failed to find source for token"), + } + } + } + + /// Create a source file by concatenating all source units. + pub fn get_merged_source_code(&self) -> String { + // The first source unit is guaranteed to be the root unit, so we can + // just push source files in their current order. + let mut source_code = String::new(); + + // Push head source code. + for source_unit in self.source_units.iter().rev() { + if let Some(head) = &source_unit.source_unit.head { + push_source_code_to_string(&mut source_code, head); + } + } + // Push main source code. + for source_unit in self.source_units.iter() { + push_source_code_to_string(&mut source_code, &source_unit.source_unit.main); + } + // Push tail source code. + for source_unit in self.source_units.iter().rev() { + if let Some(tail) = &source_unit.source_unit.tail { + push_source_code_to_string(&mut source_code, tail); + } + } + return source_code; + } +} + + +fn push_source_code_to_string(string: &mut String, source_file: &SourceFile) { + // Ensure that sections are separated by two newlines. + if !string.is_empty() { + if !string.ends_with('\n') { string.push('\n'); } + if !string.ends_with("\n\n") { string.push('\n'); } + } + // Write a path comment to the string. + let path_str = source_file.path.as_os_str().to_string_lossy(); + let path_comment = format!("(: {path_str} )\n"); + string.push_str(&path_comment); + string.push_str(&source_file.symbols.source_code); +} + + +pub struct HeirarchicalSourceUnit { + pub source_unit: SourceUnit, + pub child_ids: Vec<usize>, +} + + +pub struct TrackedSymbol { + pub symbol: Symbol, + pub source_id: usize, + pub source_role: SourceRole, +} + + +#[derive(Clone, Copy)] +pub enum SourceRole { + Main, + Head, + Tail, +} + + +impl PartialEq for TrackedSymbol { + fn eq(&self, other: &TrackedSymbol) -> bool { + self.symbol.name.eq(&other.symbol.name) + } +} diff --git a/src/syntactic_token.rs b/src/syntactic_token.rs deleted file mode 100644 index 4a50e8a..0000000 --- a/src/syntactic_token.rs +++ /dev/null @@ -1,43 +0,0 @@ -use crate::*; - -pub enum SyntacticTokenType { - Reference(String), - - LabelDefinition(String), - MacroDefinition(String), - MacroDefinitionTerminator, - - Padding(u16), - ByteLiteral(u8), - ShortLiteral(u16), - Instruction(u8), - - Comment, -} - - - -pub struct SyntacticToken { - pub r#type: SyntacticTokenType, - pub source_location: SourceLocation, - pub error: Option<Error>, -} - -impl SyntacticToken { - // Call when this token is found inside a macro definition. - pub fn use_in_macro_body(&mut self) { - match self.r#type { - SyntacticTokenType::LabelDefinition(..) | - SyntacticTokenType::MacroDefinition(..) => { - self.set_error(Error::InvalidTypeInMacroDefinition) - } - _ => (), - }; - } - pub fn set_error(&mut self, error: Error) { - self.error = Some(error); - } - pub fn is_macro_terminator(&self) -> bool { - if let SyntacticTokenType::MacroDefinitionTerminator = self.r#type {true} else {false} - } -} diff --git a/src/tokenizer.rs b/src/tokenizer.rs deleted file mode 100644 index 02bf490..0000000 --- a/src/tokenizer.rs +++ /dev/null @@ -1,235 +0,0 @@ -use std::mem::take; -use crate::*; - -#[derive(PartialEq)] -enum StringLiteral { - None, - Raw, - NullTerminated, -} - -pub struct TokenIterator { - /// The characters that comprise the program souce code. - chars: Vec<char>, - /// The index of the next character to read. - i: usize, - /// The address of the next character to read. - addr: CharAddress, - /// If true, skip over any whitespace characters. If false, stop reading - /// when a whitespace character is encountered. - skip_whitespace: bool, - /// The name of the most recently defined label. - label: String, - /// If not None, each individual character will be tokenised as a ByteLiteral. - parse_string_literal: StringLiteral, - - - /// The address of the first character of the current token. - start: CharAddress, - /// The address of the final character of the current token. - end: CharAddress, - /// The entire current token. - source: String, - /// The first character of the current token. - prefix: char, - /// The second and remaining characters of the current token. - suffix: String, -} - -impl TokenIterator { - /// Create an iterator from a string of program source code. - pub fn from_str(source_code: &str) -> Self { - Self { - chars: source_code.chars().collect(), - i: 0, - addr: CharAddress::zero(), - skip_whitespace: true, - parse_string_literal: StringLiteral::None, - label: String::new(), - start: CharAddress::zero(), - end: CharAddress::zero(), - source: String::new(), - prefix: ' ', - suffix: String::new(), - } - } - /// Append a character to the current token. - fn push(&mut self, c:char) { - self.end = self.addr; - self.source.push(c); - self.suffix.push(c); - self.next(c); - } - /// Move forward to the next source character. - fn next(&mut self, c: char) { - self.addr.column += 1; - self.i += 1; - if c == '\n' { - self.addr.column = 0; - self.addr.line += 1; - } - } - /// Mark the current character as being the first character of a new token. - fn mark_start(&mut self, c:char) { - if c == '"' { - self.parse_string_literal = StringLiteral::NullTerminated; - } else if c == '\'' { - self.parse_string_literal = StringLiteral::Raw; - } else { - self.start=self.addr; - self.end=self.addr; - self.prefix=c; - self.source.push(c); - self.skip_whitespace=false; - } - self.next(c); - } -} - -impl Iterator for TokenIterator { - type Item = SyntacticToken; - - fn next(&mut self) -> Option<SyntacticToken> { - // Initialise values before reading the next token - let mut is_comment = false; - self.skip_whitespace = true; - - // Iterate over source characters until a full token is read - while let Some(c) = self.chars.get(self.i) { - let c = *c; - // Parse individual characters from a string literal - if self.parse_string_literal != StringLiteral::None { - if c == '"' && self.parse_string_literal == StringLiteral::NullTerminated { - self.parse_string_literal = StringLiteral::None; - let token = SyntacticToken { - r#type: SyntacticTokenType::ByteLiteral(0), - source_location: SourceLocation { - source: c.to_string(), start:self.addr, end:self.addr }, - error: None, - }; - self.next(c); - return Some(token); - } else if c == '\'' && self.parse_string_literal == StringLiteral::Raw { - self.parse_string_literal = StringLiteral::None; - self.next(c); - continue - } else { - self.next(c); - return Some(SyntacticToken { - r#type: SyntacticTokenType::ByteLiteral(c as u8), - source_location: SourceLocation { - source: c.to_string(), start:self.addr, end:self.addr }, - error: None, - }); - } - } - // Intercept comments - if is_comment { - self.push(c); if c == ')' { break } else { continue }; } - else if self.skip_whitespace && c == '(' { - is_comment = true; self.mark_start(c); continue } - - // Allow a semicolon at the end of a token to be handled as a separate token - if self.source.len() > 0 && c == ';' { break } - // Handle the current character - match (is_whitespace(c), self.skip_whitespace) { - (true, true) => self.next(c), // c is the expected leading whitespace - (false, true) => self.mark_start(c), // c is the first character of the token - (false, false) => self.push(c), // c is a character of the token - (true, false) => break, // c is trailing whitespace - } - // Allow literal values to be chained to the end of the previous token - if self.source.len() > 0 && c == ':' { break } - } - - // If no source characters were grabbed then we have read through the entire source file - if self.source.len() == 0 { return None; } - // Allow handling macro terminators and symbols of length 1 in the match expression - if self.suffix.len() == 0 { self.prefix = '\0'; } - // Consume the collected characters to be used in the match expression - let full = take(&mut self.source); - let suffix = take(&mut self.suffix); - let mut error = None; - let mut parse_padding_value = |v| { - parse_short(v).or_else(|| { - error = Some(Error::InvalidPaddingValue); Some(0) - }).unwrap() - }; - - let r#type = match self.prefix { - '(' => { SyntacticTokenType::Comment } - '@' => { SyntacticTokenType::LabelDefinition({self.label=suffix.clone(); suffix}) } - '&' => { SyntacticTokenType::LabelDefinition(format!("{}/{}", self.label, suffix)) } - '$' => { SyntacticTokenType::Padding(parse_padding_value(&suffix)) } - '~' => { SyntacticTokenType::Reference(format!("{}/{}", self.label, suffix)) } - '%' => if let Some(("", sublabel)) = suffix.split_once("~") { - SyntacticTokenType::MacroDefinition(format!("{}/{}", self.label, sublabel)) - } else { - SyntacticTokenType::MacroDefinition(suffix) - } - _ => { - if ";" == &full { SyntacticTokenType::MacroDefinitionTerminator } - else if let Some(value) = parse_byte_lit(&full) { SyntacticTokenType::ByteLiteral(value) } - else if let Some(value) = parse_short_lit(&full) { SyntacticTokenType::ShortLiteral(value) } - else if let Some(value) = parse_instruction(&full) { SyntacticTokenType::Instruction(value) } - else { SyntacticTokenType::Reference(full.clone()) } - } - }; - Some(SyntacticToken { - r#type, - source_location:SourceLocation::new(full,self.start,self.end), - error, - }) - } -} - - -fn parse_byte_lit(token: &str) -> Option<u8> { - match token.len() { 2 => u8::from_str_radix(token, 16).ok(), _ => None } } -fn parse_short_lit(token: &str) -> Option<u16> { - match token.len() { 4 => u16::from_str_radix(token, 16).ok(), _ => None } } -fn parse_short(token: &str) -> Option<u16> { - match token.len() { 1..=4 => u16::from_str_radix(token, 16).ok(), _ => None } } -fn is_whitespace(c: char) -> bool { - match c { ' '|'\t'|'\n'|'\r'|'['|']'|'(' =>true, _=>false } } -fn parse_instruction(token: &str) -> Option<u8> { - Some(match token { - // Control operators - "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2" =>0x60,"DB3" =>0x80,"DB4" =>0xA0,"DB5" =>0xC0,"DB6" =>0xE0, - "JMP"=>0x01,"JMS" =>0x21,"JMP:"=>0x41,"JMS:" =>0x61,"JMPr"=>0x81,"JMSr" =>0xA1,"JMPr:"=>0xC1,"JMSr:" =>0xE1, - "JCN"=>0x02,"JCS" =>0x22,"JCN:"=>0x42,"JCS:" =>0x62,"JCNr"=>0x82,"JCSr" =>0xA2,"JCNr:"=>0xC2,"JCSr:" =>0xE2, - "JCK"=>0x03,"JCK*"=>0x23,"JCK:"=>0x43,"JCK*:"=>0x63,"JCKr"=>0x83,"JCKr*"=>0xA3,"JCKr:"=>0xC3,"JCKr*:"=>0xE3, - "LDA"=>0x04,"LDA*"=>0x24,"LDA:"=>0x44,"LDA*:"=>0x64,"LDAr"=>0x84,"LDAr*"=>0xA4,"LDAr:"=>0xC4,"LDAr*:"=>0xE4, - "STA"=>0x05,"STA*"=>0x25,"STA:"=>0x45,"STA*:"=>0x65,"STAr"=>0x85,"STAr*"=>0xA5,"STAr:"=>0xC5,"STAr*:"=>0xE5, - "LDD"=>0x06,"LDD*"=>0x26,"LDD:"=>0x46,"LDD*:"=>0x66,"LDDr"=>0x86,"LDDr*"=>0xA6,"LDDr:"=>0xC6,"LDDr*:"=>0xE6, - "STD"=>0x07,"STD*"=>0x27,"STD:"=>0x47,"STD*:"=>0x67,"STDr"=>0x87,"STDr*"=>0xA7,"STDr:"=>0xC7,"STDr*:"=>0xE7, - // Stack operators - "PSH"=>0x08,"PSH*"=>0x28,"PSH:"=>0x48,"PSH*:"=>0x68,"PSHr"=>0x88,"PSHr*"=>0xA8,"PSHr:"=>0xC8,"PSHr*:"=>0xE8, - "POP"=>0x09,"POP*"=>0x29,"POP:"=>0x49,"POP*:"=>0x69,"POPr"=>0x89,"POPr*"=>0xA9,"POPr:"=>0xC9,"POPr*:"=>0xE9, - "CPY"=>0x0A,"CPY*"=>0x2A,"CPY:"=>0x4A,"CPY*:"=>0x6A,"CPYr"=>0x8A,"CPYr*"=>0xAA,"CPYr:"=>0xCA,"CPYr*:"=>0xEA, - "SPL"=>0x0B,"SPL*"=>0x2B,"SPL:"=>0x4B,"SPL*:"=>0x6B,"SPLr"=>0x8B,"SPLr*"=>0xAB,"SPLr:"=>0xCB,"SPLr*:"=>0xEB, - "DUP"=>0x0C,"DUP*"=>0x2C,"DUP:"=>0x4C,"DUP*:"=>0x6C,"DUPr"=>0x8C,"DUPr*"=>0xAC,"DUPr:"=>0xCC,"DUPr*:"=>0xEC, - "OVR"=>0x0D,"OVR*"=>0x2D,"OVR:"=>0x4D,"OVR*:"=>0x6D,"OVRr"=>0x8D,"OVRr*"=>0xAD,"OVRr:"=>0xCD,"OVRr*:"=>0xED, - "SWP"=>0x0E,"SWP*"=>0x2E,"SWP:"=>0x4E,"SWP*:"=>0x6E,"SWPr"=>0x8E,"SWPr*"=>0xAE,"SWPr:"=>0xCE,"SWPr*:"=>0xEE, - "ROT"=>0x0F,"ROT*"=>0x2F,"ROT:"=>0x4F,"ROT*:"=>0x6F,"ROTr"=>0x8F,"ROTr*"=>0xAF,"ROTr:"=>0xCF,"ROTr*:"=>0xEF, - // Numeric operators - "ADD"=>0x10,"ADD*"=>0x30,"ADD:"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr*"=>0xB0,"ADDr:"=>0xD0,"ADDr*:"=>0xF0, - "SUB"=>0x11,"SUB*"=>0x31,"SUB:"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr*"=>0xB1,"SUBr:"=>0xD1,"SUBr*:"=>0xF1, - "INC"=>0x12,"INC*"=>0x32,"INC:"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr*"=>0xB2,"INCr:"=>0xD2,"INCr*:"=>0xF2, - "DEC"=>0x13,"DEC*"=>0x33,"DEC:"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr*"=>0xB3,"DECr:"=>0xD3,"DECr*:"=>0xF3, - "LTH"=>0x14,"LTH*"=>0x34,"LTH:"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr*"=>0xB4,"LTHr:"=>0xD4,"LTHr*:"=>0xF4, - "GTH"=>0x15,"GTH*"=>0x35,"GTH:"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr*"=>0xB5,"GTHr:"=>0xD5,"GTHr*:"=>0xF5, - "EQU"=>0x16,"EQU*"=>0x36,"EQU:"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr*"=>0xB6,"EQUr:"=>0xD6,"EQUr*:"=>0xF6, - "NQK"=>0x17,"NQK*"=>0x37,"NQK:"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr*"=>0xB7,"NQKr:"=>0xD7,"NQKr*:"=>0xF7, - // Bitwise operators - "IOR"=>0x18,"IOR*"=>0x38,"IOR:"=>0x58,"IOR*:"=>0x78,"IORr"=>0x98,"IORr*"=>0xB8,"IORr:"=>0xD8,"IORr*:"=>0xF8, - "XOR"=>0x19,"XOR*"=>0x39,"XOR:"=>0x59,"XOR*:"=>0x79,"XORr"=>0x99,"XORr*"=>0xB9,"XORr:"=>0xD9,"XORr*:"=>0xF9, - "AND"=>0x1A,"AND*"=>0x3A,"AND:"=>0x5A,"AND*:"=>0x7A,"ANDr"=>0x9A,"ANDr*"=>0xBA,"ANDr:"=>0xDA,"ANDr*:"=>0xFA, - "NOT"=>0x1B,"NOT*"=>0x3B,"NOT:"=>0x5B,"NOT*:"=>0x7B,"NOTr"=>0x9B,"NOTr*"=>0xBB,"NOTr:"=>0xDB,"NOTr*:"=>0xFB, - "SHF"=>0x1C,"SHF*"=>0x3C,"SHF:"=>0x5C,"SHF*:"=>0x7C,"SHFr"=>0x9C,"SHFr*"=>0xBC,"SHFr:"=>0xDC,"SHFr*:"=>0xFC, - "SHC"=>0x1D,"SHC*"=>0x3D,"SHC:"=>0x5D,"SHC*:"=>0x7D,"SHCr"=>0x9D,"SHCr*"=>0xBD,"SHCr:"=>0xDD,"SHCr*:"=>0xFD, - "TAL"=>0x1E,"TAL*"=>0x3E,"TAL:"=>0x5E,"TAL*:"=>0x7E,"TALr"=>0x9E,"TALr*"=>0xBE,"TALr:"=>0xDE,"TALr*:"=>0xFE, - "REV"=>0x1F,"REV*"=>0x3F,"REV:"=>0x5F,"REV*:"=>0x7F,"REVr"=>0x9F,"REVr*"=>0xBF,"REVr:"=>0xDF,"REVr*:"=>0xFF, - _ => return None, - }) -} diff --git a/src/tokens.rs b/src/tokens.rs new file mode 100644 index 0000000..81bf9d5 --- /dev/null +++ b/src/tokens.rs @@ -0,0 +1,9 @@ +mod syntactic; +mod semantic; +mod instruction; +mod value; + +pub use syntactic::*; +pub use semantic::*; +pub use instruction::*; +pub use value::*; diff --git a/src/tokens/instruction.rs b/src/tokens/instruction.rs new file mode 100644 index 0000000..d5fb3e5 --- /dev/null +++ b/src/tokens/instruction.rs @@ -0,0 +1,170 @@ +use Operation as Op; + + +pub struct Instruction { + pub value: u8, +} + + +impl Instruction { + pub fn operation(&self) -> Operation { + match self.value & 0x1f { + 0x00=>Op::HLT, 0x01=>Op::JMP, 0x02=>Op::JCN, 0x03=>Op::JCK, + 0x04=>Op::LDA, 0x05=>Op::STA, 0x06=>Op::LDD, 0x07=>Op::STD, + 0x08=>Op::PSH, 0x09=>Op::POP, 0x0a=>Op::CPY, 0x0b=>Op::SPL, + 0x0c=>Op::DUP, 0x0d=>Op::OVR, 0x0e=>Op::SWP, 0x0f=>Op::ROT, + 0x10=>Op::ADD, 0x11=>Op::SUB, 0x12=>Op::INC, 0x13=>Op::DEC, + 0x14=>Op::LTH, 0x15=>Op::GTH, 0x16=>Op::EQU, 0x17=>Op::NQK, + 0x18=>Op::IOR, 0x19=>Op::XOR, 0x1a=>Op::AND, 0x1b=>Op::NOT, + 0x1c=>Op::SHF, 0x1d=>Op::SHC, 0x1e=>Op::TAL, 0x1f=>Op::REV, + _ => unreachable!(), + } + } + + pub fn return_mode(&self) -> bool { + self.value & 0x80 != 0 + } + + pub fn literal_mode(&self) -> bool { + self.value & 0x40 != 0 + } + + pub fn double_mode(&self) -> bool { + self.value & 0x20 != 0 + } +} + + +impl std::fmt::Display for Instruction { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}", match self.value { + // Control operators + 0x00=>"HLT",0x20=>"NOP" ,0x40=>"DB1" ,0x60=>"DB2" ,0x80=>"DB3" ,0xA0=>"DB4" ,0xC0=>"DB5" ,0xE0=>"DB6" , + 0x01=>"JMP",0x21=>"JMS" ,0x41=>"JMP:",0x61=>"JMS:" ,0x81=>"JMPr",0xA1=>"JMSr" ,0xC1=>"JMPr:",0xE1=>"JMSr:" , + 0x02=>"JCN",0x22=>"JCS" ,0x42=>"JCN:",0x62=>"JCS:" ,0x82=>"JCNr",0xA2=>"JCSr" ,0xC2=>"JCNr:",0xE2=>"JCSr:" , + 0x03=>"JCK",0x23=>"JCK*",0x43=>"JCK:",0x63=>"JCK*:",0x83=>"JCKr",0xA3=>"JCKr*",0xC3=>"JCKr:",0xE3=>"JCKr*:", + 0x04=>"LDA",0x24=>"LDA*",0x44=>"LDA:",0x64=>"LDA*:",0x84=>"LDAr",0xA4=>"LDAr*",0xC4=>"LDAr:",0xE4=>"LDAr*:", + 0x05=>"STA",0x25=>"STA*",0x45=>"STA:",0x65=>"STA*:",0x85=>"STAr",0xA5=>"STAr*",0xC5=>"STAr:",0xE5=>"STAr*:", + 0x06=>"LDD",0x26=>"LDD*",0x46=>"LDD:",0x66=>"LDD*:",0x86=>"LDDr",0xA6=>"LDDr*",0xC6=>"LDDr:",0xE6=>"LDDr*:", + 0x07=>"STD",0x27=>"STD*",0x47=>"STD:",0x67=>"STD*:",0x87=>"STDr",0xA7=>"STDr*",0xC7=>"STDr:",0xE7=>"STDr*:", + // Stack operators + 0x08=>"PSH",0x28=>"PSH*",0x48=>"PSH:",0x68=>"PSH*:",0x88=>"PSHr",0xA8=>"PSHr*",0xC8=>"PSHr:",0xE8=>"PSHr*:", + 0x09=>"POP",0x29=>"POP*",0x49=>"POP:",0x69=>"POP*:",0x89=>"POPr",0xA9=>"POPr*",0xC9=>"POPr:",0xE9=>"POPr*:", + 0x0A=>"CPY",0x2A=>"CPY*",0x4A=>"CPY:",0x6A=>"CPY*:",0x8A=>"CPYr",0xAA=>"CPYr*",0xCA=>"CPYr:",0xEA=>"CPYr*:", + 0x0B=>"SPL",0x2B=>"SPL*",0x4B=>"SPL:",0x6B=>"SPL*:",0x8B=>"SPLr",0xAB=>"SPLr*",0xCB=>"SPLr:",0xEB=>"SPLr*:", + 0x0C=>"DUP",0x2C=>"DUP*",0x4C=>"DUP:",0x6C=>"DUP*:",0x8C=>"DUPr",0xAC=>"DUPr*",0xCC=>"DUPr:",0xEC=>"DUPr*:", + 0x0D=>"OVR",0x2D=>"OVR*",0x4D=>"OVR:",0x6D=>"OVR*:",0x8D=>"OVRr",0xAD=>"OVRr*",0xCD=>"OVRr:",0xED=>"OVRr*:", + 0x0E=>"SWP",0x2E=>"SWP*",0x4E=>"SWP:",0x6E=>"SWP*:",0x8E=>"SWPr",0xAE=>"SWPr*",0xCE=>"SWPr:",0xEE=>"SWPr*:", + 0x0F=>"ROT",0x2F=>"ROT*",0x4F=>"ROT:",0x6F=>"ROT*:",0x8F=>"ROTr",0xAF=>"ROTr*",0xCF=>"ROTr:",0xEF=>"ROTr*:", + // Numeric operators + 0x10=>"ADD",0x30=>"ADD*",0x50=>"ADD:",0x70=>"ADD*:",0x90=>"ADDr",0xB0=>"ADDr*",0xD0=>"ADDr:",0xF0=>"ADDr*:", + 0x11=>"SUB",0x31=>"SUB*",0x51=>"SUB:",0x71=>"SUB*:",0x91=>"SUBr",0xB1=>"SUBr*",0xD1=>"SUBr:",0xF1=>"SUBr*:", + 0x12=>"INC",0x32=>"INC*",0x52=>"INC:",0x72=>"INC*:",0x92=>"INCr",0xB2=>"INCr*",0xD2=>"INCr:",0xF2=>"INCr*:", + 0x13=>"DEC",0x33=>"DEC*",0x53=>"DEC:",0x73=>"DEC*:",0x93=>"DECr",0xB3=>"DECr*",0xD3=>"DECr:",0xF3=>"DECr*:", + 0x14=>"LTH",0x34=>"LTH*",0x54=>"LTH:",0x74=>"LTH*:",0x94=>"LTHr",0xB4=>"LTHr*",0xD4=>"LTHr:",0xF4=>"LTHr*:", + 0x15=>"GTH",0x35=>"GTH*",0x55=>"GTH:",0x75=>"GTH*:",0x95=>"GTHr",0xB5=>"GTHr*",0xD5=>"GTHr:",0xF5=>"GTHr*:", + 0x16=>"EQU",0x36=>"EQU*",0x56=>"EQU:",0x76=>"EQU*:",0x96=>"EQUr",0xB6=>"EQUr*",0xD6=>"EQUr:",0xF6=>"EQUr*:", + 0x17=>"NQK",0x37=>"NQK*",0x57=>"NQK:",0x77=>"NQK*:",0x97=>"NQKr",0xB7=>"NQKr*",0xD7=>"NQKr:",0xF7=>"NQKr*:", + // Bitwise operators + 0x18=>"IOR",0x38=>"IOR*",0x58=>"IOR:",0x78=>"IOR*:",0x98=>"IORr",0xB8=>"IORr*",0xD8=>"IORr:",0xF8=>"IORr*:", + 0x19=>"XOR",0x39=>"XOR*",0x59=>"XOR:",0x79=>"XOR*:",0x99=>"XORr",0xB9=>"XORr*",0xD9=>"XORr:",0xF9=>"XORr*:", + 0x1A=>"AND",0x3A=>"AND*",0x5A=>"AND:",0x7A=>"AND*:",0x9A=>"ANDr",0xBA=>"ANDr*",0xDA=>"ANDr:",0xFA=>"ANDr*:", + 0x1B=>"NOT",0x3B=>"NOT*",0x5B=>"NOT:",0x7B=>"NOT*:",0x9B=>"NOTr",0xBB=>"NOTr*",0xDB=>"NOTr:",0xFB=>"NOTr*:", + 0x1C=>"SHF",0x3C=>"SHF*",0x5C=>"SHF:",0x7C=>"SHF*:",0x9C=>"SHFr",0xBC=>"SHFr*",0xDC=>"SHFr:",0xFC=>"SHFr*:", + 0x1D=>"SHC",0x3D=>"SHC*",0x5D=>"SHC:",0x7D=>"SHC*:",0x9D=>"SHCr",0xBD=>"SHCr*",0xDD=>"SHCr:",0xFD=>"SHCr*:", + 0x1E=>"TAL",0x3E=>"TAL*",0x5E=>"TAL:",0x7E=>"TAL*:",0x9E=>"TALr",0xBE=>"TALr*",0xDE=>"TALr:",0xFE=>"TALr*:", + 0x1F=>"REV",0x3F=>"REV*",0x5F=>"REV:",0x7F=>"REV*:",0x9F=>"REVr",0xBF=>"REVr*",0xDF=>"REVr:",0xFF=>"REVr*:", + }) + } +} + + +impl std::str::FromStr for Instruction { + type Err = (); + + fn from_str(token: &str) -> Result<Self, Self::Err> { + Ok( Instruction { value: match token { + // Control operators + "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2" =>0x60,"DB3" =>0x80,"DB4" =>0xA0,"DB5" =>0xC0,"DB6" =>0xE0, + "JMP"=>0x01,"JMS" =>0x21,"JMP:"=>0x41,"JMS:" =>0x61,"JMPr"=>0x81,"JMSr" =>0xA1,"JMPr:"=>0xC1,"JMSr:" =>0xE1, + "JCN"=>0x02,"JCS" =>0x22,"JCN:"=>0x42,"JCS:" =>0x62,"JCNr"=>0x82,"JCSr" =>0xA2,"JCNr:"=>0xC2,"JCSr:" =>0xE2, + "JCK"=>0x03,"JCK*"=>0x23,"JCK:"=>0x43,"JCK*:"=>0x63,"JCKr"=>0x83,"JCKr*"=>0xA3,"JCKr:"=>0xC3,"JCKr*:"=>0xE3, + "LDA"=>0x04,"LDA*"=>0x24,"LDA:"=>0x44,"LDA*:"=>0x64,"LDAr"=>0x84,"LDAr*"=>0xA4,"LDAr:"=>0xC4,"LDAr*:"=>0xE4, + "STA"=>0x05,"STA*"=>0x25,"STA:"=>0x45,"STA*:"=>0x65,"STAr"=>0x85,"STAr*"=>0xA5,"STAr:"=>0xC5,"STAr*:"=>0xE5, + "LDD"=>0x06,"LDD*"=>0x26,"LDD:"=>0x46,"LDD*:"=>0x66,"LDDr"=>0x86,"LDDr*"=>0xA6,"LDDr:"=>0xC6,"LDDr*:"=>0xE6, + "STD"=>0x07,"STD*"=>0x27,"STD:"=>0x47,"STD*:"=>0x67,"STDr"=>0x87,"STDr*"=>0xA7,"STDr:"=>0xC7,"STDr*:"=>0xE7, + // Stack operators + "PSH"=>0x08,"PSH*"=>0x28,"PSH:"=>0x48,"PSH*:"=>0x68,"PSHr"=>0x88,"PSHr*"=>0xA8,"PSHr:"=>0xC8,"PSHr*:"=>0xE8, + "POP"=>0x09,"POP*"=>0x29,"POP:"=>0x49,"POP*:"=>0x69,"POPr"=>0x89,"POPr*"=>0xA9,"POPr:"=>0xC9,"POPr*:"=>0xE9, + "CPY"=>0x0A,"CPY*"=>0x2A,"CPY:"=>0x4A,"CPY*:"=>0x6A,"CPYr"=>0x8A,"CPYr*"=>0xAA,"CPYr:"=>0xCA,"CPYr*:"=>0xEA, + "SPL"=>0x0B,"SPL*"=>0x2B,"SPL:"=>0x4B,"SPL*:"=>0x6B,"SPLr"=>0x8B,"SPLr*"=>0xAB,"SPLr:"=>0xCB,"SPLr*:"=>0xEB, + "DUP"=>0x0C,"DUP*"=>0x2C,"DUP:"=>0x4C,"DUP*:"=>0x6C,"DUPr"=>0x8C,"DUPr*"=>0xAC,"DUPr:"=>0xCC,"DUPr*:"=>0xEC, + "OVR"=>0x0D,"OVR*"=>0x2D,"OVR:"=>0x4D,"OVR*:"=>0x6D,"OVRr"=>0x8D,"OVRr*"=>0xAD,"OVRr:"=>0xCD,"OVRr*:"=>0xED, + "SWP"=>0x0E,"SWP*"=>0x2E,"SWP:"=>0x4E,"SWP*:"=>0x6E,"SWPr"=>0x8E,"SWPr*"=>0xAE,"SWPr:"=>0xCE,"SWPr*:"=>0xEE, + "ROT"=>0x0F,"ROT*"=>0x2F,"ROT:"=>0x4F,"ROT*:"=>0x6F,"ROTr"=>0x8F,"ROTr*"=>0xAF,"ROTr:"=>0xCF,"ROTr*:"=>0xEF, + // Numeric operators + "ADD"=>0x10,"ADD*"=>0x30,"ADD:"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr*"=>0xB0,"ADDr:"=>0xD0,"ADDr*:"=>0xF0, + "SUB"=>0x11,"SUB*"=>0x31,"SUB:"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr*"=>0xB1,"SUBr:"=>0xD1,"SUBr*:"=>0xF1, + "INC"=>0x12,"INC*"=>0x32,"INC:"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr*"=>0xB2,"INCr:"=>0xD2,"INCr*:"=>0xF2, + "DEC"=>0x13,"DEC*"=>0x33,"DEC:"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr*"=>0xB3,"DECr:"=>0xD3,"DECr*:"=>0xF3, + "LTH"=>0x14,"LTH*"=>0x34,"LTH:"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr*"=>0xB4,"LTHr:"=>0xD4,"LTHr*:"=>0xF4, + "GTH"=>0x15,"GTH*"=>0x35,"GTH:"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr*"=>0xB5,"GTHr:"=>0xD5,"GTHr*:"=>0xF5, + "EQU"=>0x16,"EQU*"=>0x36,"EQU:"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr*"=>0xB6,"EQUr:"=>0xD6,"EQUr*:"=>0xF6, + "NQK"=>0x17,"NQK*"=>0x37,"NQK:"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr*"=>0xB7,"NQKr:"=>0xD7,"NQKr*:"=>0xF7, + // Bitwise operators + "IOR"=>0x18,"IOR*"=>0x38,"IOR:"=>0x58,"IOR*:"=>0x78,"IORr"=>0x98,"IORr*"=>0xB8,"IORr:"=>0xD8,"IORr*:"=>0xF8, + "XOR"=>0x19,"XOR*"=>0x39,"XOR:"=>0x59,"XOR*:"=>0x79,"XORr"=>0x99,"XORr*"=>0xB9,"XORr:"=>0xD9,"XORr*:"=>0xF9, + "AND"=>0x1A,"AND*"=>0x3A,"AND:"=>0x5A,"AND*:"=>0x7A,"ANDr"=>0x9A,"ANDr*"=>0xBA,"ANDr:"=>0xDA,"ANDr*:"=>0xFA, + "NOT"=>0x1B,"NOT*"=>0x3B,"NOT:"=>0x5B,"NOT*:"=>0x7B,"NOTr"=>0x9B,"NOTr*"=>0xBB,"NOTr:"=>0xDB,"NOTr*:"=>0xFB, + "SHF"=>0x1C,"SHF*"=>0x3C,"SHF:"=>0x5C,"SHF*:"=>0x7C,"SHFr"=>0x9C,"SHFr*"=>0xBC,"SHFr:"=>0xDC,"SHFr*:"=>0xFC, + "SHC"=>0x1D,"SHC*"=>0x3D,"SHC:"=>0x5D,"SHC*:"=>0x7D,"SHCr"=>0x9D,"SHCr*"=>0xBD,"SHCr:"=>0xDD,"SHCr*:"=>0xFD, + "TAL"=>0x1E,"TAL*"=>0x3E,"TAL:"=>0x5E,"TAL*:"=>0x7E,"TALr"=>0x9E,"TALr*"=>0xBE,"TALr:"=>0xDE,"TALr*:"=>0xFE, + "REV"=>0x1F,"REV*"=>0x3F,"REV:"=>0x5F,"REV*:"=>0x7F,"REVr"=>0x9F,"REVr*"=>0xBF,"REVr:"=>0xDF,"REVr*:"=>0xFF, + _ => return Err(()), + }}) + } +} + + +pub enum Operation { + HLT, JMP, JCN, JCK, + LDA, STA, LDD, STD, + PSH, POP, CPY, SPL, + DUP, OVR, SWP, ROT, + ADD, SUB, INC, DEC, + LTH, GTH, EQU, NQK, + IOR, XOR, AND, NOT, + SHF, SHC, TAL, REV, +} + + +impl From<Operation> for u8 { + fn from(operation: Operation) -> Self { + match operation { + Op::HLT=>0x00, Op::JMP=>0x01, Op::JCN=>0x02, Op::JCK=>0x03, + Op::LDA=>0x04, Op::STA=>0x05, Op::LDD=>0x06, Op::STD=>0x07, + Op::PSH=>0x08, Op::POP=>0x09, Op::CPY=>0x0a, Op::SPL=>0x0b, + Op::DUP=>0x0c, Op::OVR=>0x0d, Op::SWP=>0x0e, Op::ROT=>0x0f, + Op::ADD=>0x10, Op::SUB=>0x11, Op::INC=>0x12, Op::DEC=>0x13, + Op::LTH=>0x14, Op::GTH=>0x15, Op::EQU=>0x16, Op::NQK=>0x17, + Op::IOR=>0x18, Op::XOR=>0x19, Op::AND=>0x1a, Op::NOT=>0x1b, + Op::SHF=>0x1c, Op::SHC=>0x1d, Op::TAL=>0x1e, Op::REV=>0x1f, + } + } +} + + +impl std::fmt::Display for Operation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}", match self { + Op::HLT=>"HLT", Op::JMP=>"JMP", Op::JCN=>"JCN", Op::JCK=>"JCK", + Op::LDA=>"LDA", Op::STA=>"STA", Op::LDD=>"LDD", Op::STD=>"STD", + Op::PSH=>"PSH", Op::POP=>"POP", Op::CPY=>"CPY", Op::SPL=>"SPL", + Op::DUP=>"DUP", Op::OVR=>"OVR", Op::SWP=>"SWP", Op::ROT=>"ROT", + Op::ADD=>"ADD", Op::SUB=>"SUB", Op::INC=>"INC", Op::DEC=>"DEC", + Op::LTH=>"LTH", Op::GTH=>"GTH", Op::EQU=>"EQU", Op::NQK=>"NQK", + Op::IOR=>"IOR", Op::XOR=>"XOR", Op::AND=>"AND", Op::NOT=>"NOT", + Op::SHF=>"SHF", Op::SHC=>"SHC", Op::TAL=>"TAL", Op::REV=>"REV", + }) + } +} diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs new file mode 100644 index 0000000..ac5179c --- /dev/null +++ b/src/tokens/semantic.rs @@ -0,0 +1,90 @@ +use crate::*; + +use SemanticTokenVariant as SemVar; + + +pub struct SemanticToken { + pub source: SourceSpan, + pub bytecode: BytecodeSpan, + pub variant: SemanticTokenVariant, +} + + +pub enum SemanticTokenVariant { + LabelDefinition(LabelDefinition), + MacroDefinition(MacroDefinition), + + /// Pointer to the matching label definition. + LabelReference(usize), + /// Pointer to the matching macro definition. + MacroInvocation(usize), + + Literal(Value), + Padding(Value), + Instruction(Instruction), + + Comment(String), + String(Vec<u8>), + + /// Pointer to the matching block close. + BlockOpen(usize), + /// Pointer to the matching block open. + BlockClose(usize), + MarkOpen, + MarkClose, + + Error(SemanticParseError), +} + +impl std::fmt::Debug for SemanticToken { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + match &self.variant { + SemVar::LabelDefinition(def) => write!(f, "LabelDefinition({})", def.name), + SemVar::MacroDefinition(def) => write!(f, "MacroDefinition({})", def.name), + SemVar::LabelReference(pointer) => write!(f, "LabelReference(*{pointer})"), + SemVar::MacroInvocation(pointer) => write!(f, "MacroInvocation(*{pointer})"), + SemVar::Literal(value) => write!(f, "Literal({value})"), + SemVar::Padding(value) => write!(f, "Padding({value})"), + SemVar::Instruction(instr) => write!(f, "Instruction(0x{:02x})", instr.value), + SemVar::Comment(comment) => write!(f, "Comment({comment})"), + SemVar::String(string) => write!(f, "String({})", String::from_utf8_lossy(&string)), + SemVar::BlockOpen(_) => write!(f, "BlockOpen"), + SemVar::BlockClose(_) => write!(f, "BlockClose"), + SemVar::MarkOpen => write!(f, "MarkOpen"), + SemVar::MarkClose => write!(f, "MarkClose"), + SemVar::Error(_) => write!(f, "Error"), + } + } +} + + +pub struct LabelDefinition { + /// The absolute name of the label or sublabel. + pub name: String, + /// List of pointers to label reference tokens. + pub references: Vec<usize>, +} + + +pub struct MacroDefinition { + pub name: String, + pub references: Vec<usize>, + pub body_tokens: Vec<SemanticToken>, +} + + +pub enum SemanticParseError { + LabelDefinitionInMacroDefinition, + MacroDefinitionInMacroDefinition, + + StrayMacroTerminator, + StrayBlockClose, + UnclosedBlock, + + UndefinedSymbol(String), + RedefinedSymbol((String, SourceSpan)), + + MacroInvocationBeforeDefinition((String, SourceSpan)), + + SyntaxError(SyntacticParseError) +} diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs new file mode 100644 index 0000000..8684ed9 --- /dev/null +++ b/src/tokens/syntactic.rs @@ -0,0 +1,39 @@ +use crate::*; + + +pub struct SyntacticToken { + /// Location of token in source files. + pub source: SourceSpan, + pub variant: SyntacticTokenVariant, +} + + +pub enum SyntacticTokenVariant { + LabelDefinition(String), + MacroDefinition(String), + MacroDefinitionTerminator, + + Literal(Value), + Padding(Value), + Instruction(Instruction), + + Comment(String), + String(Vec<u8>), + + BlockOpen, + BlockClose, + MarkOpen, + MarkClose, + + Symbol(String), + + Error(SyntacticParseError), +} + + +pub enum SyntacticParseError { + UnterminatedComment, + UnterminatedRawString, + UnterminatedNullString, + InvalidPaddingValue(String), +} diff --git a/src/tokens/value.rs b/src/tokens/value.rs new file mode 100644 index 0000000..e421bd5 --- /dev/null +++ b/src/tokens/value.rs @@ -0,0 +1,32 @@ +pub enum Value { + Byte(u8), + Double(u16), +} + +impl std::fmt::Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + match self { + Self::Byte(value) => write!(f, "0x{value:02x}"), + Self::Double(value) => write!(f, "0x{value:04x}"), + } + } +} + + +impl std::str::FromStr for Value { + type Err = (); + + fn from_str(token: &str) -> Result<Self, Self::Err> { + match token.len() { + 2 => match u8::from_str_radix(&token, 16) { + Ok(value) => Ok(Value::Byte(value)), + Err(_) => Err(()), + } + 4 => match u16::from_str_radix(&token, 16) { + Ok(value) => Ok(Value::Double(value)), + Err(_) => Err(()), + } + _ => Err(()), + } + } +} diff --git a/src/translators.rs b/src/translators.rs new file mode 100644 index 0000000..cce5633 --- /dev/null +++ b/src/translators.rs @@ -0,0 +1,9 @@ +mod syntactic_parser; +mod semantic_parser; +mod bytecode_generator; +mod symbols_generator; + +pub use syntactic_parser::*; +pub use semantic_parser::*; +pub use bytecode_generator::*; +pub use symbols_generator::*; diff --git a/src/translators/bytecode_generator.rs b/src/translators/bytecode_generator.rs new file mode 100644 index 0000000..956aca5 --- /dev/null +++ b/src/translators/bytecode_generator.rs @@ -0,0 +1,131 @@ +use crate::*; + +use SemanticTokenVariant as SemVar; + + +pub fn generate_bytecode(semantic_tokens: &mut [SemanticToken]) -> Vec<u8> { + let generator = BytecodeGenerator::from_semantic_tokens(semantic_tokens); + generator.generate() +} + + +/// Translate semantic tokens into bytecode. +struct BytecodeGenerator<'a> { + semantic_tokens: &'a mut [SemanticToken], + block_stack: Vec<usize>, + bytecode: Vec<u8>, + /// (address in bytecode, label definition token index) + label_references: Vec<(usize, usize)>, +} + +impl<'a> BytecodeGenerator<'a> { + pub fn from_semantic_tokens(semantic_tokens: &'a mut [SemanticToken]) -> Self { + Self { + semantic_tokens, + block_stack: Vec::new(), + bytecode: Vec::new(), + label_references: Vec::new(), + } + } + + pub fn generate(mut self) -> Vec<u8> { + for i in 0..self.semantic_tokens.len() { + let address = self.bytecode.len(); + self.generate_bytecode_for_token(i, None); + self.semantic_tokens[i].bytecode = BytecodeSpan { + bytes: self.bytecode[address..].to_vec(), + location: BytecodeLocation { + address, + length: self.bytecode.len().saturating_sub(address), + } + }; + } + + // Replace blank label references in bytecode with real label addresses. + // The layer of indirection is necessary because the iteration borrows + // self immutably. + let mut insertions: Vec<(usize, u16)> = Vec::new(); + for (bytecode_address, token_pointer) in &self.label_references { + let label_token = &self.semantic_tokens[*token_pointer]; + // TODO: If greater than u16, print a warning. + let address_value = label_token.bytecode.location.address as u16; + insertions.push((*bytecode_address, address_value)); + } + for (bytecode_address, address_value) in insertions { + self.replace_address_in_bytecode(bytecode_address, address_value); + } + + // Strip trailing null bytes from the bytecode. + let mut length = self.bytecode.len(); + for (i, byte) in self.bytecode.iter().enumerate().rev() { + match *byte == 0 { + true => length = i, + false => break, + }; + } + self.bytecode.truncate(length); + + return self.bytecode; + } + + fn generate_bytecode_for_token(&mut self, pointer: usize, macro_pointer: Option<usize>) { + macro_rules! push_byte { + ($byte:expr) => { self.bytecode.push($byte) }; } + macro_rules! push_double { + ($double:expr) => { self.bytecode.extend_from_slice(&$double.to_be_bytes()) }; } + macro_rules! pad { + ($len:expr) => { for _ in 0..$len { push_byte!(0); } } } + + let semantic_token = if let Some(macro_pointer) = macro_pointer { + let macro_definition = &self.semantic_tokens[macro_pointer]; + if let SemVar::MacroDefinition(def) = ¯o_definition.variant { + &def.body_tokens[pointer] + } else { unreachable!() } + } else { + &self.semantic_tokens[pointer] + }; + match &semantic_token.variant { + SemVar::MacroInvocation(pointer) => { + let macro_definition = &self.semantic_tokens[*pointer]; + if let SemVar::MacroDefinition(def) = ¯o_definition.variant { + let length = def.body_tokens.len(); + let macro_pointer = Some(*pointer); + for body_pointer in 0..length { + // Recurse, generate bytecode for each macro body token. + self.generate_bytecode_for_token(body_pointer, macro_pointer); + } + } else { unreachable!() } + } + SemVar::Literal(value) => match value { + Value::Byte(value) => push_byte!(*value), + Value::Double(value) => push_double!(value), + } + SemVar::Padding(value) => match value { + Value::Byte(value) => pad!(*value), + Value::Double(value) => pad!(*value), + } + SemVar::Instruction(instr) => push_byte!(instr.value), + SemVar::String(bytes) => self.bytecode.extend_from_slice(&bytes), + SemVar::LabelReference(pointer) => { + self.label_references.push((self.bytecode.len(), *pointer)); + push_double!(0u16); + } + SemVar::BlockOpen(_) => { + self.block_stack.push(self.bytecode.len()); + push_double!(0u16); + } + SemVar::BlockClose(_) => { + let bytecode_address = self.block_stack.pop().unwrap(); + // TODO: If greater than u16, print a warning. + let address_value = self.bytecode.len() as u16; + self.replace_address_in_bytecode(bytecode_address, address_value); + } + _ => (), + }; + } + + fn replace_address_in_bytecode(&mut self, bytecode_address: usize, address_value: u16) { + let range = bytecode_address..bytecode_address+2; + self.bytecode[range].clone_from_slice(&address_value.to_be_bytes()); + } +} diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs new file mode 100644 index 0000000..cb6a435 --- /dev/null +++ b/src/translators/semantic_parser.rs @@ -0,0 +1,245 @@ +use crate::*; + +use std::collections::HashMap; +use std::path::PathBuf; + +use SyntacticTokenVariant as SynVar; +use SemanticTokenVariant as SemVar; +use SemanticParseError as SemErr; + + +pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> { + let semantic_parser = SemanticParser::from_source_code(source_code, path); + semantic_parser.parse() +} + + +/// Translate syntactic tokens into semantic tokens. +struct SemanticParser { + labels: HashMap<String, Definition>, + macros: HashMap<String, Definition>, + syntactic_tokens: Vec<SyntacticToken>, + /// Index of the current outer token. + current_outer_index: usize, +} + +impl SemanticParser { + pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { + let mut labels = HashMap::new(); + let mut macros = HashMap::new(); + let mut syntactic_tokens = Vec::new(); + + let parser = SyntacticParser::from_source_code(source_code, path); + for syntactic_token in parser { + let definition = Definition::new(syntactic_token.source.clone()); + match &syntactic_token.variant { + SynVar::LabelDefinition(name) => { + let _ = labels.try_insert(name.to_owned(), definition); + }, + SynVar::MacroDefinition(name) => { + let _ = macros.try_insert(name.to_owned(), definition); + }, + _ => (), + } + syntactic_tokens.push(syntactic_token); + } + + Self { + labels, + macros, + syntactic_tokens, + current_outer_index: 0, + } + } + + /// Parse syntactic tokens as semantic tokens. + pub fn parse(mut self) -> Vec<SemanticToken> { + let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens); + let mut syntactic = syntactic_tokens.into_iter(); + let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false); + + // Insert real label definition pointers into label reference tokens. + for definition in self.labels.values_mut() { + if let Some(definition_pointer) = definition.pointer { + // Insert definition pointer into reference tokens. + for reference_pointer in &definition.references { + let reference_token = &mut semantic_tokens[*reference_pointer]; + reference_token.variant = SemVar::LabelReference(definition_pointer); + } + // Insert reference pointers into definition token. + let definition_token = &mut semantic_tokens[definition_pointer]; + if let SemVar::LabelDefinition(ref mut def) = definition_token.variant { + def.references = std::mem::take(&mut definition.references); + } else { unreachable!() } + // Insert definition pointer into reference tokens inside macros. + for (outer, inner) in &definition.deep_references { + let macro_token = &mut semantic_tokens[*outer]; + if let SemVar::MacroDefinition(ref mut def) = macro_token.variant { + let reference_token = &mut def.body_tokens[*inner]; + reference_token.variant = SemVar::LabelReference(definition_pointer); + } else { unreachable!() } + } + // TODO: Record deep references in macro and label definitions? + } + } + + return semantic_tokens; + } + + fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken> + where I: Iterator<Item = SyntacticToken> + { + let mut semantic_tokens: Vec<SemanticToken> = Vec::new(); + let mut block_stack: Vec<usize> = Vec::new(); + + while let Some(syntactic_token) = parser.next() { + let current_index = semantic_tokens.len(); + if !in_macro { + self.current_outer_index = current_index; + } + + let semantic_token_variant = match syntactic_token.variant { + SynVar::LabelDefinition(name) => { + if in_macro { + SemVar::Error(SemErr::LabelDefinitionInMacroDefinition) + } else if let Some(definition) = self.macros.get(&name) { + let source = definition.source.clone(); + SemVar::Error(SemErr::RedefinedSymbol((name, source))) + } else if let Some(definition) = self.labels.get_mut(&name) { + if definition.pointer.is_some() { + let source = definition.source.clone(); + SemVar::Error(SemErr::RedefinedSymbol((name, source))) + } else { + definition.pointer = Some(current_index); + let references = Vec::new(); + SemVar::LabelDefinition(LabelDefinition { name, references }) + } + } else { + unreachable!() + } + } + SynVar::MacroDefinition(name) => { + if in_macro { + SemVar::Error(SemErr::MacroDefinitionInMacroDefinition) + } else if let Some(definition) = self.labels.get(&name) { + let source = definition.source.clone(); + SemVar::Error(SemErr::RedefinedSymbol((name, source))) + } else if let Some(definition) = self.macros.get_mut(&name) { + if definition.pointer.is_some() { + let source = definition.source.clone(); + SemVar::Error(SemErr::RedefinedSymbol((name, source))) + } else { + definition.pointer = Some(current_index); + let references = Vec::new(); + let body_tokens = self.pull_semantic_tokens(parser, true); + SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens }) + } + } else { + unreachable!() + } + } + SynVar::MacroDefinitionTerminator => if in_macro { + break; + } else { + SemVar::Error(SemErr::StrayMacroTerminator) + } + SynVar::Literal(value) => { + SemVar::Literal(value) + } + SynVar::Padding(value) => { + SemVar::Padding(value) + } + SynVar::Instruction(instr) => { + SemVar::Instruction(instr) + } + SynVar::Comment(comment) => { + SemVar::Comment(comment) + } + SynVar::String(bytes) => { + SemVar::String(bytes) + } + SynVar::BlockOpen => { + block_stack.push(current_index); + SemVar::BlockOpen(0) + } + SynVar::BlockClose => { + if let Some(pointer) = block_stack.pop() { + let open = &mut semantic_tokens[pointer]; + open.variant = SemVar::BlockOpen(current_index); + SemVar::BlockClose(pointer) + } else { + SemVar::Error(SemErr::StrayBlockClose) + } + } + SynVar::MarkOpen => { + SemVar::MarkOpen + } + SynVar::MarkClose => { + SemVar::MarkClose + } + SynVar::Symbol(name) => { + if let Some(definition) = self.labels.get_mut(&name) { + if in_macro { + let pointer = (self.current_outer_index, current_index); + definition.deep_references.push(pointer); + } else { + definition.references.push(current_index); + } + SemVar::LabelReference(0) + } else if let Some(definition) = self.macros.get_mut(&name) { + if let Some(pointer) = definition.pointer { + if !in_macro { definition.references.push(current_index); } + SemVar::MacroInvocation(pointer) + } else { + let source = definition.source.clone(); + SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source))) + } + } else { + SemVar::Error(SemErr::UndefinedSymbol(name)) + } + } + SynVar::Error(syntax_err) => { + SemVar::Error(SemErr::SyntaxError(syntax_err)) + } + }; + + let semantic_token = SemanticToken { + source: syntactic_token.source, + bytecode: BytecodeSpan::default(), + variant: semantic_token_variant, + }; + semantic_tokens.push(semantic_token); + } + + if in_macro { + //TODO: UnterminatedMacroDefinition + } + + // Replace each unclosed BlockOpen token with an error. + for block_pointer in block_stack { + semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock); + } + + return semantic_tokens; + } +} + + +struct Definition { + pub source: SourceSpan, + pub pointer: Option<usize>, + pub references: Vec<usize>, + /// (macro index, label reference index) + pub deep_references: Vec<(usize, usize)>, +} + +impl Definition { + pub fn new(source: SourceSpan) -> Self { + Self { + source, + pointer: None, + references: Vec::new(), + deep_references: Vec::new(), + } + } +} diff --git a/src/translators/symbols_generator.rs b/src/translators/symbols_generator.rs new file mode 100644 index 0000000..06bbaa8 --- /dev/null +++ b/src/translators/symbols_generator.rs @@ -0,0 +1,28 @@ +use crate::*; + +use SemanticTokenVariant as SemVar; + + +pub fn generate_symbols_file(semantic_tokens: &[SemanticToken]) -> String { + let mut symbols = String::new(); + + for token in semantic_tokens { + if let SemVar::LabelDefinition(definition) = &token.variant { + let address = token.bytecode.location.address; + if address > 0xffff { break; } + let name = &definition.name; + let path = match &token.source.in_source { + Some(source) => &source.path, + None => &token.source.in_merged.path, + }; + if let Some(path) = path { + let path = path.as_os_str().to_string_lossy(); + symbols.push_str(&format!("{address:04x} {name} {path}\n")); + } else { + symbols.push_str(&format!("{address:04x} {name}\n")); + } + } + } + + return symbols; +} diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs new file mode 100644 index 0000000..7279daf --- /dev/null +++ b/src/translators/syntactic_parser.rs @@ -0,0 +1,247 @@ +use crate::*; + +use std::path::PathBuf; + + +/// Translate raw source code characters into syntactic tokens. +pub struct SyntacticParser { + /// Path of file from which the source was read. + path: Option<PathBuf>, + /// Path of the original source file. + source_path: Option<PathBuf>, + /// Position of the next character to be read. + position: Position, + /// Previous value of the position field. + prev_position: Position, + /// Line where the embedded source file begins. + source_line_start: usize, + /// Characters waiting to be parsed, in reverse order. + chars: Vec<char>, + /// The token currently being parsed. + token_source_string: String, + /// The name of the most recently parsed label. + label: String, +} + + +impl SyntacticParser { + /// Parse source code. + pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { + Self { + path: path.map(|p| p.into()), + source_path: None, + position: Position { line: 0, column: 0 }, + prev_position: Position { line: 0, column: 0 }, + source_line_start: 0, + chars: source_code.chars().rev().collect(), + token_source_string: String::new(), + label: String::new(), + } + } + + /// Return the next character, keeping it on the queue. + fn peek_char(&self) -> Option<char> { + self.chars.last().copied() + } + + /// Return the next character, removing it from the queue. + fn eat_char(&mut self) -> Option<char> { + let option = self.chars.pop(); + if let Some(c) = option { + self.prev_position = self.position; + self.position.advance(c); + self.token_source_string.push(c); + } + return option; + } + + /// Remove the next character from the queue. + fn drop_char(&mut self) { + if let Some(c) = self.chars.pop() { + self.prev_position = self.position; + self.position.advance(c); + } + } + + /// Remove leading whitespace. + fn drop_whitespace(&mut self) { + while let Some(c) = self.peek_char() { + match c.is_whitespace() { + true => self.drop_char(), + false => break, + } + } + } + + /// Remove a full token from the queue. + fn eat_token(&mut self) -> String { + const DELIMITERS: [char; 13] = + ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~']; + let mut token = String::new(); + while let Some(peek) = self.peek_char() { + if peek.is_whitespace() || DELIMITERS.contains(&peek) { + break; + } + let c = self.eat_char().unwrap(); + token.push(c); + if c == ':' { + break; + } + } + token + } + + /// Return all characters until the delimiter, removing all returned + /// characters and the delimiter from the queue. Returns None if end + /// of source is reached before delimiter is found. + fn eat_to_delim(&mut self, delim: char) -> Option<String> { + let mut token = String::new(); + while let Some(c) = self.eat_char() { + self.token_source_string.push(c); + match c == delim { + true => return Some(token), + false => token.push(c), + } + } + return None; + } + + fn is_line_empty(&self) -> bool { + for c in self.chars.iter().rev() { + if *c == '\n' { + return true; + } + if !c.is_whitespace() { + return false + } + } + return false; + } +} + + +impl Iterator for SyntacticParser { + type Item = SyntacticToken; + + /// Sequentially parse tokens from the source code. + fn next(&mut self) -> Option<SyntacticToken> { + use SyntacticTokenVariant as SynVar; + use SyntacticParseError as SynErr; + + self.drop_whitespace(); + let start = self.position; + + let variant = match self.eat_char()? { + '@' => { + self.label = self.eat_token(); + SynVar::LabelDefinition(self.label.clone()) + } + '&' => { + let token = self.eat_token(); + let sublabel = format!("{}/{token}", self.label); + SynVar::LabelDefinition(sublabel) + } + '%' => SynVar::MacroDefinition(self.eat_token()), + ';' => SynVar::MacroDefinitionTerminator, + '[' => SynVar::MarkOpen, + ']' => SynVar::MarkClose, + '{' => SynVar::BlockOpen, + '}' => SynVar::BlockClose, + '(' => match self.eat_to_delim(')') { + Some(string) => SynVar::Comment(string), + None => SynVar::Error(SynErr::UnterminatedComment), + } + '\'' => match self.eat_to_delim('\'') { + Some(string) => SynVar::String(string.as_bytes().to_vec()), + None => SynVar::Error(SynErr::UnterminatedRawString), + } + '"' => match self.eat_to_delim('"') { + Some(string) => { + let mut bytes = string.as_bytes().to_vec(); + bytes.push(0x00); + SynVar::String(bytes) + } + None => SynVar::Error(SynErr::UnterminatedNullString), + } + '#' => { + let token = self.eat_token(); + match token.parse::<Value>() { + Ok(value) => SynVar::Padding(value), + Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)), + } + }, + '~' => { + let token = self.eat_token(); + let symbol = format!("{}/{token}", self.label); + SynVar::Symbol(symbol) + } + ':' => SynVar::Symbol(String::from(':')), + c => { + let token = format!("{c}{}", self.eat_token()); + match token.parse::<Value>() { + Ok(value) => SynVar::Literal(value), + Err(_) => match token.parse::<Instruction>() { + Ok(instruction) => SynVar::Instruction(instruction), + Err(_) => SynVar::Symbol(token), + } + } + } + }; + + // Parse source path comments. + if let SynVar::Comment(comment) = &variant { + // Check that the comment fills the entire line. + if start.column == 0 && self.is_line_empty() { + if let Some(path) = comment.strip_prefix(": ") { + self.source_path = Some(PathBuf::from(path.trim())); + self.source_line_start = start.line + 1; + } + } + } + + // Find location in current merged file. + let in_merged = SourceLocation { + path: self.path.to_owned(), + start, + end: self.prev_position, + }; + + // Find location in original source file. + let in_source = if start.line >= self.source_line_start { + match &self.source_path { + Some(path) => { + let offset = self.source_line_start; + Some( SourceLocation { + path: Some(path.to_owned()), + start: Position { + line: in_merged.start.line.saturating_sub(offset), + column: in_merged.start.column, + }, + end: Position { + line: in_merged.end.line.saturating_sub(offset), + column: in_merged.end.column, + } + }) + } + None => None, + } + } else { + None + }; + + let string = std::mem::take(&mut self.token_source_string); + let source = SourceSpan { string, in_merged, in_source }; + Some( SyntacticToken { source, variant } ) + } +} + + +#[derive(Debug)] +pub enum ParseError { + InvalidExtension, + NotFound, + NotReadable, + IsADirectory, + InvalidUtf8, + Unknown, +} |