diff options
35 files changed, 1497 insertions, 2024 deletions
@@ -3,29 +3,175 @@ version = 4 [[package]] -name = "bedrock-asm" -version = "4.0.5" +name = "assembler" +version = "2.3.0" +source = "git+git://benbridle.com/assembler?tag=v2.3.0#a9640fce1aaa5e80170ce4d2ac700f66cfffbb4b" dependencies = [ + "inked", + "log 2.0.0", "vagabond", - "xflags", +] + +[[package]] +name = "bedrock-asm" +version = "1.0.2" +dependencies = [ + "assembler", + "indexmap", + "log 2.0.0", + "switchboard", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "hashbrown" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "inked" +version = "1.0.0" +source = "git+git://benbridle.com/inked?tag=v1.0.0#2954d37b638fa2c1dd3d51ff53f08f475aea6ea3" +dependencies = [ + "termcolor", +] + +[[package]] +name = "log" +version = "1.1.1" +source = "git+git://benbridle.com/log?tag=v1.1.1#930f3d0e2b82df1243f423c092a38546ea7533c3" + +[[package]] +name = "log" +version = "2.0.0" +source = "git+git://benbridle.com/log?tag=v2.0.0#a38d3dd487594f41151db57625410d1b786bebe4" +dependencies = [ + "inked", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "switchboard" +version = "2.1.0" +source = "git+git://benbridle.com/switchboard?tag=v2.1.0#e6435712ba5b3ca36e99fc8cbe7755940f8b1f3f" +dependencies = [ + "log 1.1.1", + "paste", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", ] [[package]] name = "vagabond" -version = "1.0.1" -source = "git+git://benbridle.com/vagabond?tag=v1.0.1#08f3153fea62ea81a42438347eeee058f5bec199" +version = "1.1.1" +source = "git+git://benbridle.com/vagabond?tag=v1.1.1#b190582517e6008ad1deff1859f15988e4efaa26" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] [[package]] -name = "xflags" -version = "0.4.0-pre.1" +name = "windows-sys" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4697c0db52cfb7277cf997ed334c92c739fafc7c5d44a948a906a5bf4b41a63f" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "xflags-macros", + "windows-targets", ] [[package]] -name = "xflags-macros" -version = "0.4.0-pre.1" +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94d18ac1a136311770ed587356f8a828c9b86261f68761f34e6cdc6d5b4c435c" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" @@ -1,15 +1,16 @@ [package] name = "bedrock-asm" -version = "4.0.5" +version = "1.0.2" authors = ["Ben Bridle"] -edition = "2021" -description = "Assembler program for the Bedrock assembly language" - +edition = "2024" +description = "Assembler for the Bedrock assembly language" [dependencies] -vagabond = { git = "git://benbridle.com/vagabond", tag = "v1.0.1" } -xflags = "0.4.0-pre" +assembler = { git = "git://benbridle.com/assembler", tag = "v2.3.0" } +log = { git = "git://benbridle.com/log", tag = "v2.0.0" } +switchboard = { git = "git://benbridle.com/switchboard", tag = "v2.1.0" } +indexmap = "2.7.1" [profile.release] lto=true diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..21ed643 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) Ben Bridle + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so. + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..5d56faf --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "nightly" diff --git a/src/bin/bedrock-asm.rs b/src/bin/bedrock-asm.rs deleted file mode 100644 index 5cb962f..0000000 --- a/src/bin/bedrock-asm.rs +++ /dev/null @@ -1,154 +0,0 @@ -use bedrock_asm::*; - -use std::io::{Read, Write}; -use std::path::{Path, PathBuf}; - - -static mut VERBOSE: bool = false; - -macro_rules! verbose { - ($($tokens:tt)*) => { if unsafe { VERBOSE } { - eprint!("[INFO] "); eprintln!($($tokens)*); - } }; -} -macro_rules! error { - ($($tokens:tt)*) => {{ - eprint!("[ERROR] "); eprintln!($($tokens)*); std::process::exit(1); - }}; -} - - -fn main() { - let args = Arguments::from_env_or_exit(); - - // ----------------------------------------------------------------------- - // RESOLVE syntactic symbols - let ext = args.ext.unwrap_or(String::from("brc")); - let mut resolver = if let Some(path) = &args.source { - match SourceUnit::from_path(&path, &ext) { - Ok(source_unit) => SymbolResolver::from_source_unit(source_unit), - Err(err) => match err { - ParseError::InvalidExtension => error!( - "File {path:?} has invalid extension, must be '.{ext}'"), - ParseError::NotFound => error!( - "File {path:?} was not found"), - ParseError::InvalidUtf8 => error!( - "File {path:?} does not contain valid UTF-8 text"), - ParseError::NotReadable => error!( - "File {path:?} is not readable"), - ParseError::IsADirectory => error!( - "File {path:?} is a directory"), - ParseError::Unknown => error!( - "Unknown error while attempting to read from {path:?}") - } - } - } else { - let mut source_code = String::new(); - verbose!("Reading program source from standard input"); - if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { - eprintln!("Could not read from standard input, exiting."); - eprintln!("({err:?})"); - std::process::exit(1); - } - let path = "<standard input>"; - let source_unit = SourceUnit::from_source_code(source_code, path); - SymbolResolver::from_source_unit(source_unit) - }; - // Load project libraries. - if let Some(path) = &args.source { - if !args.no_libs && !args.no_project_libs { - let project_library = gather_project_libraries(path, &ext); - resolver.add_library_units(project_library); - } - } - // Load environment libraries. - if !args.no_libs && !args.no_env_libs { - for env_library in gather_environment_libraries(&ext) { - resolver.add_library_units(env_library); - } - } - resolver.resolve(); - - // ----------------------------------------------------------------------- - // PRINT information, generate merged source code - if args.tree { - print_source_tree(&resolver); - } - if print_resolver_errors(&resolver) { - std::process::exit(1); - }; - let merged_source = match resolver.get_merged_source_code() { - Ok(merged_source) => merged_source, - Err(ids) => { - print_cyclic_source_units(&ids, &resolver); - std::process::exit(1); - }, - }; - if args.resolve { - write_bytes_and_exit(merged_source.as_bytes(), args.output.as_ref()); - } - - // ----------------------------------------------------------------------- - // PARSE semantic tokens from merged source code - let path = Some("<merged source>"); - let mut semantic_tokens = generate_semantic_tokens(&merged_source, path); - if print_semantic_errors(&semantic_tokens, &merged_source) { - std::process::exit(1); - }; - - // ----------------------------------------------------------------------- - // GENERATE symbols file and bytecode - let bytecode = generate_bytecode(&mut semantic_tokens); - // let symbols = generate_symbols_file(&semantic_tokens); - write_bytes_and_exit(&bytecode, args.output.as_ref()); -} - - -fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { - if let Some(path) = path { - if let Err(err) = std::fs::write(path, bytes) { - eprintln!("Could not write to path {:?}, exiting.", path.as_ref()); - eprintln!("({err:?})"); - std::process::exit(1); - } - } else { - if let Err(err) = std::io::stdout().write_all(bytes) { - eprintln!("Could not write to standard output, exiting."); - eprintln!("({err:?})"); - std::process::exit(1); - } - } - std::process::exit(0); -} - - -xflags::xflags! { - cmd arguments { - /// Print additional debug information - optional --verbose - /// Print the assembler version and exit - optional --version - - - /// Bedrock source code file to assemble. - optional source: PathBuf - /// Destination path for assembler output. - optional output: PathBuf - /// File extension to identify source files. - optional ext: String - - /// Don't include libraries or resolve references. - optional --no-libs - /// Don't include project libraries - optional --no-project-libs - /// Don't include environment libraries. - optional --no-env-libs - - /// Show the resolved source file heirarchy - optional --tree - /// Assemble the program without saving any output - optional --check - /// Only return resolved source code. - optional --resolve - } -} diff --git a/src/bin/br-asm.rs b/src/bin/br-asm.rs new file mode 100644 index 0000000..e7a9230 --- /dev/null +++ b/src/bin/br-asm.rs @@ -0,0 +1,8 @@ +use bedrock_asm::*; +use switchboard::*; + + +fn main() { + let args = Switchboard::from_env(); + assemble(args, "br-asm"); +} diff --git a/src/formats/clang.rs b/src/formats/clang.rs new file mode 100644 index 0000000..524b501 --- /dev/null +++ b/src/formats/clang.rs @@ -0,0 +1,10 @@ +pub fn format_clang(bytecode: &[u8]) -> Vec<u8> { + let mut output = String::new(); + for chunk in bytecode.chunks(16) { + for byte in chunk { + output.push_str(&format!("0x{byte:02X}, ")); + } + output.push('\n'); + } + return output.into_bytes(); +} diff --git a/src/formats/mod.rs b/src/formats/mod.rs new file mode 100644 index 0000000..79b1c51 --- /dev/null +++ b/src/formats/mod.rs @@ -0,0 +1,23 @@ +mod clang; +pub use clang::*; + +use crate::*; + + +#[derive(Clone, Copy, PartialEq)] +pub enum Format { + Raw, + Source, + Clang, +} + +impl Format { + pub fn from_str(string: &str) -> Self { + match string { + "raw" => Self::Raw, + "source" => Self::Source, + "c" => Self::Clang, + _ => fatal!("Unknown format '{string}', expected 'raw', 'c', or 'source'"), + } + } +} diff --git a/src/gather_libraries.rs b/src/gather_libraries.rs deleted file mode 100644 index 0fd1131..0000000 --- a/src/gather_libraries.rs +++ /dev/null @@ -1,198 +0,0 @@ -use crate::*; - -use vagabond::*; - - -/// Gather all library units from the given path. -pub fn gather_project_libraries(path: &Path, extension: &str) -> Vec<SourceUnit> { - match path.parent() { - Some(parent_path) => gather_source_units(parent_path, extension), - None => Vec::new(), - } -} - - -/// Gather all library units from the paths specified in an environment variable. -pub fn gather_environment_libraries(extension: &str) -> Vec<Vec<SourceUnit>> { - let mut environment_libraries = Vec::new(); - if let Ok(lib_var) = std::env::var("BEDROCK_LIBS") { - for path_str in lib_var.split(":") { - let lib_path = PathBuf::from(path_str); - let source_units = gather_source_units(&lib_path, extension); - if !source_units.is_empty() { - environment_libraries.push(source_units); - } - } - }; - return environment_libraries; -} - - -/// Gather all source units at or descended from the given entry. -fn gather_source_units(path: &Path, extension: &str) -> Vec<SourceUnit> { - let mut source_units = Vec::new(); - if let Ok(entry) = Entry::from_path(path) { - match entry.entry_type { - EntryType::File => { - if let Ok(source) = SourceUnit::from_path(entry.path, extension) { - source_units.push(source); - } - } - EntryType::Directory => { - if let Ok(entries) = traverse_directory(entry.path) { - for entry in entries { - if let Ok(source) = SourceUnit::from_path(entry.path, extension) { - source_units.push(source); - } - } - } - } - } - }; - return source_units; -} - - -pub struct SourceUnit { - pub main: SourceFile, - pub head: Option<SourceFile>, - pub tail: Option<SourceFile>, -} - - -impl SourceUnit { - /// Load from a source file and an associated head and tail file. - pub fn from_path<P: Into<PathBuf>>(path: P, extension: &str) -> Result<Self, ParseError> { - let main_path = canonicalize_path(path); - let main_path_str = main_path.as_os_str().to_string_lossy().to_string(); - let head_extension = format!("head.{extension}"); - let tail_extension = format!("tail.{extension}"); - let is_head = main_path_str.ends_with(&head_extension); - let is_tail = main_path_str.ends_with(&tail_extension); - let is_not_main = !main_path_str.ends_with(extension); - if is_not_main || is_head || is_tail { return Err(ParseError::InvalidExtension); } - - let symbols = parse_symbols_from_file(&main_path)?; - let head_path = main_path.with_extension(head_extension); - let tail_path = main_path.with_extension(tail_extension); - - let main = SourceFile { path: main_path, symbols }; - let head = match parse_symbols_from_file(&head_path) { - Ok(symbols) => Some(SourceFile { path: head_path, symbols }), - Err(_) => None, - }; - let tail = match parse_symbols_from_file(&tail_path) { - Ok(symbols) => Some(SourceFile { path: tail_path, symbols }), - Err(_) => None, - }; - Ok( SourceUnit { main, head, tail } ) - } - - /// Load from a string of source code. - pub fn from_source_code<P: Into<PathBuf>>(source_code: String, path: P) -> Self { - let path = canonicalize_path(path); - let symbols = parse_symbols_from_source(source_code, Some(&path)); - Self { - main: SourceFile { path, symbols }, - head: None, - tail: None, - } - } -} - - -/// Read and parse all symbols from a source file. -fn parse_symbols_from_file(path: &Path) -> Result<Symbols, ParseError> { - let source = read_source_from_file(path)?; - Ok(parse_symbols_from_source(source, Some(path))) -} - - -/// Parse all symbols from a source code string. -fn parse_symbols_from_source(source_code: String, path: Option<&Path>) -> Symbols { - use SyntacticTokenVariant as SynVar; - - let token_iter = SyntacticParser::from_source_code(&source_code, path); - let mut definitions = Vec::new(); - let mut references = Vec::new(); - - for token in token_iter { - let source = token.source; - match token.variant { - SynVar::LabelDefinition(name) => { - let variant = SymbolVariant::LabelDefinition; - definitions.push(Symbol { name, source, variant }); - }, - SynVar::MacroDefinition(name) => { - let variant = SymbolVariant::MacroDefinition; - definitions.push(Symbol { name, source, variant }); - } - SynVar::Symbol(name) => { - let variant = SymbolVariant::Reference; - references.push(Symbol { name, source, variant }); - }, - _ => (), - } - } - - Symbols { - definitions: Some(definitions), - references: Some(references), - source_code, - } -} - - -/// Attempt to read program source from a file. -pub fn read_source_from_file(path: &Path) -> Result<String, ParseError> { - match std::fs::read(&path) { - Ok(bytes) => match String::from_utf8(bytes) { - Ok(source) => Ok(source), - Err(_) => return Err(ParseError::InvalidUtf8), - } - Err(err) => return Err( match err.kind() { - std::io::ErrorKind::NotFound => ParseError::NotFound, - std::io::ErrorKind::PermissionDenied => ParseError::NotReadable, - std::io::ErrorKind::IsADirectory => ParseError::IsADirectory, - _ => ParseError::Unknown, - } ) - } -} - - -fn canonicalize_path<P: Into<PathBuf>>(path: P) -> PathBuf { - let pathbuf = path.into(); - match pathbuf.canonicalize() { - Ok(canonical) => canonical, - Err(_) => pathbuf, - } -} - - - -pub struct SourceFile { - pub path: PathBuf, - pub symbols: Symbols, -} - - -pub struct Symbols { - pub definitions: Option<Vec<Symbol>>, - pub references: Option<Vec<Symbol>>, - pub source_code: String, -} - - -pub struct Symbol { - pub name: String, - pub variant: SymbolVariant, - pub source: SourceSpan, -} - - -#[derive(PartialEq)] -pub enum SymbolVariant { - LabelDefinition, - MacroDefinition, - Reference, -} @@ -1,21 +1,245 @@ -#![feature(extract_if)] -#![feature(io_error_more)] -#![feature(map_try_insert)] +#![feature(path_add_extension)] +mod formats; +mod types; +mod stages; +pub use formats::*; +pub use types::*; +pub use stages::*; -mod gather_libraries; -mod symbol_resolver; +use assembler::*; +use log::*; +use switchboard::*; -pub use gather_libraries::*; -pub use symbol_resolver::*; +use std::io::Read; +use std::io::Write; -mod locators; -mod tokens; -mod translators; -pub use locators::*; -pub use tokens::*; -pub use translators::*; +pub const RETURN_MODE: u8 = 0x80; +pub const WIDE_MODE: u8 = 0x40; +pub const IMMEDIATE_MODE: u8 = 0x20; -mod print; -pub use print::*; + +pub fn assemble(mut args: Switchboard, invocation: &str) -> ! { + args.named("help").short('h'); + args.named("version"); + args.named("verbose").short('v'); + + if args.get("help").as_bool() { + print_help(invocation); + std::process::exit(0); + } + if args.get("version").as_bool() { + let name = env!("CARGO_PKG_NAME"); + let version = env!("CARGO_PKG_VERSION"); + eprintln!("{name} v{version}"); + eprintln!("Written by Ben Bridle."); + std::process::exit(0); + } + if args.get("verbose").as_bool() { + log::set_log_level(log::LogLevel::Info); + } + + args.positional("source"); + args.positional("destination"); + args.named("extension").default("brc"); + + args.named("no-libs"); + args.named("no-project-libs"); + args.named("no-env-libs"); + args.named("no-truncate"); + + args.named("format").default("raw"); + args.named("dry-run").short('n'); + args.named("tree"); + args.named("with-symbols"); + args.raise_errors(); + + let source_path = args.get("source").as_path_opt().map( + |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}"))); + let destination_path = args.get("destination").as_path_opt(); + let extension = args.get("extension").as_string(); + let opt_extension = Some(extension.as_str()); + + let no_libs = args.get("no-libs").as_bool(); + let no_project_libs = args.get("no-project-libs").as_bool(); + let no_env_libs = args.get("no-env-libs").as_bool(); + let no_truncate = args.get("no-truncate").as_bool(); + + let format = Format::from_str(args.get("format").as_str()); + let dry_run = args.get("dry-run").as_bool(); + let print_tree = args.get("tree").as_bool(); + let export_symbols = args.get("with-symbols").as_bool(); + + // ----------------------------------------------------------------------- + + let mut compiler = new_compiler(); + + if let Some(path) = &source_path { + info!("Reading program source from {path:?}"); + compiler.root_from_path(path).unwrap_or_else(|err| fatal!("{err:?}: {path:?}")); + } else { + let mut source_code = String::new(); + info!("Reading program source from standard input"); + if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { + fatal!("Could not read from standard input\n{err:?}"); + } + compiler.root_from_string(source_code, "<standard input>") + }; + if compiler.error().is_some() && !no_libs && !no_project_libs { + compiler.include_libs_from_parent(opt_extension); + } + if compiler.error().is_some() && !no_libs && !no_env_libs { + compiler.include_libs_from_path_variable("BEDROCK_LIBS", opt_extension); + } + + if print_tree { + compiler.hierarchy().report() + } + if let Some(error) = compiler.error() { + error.report(); + std::process::exit(1); + } + + let merged_source = compiler.get_compiled_source().unwrap_or_else(|error| { + error.report(); + std::process::exit(1); + }); + + if !dry_run && format == Format::Source { + write_bytes_and_exit(merged_source.as_bytes(), destination_path.as_ref()); + } + + // ----------------------------------------------------------------------- + + let path = Some("<merged source>"); + let syntactic = match parse_syntactic(&merged_source, path) { + Ok(tokens) => tokens, + Err(errors) => { + report_syntactic_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let semantic = match parse_semantic(syntactic) { + Ok(tokens) => tokens, + Err(errors) => { + report_semantic_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let program = match generate_bytecode(&semantic) { + Ok(program) => program, + Err(errors) => { + report_bytecode_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let AssembledProgram { mut bytecode, symbols } = program; + + let length = bytecode.len(); + let percentage = (length as f32 / 65536.0 * 100.0).round() as u16; + info!("Assembled program in {length} bytes ({percentage}% of maximum)"); + + if !no_truncate { + // Remove null bytes from end of bytecode. + while let Some(0) = bytecode.last() { + bytecode.pop(); + } + let new_length = bytecode.len(); + let difference = length - new_length; + if difference > 0 { + info!("Truncated program to {new_length} bytes (saved {difference} bytes)"); + } + } + + if !dry_run { + if export_symbols { + if let Some(path) = &destination_path { + let mut symbols_path = path.to_path_buf(); + symbols_path.add_extension("sym"); + let mut symbols_string = String::new(); + for symbol in &symbols { + let address = &symbol.address; + let name = &symbol.name; + let location = &symbol.source.location(); + symbols_string.push_str(&format!( + "{address:04x} {name} {location}\n" + )); + } + match std::fs::write(&symbols_path, symbols_string) { + Ok(_) => info!("Saved symbols to {symbols_path:?}"), + Err(err) => info!("Could not write symbols to {symbols_path:?}\n{err:?}"), + } + } + } + + let bytes = match format { + Format::Raw => bytecode, + Format::Clang => format_clang(&bytecode), + Format::Source => unreachable!("Source output is handled before full assembly"), + }; + write_bytes_and_exit(&bytes, destination_path.as_ref()); + } + std::process::exit(0); +} + + +fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { + match path { + Some(path) => match std::fs::write(path, bytes) { + Ok(_) => info!("Wrote output to {:?}", path.as_ref()), + Err(err) => fatal!("Could not write to {:?}\n{err:?}", path.as_ref()), + } + None => match std::io::stdout().write_all(bytes) { + Ok(_) => info!("Wrote output to standard output"), + Err(err) => fatal!("Could not write to standard output\n{err:?}"), + } + } + std::process::exit(0); +} + + +fn print_help(invocation: &str) { + eprintln!("\ +Usage: {invocation} [source] [destination] + +Assembler for the Bedrock computer system. + +Usage: + To assemble a Bedrock program from a source file and write to an output + file, run `br-asm [source] [destination]`, where [source] is the path + of the source file and [destination] is the path to write to. + + If [destination] is omitted, the assembled program will be written to + standard output. If [source] is omitted, the program source code will + be read from standard input. + +Environment variables: + BEDROCK_LIBS + A list of colon-separated paths that will be searched to find Bedrock + source code files to use as libraries when assembling a Bedrock program. + If a library file resolves an unresolved symbol in the program being + assembled, the library file will be merged into the program. + +Arguments: + [source] Bedrock source code file to assemble. + [destination] Destination path for assembler output. + +Switches: + --dry-run (-n) Assemble and show errors only, don't write any output + --extension=<ext> File extension to identify source files (default is 'brc') + --format=<fmt> Output format to use for assembled program (default is 'raw') + --no-project-libs Don't search for libraries in the source parent folder + --no-env-libs Don't search for libraries in the BEDROCK_LIBS path variable + --no-libs Combination of --no-project-libs and --no-env-libs + --no-truncate Don't remove trailing zero-bytes from the assembled program + --tree Show a tree diagram of all included library files + --with-symbols Also generate debug symbols file with extension '.sym' + --help (-h) Print this help information + --verbose, (-v) Print additional information + --version Print the program version and exit +"); +} diff --git a/src/locators.rs b/src/locators.rs deleted file mode 100644 index b7db1ee..0000000 --- a/src/locators.rs +++ /dev/null @@ -1,5 +0,0 @@ -mod bytecode; -mod source; - -pub use bytecode::*; -pub use source::*; diff --git a/src/locators/bytecode.rs b/src/locators/bytecode.rs deleted file mode 100644 index 500e9f0..0000000 --- a/src/locators/bytecode.rs +++ /dev/null @@ -1,39 +0,0 @@ -pub struct BytecodeSpan { - /// The location of this span in the assembled bytecode. - pub location: BytecodeLocation, - /// The bytes which this span represents. - pub bytes: Vec<u8>, -} - - -impl Default for BytecodeSpan { - fn default() -> Self { - Self { - location: BytecodeLocation { - address: 0, - length: 0, - }, - bytes: Vec::new(), - } - } -} - - -#[derive(Clone, Copy)] -pub struct BytecodeLocation { - // Address of the first byte. - pub address: usize, - // Length as a number of bytes. - pub length: usize, -} - - -impl std::fmt::Display for BytecodeLocation { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "0x{:>04x}", self.address)?; - if self.length > 0 { - write!(f, "-0x{:>04x}", self.address + self.length)?; - } - Ok(()) - } -} diff --git a/src/locators/source.rs b/src/locators/source.rs deleted file mode 100644 index 20542e3..0000000 --- a/src/locators/source.rs +++ /dev/null @@ -1,75 +0,0 @@ -use std::path::PathBuf; - - -#[derive(Clone)] -pub struct SourceSpan { - /// The source characters which this span represents. - pub string: String, - /// The location of this span in the merged source file. - pub in_merged: SourceLocation, - /// The location of this span in the original source file. - pub in_source: Option<SourceLocation>, -} - -impl SourceSpan { - pub fn location(&self) -> &SourceLocation { - self.in_source.as_ref().unwrap_or(&self.in_merged) - } -} - - -#[derive(Clone)] -pub struct SourceLocation { - /// File path the source was loaded from. - pub path: Option<PathBuf>, - /// Position of the first character of the string. - pub start: Position, - /// Position of the final character of the string. - pub end: Position, -} - -impl std::fmt::Display for SourceLocation { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - let y = self.start.line + 1; - let x = self.start.column + 1; - match &self.path { - Some(path) => write!(f, "{}:{y}:{x}", path.as_os_str().to_string_lossy()), - None => write!(f, "<unknown>:{y}:{x}"), - } - } -} - - -#[derive(Clone, Copy)] -pub struct Position { - /// The number of lines that precede this line in the file. - pub line: usize, - /// The number of characters that precede this character in the line. - pub column: usize, -} - -impl Position { - pub fn to_next_char(&mut self) { - self.column += 1; - } - - pub fn to_next_line(&mut self) { - self.line += 1; - self.column = 0; - } - - pub fn advance(&mut self, c: char) { - match c { - '\n' => self.to_next_line(), - _ => self.to_next_char(), - } - } -} - -impl std::fmt::Display for Position { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - let y = self.line + 1; - let x = self.column + 1; - write!(f, "{y}:{x}") - } -} diff --git a/src/print.rs b/src/print.rs deleted file mode 100644 index 800a1d5..0000000 --- a/src/print.rs +++ /dev/null @@ -1,264 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; -use SemanticParseError as SemErr; -use SyntacticParseError as SynErr; - - -const NORMAL: &str = "\x1b[0m"; -const BOLD: &str = "\x1b[1m"; -const DIM: &str = "\x1b[2m"; -const WHITE: &str = "\x1b[37m"; -const RED: &str = "\x1b[31m"; -const YELLOW: &str = "\x1b[33m"; -const BLUE: &str = "\x1b[34m"; - - -pub struct Context<'a> { - pub source_code: &'a str, - pub source: &'a SourceSpan, -} - - -/// Print all errors found in the semantic tokens, including those inside macro -/// definitions. Returns true if at least one error was printed. -pub fn print_semantic_errors(semantic_tokens: &[SemanticToken], source_code: &str) -> bool { - let mut found_error = false; - for semantic_token in semantic_tokens { - match &semantic_token.variant { - SemVar::Error(err) => { - let context = Context { - source_code: source_code, - source: &semantic_token.source, - }; - found_error = true; - print_semantic_error(&err, context) - } - SemVar::MacroDefinition(definition) => { - for body_token in &definition.body_tokens { - if let SemVar::Error(err) = &body_token.variant { - let context = Context { - source_code: source_code, - source: &body_token.source, - }; - found_error = true; - print_semantic_error(err, context) - } - } - } - _ => (), - } - } - return found_error; -} - -fn print_semantic_error(error: &SemanticParseError, context: Context) { - let message = get_message_for_semantic_error(error); - print_error(&message, context); -} - -fn get_message_for_semantic_error(error: &SemanticParseError) -> String { - match error { - SemErr::LabelDefinitionInMacroDefinition => - format!("Labels cannot be defined inside a macro"), - SemErr::MacroDefinitionInMacroDefinition => - format!("Macros cannot be defined inside a macro"), - SemErr::StrayMacroTerminator => - format!("Macro definition terminator is missing a macro definition"), - SemErr::StrayBlockClose => - format!("Block was not opened, add a '{{' character to open"), - SemErr::UnclosedBlock => - format!("Block was not closed, add a '}}' character to close"), - SemErr::UndefinedSymbol(name) => - format!("Undefined symbol, no label or macro has been defined with the name '{name}'"), - SemErr::RedefinedSymbol((_, source)) => - format!("Redefined symbol, first defined at {}", source.location()), - SemErr::MacroInvocationBeforeDefinition((_, source)) => - format!("Macro used before definition, definition is at {}", source.location()), - SemErr:: SyntaxError(syntax_error) => match syntax_error { - SynErr::UnterminatedComment => - format!("Unclosed comment, add a ')' character to close"), - SynErr::UnterminatedRawString => - format!("Unclosed string, add a ' character to close"), - SynErr::UnterminatedNullString => - format!("Unclosed string, add a \" character to close"), - SynErr::InvalidPaddingValue(_) => - format!("Padding value must be two or four hexidecimal digits"), - } - } -} - - -pub fn print_resolver_errors(resolver: &SymbolResolver) -> bool { - let mut found_error = false; - for reference in &resolver.unresolved { - found_error = true; - let message = format!( - "Undefined symbol, no label or macro has been defined with the name '{}'", - &reference.symbol.source.string, - ); - let source_code = resolver.get_source_code_for_tracked_symbol(reference); - let source = &reference.symbol.source; - print_error(&message, Context { source_code, source } ) - } - for redefinition in &resolver.redefinitions { - found_error = true; - let definition = resolver.definitions.get(redefinition.1).unwrap(); - let message = format!( - "Redefined symbol, first defined at {}", - &definition.symbol.source.in_merged, - ); - let source_code = resolver.get_source_code_for_tracked_symbol(&redefinition.0); - let source = &redefinition.0.symbol.source; - print_error(&message, Context { source_code, source } ) - } - return found_error; -} - - -/// The `ids` argument contains a list of the IDs of the source units which -/// cyclicly depend on one another. -pub fn print_cyclic_source_units(ids: &[usize], resolver: &SymbolResolver) { - eprintln!("{BOLD}{RED}[ERROR]{WHITE}: Some libraries contain a dependency cycle{NORMAL}"); - for id in ids { - if let Some(unit) = resolver.source_units.get(*id) { - let path = &unit.source_unit.main.path; - let path_str = path.as_os_str().to_string_lossy(); - if let Some(name_str) = get_unit_name(&unit.source_unit) { - eprintln!("{name_str}{NORMAL}{DIM} ({path_str}){NORMAL}"); - } else { - eprintln!("{path_str}"); - }; - // Print parents involved in dependency cycle. - for parent_id in &unit.parent_ids { - if !ids.contains(parent_id) { continue; } - if let Some(parent_unit) = resolver.source_units.get(*parent_id) { - let parent_path = &parent_unit.source_unit.main.path; - let parent_path_str = parent_path.as_os_str().to_string_lossy(); - let parent_name_str = match get_unit_name(&parent_unit.source_unit) { - Some(parent_name_str) => parent_name_str, - None => parent_path_str.to_string(), - }; - eprintln!(" => {parent_name_str} {DIM}({parent_path_str}){NORMAL}"); - } - } - } - } -} - - -pub fn print_error(message: &str, context: Context) { - print_source_issue(message, context, SourceIssueVariant::Error); -} - -pub fn print_warning(message: &str, context: Context) { - print_source_issue(message, context, SourceIssueVariant::Warning); -} - -fn print_source_issue(message: &str, context: Context, variant: SourceIssueVariant) { - let (label, colour) = match variant { - SourceIssueVariant::Warning => ("WARNING", YELLOW), - SourceIssueVariant::Error => ("ERROR", RED), - }; - - // Prepare variables. - let location = &context.source.in_merged; - let y = location.start.line + 1; - let digits = y.to_string().len(); - let arrow = "-->"; - let space = " "; - - // Print message and file path. - eprintln!("{BOLD}{colour}[{label}]{WHITE}: {message}{NORMAL}"); - eprintln!("{BLUE}{arrow:>w$}{NORMAL} {location}{NORMAL}", w=digits+3); - if let Some(source) = &context.source.in_source { - eprintln!("{BLUE}{arrow:>w$}{NORMAL} {source}{NORMAL}", w=digits+3); - } - - let start = location.start.column; - let end = location.end.column + 1; - - // Print source code line. - eprint!("{BLUE} {y} | {NORMAL}"); - let line = get_line_from_source_code(context.source_code, location.start.line); - for (i, c) in line.chars().enumerate() { - if i == start { eprint!("{colour}") } - if i == end { eprint!("{NORMAL}") } - eprint!("{c}"); - } - eprintln!("{NORMAL}"); - - // Print source code underline. - eprint!("{BLUE} {space:>w$} | {NORMAL}", w=digits); - for _ in 0..start { eprint!(" "); } - eprint!("{colour}"); - for _ in start..end { eprint!("^"); } - eprintln!("{NORMAL}"); -} - - -fn get_line_from_source_code(source_code: &str, line: usize) -> &str { - source_code.split('\n').nth(line).unwrap_or("<error reading line from source>") -} - - -enum SourceIssueVariant { - Warning, - Error, -} - - -/// Print a tree containing the name and path of each source unit. -pub fn print_source_tree(resolver: &SymbolResolver) { - eprintln!("."); - let len = resolver.root_unit_ids.len(); - for (i, id) in resolver.root_unit_ids.iter().enumerate() { - let end = i + 1 == len; - print_source_tree_leaf(resolver, *id, Vec::new(), end); - } - eprintln!(); -} - -fn print_source_tree_leaf(resolver: &SymbolResolver, id: usize, mut levels: Vec<bool>, end: bool) { - // A level entry is true if all entries in that level have been printed. - for level in &levels { - match level { - false => eprint!("│ "), - true => eprint!(" "), - } - } - // The end value is true if all siblings of this entry have been printed. - match end { - false => eprint!("├── "), - true => eprint!("└── "), - } - if let Some(unit) = resolver.source_units.get(id) { - let path_str = &unit.source_unit.main.path.as_os_str().to_string_lossy(); - if let Some(name_str) = get_unit_name(&unit.source_unit) { - eprint!("{name_str}{BLUE}"); - if unit.source_unit.head.is_some() { eprint!(" +head") } - if unit.source_unit.tail.is_some() { eprint!(" +tail") } - let mut unresolved = 0; - for symbol in &resolver.unresolved { - if symbol.source_id == id { unresolved += 1; } - } - if unresolved > 0 { eprint!("{RED} ({unresolved})"); } - eprintln!("{NORMAL} {DIM}({path_str}){NORMAL}"); - } else { - eprintln!("{path_str}"); - } - levels.push(end); - let len = unit.child_ids.len(); - for (i, id) in unit.child_ids.iter().enumerate() { - let end = i + 1 == len; - print_source_tree_leaf(resolver, *id, levels.clone(), end); - } - } else { - eprintln!("<error loading source unit details>"); - } -} - - -fn get_unit_name(source_unit: &SourceUnit) -> Option<String> { - source_unit.main.path.file_name().map(|s| s.to_string_lossy().to_string()) -} diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs new file mode 100644 index 0000000..02cc739 --- /dev/null +++ b/src/stages/bytecode.rs @@ -0,0 +1,150 @@ +use crate::*; + +use indexmap::IndexMap; + + +/// Doesn't truncate trailing null bytes. +pub fn generate_bytecode(semantic: &Program) -> Result<AssembledProgram, Vec<Tracked<BytecodeError>>> { + let mut generator = BytecodeGenerator::new(&semantic.definitions); + generator.parse(&semantic.tokens, false); + generator.fill_slots(); + let mut symbols = Vec::new(); + for (name, information) in generator.labels { + let source = semantic.definitions.get(&name).unwrap().source.clone(); + let address = information.address; + symbols.push(AssembledSymbol { name, address, source }); + } + match generator.errors.is_empty() { + true => Ok(AssembledProgram { bytecode: generator.bytecode, symbols }), + false => Err(generator.errors), + } +} + + +pub struct BytecodeGenerator<'a> { + definitions: &'a IndexMap<String, Tracked<Definition>>, + labels: IndexMap<String, LabelInformation>, + stack: Vec<usize>, + bytecode: Vec<u8>, + errors: Vec<Tracked<BytecodeError>>, +} + +struct LabelInformation { + address: usize, + slots: Vec<usize>, +} + +impl<'a> BytecodeGenerator<'a> { + pub fn new(definitions: &'a IndexMap<String, Tracked<Definition>>) -> Self { + let mut labels = IndexMap::new(); + for (name, definition) in definitions { + if let DefinitionVariant::LabelDefinition = definition.variant { + // Use fake address for now. + let information = LabelInformation { address: 0, slots: Vec::new() }; + labels.insert(name.to_string(), information); + } + } + Self { + definitions, + labels, + stack: Vec::new(), + bytecode: Vec::new(), + errors: Vec::new(), + } + } + + pub fn parse(&mut self, tokens: &[Tracked<SemanticToken>], in_macro: bool) { + macro_rules! byte { + ($byte:expr) => { self.bytecode.push($byte) }; + } + macro_rules! double { + ($double:expr) => {{ + let [high, low] = u16::to_be_bytes($double); + self.bytecode.push(high); self.bytecode.push(low); + }}; + } + + for token in tokens { + let i = self.bytecode.len(); + match &token.value { + SemanticToken::Literal(value) => match value { + Value::Byte(byte) => byte!(*byte), + Value::Double(double) => double!(*double), + } + SemanticToken::Pad(value) => { + self.bytecode.resize(i + usize::from(value), 0); + }, + SemanticToken::String(bytes) => { + self.bytecode.extend_from_slice(bytes) + }, + SemanticToken::Comment(_) => (), + SemanticToken::BlockOpen(_) => { + self.stack.push(i); + // Use a fake index for now. + double!(0); + } + SemanticToken::BlockClose(_) => { + if i > 0xFFFF { + let error = BytecodeError::InvalidBlockAddress(i); + self.errors.push(Tracked::from(error, token.source.clone())); + } + let Some(addr) = self.stack.pop() else { + unreachable!("Uncaught unmatched block terminator"); + }; + let [high, low] = (i as u16).to_be_bytes(); + self.bytecode[addr] = high; + self.bytecode[addr+1] = low; + } + SemanticToken::Symbol(name) => { + if let Some(definition) = self.definitions.get(name) { + match &definition.variant { + DefinitionVariant::MacroDefinition(body) => { + self.parse(body, true); + } + DefinitionVariant::LabelDefinition => { + let information = self.labels.get_mut(name).unwrap(); + information.slots.push(i); + // Use a fake index for now. + double!(0); + } + } + } else { + unreachable!("Uncaught undefined symbol '{name}'"); + } + } + SemanticToken::Instruction(instruction) => { + byte!(instruction.value) + } + SemanticToken::LabelDefinition(name) => if in_macro { + unreachable!("Uncaught label definition in macro"); + } else { + if i > 0xFFFF { + let error = BytecodeError::InvalidLabelAddress(i); + self.errors.push(Tracked::from(error, token.source.clone())); + } + let information = self.labels.get_mut(name).unwrap(); + // Replace fake index with real index. + information.address = i; + } + SemanticToken::MacroDefinition{ .. } => if in_macro { + unreachable!("Uncaught macro definition in macro"); + } + } + } + + if !in_macro && !self.stack.is_empty() { + unreachable!("Uncaught unterminated block"); + } + } + + /// Fill each label slot with a real label address. + pub fn fill_slots(&mut self) { + for information in self.labels.values() { + let [high, low] = (information.address as u16).to_be_bytes(); + for addr in &information.slots { + self.bytecode[*addr] = high; + self.bytecode[*addr + 1] = low; + } + } + } +} diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs new file mode 100644 index 0000000..902fcd7 --- /dev/null +++ b/src/stages/bytecode_tokens.rs @@ -0,0 +1,37 @@ +use crate::*; + + +pub struct AssembledProgram { + pub bytecode: Vec<u8>, + pub symbols: Vec<AssembledSymbol>, +} + +pub struct AssembledSymbol { + pub name: String, + pub address: usize, + pub source: SourceSpan, +} + +pub enum BytecodeError { + InvalidLabelAddress(usize), + InvalidBlockAddress(usize), +} + + +pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) { + for error in errors { + report_bytecode_error(error, source_code); + } +} + + +fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + BytecodeError::InvalidLabelAddress(address) => + &format!("The label address exceeds 0xFFFF: 0x{address:X}"), + BytecodeError::InvalidBlockAddress(address) => + &format!("The block address exceeds 0xFFFF: 0x{address:X}"), + }; + report_source_issue(LogLevel::Error, &context, message); +} diff --git a/src/stages/compiler.rs b/src/stages/compiler.rs new file mode 100644 index 0000000..97bf20c --- /dev/null +++ b/src/stages/compiler.rs @@ -0,0 +1,84 @@ +use crate::*; + +use assembler::SymbolRole::*; +use assembler::DefinitionType::*; + + +pub fn new_compiler() -> Compiler { + Compiler::new(parse_symbols, push_code) +} + + +/// Parse all symbols from a source code string. +pub fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbol>> { + let syntactic = match parse_syntactic(source_code, path) { + Ok(syntactic) => syntactic, + Err(_) => return None, + }; + Some(SymbolParser::new().parse(&syntactic)) +} + +/// Push source code to a source compilation string. +pub fn push_code(compilation: &mut String, source_file: &SourceFile) { + // Skip blank files. + let source_code = &source_file.source_code; + if source_code.chars().all(|c| c.is_whitespace()) { return; } + // Ensure that the previous section is followed by two newline characters. + if !compilation.is_empty() { + if !compilation.ends_with('\n') { compilation.push('\n'); } + if !compilation.ends_with("\n\n") { compilation.push('\n'); } + } + // Push a path comment and the source code. + let path_str = source_file.path.as_os_str().to_string_lossy(); + let path_comment = format!("(: {path_str} )\n"); + compilation.push_str(&path_comment); + compilation.push_str(&source_code); +} + + +// Extract symbol definitions from a list of syntactic tokens. +pub struct SymbolParser { + pub symbols: Vec<Symbol>, +} + +impl SymbolParser { + pub fn new() -> Self { + Self { + symbols: Vec::new(), + } + } + + fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) { + let name = name.to_string(); + let namespace = Vec::new(); + let source = source.to_owned(); + self.symbols.push(Symbol { name, namespace, source, role }); + } + + pub fn parse(mut self, syntactic: &[Tracked<SyntacticToken>]) -> Vec<Symbol> { + for token in syntactic { + match &token.value { + SyntacticToken::MacroDefinition(definition) => { + self.record_symbol( + &definition.name, + &definition.name.source, + Definition(MustPrecedeReference), + ); + for token in &definition.body { + if let SyntacticToken::Symbol(name) = &token.value { + self.record_symbol(&name, &token.source, Reference); + } + } + } + SyntacticToken::LabelDefinition(name) => { + self.record_symbol(&name, &token.source, Definition(CanFollowReference)); + } + SyntacticToken::Symbol(name) => { + self.record_symbol(&name, &token.source, Reference); + } + _ => (), + } + } + return self.symbols; + } +} diff --git a/src/stages/mod.rs b/src/stages/mod.rs new file mode 100644 index 0000000..76bda0d --- /dev/null +++ b/src/stages/mod.rs @@ -0,0 +1,26 @@ +mod compiler; +mod syntactic; +mod syntactic_tokens; +mod semantic; +mod semantic_tokens; +mod bytecode; +mod bytecode_tokens; +pub use compiler::*; +pub use syntactic::*; +pub use syntactic_tokens::*; +pub use semantic::*; +pub use semantic_tokens::*; +pub use bytecode::*; +pub use bytecode_tokens::*; + + +#[macro_export] +macro_rules! indent { + (0, $($tokens:tt)*) => {{ + println!($($tokens)*); + }}; + ($indent:expr, $($tokens:tt)*) => {{ + for _ in 0..$indent { print!(" "); } + println!($($tokens)*); + }}; +} diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..dc9709e --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,154 @@ +use crate::*; + +use std::str::FromStr; + +use indexmap::{IndexMap, IndexSet}; + + +pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Program, Vec<Tracked<SemanticError>>> { + let mut errors = Vec::new(); + + // Record all label definitions and macro names up front. + let mut definitions = IndexMap::new(); + let mut macro_names = IndexSet::new(); + for token in &syntactic { + match &token.value { + SyntacticToken::LabelDefinition(name) => { + // Check if identifier is reserved. + if Instruction::from_str(&name).is_ok() { + let error = SemanticError::ReservedIdentifier(name.to_string()); + errors.push(Tracked::from(error, token.source.clone())); + } + // Use a fake index for now. + let definition = Definition::new(0, DefinitionVariant::LabelDefinition); + let tracked = Tracked::from(definition, token.source.clone()); + if let Some(_) = definitions.insert(name.clone(), tracked) { + unreachable!("Uncaught duplicate label definition '{name}'"); + } + } + SyntacticToken::MacroDefinition(definition) => { + let name = &definition.name; + // Check if identifier is reserved. + if Instruction::from_str(&name).is_ok() { + let error = SemanticError::ReservedIdentifier(name.to_string()); + errors.push(Tracked::from(error, name.source.clone())); + } + if !macro_names.insert(name.clone()) { + unreachable!("Uncaught duplicate macro definition '{name}'") + } + } + _ => (), + } + } + + // Convert syntactic tokens to semantic tokens. + let mut tokens: Vec<Tracked<SemanticToken>> = Vec::new(); + let mut stack = Vec::new(); + + for syn_token in syntactic { + let i = tokens.len(); + let sem_token = match syn_token.value { + SyntacticToken::Literal(value) => SemanticToken::Literal(value), + SyntacticToken::Pad(value) => SemanticToken::Pad(value), + SyntacticToken::String(bytes) => SemanticToken::String(bytes), + SyntacticToken::Comment(string) => SemanticToken::Comment(string), + SyntacticToken::BlockOpen => { + stack.push(i); + // Use a fake index for now. + SemanticToken::BlockOpen(0) + } + SyntacticToken::BlockClose => { + let Some(k) = stack.pop() else { + unreachable!("Uncaught unmatched block terminator"); + }; + // Replace fake index with real index. + tokens[k].value = SemanticToken::BlockOpen(i); + SemanticToken::BlockClose(k) + } + SyntacticToken::Symbol(symbol) => { + if let Some(definition) = definitions.get_mut(&symbol) { + definition.value.references.push(i); + } else if let Some(definition) = macro_names.get(&symbol) { + let error = SemanticError::InvocationBeforeDefinition; + let source = syn_token.source.wrap(definition.source.clone()); + errors.push(Tracked::from(error, source)); + } else { + unreachable!("Uncaught undefined symbol '{symbol}'"); + }; + SemanticToken::Symbol(symbol) + } + SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), + SyntacticToken::LabelDefinition(name) => { + let definition = definitions.get_mut(&name).unwrap(); + // Replace fake index with real index. + definition.value.definition = i; + SemanticToken::LabelDefinition(name) + } + SyntacticToken::MacroDefinition(definition) => { + let name = definition.name.clone(); + let mut body: Vec<Tracked<SemanticToken>> = Vec::new(); + let mut body_stack = Vec::new(); + for syn_token in definition.body { + let j = body.len(); + let sem_token = match syn_token.value { + SyntacticToken::Literal(value) => SemanticToken::Literal(value), + SyntacticToken::Pad(value) => SemanticToken::Pad(value), + SyntacticToken::String(bytes) => SemanticToken::String(bytes), + SyntacticToken::Comment(string) => SemanticToken::Comment(string), + SyntacticToken::BlockOpen => { + body_stack.push(j); + // Use a fake index for now. + SemanticToken::BlockOpen(0) + } + SyntacticToken::BlockClose => { + let Some(k) = body_stack.pop() else { + unreachable!("Uncaught unmatched block terminator in macro '{name}'"); + }; + // Replace fake index with real index. + body[k].value = SemanticToken::BlockOpen(j); + SemanticToken::BlockClose(k) + } + SyntacticToken::Symbol(symbol) => { + if let Some(definition) = definitions.get_mut(&symbol) { + definition.value.deep_references.push((i, j)); + } else if let Some(definition) = macro_names.get(&symbol) { + let error = SemanticError::InvocationBeforeDefinition; + let source = syn_token.source.wrap(definition.source.clone()); + errors.push(Tracked::from(error, source)); + } else { + unreachable!("Uncaught undefined symbol '{symbol}' in macro '{name}'"); + }; + SemanticToken::Symbol(symbol) + } + SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), + SyntacticToken::LabelDefinition(label) => + unreachable!("Uncaught label definition '{label}' in macro '{name}'"), + SyntacticToken::MacroDefinition(definition) => + unreachable!("Uncaught macro definition '{}' in macro '{name}'", definition.name), + }; + body.push(Tracked::from(sem_token, syn_token.source)); + } + + let variant = DefinitionVariant::MacroDefinition(body); + let source = definition.name.source.clone(); + let tracked = Tracked::from(Definition::new(i, variant), source); + if let Some(_) = definitions.insert(name.value.clone(), tracked) { + unreachable!("Uncaught duplicate definition '{name}'") + }; + if !body_stack.is_empty() { + unreachable!("Uncaught unterminated block in macro '{name}'"); + } + SemanticToken::MacroDefinition(name) + } + }; + tokens.push(Tracked::from(sem_token, syn_token.source)); + } + + if !stack.is_empty() { + unreachable!("Uncaught unterminated block"); + } + match errors.is_empty() { + true => Ok(Program { definitions, tokens }), + false => Err(errors), + } +} diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs new file mode 100644 index 0000000..c735828 --- /dev/null +++ b/src/stages/semantic_tokens.rs @@ -0,0 +1,97 @@ +use crate::*; + +use indexmap::IndexMap; + + +pub struct Program { + pub definitions: IndexMap<String, Tracked<Definition>>, + pub tokens: Vec<Tracked<SemanticToken>>, +} + +pub struct Definition { + pub variant: DefinitionVariant, + /// Index of definition token. + pub definition: usize, + /// Indices of symbols referencing this definition. + pub references: Vec<usize>, + /// Indices of references inside other definitions. + pub deep_references: Vec<(usize, usize)>, +} + +impl Definition { + pub fn new(i: usize, variant: DefinitionVariant) -> Self { + Self { + variant, + definition: i, + references: Vec::new(), + deep_references: Vec::new(), + } + } +} + +pub enum DefinitionVariant { + LabelDefinition, + MacroDefinition(Vec<Tracked<SemanticToken>>), +} + +pub enum SemanticToken { + Literal(Value), + Pad(Value), + String(Vec<u8>), + Comment(String), + BlockOpen(usize), // index to matching block-close + BlockClose(usize), // index to matching block-open + Symbol(String), + Instruction(Instruction), + LabelDefinition(String), + MacroDefinition(Tracked<String>), +} + +pub enum SemanticError { + InvocationBeforeDefinition, + ReservedIdentifier(String), +} + + +pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) { + for error in errors { + report_semantic_error(error, source_code); + } +} + + +fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + SemanticError::InvocationBeforeDefinition => + "Macro cannot be invoked before it has been defined", + SemanticError::ReservedIdentifier(name) => + &format!("Identifier '{name}' is reserved for a built-in instruction"), + }; + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_semantic_token(i: usize, token: &SemanticToken, definitions: &IndexMap<String, Tracked<Definition>>) { + match token { + SemanticToken::Literal(value) => indent!(i, "Literal({value})"), + SemanticToken::Pad(value) => indent!(i, "Pad({value})"), + SemanticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)), + SemanticToken::Comment(_) => indent!(i, "Comment"), + SemanticToken::BlockOpen(pointer) => indent!(i, "BlockOpen(*{pointer})"), + SemanticToken::BlockClose(pointer) => indent!(i, "BlockClose(*{pointer})"), + SemanticToken::Symbol(name) => indent!(i, "Symbol({name})"), + SemanticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"), + SemanticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"), + SemanticToken::MacroDefinition(name) => { + indent!(i, "MacroDefinition({name})"); + if let Some(definition) = definitions.get(name.as_str()) { + if let DefinitionVariant::MacroDefinition(body) = &definition.variant { + for token in body { + print_semantic_token(i+1, token, definitions); + } + } + } + } + } +} diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs new file mode 100644 index 0000000..59b8b95 --- /dev/null +++ b/src/stages/syntactic.rs @@ -0,0 +1,211 @@ +use crate::*; + +use std::path::PathBuf; + + +pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path), "") +} + +fn parse_syntactic_from_tokeniser(mut t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + t.add_delimiters(&['(',')','[',']','{','}',';']); + t.add_terminators(&[':']); + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + let mut label_name = label_name.to_string(); + + macro_rules! err { + ($error:expr) => {{ + err!($error, t.get_source()); + }}; + ($error:expr, $source:expr) => {{ + errors.push(Tracked::from($error, $source)); + continue; + }}; + } + + macro_rules! check_name { + ($name:expr) => {{ + check_name!($name, t.get_source()); + }}; + ($name:expr, $source:expr) => { + if $name.chars().count() > 63 { + let error = SyntacticError::InvalidIdentifier($name.clone()); + errors.push(Tracked::from(error, $source.clone())); + } + }; + } + + // Eat characters until the end character is found. + macro_rules! is_end { + ($end:expr) => { + |t: &mut Tokeniser| { + t.eat_char() == Some($end) + } + }; + } + + loop { + // Eat leading whitespace. + while let Some(c) = t.peek_char() { + match [' ', '\n', '\r', '\t'].contains(&c) { + true => t.eat_char(), + false => break, + }; + } + t.mark_start(); + let Some(c) = t.eat_char() else { break }; + let token = match c { + '"' => { + let source = t.get_source(); + match t.track_until(is_end!('"')) { + Some(string) => { + let mut bytes = string.into_bytes(); + bytes.push(0x00); + SyntacticToken::String(bytes) + } + None => err!(SyntacticError::UnterminatedNullString, source), + } + } + '\'' => { + let source = t.get_source(); + match t.track_until(is_end!('\'')) { + Some(string) => SyntacticToken::String(string.into_bytes()), + None => err!(SyntacticError::UnterminatedRawString, source), + } + } + '(' => { + let source = t.get_source(); + if let Some(string) = t.track_until(is_end!(')')) { + // Check if the comment fills the entire line. + if t.start.position.column == 0 && t.end_of_line() { + if let Some(path) = string.strip_prefix(": ") { + t.embedded_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start.position.line + 1; + continue; + } + } + SyntacticToken::Comment(string) + } else { + err!(SyntacticError::UnterminatedComment, source) + } + } + ')' => err!(SyntacticError::UnmatchedCommentTerminator), + '%' => { + let name = t.eat_token(); + let source = t.get_source(); + check_name!(name, source); + t.mark_child(); + if let Some(_) = t.track_until(is_end!(';')) { + let child = t.tokenise_child_span(); + match parse_body_from_tokeniser(child, &label_name) { + Ok(body) => { + let name = Tracked::from(name, source); + let definition = SyntacticMacroDefinition { name, body }; + SyntacticToken::MacroDefinition(definition) + } + Err(mut err) => { + errors.append(&mut err); + continue; + } + } + } else { + err!(SyntacticError::UnterminatedMacroDefinition, source); + } + } + ';' => err!(SyntacticError::UnmatchedMacroTerminator), + '{' => SyntacticToken::BlockOpen, + '}' => SyntacticToken::BlockClose, + '['|']' => continue, + '@' => { + label_name = t.eat_token(); + check_name!(label_name); + SyntacticToken::LabelDefinition(label_name.clone()) + } + '&' => { + let name = format!("{label_name}/{}", t.eat_token()); + check_name!(name); + SyntacticToken::LabelDefinition(name) + } + '~' => { + let name = format!("{label_name}/{}", t.eat_token()); + check_name!(name); + SyntacticToken::Symbol(name) + } + '#' => { + let token = t.eat_token(); + match token.parse::<Value>() { + Ok(value) => SyntacticToken::Pad(value), + Err(_) => err!(SyntacticError::InvalidPadValue), + } + }, + ':' => { + SyntacticToken::Instruction(Instruction { value: 0x21 }) + } + c => { + let token = format!("{c}{}", t.eat_token()); + if let Ok(value) = token.parse::<Value>() { + SyntacticToken::Literal(value) + } else if let Ok(instruction) = token.parse::<Instruction>() { + SyntacticToken::Instruction(instruction) + } else { + check_name!(token); + SyntacticToken::Symbol(token) + } + } + }; + + t.mark_end(); + let source = t.get_source(); + tokens.push(Tracked::from(token, source)); + } + + // Check that every block open matches a block close. + let mut stack = Vec::new(); + for token in &tokens { + match &token.value { + SyntacticToken::BlockOpen => stack.push(token.source.clone()), + SyntacticToken::BlockClose => if let None = stack.pop() { + let error = SyntacticError::UnmatchedBlockTerminator; + errors.push(Tracked::from(error, token.source.clone())); + } + _ => (), + } + } + for source in stack { + let error = SyntacticError::UnterminatedBlock; + errors.push(Tracked::from(error, source)); + } + + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} + + +fn parse_body_from_tokeniser(t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + + for token in parse_syntactic_from_tokeniser(t, label_name)? { + match token.value { + SyntacticToken::LabelDefinition(_) => { + let error = SyntacticError::LabelDefinitionInMacroDefinition; + errors.push(Tracked::from(error, token.source)); + continue; + } + SyntacticToken::MacroDefinition(_) => { + let error = SyntacticError::MacroDefinitionInMacroDefinition; + errors.push(Tracked::from(error, token.source)); + continue; + } + _ => tokens.push(token), + }; + } + + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs new file mode 100644 index 0000000..35afa80 --- /dev/null +++ b/src/stages/syntactic_tokens.rs @@ -0,0 +1,94 @@ +use crate::*; + + +pub enum SyntacticToken { + Literal(Value), + Pad(Value), + String(Vec<u8>), + Comment(String), + BlockOpen, + BlockClose, + Symbol(String), + Instruction(Instruction), + LabelDefinition(String), + MacroDefinition(SyntacticMacroDefinition), +} + +pub struct SyntacticMacroDefinition { + pub name: Tracked<String>, + pub body: Vec<Tracked<SyntacticToken>>, +} + +pub enum SyntacticError { + UnterminatedBlock, + UnterminatedComment, + UnterminatedRawString, + UnterminatedNullString, + UnterminatedMacroDefinition, + UnmatchedBlockTerminator, + UnmatchedCommentTerminator, + UnmatchedMacroTerminator, + InvalidPadValue, + InvalidIdentifier(String), + MacroDefinitionInMacroDefinition, + LabelDefinitionInMacroDefinition, +} + + +pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) { + for error in errors { + report_syntactic_error(error, source_code); + } +} + +fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + SyntacticError::UnterminatedBlock => + "Block was not terminated, add a '}' character to terminate", + SyntacticError::UnterminatedComment => + "Comment was not terminated, add a ')' character to terminate", + SyntacticError::UnterminatedRawString => + "String was not terminated, add a ' character to terminate", + SyntacticError::UnterminatedNullString => + "String was not terminated, add a '\"' character to terminate", + SyntacticError::UnterminatedMacroDefinition => + "Macro definition was not terminated, add a ';' character to terminate", + SyntacticError::UnmatchedBlockTerminator => + "Attempted to terminate a block, but no block was in progress", + SyntacticError::UnmatchedCommentTerminator => + "Attempted to terminate a comment, but no comment was in progress", + SyntacticError::UnmatchedMacroTerminator => + "Attempted to terminate a macro definition, but no macro definition was in progress", + SyntacticError::InvalidPadValue => + "The pad value must be two or four hexadecimal digits", + SyntacticError::InvalidIdentifier(name) => + &format!("An identifier cannot exceed 63 characters in length: {name}"), + SyntacticError::MacroDefinitionInMacroDefinition => + "A macro cannot be defined inside another macro", + SyntacticError::LabelDefinitionInMacroDefinition => + "A label cannot be defined inside a macro", + }; + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_syntactic_token(i: usize, token: &SyntacticToken) { + match token { + SyntacticToken::Literal(value) => indent!(i, "Literal({value})"), + SyntacticToken::Pad(value) => indent!(i, "Pad({value})"), + SyntacticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)), + SyntacticToken::Comment(_) => indent!(i, "Comment"), + SyntacticToken::BlockOpen => indent!(i, "BlockOpen"), + SyntacticToken::BlockClose => indent!(i, "BlockClose"), + SyntacticToken::Symbol(name) => indent!(i, "Symbol({name})"), + SyntacticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"), + SyntacticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"), + SyntacticToken::MacroDefinition(definition) => { + indent!(i, "MacroDefinition({})", definition.name); + for token in &definition.body { + print_syntactic_token(i+1, token); + } + } + } +} diff --git a/src/symbol_resolver.rs b/src/symbol_resolver.rs deleted file mode 100644 index 0b89fb1..0000000 --- a/src/symbol_resolver.rs +++ /dev/null @@ -1,296 +0,0 @@ -use crate::*; - -use std::mem::take; - - -/// Resolve symbol references across source units. -pub struct SymbolResolver { - pub definitions: Vec<TrackedSymbol>, - /// All resolved references. - pub resolved: Vec<TrackedSymbol>, - /// All unresolved references. - pub unresolved: Vec<TrackedSymbol>, - /// Contains the `definitions` index of the original definition. - pub redefinitions: Vec<(TrackedSymbol, usize)>, - pub source_units: Vec<HeirarchicalSourceUnit>, - pub root_unit_ids: Vec<usize>, - pub unused_library_units: Vec<SourceUnit>, -} - - -impl SymbolResolver { - /// Construct a resolver from a root source unit. - pub fn from_source_unit(source_unit: SourceUnit) -> Self { - let mut new = Self { - definitions: Vec::new(), - resolved: Vec::new(), - unresolved: Vec::new(), - redefinitions: Vec::new(), - source_units: Vec::new(), - root_unit_ids: Vec::new(), - unused_library_units: Vec::new(), - }; - new.add_source_unit(source_unit, None); - return new; - } - - pub fn add_library_units(&mut self, mut source_units: Vec<SourceUnit>) { - self.unused_library_units.append(&mut source_units); - } - - pub fn resolve(&mut self) { - // Repeatedly test if any unused source unit resolves an unresolved symbol, - // breaking the loop when no new resolutions are found. - 'outer: loop { - for (i, source_unit) in self.unused_library_units.iter().enumerate() { - if let Some(id) = self.resolves_reference(&source_unit) { - let source_unit = self.unused_library_units.remove(i); - self.add_source_unit(source_unit, Some(id)); - continue 'outer; - } - } - break; - } - - // For every macro reference in every unit, find the ID of the unit which - // resolves that reference and add it to the .parent_ids field of the - // referencing unit. - for reference in &self.resolved { - let predicate = |d: &&TrackedSymbol| d.symbol.name == reference.symbol.name; - if let Some(definition) = self.definitions.iter().find(predicate) { - let is_self = reference.source_id == definition.source_id; - let is_label = definition.symbol.variant == SymbolVariant::LabelDefinition; - if is_self || is_label { continue; } - let referencing_unit = &mut self.source_units[reference.source_id]; - referencing_unit.parent_ids.push(definition.source_id); - }; - } - } - - /// Add a source unit to the resolver and link it to a parent unit. - pub fn add_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) { - let source_id = self.source_units.len(); - - // Add all main symbols. - if let Some(definitions) = take(&mut source_unit.main.symbols.definitions) { - self.add_definitions(definitions, source_id, SourceRole::Main); } - if let Some(references) = take(&mut source_unit.main.symbols.references) { - self.add_references(references, source_id, SourceRole::Main); } - - // Add all head symbols. - if let Some(head) = &mut source_unit.head { - if let Some(references) = take(&mut head.symbols.references) { - self.add_references(references, source_id, SourceRole::Head); } - if let Some(definitions) = take(&mut head.symbols.definitions) { - self.add_definitions(definitions, source_id, SourceRole::Head); } - } - - // Add all tail symbols. - if let Some(tail) = &mut source_unit.tail { - if let Some(references) = take(&mut tail.symbols.references) { - self.add_references(references, source_id, SourceRole::Tail); } - if let Some(definitions) = take(&mut tail.symbols.definitions) { - self.add_definitions(definitions, source_id, SourceRole::Tail); } - } - - if let Some(parent_id) = parent_id { - if let Some(parent_unit) = self.source_units.get_mut(parent_id) { - parent_unit.child_ids.push(source_id); - } - } else { - self.root_unit_ids.push(source_id); - } - - self.source_units.push( - HeirarchicalSourceUnit { - source_unit, - child_ids: Vec::new(), - parent_ids: Vec::new(), - } - ); - } - - fn add_references(&mut self, references: Vec<Symbol>, source_id: usize, source_role: SourceRole) { - for symbol in references { - let reference = TrackedSymbol { symbol, source_id, source_role }; - match self.definitions.contains(&reference) { - true => self.resolved.push(reference), - false => self.unresolved.push(reference), - } - } - } - - fn add_definitions(&mut self, definitions: Vec<Symbol>, source_id: usize, source_role: SourceRole) { - for symbol in definitions { - let predicate = |d: &TrackedSymbol| { &d.symbol.name == &symbol.name }; - if let Some(original) = self.definitions.iter().position(predicate) { - let definition = TrackedSymbol { symbol, source_id, source_role }; - let redefinition = (definition, original); - self.redefinitions.push(redefinition); - } else { - let predicate = |s: &mut TrackedSymbol| s.symbol.name == symbol.name; - for symbol in self.unresolved.extract_if(predicate) { - self.resolved.push(symbol); - } - self.unresolved.retain(|s| s.symbol.name != symbol.name); - let definition = TrackedSymbol { symbol, source_id, source_role }; - self.definitions.push(definition); - } - } - } - - /// Returns the ID of the owner of a symbol resolved by this unit. - pub fn resolves_reference(&self, source_unit: &SourceUnit) -> Option<usize> { - if let Some(definitions) = &source_unit.main.symbols.definitions { - if let Some(id) = self.source_id_of_unresolved(&definitions) { - return Some(id); - } - } - if let Some(head) = &source_unit.head { - if let Some(definitions) = &head.symbols.definitions { - if let Some(id) = self.source_id_of_unresolved(&definitions) { - return Some(id); - } - } - } - if let Some(tail) = &source_unit.tail { - if let Some(definitions) = &tail.symbols.definitions { - if let Some(id) = self.source_id_of_unresolved(&definitions) { - return Some(id); - } - } - } - return None; - } - - /// Returns the ID of the owner of a reference to one of these symbols. - fn source_id_of_unresolved(&self, symbols: &[Symbol]) -> Option<usize> { - for symbol in symbols { - let opt = self.unresolved.iter().find(|s| s.symbol.name == symbol.name); - if let Some(unresolved) = opt { - return Some(unresolved.source_id); - } - } - return None; - } - - pub fn get_source_code_for_tracked_symbol(&self, symbol: &TrackedSymbol) -> &str { - let source_unit = &self.source_units[symbol.source_id].source_unit; - match symbol.source_role { - SourceRole::Main => source_unit.main.symbols.source_code.as_str(), - SourceRole::Head => match &source_unit.head { - Some(head) => head.symbols.source_code.as_str(), - None => unreachable!("Failed to find source for token"), - } - SourceRole::Tail => match &source_unit.tail { - Some(tail) => tail.symbols.source_code.as_str(), - None => unreachable!("Failed to find source for token"), - } - } - } - - /// Create a source file by concatenating all source units. - /// If the source unit dependency graph contains a cycle, the IDs of the - /// source units involved in the cycle will be returned. - pub fn get_merged_source_code(&self) -> Result<String, Vec<usize>> { - // The ID of a given source unit will come after the IDs of all - // source units which define at least one symbol referenced in the - // given source unit. - let head_order = { - let mut included_source_ids: Vec<usize> = Vec::new(); - let mut remaining_source_ids: Vec<usize> = Vec::new(); - // Reverse the order so that the root unit is the last to be added. - for i in (0..self.source_units.len()).rev() { - remaining_source_ids.push(i); - } - - 'restart: while !remaining_source_ids.is_empty() { - 'next: for (i, id) in remaining_source_ids.iter().enumerate() { - let unit = &self.source_units[*id]; - for parent_id in &unit.parent_ids { - if !included_source_ids.contains(&parent_id) { - continue 'next; - } - } - included_source_ids.push(*id); - remaining_source_ids.remove(i); - continue 'restart; - } - // All remaining source units depend on at least one remaining - // source unit, indicating a dependency cycle. - return Err(remaining_source_ids); - } - included_source_ids - }; - - let mut source_code = String::new(); - - // Push head source code in macro-definition order. - for id in &head_order { - let source_unit = &self.source_units[*id]; - if let Some(head) = &source_unit.source_unit.head { - push_source_code_to_string(&mut source_code, head); - } - } - // Push main source code in source-added order. - for source_unit in self.source_units.iter() { - let main = &source_unit.source_unit.main; - push_source_code_to_string(&mut source_code, &main); - } - // Push tail source code in reverse source-added order. - for source_unit in self.source_units.iter().rev() { - if let Some(tail) = &source_unit.source_unit.tail { - push_source_code_to_string(&mut source_code, tail); - } - } - return Ok(source_code); - } -} - - -fn push_source_code_to_string(string: &mut String, source_file: &SourceFile) { - // Don't push source code if it contains only whitespace. - let source_code = &source_file.symbols.source_code; - if source_code.chars().all(|c| c.is_whitespace()) { return; } - // Ensure that sections are separated by two newlines. - if !string.is_empty() { - if !string.ends_with('\n') { string.push('\n'); } - if !string.ends_with("\n\n") { string.push('\n'); } - } - // Write a path comment to the string. - let path_str = source_file.path.as_os_str().to_string_lossy(); - let path_comment = format!("(: {path_str} )\n"); - string.push_str(&path_comment); - string.push_str(&source_code); -} - - -pub struct HeirarchicalSourceUnit { - pub source_unit: SourceUnit, - /// IDs of units which were added to resolve symbol references this unit. - pub child_ids: Vec<usize>, - /// IDs of units which resolve macro references in this unit. - pub parent_ids: Vec<usize>, -} - - -pub struct TrackedSymbol { - pub symbol: Symbol, - pub source_id: usize, - pub source_role: SourceRole, -} - - -#[derive(Clone, Copy)] -pub enum SourceRole { - Main, - Head, - Tail, -} - - -impl PartialEq for TrackedSymbol { - fn eq(&self, other: &TrackedSymbol) -> bool { - self.symbol.name.eq(&other.symbol.name) - } -} diff --git a/src/tokens.rs b/src/tokens.rs deleted file mode 100644 index 81bf9d5..0000000 --- a/src/tokens.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod syntactic; -mod semantic; -mod instruction; -mod value; - -pub use syntactic::*; -pub use semantic::*; -pub use instruction::*; -pub use value::*; diff --git a/src/tokens/instruction.rs b/src/tokens/instruction.rs deleted file mode 100644 index d5fb3e5..0000000 --- a/src/tokens/instruction.rs +++ /dev/null @@ -1,170 +0,0 @@ -use Operation as Op; - - -pub struct Instruction { - pub value: u8, -} - - -impl Instruction { - pub fn operation(&self) -> Operation { - match self.value & 0x1f { - 0x00=>Op::HLT, 0x01=>Op::JMP, 0x02=>Op::JCN, 0x03=>Op::JCK, - 0x04=>Op::LDA, 0x05=>Op::STA, 0x06=>Op::LDD, 0x07=>Op::STD, - 0x08=>Op::PSH, 0x09=>Op::POP, 0x0a=>Op::CPY, 0x0b=>Op::SPL, - 0x0c=>Op::DUP, 0x0d=>Op::OVR, 0x0e=>Op::SWP, 0x0f=>Op::ROT, - 0x10=>Op::ADD, 0x11=>Op::SUB, 0x12=>Op::INC, 0x13=>Op::DEC, - 0x14=>Op::LTH, 0x15=>Op::GTH, 0x16=>Op::EQU, 0x17=>Op::NQK, - 0x18=>Op::IOR, 0x19=>Op::XOR, 0x1a=>Op::AND, 0x1b=>Op::NOT, - 0x1c=>Op::SHF, 0x1d=>Op::SHC, 0x1e=>Op::TAL, 0x1f=>Op::REV, - _ => unreachable!(), - } - } - - pub fn return_mode(&self) -> bool { - self.value & 0x80 != 0 - } - - pub fn literal_mode(&self) -> bool { - self.value & 0x40 != 0 - } - - pub fn double_mode(&self) -> bool { - self.value & 0x20 != 0 - } -} - - -impl std::fmt::Display for Instruction { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{}", match self.value { - // Control operators - 0x00=>"HLT",0x20=>"NOP" ,0x40=>"DB1" ,0x60=>"DB2" ,0x80=>"DB3" ,0xA0=>"DB4" ,0xC0=>"DB5" ,0xE0=>"DB6" , - 0x01=>"JMP",0x21=>"JMS" ,0x41=>"JMP:",0x61=>"JMS:" ,0x81=>"JMPr",0xA1=>"JMSr" ,0xC1=>"JMPr:",0xE1=>"JMSr:" , - 0x02=>"JCN",0x22=>"JCS" ,0x42=>"JCN:",0x62=>"JCS:" ,0x82=>"JCNr",0xA2=>"JCSr" ,0xC2=>"JCNr:",0xE2=>"JCSr:" , - 0x03=>"JCK",0x23=>"JCK*",0x43=>"JCK:",0x63=>"JCK*:",0x83=>"JCKr",0xA3=>"JCKr*",0xC3=>"JCKr:",0xE3=>"JCKr*:", - 0x04=>"LDA",0x24=>"LDA*",0x44=>"LDA:",0x64=>"LDA*:",0x84=>"LDAr",0xA4=>"LDAr*",0xC4=>"LDAr:",0xE4=>"LDAr*:", - 0x05=>"STA",0x25=>"STA*",0x45=>"STA:",0x65=>"STA*:",0x85=>"STAr",0xA5=>"STAr*",0xC5=>"STAr:",0xE5=>"STAr*:", - 0x06=>"LDD",0x26=>"LDD*",0x46=>"LDD:",0x66=>"LDD*:",0x86=>"LDDr",0xA6=>"LDDr*",0xC6=>"LDDr:",0xE6=>"LDDr*:", - 0x07=>"STD",0x27=>"STD*",0x47=>"STD:",0x67=>"STD*:",0x87=>"STDr",0xA7=>"STDr*",0xC7=>"STDr:",0xE7=>"STDr*:", - // Stack operators - 0x08=>"PSH",0x28=>"PSH*",0x48=>"PSH:",0x68=>"PSH*:",0x88=>"PSHr",0xA8=>"PSHr*",0xC8=>"PSHr:",0xE8=>"PSHr*:", - 0x09=>"POP",0x29=>"POP*",0x49=>"POP:",0x69=>"POP*:",0x89=>"POPr",0xA9=>"POPr*",0xC9=>"POPr:",0xE9=>"POPr*:", - 0x0A=>"CPY",0x2A=>"CPY*",0x4A=>"CPY:",0x6A=>"CPY*:",0x8A=>"CPYr",0xAA=>"CPYr*",0xCA=>"CPYr:",0xEA=>"CPYr*:", - 0x0B=>"SPL",0x2B=>"SPL*",0x4B=>"SPL:",0x6B=>"SPL*:",0x8B=>"SPLr",0xAB=>"SPLr*",0xCB=>"SPLr:",0xEB=>"SPLr*:", - 0x0C=>"DUP",0x2C=>"DUP*",0x4C=>"DUP:",0x6C=>"DUP*:",0x8C=>"DUPr",0xAC=>"DUPr*",0xCC=>"DUPr:",0xEC=>"DUPr*:", - 0x0D=>"OVR",0x2D=>"OVR*",0x4D=>"OVR:",0x6D=>"OVR*:",0x8D=>"OVRr",0xAD=>"OVRr*",0xCD=>"OVRr:",0xED=>"OVRr*:", - 0x0E=>"SWP",0x2E=>"SWP*",0x4E=>"SWP:",0x6E=>"SWP*:",0x8E=>"SWPr",0xAE=>"SWPr*",0xCE=>"SWPr:",0xEE=>"SWPr*:", - 0x0F=>"ROT",0x2F=>"ROT*",0x4F=>"ROT:",0x6F=>"ROT*:",0x8F=>"ROTr",0xAF=>"ROTr*",0xCF=>"ROTr:",0xEF=>"ROTr*:", - // Numeric operators - 0x10=>"ADD",0x30=>"ADD*",0x50=>"ADD:",0x70=>"ADD*:",0x90=>"ADDr",0xB0=>"ADDr*",0xD0=>"ADDr:",0xF0=>"ADDr*:", - 0x11=>"SUB",0x31=>"SUB*",0x51=>"SUB:",0x71=>"SUB*:",0x91=>"SUBr",0xB1=>"SUBr*",0xD1=>"SUBr:",0xF1=>"SUBr*:", - 0x12=>"INC",0x32=>"INC*",0x52=>"INC:",0x72=>"INC*:",0x92=>"INCr",0xB2=>"INCr*",0xD2=>"INCr:",0xF2=>"INCr*:", - 0x13=>"DEC",0x33=>"DEC*",0x53=>"DEC:",0x73=>"DEC*:",0x93=>"DECr",0xB3=>"DECr*",0xD3=>"DECr:",0xF3=>"DECr*:", - 0x14=>"LTH",0x34=>"LTH*",0x54=>"LTH:",0x74=>"LTH*:",0x94=>"LTHr",0xB4=>"LTHr*",0xD4=>"LTHr:",0xF4=>"LTHr*:", - 0x15=>"GTH",0x35=>"GTH*",0x55=>"GTH:",0x75=>"GTH*:",0x95=>"GTHr",0xB5=>"GTHr*",0xD5=>"GTHr:",0xF5=>"GTHr*:", - 0x16=>"EQU",0x36=>"EQU*",0x56=>"EQU:",0x76=>"EQU*:",0x96=>"EQUr",0xB6=>"EQUr*",0xD6=>"EQUr:",0xF6=>"EQUr*:", - 0x17=>"NQK",0x37=>"NQK*",0x57=>"NQK:",0x77=>"NQK*:",0x97=>"NQKr",0xB7=>"NQKr*",0xD7=>"NQKr:",0xF7=>"NQKr*:", - // Bitwise operators - 0x18=>"IOR",0x38=>"IOR*",0x58=>"IOR:",0x78=>"IOR*:",0x98=>"IORr",0xB8=>"IORr*",0xD8=>"IORr:",0xF8=>"IORr*:", - 0x19=>"XOR",0x39=>"XOR*",0x59=>"XOR:",0x79=>"XOR*:",0x99=>"XORr",0xB9=>"XORr*",0xD9=>"XORr:",0xF9=>"XORr*:", - 0x1A=>"AND",0x3A=>"AND*",0x5A=>"AND:",0x7A=>"AND*:",0x9A=>"ANDr",0xBA=>"ANDr*",0xDA=>"ANDr:",0xFA=>"ANDr*:", - 0x1B=>"NOT",0x3B=>"NOT*",0x5B=>"NOT:",0x7B=>"NOT*:",0x9B=>"NOTr",0xBB=>"NOTr*",0xDB=>"NOTr:",0xFB=>"NOTr*:", - 0x1C=>"SHF",0x3C=>"SHF*",0x5C=>"SHF:",0x7C=>"SHF*:",0x9C=>"SHFr",0xBC=>"SHFr*",0xDC=>"SHFr:",0xFC=>"SHFr*:", - 0x1D=>"SHC",0x3D=>"SHC*",0x5D=>"SHC:",0x7D=>"SHC*:",0x9D=>"SHCr",0xBD=>"SHCr*",0xDD=>"SHCr:",0xFD=>"SHCr*:", - 0x1E=>"TAL",0x3E=>"TAL*",0x5E=>"TAL:",0x7E=>"TAL*:",0x9E=>"TALr",0xBE=>"TALr*",0xDE=>"TALr:",0xFE=>"TALr*:", - 0x1F=>"REV",0x3F=>"REV*",0x5F=>"REV:",0x7F=>"REV*:",0x9F=>"REVr",0xBF=>"REVr*",0xDF=>"REVr:",0xFF=>"REVr*:", - }) - } -} - - -impl std::str::FromStr for Instruction { - type Err = (); - - fn from_str(token: &str) -> Result<Self, Self::Err> { - Ok( Instruction { value: match token { - // Control operators - "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2" =>0x60,"DB3" =>0x80,"DB4" =>0xA0,"DB5" =>0xC0,"DB6" =>0xE0, - "JMP"=>0x01,"JMS" =>0x21,"JMP:"=>0x41,"JMS:" =>0x61,"JMPr"=>0x81,"JMSr" =>0xA1,"JMPr:"=>0xC1,"JMSr:" =>0xE1, - "JCN"=>0x02,"JCS" =>0x22,"JCN:"=>0x42,"JCS:" =>0x62,"JCNr"=>0x82,"JCSr" =>0xA2,"JCNr:"=>0xC2,"JCSr:" =>0xE2, - "JCK"=>0x03,"JCK*"=>0x23,"JCK:"=>0x43,"JCK*:"=>0x63,"JCKr"=>0x83,"JCKr*"=>0xA3,"JCKr:"=>0xC3,"JCKr*:"=>0xE3, - "LDA"=>0x04,"LDA*"=>0x24,"LDA:"=>0x44,"LDA*:"=>0x64,"LDAr"=>0x84,"LDAr*"=>0xA4,"LDAr:"=>0xC4,"LDAr*:"=>0xE4, - "STA"=>0x05,"STA*"=>0x25,"STA:"=>0x45,"STA*:"=>0x65,"STAr"=>0x85,"STAr*"=>0xA5,"STAr:"=>0xC5,"STAr*:"=>0xE5, - "LDD"=>0x06,"LDD*"=>0x26,"LDD:"=>0x46,"LDD*:"=>0x66,"LDDr"=>0x86,"LDDr*"=>0xA6,"LDDr:"=>0xC6,"LDDr*:"=>0xE6, - "STD"=>0x07,"STD*"=>0x27,"STD:"=>0x47,"STD*:"=>0x67,"STDr"=>0x87,"STDr*"=>0xA7,"STDr:"=>0xC7,"STDr*:"=>0xE7, - // Stack operators - "PSH"=>0x08,"PSH*"=>0x28,"PSH:"=>0x48,"PSH*:"=>0x68,"PSHr"=>0x88,"PSHr*"=>0xA8,"PSHr:"=>0xC8,"PSHr*:"=>0xE8, - "POP"=>0x09,"POP*"=>0x29,"POP:"=>0x49,"POP*:"=>0x69,"POPr"=>0x89,"POPr*"=>0xA9,"POPr:"=>0xC9,"POPr*:"=>0xE9, - "CPY"=>0x0A,"CPY*"=>0x2A,"CPY:"=>0x4A,"CPY*:"=>0x6A,"CPYr"=>0x8A,"CPYr*"=>0xAA,"CPYr:"=>0xCA,"CPYr*:"=>0xEA, - "SPL"=>0x0B,"SPL*"=>0x2B,"SPL:"=>0x4B,"SPL*:"=>0x6B,"SPLr"=>0x8B,"SPLr*"=>0xAB,"SPLr:"=>0xCB,"SPLr*:"=>0xEB, - "DUP"=>0x0C,"DUP*"=>0x2C,"DUP:"=>0x4C,"DUP*:"=>0x6C,"DUPr"=>0x8C,"DUPr*"=>0xAC,"DUPr:"=>0xCC,"DUPr*:"=>0xEC, - "OVR"=>0x0D,"OVR*"=>0x2D,"OVR:"=>0x4D,"OVR*:"=>0x6D,"OVRr"=>0x8D,"OVRr*"=>0xAD,"OVRr:"=>0xCD,"OVRr*:"=>0xED, - "SWP"=>0x0E,"SWP*"=>0x2E,"SWP:"=>0x4E,"SWP*:"=>0x6E,"SWPr"=>0x8E,"SWPr*"=>0xAE,"SWPr:"=>0xCE,"SWPr*:"=>0xEE, - "ROT"=>0x0F,"ROT*"=>0x2F,"ROT:"=>0x4F,"ROT*:"=>0x6F,"ROTr"=>0x8F,"ROTr*"=>0xAF,"ROTr:"=>0xCF,"ROTr*:"=>0xEF, - // Numeric operators - "ADD"=>0x10,"ADD*"=>0x30,"ADD:"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr*"=>0xB0,"ADDr:"=>0xD0,"ADDr*:"=>0xF0, - "SUB"=>0x11,"SUB*"=>0x31,"SUB:"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr*"=>0xB1,"SUBr:"=>0xD1,"SUBr*:"=>0xF1, - "INC"=>0x12,"INC*"=>0x32,"INC:"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr*"=>0xB2,"INCr:"=>0xD2,"INCr*:"=>0xF2, - "DEC"=>0x13,"DEC*"=>0x33,"DEC:"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr*"=>0xB3,"DECr:"=>0xD3,"DECr*:"=>0xF3, - "LTH"=>0x14,"LTH*"=>0x34,"LTH:"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr*"=>0xB4,"LTHr:"=>0xD4,"LTHr*:"=>0xF4, - "GTH"=>0x15,"GTH*"=>0x35,"GTH:"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr*"=>0xB5,"GTHr:"=>0xD5,"GTHr*:"=>0xF5, - "EQU"=>0x16,"EQU*"=>0x36,"EQU:"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr*"=>0xB6,"EQUr:"=>0xD6,"EQUr*:"=>0xF6, - "NQK"=>0x17,"NQK*"=>0x37,"NQK:"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr*"=>0xB7,"NQKr:"=>0xD7,"NQKr*:"=>0xF7, - // Bitwise operators - "IOR"=>0x18,"IOR*"=>0x38,"IOR:"=>0x58,"IOR*:"=>0x78,"IORr"=>0x98,"IORr*"=>0xB8,"IORr:"=>0xD8,"IORr*:"=>0xF8, - "XOR"=>0x19,"XOR*"=>0x39,"XOR:"=>0x59,"XOR*:"=>0x79,"XORr"=>0x99,"XORr*"=>0xB9,"XORr:"=>0xD9,"XORr*:"=>0xF9, - "AND"=>0x1A,"AND*"=>0x3A,"AND:"=>0x5A,"AND*:"=>0x7A,"ANDr"=>0x9A,"ANDr*"=>0xBA,"ANDr:"=>0xDA,"ANDr*:"=>0xFA, - "NOT"=>0x1B,"NOT*"=>0x3B,"NOT:"=>0x5B,"NOT*:"=>0x7B,"NOTr"=>0x9B,"NOTr*"=>0xBB,"NOTr:"=>0xDB,"NOTr*:"=>0xFB, - "SHF"=>0x1C,"SHF*"=>0x3C,"SHF:"=>0x5C,"SHF*:"=>0x7C,"SHFr"=>0x9C,"SHFr*"=>0xBC,"SHFr:"=>0xDC,"SHFr*:"=>0xFC, - "SHC"=>0x1D,"SHC*"=>0x3D,"SHC:"=>0x5D,"SHC*:"=>0x7D,"SHCr"=>0x9D,"SHCr*"=>0xBD,"SHCr:"=>0xDD,"SHCr*:"=>0xFD, - "TAL"=>0x1E,"TAL*"=>0x3E,"TAL:"=>0x5E,"TAL*:"=>0x7E,"TALr"=>0x9E,"TALr*"=>0xBE,"TALr:"=>0xDE,"TALr*:"=>0xFE, - "REV"=>0x1F,"REV*"=>0x3F,"REV:"=>0x5F,"REV*:"=>0x7F,"REVr"=>0x9F,"REVr*"=>0xBF,"REVr:"=>0xDF,"REVr*:"=>0xFF, - _ => return Err(()), - }}) - } -} - - -pub enum Operation { - HLT, JMP, JCN, JCK, - LDA, STA, LDD, STD, - PSH, POP, CPY, SPL, - DUP, OVR, SWP, ROT, - ADD, SUB, INC, DEC, - LTH, GTH, EQU, NQK, - IOR, XOR, AND, NOT, - SHF, SHC, TAL, REV, -} - - -impl From<Operation> for u8 { - fn from(operation: Operation) -> Self { - match operation { - Op::HLT=>0x00, Op::JMP=>0x01, Op::JCN=>0x02, Op::JCK=>0x03, - Op::LDA=>0x04, Op::STA=>0x05, Op::LDD=>0x06, Op::STD=>0x07, - Op::PSH=>0x08, Op::POP=>0x09, Op::CPY=>0x0a, Op::SPL=>0x0b, - Op::DUP=>0x0c, Op::OVR=>0x0d, Op::SWP=>0x0e, Op::ROT=>0x0f, - Op::ADD=>0x10, Op::SUB=>0x11, Op::INC=>0x12, Op::DEC=>0x13, - Op::LTH=>0x14, Op::GTH=>0x15, Op::EQU=>0x16, Op::NQK=>0x17, - Op::IOR=>0x18, Op::XOR=>0x19, Op::AND=>0x1a, Op::NOT=>0x1b, - Op::SHF=>0x1c, Op::SHC=>0x1d, Op::TAL=>0x1e, Op::REV=>0x1f, - } - } -} - - -impl std::fmt::Display for Operation { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{}", match self { - Op::HLT=>"HLT", Op::JMP=>"JMP", Op::JCN=>"JCN", Op::JCK=>"JCK", - Op::LDA=>"LDA", Op::STA=>"STA", Op::LDD=>"LDD", Op::STD=>"STD", - Op::PSH=>"PSH", Op::POP=>"POP", Op::CPY=>"CPY", Op::SPL=>"SPL", - Op::DUP=>"DUP", Op::OVR=>"OVR", Op::SWP=>"SWP", Op::ROT=>"ROT", - Op::ADD=>"ADD", Op::SUB=>"SUB", Op::INC=>"INC", Op::DEC=>"DEC", - Op::LTH=>"LTH", Op::GTH=>"GTH", Op::EQU=>"EQU", Op::NQK=>"NQK", - Op::IOR=>"IOR", Op::XOR=>"XOR", Op::AND=>"AND", Op::NOT=>"NOT", - Op::SHF=>"SHF", Op::SHC=>"SHC", Op::TAL=>"TAL", Op::REV=>"REV", - }) - } -} diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs deleted file mode 100644 index ac5179c..0000000 --- a/src/tokens/semantic.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; - - -pub struct SemanticToken { - pub source: SourceSpan, - pub bytecode: BytecodeSpan, - pub variant: SemanticTokenVariant, -} - - -pub enum SemanticTokenVariant { - LabelDefinition(LabelDefinition), - MacroDefinition(MacroDefinition), - - /// Pointer to the matching label definition. - LabelReference(usize), - /// Pointer to the matching macro definition. - MacroInvocation(usize), - - Literal(Value), - Padding(Value), - Instruction(Instruction), - - Comment(String), - String(Vec<u8>), - - /// Pointer to the matching block close. - BlockOpen(usize), - /// Pointer to the matching block open. - BlockClose(usize), - MarkOpen, - MarkClose, - - Error(SemanticParseError), -} - -impl std::fmt::Debug for SemanticToken { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - match &self.variant { - SemVar::LabelDefinition(def) => write!(f, "LabelDefinition({})", def.name), - SemVar::MacroDefinition(def) => write!(f, "MacroDefinition({})", def.name), - SemVar::LabelReference(pointer) => write!(f, "LabelReference(*{pointer})"), - SemVar::MacroInvocation(pointer) => write!(f, "MacroInvocation(*{pointer})"), - SemVar::Literal(value) => write!(f, "Literal({value})"), - SemVar::Padding(value) => write!(f, "Padding({value})"), - SemVar::Instruction(instr) => write!(f, "Instruction(0x{:02x})", instr.value), - SemVar::Comment(comment) => write!(f, "Comment({comment})"), - SemVar::String(string) => write!(f, "String({})", String::from_utf8_lossy(&string)), - SemVar::BlockOpen(_) => write!(f, "BlockOpen"), - SemVar::BlockClose(_) => write!(f, "BlockClose"), - SemVar::MarkOpen => write!(f, "MarkOpen"), - SemVar::MarkClose => write!(f, "MarkClose"), - SemVar::Error(_) => write!(f, "Error"), - } - } -} - - -pub struct LabelDefinition { - /// The absolute name of the label or sublabel. - pub name: String, - /// List of pointers to label reference tokens. - pub references: Vec<usize>, -} - - -pub struct MacroDefinition { - pub name: String, - pub references: Vec<usize>, - pub body_tokens: Vec<SemanticToken>, -} - - -pub enum SemanticParseError { - LabelDefinitionInMacroDefinition, - MacroDefinitionInMacroDefinition, - - StrayMacroTerminator, - StrayBlockClose, - UnclosedBlock, - - UndefinedSymbol(String), - RedefinedSymbol((String, SourceSpan)), - - MacroInvocationBeforeDefinition((String, SourceSpan)), - - SyntaxError(SyntacticParseError) -} diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs deleted file mode 100644 index 8684ed9..0000000 --- a/src/tokens/syntactic.rs +++ /dev/null @@ -1,39 +0,0 @@ -use crate::*; - - -pub struct SyntacticToken { - /// Location of token in source files. - pub source: SourceSpan, - pub variant: SyntacticTokenVariant, -} - - -pub enum SyntacticTokenVariant { - LabelDefinition(String), - MacroDefinition(String), - MacroDefinitionTerminator, - - Literal(Value), - Padding(Value), - Instruction(Instruction), - - Comment(String), - String(Vec<u8>), - - BlockOpen, - BlockClose, - MarkOpen, - MarkClose, - - Symbol(String), - - Error(SyntacticParseError), -} - - -pub enum SyntacticParseError { - UnterminatedComment, - UnterminatedRawString, - UnterminatedNullString, - InvalidPaddingValue(String), -} diff --git a/src/translators.rs b/src/translators.rs deleted file mode 100644 index cce5633..0000000 --- a/src/translators.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod syntactic_parser; -mod semantic_parser; -mod bytecode_generator; -mod symbols_generator; - -pub use syntactic_parser::*; -pub use semantic_parser::*; -pub use bytecode_generator::*; -pub use symbols_generator::*; diff --git a/src/translators/bytecode_generator.rs b/src/translators/bytecode_generator.rs deleted file mode 100644 index 956aca5..0000000 --- a/src/translators/bytecode_generator.rs +++ /dev/null @@ -1,131 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; - - -pub fn generate_bytecode(semantic_tokens: &mut [SemanticToken]) -> Vec<u8> { - let generator = BytecodeGenerator::from_semantic_tokens(semantic_tokens); - generator.generate() -} - - -/// Translate semantic tokens into bytecode. -struct BytecodeGenerator<'a> { - semantic_tokens: &'a mut [SemanticToken], - block_stack: Vec<usize>, - bytecode: Vec<u8>, - /// (address in bytecode, label definition token index) - label_references: Vec<(usize, usize)>, -} - -impl<'a> BytecodeGenerator<'a> { - pub fn from_semantic_tokens(semantic_tokens: &'a mut [SemanticToken]) -> Self { - Self { - semantic_tokens, - block_stack: Vec::new(), - bytecode: Vec::new(), - label_references: Vec::new(), - } - } - - pub fn generate(mut self) -> Vec<u8> { - for i in 0..self.semantic_tokens.len() { - let address = self.bytecode.len(); - self.generate_bytecode_for_token(i, None); - self.semantic_tokens[i].bytecode = BytecodeSpan { - bytes: self.bytecode[address..].to_vec(), - location: BytecodeLocation { - address, - length: self.bytecode.len().saturating_sub(address), - } - }; - } - - // Replace blank label references in bytecode with real label addresses. - // The layer of indirection is necessary because the iteration borrows - // self immutably. - let mut insertions: Vec<(usize, u16)> = Vec::new(); - for (bytecode_address, token_pointer) in &self.label_references { - let label_token = &self.semantic_tokens[*token_pointer]; - // TODO: If greater than u16, print a warning. - let address_value = label_token.bytecode.location.address as u16; - insertions.push((*bytecode_address, address_value)); - } - for (bytecode_address, address_value) in insertions { - self.replace_address_in_bytecode(bytecode_address, address_value); - } - - // Strip trailing null bytes from the bytecode. - let mut length = self.bytecode.len(); - for (i, byte) in self.bytecode.iter().enumerate().rev() { - match *byte == 0 { - true => length = i, - false => break, - }; - } - self.bytecode.truncate(length); - - return self.bytecode; - } - - fn generate_bytecode_for_token(&mut self, pointer: usize, macro_pointer: Option<usize>) { - macro_rules! push_byte { - ($byte:expr) => { self.bytecode.push($byte) }; } - macro_rules! push_double { - ($double:expr) => { self.bytecode.extend_from_slice(&$double.to_be_bytes()) }; } - macro_rules! pad { - ($len:expr) => { for _ in 0..$len { push_byte!(0); } } } - - let semantic_token = if let Some(macro_pointer) = macro_pointer { - let macro_definition = &self.semantic_tokens[macro_pointer]; - if let SemVar::MacroDefinition(def) = ¯o_definition.variant { - &def.body_tokens[pointer] - } else { unreachable!() } - } else { - &self.semantic_tokens[pointer] - }; - match &semantic_token.variant { - SemVar::MacroInvocation(pointer) => { - let macro_definition = &self.semantic_tokens[*pointer]; - if let SemVar::MacroDefinition(def) = ¯o_definition.variant { - let length = def.body_tokens.len(); - let macro_pointer = Some(*pointer); - for body_pointer in 0..length { - // Recurse, generate bytecode for each macro body token. - self.generate_bytecode_for_token(body_pointer, macro_pointer); - } - } else { unreachable!() } - } - SemVar::Literal(value) => match value { - Value::Byte(value) => push_byte!(*value), - Value::Double(value) => push_double!(value), - } - SemVar::Padding(value) => match value { - Value::Byte(value) => pad!(*value), - Value::Double(value) => pad!(*value), - } - SemVar::Instruction(instr) => push_byte!(instr.value), - SemVar::String(bytes) => self.bytecode.extend_from_slice(&bytes), - SemVar::LabelReference(pointer) => { - self.label_references.push((self.bytecode.len(), *pointer)); - push_double!(0u16); - } - SemVar::BlockOpen(_) => { - self.block_stack.push(self.bytecode.len()); - push_double!(0u16); - } - SemVar::BlockClose(_) => { - let bytecode_address = self.block_stack.pop().unwrap(); - // TODO: If greater than u16, print a warning. - let address_value = self.bytecode.len() as u16; - self.replace_address_in_bytecode(bytecode_address, address_value); - } - _ => (), - }; - } - - fn replace_address_in_bytecode(&mut self, bytecode_address: usize, address_value: u16) { - let range = bytecode_address..bytecode_address+2; - self.bytecode[range].clone_from_slice(&address_value.to_be_bytes()); - } -} diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs deleted file mode 100644 index cb6a435..0000000 --- a/src/translators/semantic_parser.rs +++ /dev/null @@ -1,245 +0,0 @@ -use crate::*; - -use std::collections::HashMap; -use std::path::PathBuf; - -use SyntacticTokenVariant as SynVar; -use SemanticTokenVariant as SemVar; -use SemanticParseError as SemErr; - - -pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> { - let semantic_parser = SemanticParser::from_source_code(source_code, path); - semantic_parser.parse() -} - - -/// Translate syntactic tokens into semantic tokens. -struct SemanticParser { - labels: HashMap<String, Definition>, - macros: HashMap<String, Definition>, - syntactic_tokens: Vec<SyntacticToken>, - /// Index of the current outer token. - current_outer_index: usize, -} - -impl SemanticParser { - pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { - let mut labels = HashMap::new(); - let mut macros = HashMap::new(); - let mut syntactic_tokens = Vec::new(); - - let parser = SyntacticParser::from_source_code(source_code, path); - for syntactic_token in parser { - let definition = Definition::new(syntactic_token.source.clone()); - match &syntactic_token.variant { - SynVar::LabelDefinition(name) => { - let _ = labels.try_insert(name.to_owned(), definition); - }, - SynVar::MacroDefinition(name) => { - let _ = macros.try_insert(name.to_owned(), definition); - }, - _ => (), - } - syntactic_tokens.push(syntactic_token); - } - - Self { - labels, - macros, - syntactic_tokens, - current_outer_index: 0, - } - } - - /// Parse syntactic tokens as semantic tokens. - pub fn parse(mut self) -> Vec<SemanticToken> { - let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens); - let mut syntactic = syntactic_tokens.into_iter(); - let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false); - - // Insert real label definition pointers into label reference tokens. - for definition in self.labels.values_mut() { - if let Some(definition_pointer) = definition.pointer { - // Insert definition pointer into reference tokens. - for reference_pointer in &definition.references { - let reference_token = &mut semantic_tokens[*reference_pointer]; - reference_token.variant = SemVar::LabelReference(definition_pointer); - } - // Insert reference pointers into definition token. - let definition_token = &mut semantic_tokens[definition_pointer]; - if let SemVar::LabelDefinition(ref mut def) = definition_token.variant { - def.references = std::mem::take(&mut definition.references); - } else { unreachable!() } - // Insert definition pointer into reference tokens inside macros. - for (outer, inner) in &definition.deep_references { - let macro_token = &mut semantic_tokens[*outer]; - if let SemVar::MacroDefinition(ref mut def) = macro_token.variant { - let reference_token = &mut def.body_tokens[*inner]; - reference_token.variant = SemVar::LabelReference(definition_pointer); - } else { unreachable!() } - } - // TODO: Record deep references in macro and label definitions? - } - } - - return semantic_tokens; - } - - fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken> - where I: Iterator<Item = SyntacticToken> - { - let mut semantic_tokens: Vec<SemanticToken> = Vec::new(); - let mut block_stack: Vec<usize> = Vec::new(); - - while let Some(syntactic_token) = parser.next() { - let current_index = semantic_tokens.len(); - if !in_macro { - self.current_outer_index = current_index; - } - - let semantic_token_variant = match syntactic_token.variant { - SynVar::LabelDefinition(name) => { - if in_macro { - SemVar::Error(SemErr::LabelDefinitionInMacroDefinition) - } else if let Some(definition) = self.macros.get(&name) { - let source = definition.source.clone(); - SemVar::Error(SemErr::RedefinedSymbol((name, source))) - } else if let Some(definition) = self.labels.get_mut(&name) { - if definition.pointer.is_some() { - let source = definition.source.clone(); - SemVar::Error(SemErr::RedefinedSymbol((name, source))) - } else { - definition.pointer = Some(current_index); - let references = Vec::new(); - SemVar::LabelDefinition(LabelDefinition { name, references }) - } - } else { - unreachable!() - } - } - SynVar::MacroDefinition(name) => { - if in_macro { - SemVar::Error(SemErr::MacroDefinitionInMacroDefinition) - } else if let Some(definition) = self.labels.get(&name) { - let source = definition.source.clone(); - SemVar::Error(SemErr::RedefinedSymbol((name, source))) - } else if let Some(definition) = self.macros.get_mut(&name) { - if definition.pointer.is_some() { - let source = definition.source.clone(); - SemVar::Error(SemErr::RedefinedSymbol((name, source))) - } else { - definition.pointer = Some(current_index); - let references = Vec::new(); - let body_tokens = self.pull_semantic_tokens(parser, true); - SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens }) - } - } else { - unreachable!() - } - } - SynVar::MacroDefinitionTerminator => if in_macro { - break; - } else { - SemVar::Error(SemErr::StrayMacroTerminator) - } - SynVar::Literal(value) => { - SemVar::Literal(value) - } - SynVar::Padding(value) => { - SemVar::Padding(value) - } - SynVar::Instruction(instr) => { - SemVar::Instruction(instr) - } - SynVar::Comment(comment) => { - SemVar::Comment(comment) - } - SynVar::String(bytes) => { - SemVar::String(bytes) - } - SynVar::BlockOpen => { - block_stack.push(current_index); - SemVar::BlockOpen(0) - } - SynVar::BlockClose => { - if let Some(pointer) = block_stack.pop() { - let open = &mut semantic_tokens[pointer]; - open.variant = SemVar::BlockOpen(current_index); - SemVar::BlockClose(pointer) - } else { - SemVar::Error(SemErr::StrayBlockClose) - } - } - SynVar::MarkOpen => { - SemVar::MarkOpen - } - SynVar::MarkClose => { - SemVar::MarkClose - } - SynVar::Symbol(name) => { - if let Some(definition) = self.labels.get_mut(&name) { - if in_macro { - let pointer = (self.current_outer_index, current_index); - definition.deep_references.push(pointer); - } else { - definition.references.push(current_index); - } - SemVar::LabelReference(0) - } else if let Some(definition) = self.macros.get_mut(&name) { - if let Some(pointer) = definition.pointer { - if !in_macro { definition.references.push(current_index); } - SemVar::MacroInvocation(pointer) - } else { - let source = definition.source.clone(); - SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source))) - } - } else { - SemVar::Error(SemErr::UndefinedSymbol(name)) - } - } - SynVar::Error(syntax_err) => { - SemVar::Error(SemErr::SyntaxError(syntax_err)) - } - }; - - let semantic_token = SemanticToken { - source: syntactic_token.source, - bytecode: BytecodeSpan::default(), - variant: semantic_token_variant, - }; - semantic_tokens.push(semantic_token); - } - - if in_macro { - //TODO: UnterminatedMacroDefinition - } - - // Replace each unclosed BlockOpen token with an error. - for block_pointer in block_stack { - semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock); - } - - return semantic_tokens; - } -} - - -struct Definition { - pub source: SourceSpan, - pub pointer: Option<usize>, - pub references: Vec<usize>, - /// (macro index, label reference index) - pub deep_references: Vec<(usize, usize)>, -} - -impl Definition { - pub fn new(source: SourceSpan) -> Self { - Self { - source, - pointer: None, - references: Vec::new(), - deep_references: Vec::new(), - } - } -} diff --git a/src/translators/symbols_generator.rs b/src/translators/symbols_generator.rs deleted file mode 100644 index d30facd..0000000 --- a/src/translators/symbols_generator.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; - - -pub fn generate_symbols_file(semantic_tokens: &[SemanticToken]) -> String { - let mut symbols = String::new(); - - for token in semantic_tokens { - if let SemVar::LabelDefinition(definition) = &token.variant { - let address = token.bytecode.location.address; - if address > 0xffff { break; } - let name = &definition.name; - let location = token.source.location(); - symbols.push_str(&format!("{address:04x} {name} {location}\n")); - } - } - - return symbols; -} diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs deleted file mode 100644 index 7279daf..0000000 --- a/src/translators/syntactic_parser.rs +++ /dev/null @@ -1,247 +0,0 @@ -use crate::*; - -use std::path::PathBuf; - - -/// Translate raw source code characters into syntactic tokens. -pub struct SyntacticParser { - /// Path of file from which the source was read. - path: Option<PathBuf>, - /// Path of the original source file. - source_path: Option<PathBuf>, - /// Position of the next character to be read. - position: Position, - /// Previous value of the position field. - prev_position: Position, - /// Line where the embedded source file begins. - source_line_start: usize, - /// Characters waiting to be parsed, in reverse order. - chars: Vec<char>, - /// The token currently being parsed. - token_source_string: String, - /// The name of the most recently parsed label. - label: String, -} - - -impl SyntacticParser { - /// Parse source code. - pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { - Self { - path: path.map(|p| p.into()), - source_path: None, - position: Position { line: 0, column: 0 }, - prev_position: Position { line: 0, column: 0 }, - source_line_start: 0, - chars: source_code.chars().rev().collect(), - token_source_string: String::new(), - label: String::new(), - } - } - - /// Return the next character, keeping it on the queue. - fn peek_char(&self) -> Option<char> { - self.chars.last().copied() - } - - /// Return the next character, removing it from the queue. - fn eat_char(&mut self) -> Option<char> { - let option = self.chars.pop(); - if let Some(c) = option { - self.prev_position = self.position; - self.position.advance(c); - self.token_source_string.push(c); - } - return option; - } - - /// Remove the next character from the queue. - fn drop_char(&mut self) { - if let Some(c) = self.chars.pop() { - self.prev_position = self.position; - self.position.advance(c); - } - } - - /// Remove leading whitespace. - fn drop_whitespace(&mut self) { - while let Some(c) = self.peek_char() { - match c.is_whitespace() { - true => self.drop_char(), - false => break, - } - } - } - - /// Remove a full token from the queue. - fn eat_token(&mut self) -> String { - const DELIMITERS: [char; 13] = - ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~']; - let mut token = String::new(); - while let Some(peek) = self.peek_char() { - if peek.is_whitespace() || DELIMITERS.contains(&peek) { - break; - } - let c = self.eat_char().unwrap(); - token.push(c); - if c == ':' { - break; - } - } - token - } - - /// Return all characters until the delimiter, removing all returned - /// characters and the delimiter from the queue. Returns None if end - /// of source is reached before delimiter is found. - fn eat_to_delim(&mut self, delim: char) -> Option<String> { - let mut token = String::new(); - while let Some(c) = self.eat_char() { - self.token_source_string.push(c); - match c == delim { - true => return Some(token), - false => token.push(c), - } - } - return None; - } - - fn is_line_empty(&self) -> bool { - for c in self.chars.iter().rev() { - if *c == '\n' { - return true; - } - if !c.is_whitespace() { - return false - } - } - return false; - } -} - - -impl Iterator for SyntacticParser { - type Item = SyntacticToken; - - /// Sequentially parse tokens from the source code. - fn next(&mut self) -> Option<SyntacticToken> { - use SyntacticTokenVariant as SynVar; - use SyntacticParseError as SynErr; - - self.drop_whitespace(); - let start = self.position; - - let variant = match self.eat_char()? { - '@' => { - self.label = self.eat_token(); - SynVar::LabelDefinition(self.label.clone()) - } - '&' => { - let token = self.eat_token(); - let sublabel = format!("{}/{token}", self.label); - SynVar::LabelDefinition(sublabel) - } - '%' => SynVar::MacroDefinition(self.eat_token()), - ';' => SynVar::MacroDefinitionTerminator, - '[' => SynVar::MarkOpen, - ']' => SynVar::MarkClose, - '{' => SynVar::BlockOpen, - '}' => SynVar::BlockClose, - '(' => match self.eat_to_delim(')') { - Some(string) => SynVar::Comment(string), - None => SynVar::Error(SynErr::UnterminatedComment), - } - '\'' => match self.eat_to_delim('\'') { - Some(string) => SynVar::String(string.as_bytes().to_vec()), - None => SynVar::Error(SynErr::UnterminatedRawString), - } - '"' => match self.eat_to_delim('"') { - Some(string) => { - let mut bytes = string.as_bytes().to_vec(); - bytes.push(0x00); - SynVar::String(bytes) - } - None => SynVar::Error(SynErr::UnterminatedNullString), - } - '#' => { - let token = self.eat_token(); - match token.parse::<Value>() { - Ok(value) => SynVar::Padding(value), - Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)), - } - }, - '~' => { - let token = self.eat_token(); - let symbol = format!("{}/{token}", self.label); - SynVar::Symbol(symbol) - } - ':' => SynVar::Symbol(String::from(':')), - c => { - let token = format!("{c}{}", self.eat_token()); - match token.parse::<Value>() { - Ok(value) => SynVar::Literal(value), - Err(_) => match token.parse::<Instruction>() { - Ok(instruction) => SynVar::Instruction(instruction), - Err(_) => SynVar::Symbol(token), - } - } - } - }; - - // Parse source path comments. - if let SynVar::Comment(comment) = &variant { - // Check that the comment fills the entire line. - if start.column == 0 && self.is_line_empty() { - if let Some(path) = comment.strip_prefix(": ") { - self.source_path = Some(PathBuf::from(path.trim())); - self.source_line_start = start.line + 1; - } - } - } - - // Find location in current merged file. - let in_merged = SourceLocation { - path: self.path.to_owned(), - start, - end: self.prev_position, - }; - - // Find location in original source file. - let in_source = if start.line >= self.source_line_start { - match &self.source_path { - Some(path) => { - let offset = self.source_line_start; - Some( SourceLocation { - path: Some(path.to_owned()), - start: Position { - line: in_merged.start.line.saturating_sub(offset), - column: in_merged.start.column, - }, - end: Position { - line: in_merged.end.line.saturating_sub(offset), - column: in_merged.end.column, - } - }) - } - None => None, - } - } else { - None - }; - - let string = std::mem::take(&mut self.token_source_string); - let source = SourceSpan { string, in_merged, in_source }; - Some( SyntacticToken { source, variant } ) - } -} - - -#[derive(Debug)] -pub enum ParseError { - InvalidExtension, - NotFound, - NotReadable, - IsADirectory, - InvalidUtf8, - Unknown, -} diff --git a/src/types/instruction.rs b/src/types/instruction.rs new file mode 100644 index 0000000..252fc68 --- /dev/null +++ b/src/types/instruction.rs @@ -0,0 +1,168 @@ +use crate::*; + +use Operation as Op; + + +pub struct Instruction { + pub value: u8, +} + +impl Instruction { + pub fn operation(&self) -> Operation { + match self.value & 0x1f { + 0x00=>Op::HLT, 0x01=>Op::PSH, 0x02=>Op::POP, 0x03=>Op::CPY, + 0x04=>Op::DUP, 0x05=>Op::OVR, 0x06=>Op::SWP, 0x07=>Op::ROT, + 0x08=>Op::JMP, 0x09=>Op::JMS, 0x0a=>Op::JCN, 0x0b=>Op::JCS, + 0x0c=>Op::LDA, 0x0d=>Op::STA, 0x0e=>Op::LDD, 0x0f=>Op::STD, + 0x10=>Op::ADD, 0x11=>Op::SUB, 0x12=>Op::INC, 0x13=>Op::DEC, + 0x14=>Op::LTH, 0x15=>Op::GTH, 0x16=>Op::EQU, 0x17=>Op::NQK, + 0x18=>Op::SHL, 0x19=>Op::SHR, 0x1a=>Op::ROL, 0x1b=>Op::ROR, + 0x1c=>Op::IOR, 0x1d=>Op::XOR, 0x1e=>Op::AND, 0x1f=>Op::NOT, + _ => unreachable!(), + } + } + + pub fn return_mode(&self) -> bool { + self.value & RETURN_MODE != 0 + } + + pub fn wide_mode(&self) -> bool { + self.value & WIDE_MODE != 0 + } + + pub fn immediate_mode(&self) -> bool { + self.value & IMMEDIATE_MODE != 0 + } +} + +impl std::fmt::Display for Instruction { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}", match self.value { + // Stack operators + 0x00=>"HLT",0x20=>"NOP" ,0x40=>"DB1" ,0x60=>"DB2" ,0x80=>"DB3" ,0xA0=>"DB4" ,0xC0=>"DB5" ,0xE0=>"DB6" , + 0x01=>"PSH",0x21=>"PSH:",0x41=>"PSH*",0x61=>"PSH*:",0x81=>"PSHr",0xA1=>"PSHr:",0xC1=>"PSHr*",0xE1=>"PSHr*:", + 0x02=>"POP",0x22=>"POP:",0x42=>"POP*",0x62=>"POP*:",0x82=>"POPr",0xA2=>"POPr:",0xC2=>"POPr*",0xE2=>"POPr*:", + 0x03=>"CPY",0x23=>"CPY:",0x43=>"CPY*",0x63=>"CPY*:",0x83=>"CPYr",0xA3=>"CPYr:",0xC3=>"CPYr*",0xE3=>"CPYr*:", + 0x04=>"DUP",0x24=>"DUP:",0x44=>"DUP*",0x64=>"DUP*:",0x84=>"DUPr",0xA4=>"DUPr:",0xC4=>"DUPr*",0xE4=>"DUPr*:", + 0x05=>"OVR",0x25=>"OVR:",0x45=>"OVR*",0x65=>"OVR*:",0x85=>"OVRr",0xA5=>"OVRr:",0xC5=>"OVRr*",0xE5=>"OVRr*:", + 0x06=>"SWP",0x26=>"SWP:",0x46=>"SWP*",0x66=>"SWP*:",0x86=>"SWPr",0xA6=>"SWPr:",0xC6=>"SWPr*",0xE6=>"SWPr*:", + 0x07=>"ROT",0x27=>"ROT:",0x47=>"ROT*",0x67=>"ROT*:",0x87=>"ROTr",0xA7=>"ROTr:",0xC7=>"ROTr*",0xE7=>"ROTr*:", + // Control operators + 0x08=>"JMP",0x28=>"JMP:",0x48=>"JMP*",0x68=>"JMP*:",0x88=>"JMPr",0xA8=>"JMPr:",0xC8=>"JMPr*",0xE8=>"JMPr*:", + 0x09=>"JMS",0x29=>"JMS:",0x49=>"JMS*",0x69=>"JMS*:",0x89=>"JMSr",0xA9=>"JMSr:",0xC9=>"JMSr*",0xE9=>"JMSr*:", + 0x0A=>"JCN",0x2A=>"JCN:",0x4A=>"JCN*",0x6A=>"JCN*:",0x8A=>"JCNr",0xAA=>"JCNr:",0xCA=>"JCNr*",0xEA=>"JCNr*:", + 0x0B=>"JCS",0x2B=>"JCS:",0x4B=>"JCS*",0x6B=>"JCS*:",0x8B=>"JCSr",0xAB=>"JCSr:",0xCB=>"JCSr*",0xEB=>"JCSr*:", + 0x0C=>"LDA",0x2C=>"LDA:",0x4C=>"LDA*",0x6C=>"LDA*:",0x8C=>"LDAr",0xAC=>"LDAr:",0xCC=>"LDAr*",0xEC=>"LDAr*:", + 0x0D=>"STA",0x2D=>"STA:",0x4D=>"STA*",0x6D=>"STA*:",0x8D=>"STAr",0xAD=>"STAr:",0xCD=>"STAr*",0xED=>"STAr*:", + 0x0E=>"LDD",0x2E=>"LDD:",0x4E=>"LDD*",0x6E=>"LDD*:",0x8E=>"LDDr",0xAE=>"LDDr:",0xCE=>"LDDr*",0xEE=>"LDDr*:", + 0x0F=>"STD",0x2F=>"STD:",0x4F=>"STD*",0x6F=>"STD*:",0x8F=>"STDr",0xAF=>"STDr:",0xCF=>"STDr*",0xEF=>"STDr*:", + // Numeric operators + 0x10=>"ADD",0x30=>"ADD:",0x50=>"ADD*",0x70=>"ADD*:",0x90=>"ADDr",0xB0=>"ADDr:",0xD0=>"ADDr*",0xF0=>"ADDr*:", + 0x11=>"SUB",0x31=>"SUB:",0x51=>"SUB*",0x71=>"SUB*:",0x91=>"SUBr",0xB1=>"SUBr:",0xD1=>"SUBr*",0xF1=>"SUBr*:", + 0x12=>"INC",0x32=>"INC:",0x52=>"INC*",0x72=>"INC*:",0x92=>"INCr",0xB2=>"INCr:",0xD2=>"INCr*",0xF2=>"INCr*:", + 0x13=>"DEC",0x33=>"DEC:",0x53=>"DEC*",0x73=>"DEC*:",0x93=>"DECr",0xB3=>"DECr:",0xD3=>"DECr*",0xF3=>"DECr*:", + 0x14=>"LTH",0x34=>"LTH:",0x54=>"LTH*",0x74=>"LTH*:",0x94=>"LTHr",0xB4=>"LTHr:",0xD4=>"LTHr*",0xF4=>"LTHr*:", + 0x15=>"GTH",0x35=>"GTH:",0x55=>"GTH*",0x75=>"GTH*:",0x95=>"GTHr",0xB5=>"GTHr:",0xD5=>"GTHr*",0xF5=>"GTHr*:", + 0x16=>"EQU",0x36=>"EQU:",0x56=>"EQU*",0x76=>"EQU*:",0x96=>"EQUr",0xB6=>"EQUr:",0xD6=>"EQUr*",0xF6=>"EQUr*:", + 0x17=>"NQK",0x37=>"NQK:",0x57=>"NQK*",0x77=>"NQK*:",0x97=>"NQKr",0xB7=>"NQKr:",0xD7=>"NQKr*",0xF7=>"NQKr*:", + // Bitwise operators + 0x18=>"SHL",0x38=>"SHL:",0x58=>"SHL*",0x78=>"SHL*:",0x98=>"SHLr",0xB8=>"SHLr:",0xD8=>"SHLr*",0xF8=>"SHLr*:", + 0x19=>"SHR",0x39=>"SHR:",0x59=>"SHR*",0x79=>"SHR*:",0x99=>"SHRr",0xB9=>"SHRr:",0xD9=>"SHRr*",0xF9=>"SHRr*:", + 0x1A=>"ROL",0x3A=>"ROL:",0x5A=>"ROL*",0x7A=>"ROL*:",0x9A=>"ROLr",0xBA=>"ROLr:",0xDA=>"ROLr*",0xFA=>"ROLr*:", + 0x1B=>"ROR",0x3B=>"ROR:",0x5B=>"ROR*",0x7B=>"ROR*:",0x9B=>"RORr",0xBB=>"RORr:",0xDB=>"RORr*",0xFB=>"RORr*:", + 0x1C=>"IOR",0x3C=>"IOR:",0x5C=>"IOR*",0x7C=>"IOR*:",0x9C=>"IORr",0xBC=>"IORr:",0xDC=>"IORr*",0xFC=>"IORr*:", + 0x1D=>"XOR",0x3D=>"XOR:",0x5D=>"XOR*",0x7D=>"XOR*:",0x9D=>"XORr",0xBD=>"XORr:",0xDD=>"XORr*",0xFD=>"XORr*:", + 0x1E=>"AND",0x3E=>"AND:",0x5E=>"AND*",0x7E=>"AND*:",0x9E=>"ANDr",0xBE=>"ANDr:",0xDE=>"ANDr*",0xFE=>"ANDr*:", + 0x1F=>"NOT",0x3F=>"NOT:",0x5F=>"NOT*",0x7F=>"NOT*:",0x9F=>"NOTr",0xBF=>"NOTr:",0xDF=>"NOTr*",0xFF=>"NOTr*:", + }) + } +} + +impl std::str::FromStr for Instruction { + type Err = (); + + fn from_str(token: &str) -> Result<Self, Self::Err> { + Ok( Instruction { value: match token { + // Stack operators + "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2" =>0x60,"DB3" =>0x80,"DB4" =>0xA0,"DB5" =>0xC0,"DB6" =>0xE0, + "PSH"=>0x01,"PSH:"=>0x21,"PSH*"=>0x41,"PSH*:"=>0x61,"PSHr"=>0x81,"PSHr:"=>0xA1,"PSHr*"=>0xC1,"PSHr*:"=>0xE1, + ":"=>0x21, "*:"=>0x61, "r:"=>0xA1, "r*:"=>0xE1, + "POP"=>0x02,"POP:"=>0x22,"POP*"=>0x42,"POP*:"=>0x62,"POPr"=>0x82,"POPr:"=>0xA2,"POPr*"=>0xC2,"POPr*:"=>0xE2, + "CPY"=>0x03,"CPY:"=>0x23,"CPY*"=>0x43,"CPY*:"=>0x63,"CPYr"=>0x83,"CPYr:"=>0xA3,"CPYr*"=>0xC3,"CPYr*:"=>0xE3, + "DUP"=>0x04,"DUP:"=>0x24,"DUP*"=>0x44,"DUP*:"=>0x64,"DUPr"=>0x84,"DUPr:"=>0xA4,"DUPr*"=>0xC4,"DUPr*:"=>0xE4, + "OVR"=>0x05,"OVR:"=>0x25,"OVR*"=>0x45,"OVR*:"=>0x65,"OVRr"=>0x85,"OVRr:"=>0xA5,"OVRr*"=>0xC5,"OVRr*:"=>0xE5, + "SWP"=>0x06,"SWP:"=>0x26,"SWP*"=>0x46,"SWP*:"=>0x66,"SWPr"=>0x86,"SWPr:"=>0xA6,"SWPr*"=>0xC6,"SWPr*:"=>0xE6, + "ROT"=>0x07,"ROT:"=>0x27,"ROT*"=>0x47,"ROT*:"=>0x67,"ROTr"=>0x87,"ROTr:"=>0xA7,"ROTr*"=>0xC7,"ROTr*:"=>0xE7, + // Control operators + "JMP"=>0x08,"JMP:"=>0x28,"JMP*"=>0x48,"JMP*:"=>0x68,"JMPr"=>0x88,"JMPr:"=>0xA8,"JMPr*"=>0xC8,"JMPr*:"=>0xE8, + "JMS"=>0x09,"JMS:"=>0x29,"JMS*"=>0x49,"JMS*:"=>0x69,"JMSr"=>0x89,"JMSr:"=>0xA9,"JMSr*"=>0xC9,"JMSr*:"=>0xE9, + "JCN"=>0x0A,"JCN:"=>0x2A,"JCN*"=>0x4A,"JCN*:"=>0x6A,"JCNr"=>0x8A,"JCNr:"=>0xAA,"JCNr*"=>0xCA,"JCNr*:"=>0xEA, + "JCS"=>0x0B,"JCS:"=>0x2B,"JCS*"=>0x4B,"JCS*:"=>0x6B,"JCSr"=>0x8B,"JCSr:"=>0xAB,"JCSr*"=>0xCB,"JCSr*:"=>0xEB, + "LDA"=>0x0C,"LDA:"=>0x2C,"LDA*"=>0x4C,"LDA*:"=>0x6C,"LDAr"=>0x8C,"LDAr:"=>0xAC,"LDAr*"=>0xCC,"LDAr*:"=>0xEC, + "STA"=>0x0D,"STA:"=>0x2D,"STA*"=>0x4D,"STA*:"=>0x6D,"STAr"=>0x8D,"STAr:"=>0xAD,"STAr*"=>0xCD,"STAr*:"=>0xED, + "LDD"=>0x0E,"LDD:"=>0x2E,"LDD*"=>0x4E,"LDD*:"=>0x6E,"LDDr"=>0x8E,"LDDr:"=>0xAE,"LDDr*"=>0xCE,"LDDr*:"=>0xEE, + "STD"=>0x0F,"STD:"=>0x2F,"STD*"=>0x4F,"STD*:"=>0x6F,"STDr"=>0x8F,"STDr:"=>0xAF,"STDr*"=>0xCF,"STDr*:"=>0xEF, + // Numeric operators + "ADD"=>0x10,"ADD:"=>0x30,"ADD*"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr:"=>0xB0,"ADDr*"=>0xD0,"ADDr*:"=>0xF0, + "SUB"=>0x11,"SUB:"=>0x31,"SUB*"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr:"=>0xB1,"SUBr*"=>0xD1,"SUBr*:"=>0xF1, + "INC"=>0x12,"INC:"=>0x32,"INC*"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr:"=>0xB2,"INCr*"=>0xD2,"INCr*:"=>0xF2, + "DEC"=>0x13,"DEC:"=>0x33,"DEC*"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr:"=>0xB3,"DECr*"=>0xD3,"DECr*:"=>0xF3, + "LTH"=>0x14,"LTH:"=>0x34,"LTH*"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr:"=>0xB4,"LTHr*"=>0xD4,"LTHr*:"=>0xF4, + "GTH"=>0x15,"GTH:"=>0x35,"GTH*"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr:"=>0xB5,"GTHr*"=>0xD5,"GTHr*:"=>0xF5, + "EQU"=>0x16,"EQU:"=>0x36,"EQU*"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr:"=>0xB6,"EQUr*"=>0xD6,"EQUr*:"=>0xF6, + "NQK"=>0x17,"NQK:"=>0x37,"NQK*"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr:"=>0xB7,"NQKr*"=>0xD7,"NQKr*:"=>0xF7, + // Bitwise operators + "SHL"=>0x18,"SHL:"=>0x38,"SHL*"=>0x58,"SHL*:"=>0x78,"SHLr"=>0x98,"SHLr:"=>0xB8,"SHLr*"=>0xD8,"SHLr*:"=>0xF8, + "SHR"=>0x19,"SHR:"=>0x39,"SHR*"=>0x59,"SHR*:"=>0x79,"SHRr"=>0x99,"SHRr:"=>0xB9,"SHRr*"=>0xD9,"SHRr*:"=>0xF9, + "ROL"=>0x1A,"ROL:"=>0x3A,"ROL*"=>0x5A,"ROL*:"=>0x7A,"ROLr"=>0x9A,"ROLr:"=>0xBA,"ROLr*"=>0xDA,"ROLr*:"=>0xFA, + "ROR"=>0x1B,"ROR:"=>0x3B,"ROR*"=>0x5B,"ROR*:"=>0x7B,"RORr"=>0x9B,"RORr:"=>0xBB,"RORr*"=>0xDB,"RORr*:"=>0xFB, + "IOR"=>0x1C,"IOR:"=>0x3C,"IOR*"=>0x5C,"IOR*:"=>0x7C,"IORr"=>0x9C,"IORr:"=>0xBC,"IORr*"=>0xDC,"IORr*:"=>0xFC, + "XOR"=>0x1D,"XOR:"=>0x3D,"XOR*"=>0x5D,"XOR*:"=>0x7D,"XORr"=>0x9D,"XORr:"=>0xBD,"XORr*"=>0xDD,"XORr*:"=>0xFD, + "AND"=>0x1E,"AND:"=>0x3E,"AND*"=>0x5E,"AND*:"=>0x7E,"ANDr"=>0x9E,"ANDr:"=>0xBE,"ANDr*"=>0xDE,"ANDr*:"=>0xFE, + "NOT"=>0x1F,"NOT:"=>0x3F,"NOT*"=>0x5F,"NOT*:"=>0x7F,"NOTr"=>0x9F,"NOTr:"=>0xBF,"NOTr*"=>0xDF,"NOTr*:"=>0xFF, + _ => return Err(()), + }}) + } +} + + +pub enum Operation { + HLT, PSH, POP, CPY, + DUP, OVR, SWP, ROT, + JMP, JMS, JCN, JCS, + LDA, STA, LDD, STD, + ADD, SUB, INC, DEC, + LTH, GTH, EQU, NQK, + SHL, SHR, ROL, ROR, + IOR, XOR, AND, NOT, +} + +impl From<Operation> for u8 { + fn from(operation: Operation) -> Self { + match operation { + Op::HLT=>0x00, Op::PSH=>0x01, Op::POP=>0x02, Op::CPY=>0x03, + Op::DUP=>0x04, Op::OVR=>0x05, Op::SWP=>0x06, Op::ROT=>0x07, + Op::JMP=>0x08, Op::JMS=>0x09, Op::JCN=>0x0A, Op::JCS=>0x0B, + Op::LDA=>0x0C, Op::STA=>0x0D, Op::LDD=>0x0E, Op::STD=>0x0F, + Op::ADD=>0x10, Op::SUB=>0x11, Op::INC=>0x12, Op::DEC=>0x13, + Op::LTH=>0x14, Op::GTH=>0x15, Op::EQU=>0x16, Op::NQK=>0x17, + Op::SHL=>0x1C, Op::SHR=>0x1D, Op::ROL=>0x1E, Op::ROR=>0x1F, + Op::IOR=>0x18, Op::XOR=>0x19, Op::AND=>0x1A, Op::NOT=>0x1B, + } + } +} + +impl std::fmt::Display for Operation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}", match self { + Op::HLT=>"HLT", Op::PSH=>"PSH", Op::POP=>"POP", Op::CPY=>"CPY", + Op::DUP=>"DUP", Op::OVR=>"OVR", Op::SWP=>"SWP", Op::ROT=>"ROT", + Op::JMP=>"JMP", Op::JMS=>"JMS", Op::JCN=>"JCN", Op::JCS=>"JCS", + Op::LDA=>"LDA", Op::STA=>"STA", Op::LDD=>"LDD", Op::STD=>"STD", + Op::ADD=>"ADD", Op::SUB=>"SUB", Op::INC=>"INC", Op::DEC=>"DEC", + Op::LTH=>"LTH", Op::GTH=>"GTH", Op::EQU=>"EQU", Op::NQK=>"NQK", + Op::SHL=>"SHL", Op::SHR=>"SHR", Op::ROL=>"ROL", Op::ROR=>"ROR", + Op::IOR=>"IOR", Op::XOR=>"XOR", Op::AND=>"AND", Op::NOT=>"NOT", + }) + } +} diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 0000000..8094cb1 --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,4 @@ +mod instruction; +mod value; +pub use instruction::*; +pub use value::*; diff --git a/src/tokens/value.rs b/src/types/value.rs index e421bd5..fe82710 100644 --- a/src/tokens/value.rs +++ b/src/types/value.rs @@ -1,8 +1,24 @@ +#[derive(Clone, Copy)] pub enum Value { Byte(u8), Double(u16), } +impl From<Value> for usize { + fn from(value: Value) -> Self { + match value { + Value::Byte(byte) => byte.into(), + Value::Double(double) => double.into(), + } + } +} + +impl From<&Value> for usize { + fn from(value: &Value) -> Self { + (*value).into() + } +} + impl std::fmt::Display for Value { fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { match self { |
