diff options
Diffstat (limited to 'src')
31 files changed, 1321 insertions, 2006 deletions
diff --git a/src/bin/bedrock-asm.rs b/src/bin/bedrock-asm.rs deleted file mode 100644 index 5cb962f..0000000 --- a/src/bin/bedrock-asm.rs +++ /dev/null @@ -1,154 +0,0 @@ -use bedrock_asm::*; - -use std::io::{Read, Write}; -use std::path::{Path, PathBuf}; - - -static mut VERBOSE: bool = false; - -macro_rules! verbose { - ($($tokens:tt)*) => { if unsafe { VERBOSE } { - eprint!("[INFO] "); eprintln!($($tokens)*); - } }; -} -macro_rules! error { - ($($tokens:tt)*) => {{ - eprint!("[ERROR] "); eprintln!($($tokens)*); std::process::exit(1); - }}; -} - - -fn main() { - let args = Arguments::from_env_or_exit(); - - // ----------------------------------------------------------------------- - // RESOLVE syntactic symbols - let ext = args.ext.unwrap_or(String::from("brc")); - let mut resolver = if let Some(path) = &args.source { - match SourceUnit::from_path(&path, &ext) { - Ok(source_unit) => SymbolResolver::from_source_unit(source_unit), - Err(err) => match err { - ParseError::InvalidExtension => error!( - "File {path:?} has invalid extension, must be '.{ext}'"), - ParseError::NotFound => error!( - "File {path:?} was not found"), - ParseError::InvalidUtf8 => error!( - "File {path:?} does not contain valid UTF-8 text"), - ParseError::NotReadable => error!( - "File {path:?} is not readable"), - ParseError::IsADirectory => error!( - "File {path:?} is a directory"), - ParseError::Unknown => error!( - "Unknown error while attempting to read from {path:?}") - } - } - } else { - let mut source_code = String::new(); - verbose!("Reading program source from standard input"); - if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { - eprintln!("Could not read from standard input, exiting."); - eprintln!("({err:?})"); - std::process::exit(1); - } - let path = "<standard input>"; - let source_unit = SourceUnit::from_source_code(source_code, path); - SymbolResolver::from_source_unit(source_unit) - }; - // Load project libraries. - if let Some(path) = &args.source { - if !args.no_libs && !args.no_project_libs { - let project_library = gather_project_libraries(path, &ext); - resolver.add_library_units(project_library); - } - } - // Load environment libraries. - if !args.no_libs && !args.no_env_libs { - for env_library in gather_environment_libraries(&ext) { - resolver.add_library_units(env_library); - } - } - resolver.resolve(); - - // ----------------------------------------------------------------------- - // PRINT information, generate merged source code - if args.tree { - print_source_tree(&resolver); - } - if print_resolver_errors(&resolver) { - std::process::exit(1); - }; - let merged_source = match resolver.get_merged_source_code() { - Ok(merged_source) => merged_source, - Err(ids) => { - print_cyclic_source_units(&ids, &resolver); - std::process::exit(1); - }, - }; - if args.resolve { - write_bytes_and_exit(merged_source.as_bytes(), args.output.as_ref()); - } - - // ----------------------------------------------------------------------- - // PARSE semantic tokens from merged source code - let path = Some("<merged source>"); - let mut semantic_tokens = generate_semantic_tokens(&merged_source, path); - if print_semantic_errors(&semantic_tokens, &merged_source) { - std::process::exit(1); - }; - - // ----------------------------------------------------------------------- - // GENERATE symbols file and bytecode - let bytecode = generate_bytecode(&mut semantic_tokens); - // let symbols = generate_symbols_file(&semantic_tokens); - write_bytes_and_exit(&bytecode, args.output.as_ref()); -} - - -fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { - if let Some(path) = path { - if let Err(err) = std::fs::write(path, bytes) { - eprintln!("Could not write to path {:?}, exiting.", path.as_ref()); - eprintln!("({err:?})"); - std::process::exit(1); - } - } else { - if let Err(err) = std::io::stdout().write_all(bytes) { - eprintln!("Could not write to standard output, exiting."); - eprintln!("({err:?})"); - std::process::exit(1); - } - } - std::process::exit(0); -} - - -xflags::xflags! { - cmd arguments { - /// Print additional debug information - optional --verbose - /// Print the assembler version and exit - optional --version - - - /// Bedrock source code file to assemble. - optional source: PathBuf - /// Destination path for assembler output. - optional output: PathBuf - /// File extension to identify source files. - optional ext: String - - /// Don't include libraries or resolve references. - optional --no-libs - /// Don't include project libraries - optional --no-project-libs - /// Don't include environment libraries. - optional --no-env-libs - - /// Show the resolved source file heirarchy - optional --tree - /// Assemble the program without saving any output - optional --check - /// Only return resolved source code. - optional --resolve - } -} diff --git a/src/bin/br-asm.rs b/src/bin/br-asm.rs new file mode 100644 index 0000000..e7a9230 --- /dev/null +++ b/src/bin/br-asm.rs @@ -0,0 +1,8 @@ +use bedrock_asm::*; +use switchboard::*; + + +fn main() { + let args = Switchboard::from_env(); + assemble(args, "br-asm"); +} diff --git a/src/formats/clang.rs b/src/formats/clang.rs new file mode 100644 index 0000000..524b501 --- /dev/null +++ b/src/formats/clang.rs @@ -0,0 +1,10 @@ +pub fn format_clang(bytecode: &[u8]) -> Vec<u8> { + let mut output = String::new(); + for chunk in bytecode.chunks(16) { + for byte in chunk { + output.push_str(&format!("0x{byte:02X}, ")); + } + output.push('\n'); + } + return output.into_bytes(); +} diff --git a/src/formats/mod.rs b/src/formats/mod.rs new file mode 100644 index 0000000..79b1c51 --- /dev/null +++ b/src/formats/mod.rs @@ -0,0 +1,23 @@ +mod clang; +pub use clang::*; + +use crate::*; + + +#[derive(Clone, Copy, PartialEq)] +pub enum Format { + Raw, + Source, + Clang, +} + +impl Format { + pub fn from_str(string: &str) -> Self { + match string { + "raw" => Self::Raw, + "source" => Self::Source, + "c" => Self::Clang, + _ => fatal!("Unknown format '{string}', expected 'raw', 'c', or 'source'"), + } + } +} diff --git a/src/gather_libraries.rs b/src/gather_libraries.rs deleted file mode 100644 index 0fd1131..0000000 --- a/src/gather_libraries.rs +++ /dev/null @@ -1,198 +0,0 @@ -use crate::*; - -use vagabond::*; - - -/// Gather all library units from the given path. -pub fn gather_project_libraries(path: &Path, extension: &str) -> Vec<SourceUnit> { - match path.parent() { - Some(parent_path) => gather_source_units(parent_path, extension), - None => Vec::new(), - } -} - - -/// Gather all library units from the paths specified in an environment variable. -pub fn gather_environment_libraries(extension: &str) -> Vec<Vec<SourceUnit>> { - let mut environment_libraries = Vec::new(); - if let Ok(lib_var) = std::env::var("BEDROCK_LIBS") { - for path_str in lib_var.split(":") { - let lib_path = PathBuf::from(path_str); - let source_units = gather_source_units(&lib_path, extension); - if !source_units.is_empty() { - environment_libraries.push(source_units); - } - } - }; - return environment_libraries; -} - - -/// Gather all source units at or descended from the given entry. -fn gather_source_units(path: &Path, extension: &str) -> Vec<SourceUnit> { - let mut source_units = Vec::new(); - if let Ok(entry) = Entry::from_path(path) { - match entry.entry_type { - EntryType::File => { - if let Ok(source) = SourceUnit::from_path(entry.path, extension) { - source_units.push(source); - } - } - EntryType::Directory => { - if let Ok(entries) = traverse_directory(entry.path) { - for entry in entries { - if let Ok(source) = SourceUnit::from_path(entry.path, extension) { - source_units.push(source); - } - } - } - } - } - }; - return source_units; -} - - -pub struct SourceUnit { - pub main: SourceFile, - pub head: Option<SourceFile>, - pub tail: Option<SourceFile>, -} - - -impl SourceUnit { - /// Load from a source file and an associated head and tail file. - pub fn from_path<P: Into<PathBuf>>(path: P, extension: &str) -> Result<Self, ParseError> { - let main_path = canonicalize_path(path); - let main_path_str = main_path.as_os_str().to_string_lossy().to_string(); - let head_extension = format!("head.{extension}"); - let tail_extension = format!("tail.{extension}"); - let is_head = main_path_str.ends_with(&head_extension); - let is_tail = main_path_str.ends_with(&tail_extension); - let is_not_main = !main_path_str.ends_with(extension); - if is_not_main || is_head || is_tail { return Err(ParseError::InvalidExtension); } - - let symbols = parse_symbols_from_file(&main_path)?; - let head_path = main_path.with_extension(head_extension); - let tail_path = main_path.with_extension(tail_extension); - - let main = SourceFile { path: main_path, symbols }; - let head = match parse_symbols_from_file(&head_path) { - Ok(symbols) => Some(SourceFile { path: head_path, symbols }), - Err(_) => None, - }; - let tail = match parse_symbols_from_file(&tail_path) { - Ok(symbols) => Some(SourceFile { path: tail_path, symbols }), - Err(_) => None, - }; - Ok( SourceUnit { main, head, tail } ) - } - - /// Load from a string of source code. - pub fn from_source_code<P: Into<PathBuf>>(source_code: String, path: P) -> Self { - let path = canonicalize_path(path); - let symbols = parse_symbols_from_source(source_code, Some(&path)); - Self { - main: SourceFile { path, symbols }, - head: None, - tail: None, - } - } -} - - -/// Read and parse all symbols from a source file. -fn parse_symbols_from_file(path: &Path) -> Result<Symbols, ParseError> { - let source = read_source_from_file(path)?; - Ok(parse_symbols_from_source(source, Some(path))) -} - - -/// Parse all symbols from a source code string. -fn parse_symbols_from_source(source_code: String, path: Option<&Path>) -> Symbols { - use SyntacticTokenVariant as SynVar; - - let token_iter = SyntacticParser::from_source_code(&source_code, path); - let mut definitions = Vec::new(); - let mut references = Vec::new(); - - for token in token_iter { - let source = token.source; - match token.variant { - SynVar::LabelDefinition(name) => { - let variant = SymbolVariant::LabelDefinition; - definitions.push(Symbol { name, source, variant }); - }, - SynVar::MacroDefinition(name) => { - let variant = SymbolVariant::MacroDefinition; - definitions.push(Symbol { name, source, variant }); - } - SynVar::Symbol(name) => { - let variant = SymbolVariant::Reference; - references.push(Symbol { name, source, variant }); - }, - _ => (), - } - } - - Symbols { - definitions: Some(definitions), - references: Some(references), - source_code, - } -} - - -/// Attempt to read program source from a file. -pub fn read_source_from_file(path: &Path) -> Result<String, ParseError> { - match std::fs::read(&path) { - Ok(bytes) => match String::from_utf8(bytes) { - Ok(source) => Ok(source), - Err(_) => return Err(ParseError::InvalidUtf8), - } - Err(err) => return Err( match err.kind() { - std::io::ErrorKind::NotFound => ParseError::NotFound, - std::io::ErrorKind::PermissionDenied => ParseError::NotReadable, - std::io::ErrorKind::IsADirectory => ParseError::IsADirectory, - _ => ParseError::Unknown, - } ) - } -} - - -fn canonicalize_path<P: Into<PathBuf>>(path: P) -> PathBuf { - let pathbuf = path.into(); - match pathbuf.canonicalize() { - Ok(canonical) => canonical, - Err(_) => pathbuf, - } -} - - - -pub struct SourceFile { - pub path: PathBuf, - pub symbols: Symbols, -} - - -pub struct Symbols { - pub definitions: Option<Vec<Symbol>>, - pub references: Option<Vec<Symbol>>, - pub source_code: String, -} - - -pub struct Symbol { - pub name: String, - pub variant: SymbolVariant, - pub source: SourceSpan, -} - - -#[derive(PartialEq)] -pub enum SymbolVariant { - LabelDefinition, - MacroDefinition, - Reference, -} @@ -1,21 +1,245 @@ -#![feature(extract_if)] -#![feature(io_error_more)] -#![feature(map_try_insert)] +#![feature(path_add_extension)] +mod formats; +mod types; +mod stages; +pub use formats::*; +pub use types::*; +pub use stages::*; -mod gather_libraries; -mod symbol_resolver; +use assembler::*; +use log::*; +use switchboard::*; -pub use gather_libraries::*; -pub use symbol_resolver::*; +use std::io::Read; +use std::io::Write; -mod locators; -mod tokens; -mod translators; -pub use locators::*; -pub use tokens::*; -pub use translators::*; +pub const RETURN_MODE: u8 = 0x80; +pub const WIDE_MODE: u8 = 0x40; +pub const IMMEDIATE_MODE: u8 = 0x20; -mod print; -pub use print::*; + +pub fn assemble(mut args: Switchboard, invocation: &str) -> ! { + args.named("help").short('h'); + args.named("version"); + args.named("verbose").short('v'); + + if args.get("help").as_bool() { + print_help(invocation); + std::process::exit(0); + } + if args.get("version").as_bool() { + let name = env!("CARGO_PKG_NAME"); + let version = env!("CARGO_PKG_VERSION"); + eprintln!("{name} v{version}"); + eprintln!("Written by Ben Bridle."); + std::process::exit(0); + } + if args.get("verbose").as_bool() { + log::set_log_level(log::LogLevel::Info); + } + + args.positional("source"); + args.positional("destination"); + args.named("extension").default("brc"); + + args.named("no-libs"); + args.named("no-project-libs"); + args.named("no-env-libs"); + args.named("no-truncate"); + + args.named("format").default("raw"); + args.named("dry-run").short('n'); + args.named("tree"); + args.named("with-symbols"); + args.raise_errors(); + + let source_path = args.get("source").as_path_opt().map( + |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}"))); + let destination_path = args.get("destination").as_path_opt(); + let extension = args.get("extension").as_string(); + let opt_extension = Some(extension.as_str()); + + let no_libs = args.get("no-libs").as_bool(); + let no_project_libs = args.get("no-project-libs").as_bool(); + let no_env_libs = args.get("no-env-libs").as_bool(); + let no_truncate = args.get("no-truncate").as_bool(); + + let format = Format::from_str(args.get("format").as_str()); + let dry_run = args.get("dry-run").as_bool(); + let print_tree = args.get("tree").as_bool(); + let export_symbols = args.get("with-symbols").as_bool(); + + // ----------------------------------------------------------------------- + + let mut compiler = new_compiler(); + + if let Some(path) = &source_path { + info!("Reading program source from {path:?}"); + compiler.root_from_path(path).unwrap_or_else(|err| fatal!("{err:?}: {path:?}")); + } else { + let mut source_code = String::new(); + info!("Reading program source from standard input"); + if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { + fatal!("Could not read from standard input\n{err:?}"); + } + compiler.root_from_string(source_code, "<standard input>") + }; + if compiler.error().is_some() && !no_libs && !no_project_libs { + compiler.include_libs_from_parent(opt_extension); + } + if compiler.error().is_some() && !no_libs && !no_env_libs { + compiler.include_libs_from_path_variable("BEDROCK_LIBS", opt_extension); + } + + if print_tree { + compiler.hierarchy().report() + } + if let Some(error) = compiler.error() { + error.report(); + std::process::exit(1); + } + + let merged_source = compiler.get_compiled_source().unwrap_or_else(|error| { + error.report(); + std::process::exit(1); + }); + + if !dry_run && format == Format::Source { + write_bytes_and_exit(merged_source.as_bytes(), destination_path.as_ref()); + } + + // ----------------------------------------------------------------------- + + let path = Some("<merged source>"); + let syntactic = match parse_syntactic(&merged_source, path) { + Ok(tokens) => tokens, + Err(errors) => { + report_syntactic_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let semantic = match parse_semantic(syntactic) { + Ok(tokens) => tokens, + Err(errors) => { + report_semantic_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let program = match generate_bytecode(&semantic) { + Ok(program) => program, + Err(errors) => { + report_bytecode_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let AssembledProgram { mut bytecode, symbols } = program; + + let length = bytecode.len(); + let percentage = (length as f32 / 65536.0 * 100.0).round() as u16; + info!("Assembled program in {length} bytes ({percentage}% of maximum)"); + + if !no_truncate { + // Remove null bytes from end of bytecode. + while let Some(0) = bytecode.last() { + bytecode.pop(); + } + let new_length = bytecode.len(); + let difference = length - new_length; + if difference > 0 { + info!("Truncated program to {new_length} bytes (saved {difference} bytes)"); + } + } + + if !dry_run { + if export_symbols { + if let Some(path) = &destination_path { + let mut symbols_path = path.to_path_buf(); + symbols_path.add_extension("sym"); + let mut symbols_string = String::new(); + for symbol in &symbols { + let address = &symbol.address; + let name = &symbol.name; + let location = &symbol.source.location(); + symbols_string.push_str(&format!( + "{address:04x} {name} {location}\n" + )); + } + match std::fs::write(&symbols_path, symbols_string) { + Ok(_) => info!("Saved symbols to {symbols_path:?}"), + Err(err) => info!("Could not write symbols to {symbols_path:?}\n{err:?}"), + } + } + } + + let bytes = match format { + Format::Raw => bytecode, + Format::Clang => format_clang(&bytecode), + Format::Source => unreachable!("Source output is handled before full assembly"), + }; + write_bytes_and_exit(&bytes, destination_path.as_ref()); + } + std::process::exit(0); +} + + +fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { + match path { + Some(path) => match std::fs::write(path, bytes) { + Ok(_) => info!("Wrote output to {:?}", path.as_ref()), + Err(err) => fatal!("Could not write to {:?}\n{err:?}", path.as_ref()), + } + None => match std::io::stdout().write_all(bytes) { + Ok(_) => info!("Wrote output to standard output"), + Err(err) => fatal!("Could not write to standard output\n{err:?}"), + } + } + std::process::exit(0); +} + + +fn print_help(invocation: &str) { + eprintln!("\ +Usage: {invocation} [source] [destination] + +Assembler for the Bedrock computer system. + +Usage: + To assemble a Bedrock program from a source file and write to an output + file, run `br-asm [source] [destination]`, where [source] is the path + of the source file and [destination] is the path to write to. + + If [destination] is omitted, the assembled program will be written to + standard output. If [source] is omitted, the program source code will + be read from standard input. + +Environment variables: + BEDROCK_LIBS + A list of colon-separated paths that will be searched to find Bedrock + source code files to use as libraries when assembling a Bedrock program. + If a library file resolves an unresolved symbol in the program being + assembled, the library file will be merged into the program. + +Arguments: + [source] Bedrock source code file to assemble. + [destination] Destination path for assembler output. + +Switches: + --dry-run (-n) Assemble and show errors only, don't write any output + --extension=<ext> File extension to identify source files (default is 'brc') + --format=<fmt> Output format to use for assembled program (default is 'raw') + --no-project-libs Don't search for libraries in the source parent folder + --no-env-libs Don't search for libraries in the BEDROCK_LIBS path variable + --no-libs Combination of --no-project-libs and --no-env-libs + --no-truncate Don't remove trailing zero-bytes from the assembled program + --tree Show a tree diagram of all included library files + --with-symbols Also generate debug symbols file with extension '.sym' + --help (-h) Print this help information + --verbose, (-v) Print additional information + --version Print the program version and exit +"); +} diff --git a/src/locators.rs b/src/locators.rs deleted file mode 100644 index b7db1ee..0000000 --- a/src/locators.rs +++ /dev/null @@ -1,5 +0,0 @@ -mod bytecode; -mod source; - -pub use bytecode::*; -pub use source::*; diff --git a/src/locators/bytecode.rs b/src/locators/bytecode.rs deleted file mode 100644 index 500e9f0..0000000 --- a/src/locators/bytecode.rs +++ /dev/null @@ -1,39 +0,0 @@ -pub struct BytecodeSpan { - /// The location of this span in the assembled bytecode. - pub location: BytecodeLocation, - /// The bytes which this span represents. - pub bytes: Vec<u8>, -} - - -impl Default for BytecodeSpan { - fn default() -> Self { - Self { - location: BytecodeLocation { - address: 0, - length: 0, - }, - bytes: Vec::new(), - } - } -} - - -#[derive(Clone, Copy)] -pub struct BytecodeLocation { - // Address of the first byte. - pub address: usize, - // Length as a number of bytes. - pub length: usize, -} - - -impl std::fmt::Display for BytecodeLocation { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "0x{:>04x}", self.address)?; - if self.length > 0 { - write!(f, "-0x{:>04x}", self.address + self.length)?; - } - Ok(()) - } -} diff --git a/src/locators/source.rs b/src/locators/source.rs deleted file mode 100644 index 20542e3..0000000 --- a/src/locators/source.rs +++ /dev/null @@ -1,75 +0,0 @@ -use std::path::PathBuf; - - -#[derive(Clone)] -pub struct SourceSpan { - /// The source characters which this span represents. - pub string: String, - /// The location of this span in the merged source file. - pub in_merged: SourceLocation, - /// The location of this span in the original source file. - pub in_source: Option<SourceLocation>, -} - -impl SourceSpan { - pub fn location(&self) -> &SourceLocation { - self.in_source.as_ref().unwrap_or(&self.in_merged) - } -} - - -#[derive(Clone)] -pub struct SourceLocation { - /// File path the source was loaded from. - pub path: Option<PathBuf>, - /// Position of the first character of the string. - pub start: Position, - /// Position of the final character of the string. - pub end: Position, -} - -impl std::fmt::Display for SourceLocation { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - let y = self.start.line + 1; - let x = self.start.column + 1; - match &self.path { - Some(path) => write!(f, "{}:{y}:{x}", path.as_os_str().to_string_lossy()), - None => write!(f, "<unknown>:{y}:{x}"), - } - } -} - - -#[derive(Clone, Copy)] -pub struct Position { - /// The number of lines that precede this line in the file. - pub line: usize, - /// The number of characters that precede this character in the line. - pub column: usize, -} - -impl Position { - pub fn to_next_char(&mut self) { - self.column += 1; - } - - pub fn to_next_line(&mut self) { - self.line += 1; - self.column = 0; - } - - pub fn advance(&mut self, c: char) { - match c { - '\n' => self.to_next_line(), - _ => self.to_next_char(), - } - } -} - -impl std::fmt::Display for Position { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - let y = self.line + 1; - let x = self.column + 1; - write!(f, "{y}:{x}") - } -} diff --git a/src/print.rs b/src/print.rs deleted file mode 100644 index 800a1d5..0000000 --- a/src/print.rs +++ /dev/null @@ -1,264 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; -use SemanticParseError as SemErr; -use SyntacticParseError as SynErr; - - -const NORMAL: &str = "\x1b[0m"; -const BOLD: &str = "\x1b[1m"; -const DIM: &str = "\x1b[2m"; -const WHITE: &str = "\x1b[37m"; -const RED: &str = "\x1b[31m"; -const YELLOW: &str = "\x1b[33m"; -const BLUE: &str = "\x1b[34m"; - - -pub struct Context<'a> { - pub source_code: &'a str, - pub source: &'a SourceSpan, -} - - -/// Print all errors found in the semantic tokens, including those inside macro -/// definitions. Returns true if at least one error was printed. -pub fn print_semantic_errors(semantic_tokens: &[SemanticToken], source_code: &str) -> bool { - let mut found_error = false; - for semantic_token in semantic_tokens { - match &semantic_token.variant { - SemVar::Error(err) => { - let context = Context { - source_code: source_code, - source: &semantic_token.source, - }; - found_error = true; - print_semantic_error(&err, context) - } - SemVar::MacroDefinition(definition) => { - for body_token in &definition.body_tokens { - if let SemVar::Error(err) = &body_token.variant { - let context = Context { - source_code: source_code, - source: &body_token.source, - }; - found_error = true; - print_semantic_error(err, context) - } - } - } - _ => (), - } - } - return found_error; -} - -fn print_semantic_error(error: &SemanticParseError, context: Context) { - let message = get_message_for_semantic_error(error); - print_error(&message, context); -} - -fn get_message_for_semantic_error(error: &SemanticParseError) -> String { - match error { - SemErr::LabelDefinitionInMacroDefinition => - format!("Labels cannot be defined inside a macro"), - SemErr::MacroDefinitionInMacroDefinition => - format!("Macros cannot be defined inside a macro"), - SemErr::StrayMacroTerminator => - format!("Macro definition terminator is missing a macro definition"), - SemErr::StrayBlockClose => - format!("Block was not opened, add a '{{' character to open"), - SemErr::UnclosedBlock => - format!("Block was not closed, add a '}}' character to close"), - SemErr::UndefinedSymbol(name) => - format!("Undefined symbol, no label or macro has been defined with the name '{name}'"), - SemErr::RedefinedSymbol((_, source)) => - format!("Redefined symbol, first defined at {}", source.location()), - SemErr::MacroInvocationBeforeDefinition((_, source)) => - format!("Macro used before definition, definition is at {}", source.location()), - SemErr:: SyntaxError(syntax_error) => match syntax_error { - SynErr::UnterminatedComment => - format!("Unclosed comment, add a ')' character to close"), - SynErr::UnterminatedRawString => - format!("Unclosed string, add a ' character to close"), - SynErr::UnterminatedNullString => - format!("Unclosed string, add a \" character to close"), - SynErr::InvalidPaddingValue(_) => - format!("Padding value must be two or four hexidecimal digits"), - } - } -} - - -pub fn print_resolver_errors(resolver: &SymbolResolver) -> bool { - let mut found_error = false; - for reference in &resolver.unresolved { - found_error = true; - let message = format!( - "Undefined symbol, no label or macro has been defined with the name '{}'", - &reference.symbol.source.string, - ); - let source_code = resolver.get_source_code_for_tracked_symbol(reference); - let source = &reference.symbol.source; - print_error(&message, Context { source_code, source } ) - } - for redefinition in &resolver.redefinitions { - found_error = true; - let definition = resolver.definitions.get(redefinition.1).unwrap(); - let message = format!( - "Redefined symbol, first defined at {}", - &definition.symbol.source.in_merged, - ); - let source_code = resolver.get_source_code_for_tracked_symbol(&redefinition.0); - let source = &redefinition.0.symbol.source; - print_error(&message, Context { source_code, source } ) - } - return found_error; -} - - -/// The `ids` argument contains a list of the IDs of the source units which -/// cyclicly depend on one another. -pub fn print_cyclic_source_units(ids: &[usize], resolver: &SymbolResolver) { - eprintln!("{BOLD}{RED}[ERROR]{WHITE}: Some libraries contain a dependency cycle{NORMAL}"); - for id in ids { - if let Some(unit) = resolver.source_units.get(*id) { - let path = &unit.source_unit.main.path; - let path_str = path.as_os_str().to_string_lossy(); - if let Some(name_str) = get_unit_name(&unit.source_unit) { - eprintln!("{name_str}{NORMAL}{DIM} ({path_str}){NORMAL}"); - } else { - eprintln!("{path_str}"); - }; - // Print parents involved in dependency cycle. - for parent_id in &unit.parent_ids { - if !ids.contains(parent_id) { continue; } - if let Some(parent_unit) = resolver.source_units.get(*parent_id) { - let parent_path = &parent_unit.source_unit.main.path; - let parent_path_str = parent_path.as_os_str().to_string_lossy(); - let parent_name_str = match get_unit_name(&parent_unit.source_unit) { - Some(parent_name_str) => parent_name_str, - None => parent_path_str.to_string(), - }; - eprintln!(" => {parent_name_str} {DIM}({parent_path_str}){NORMAL}"); - } - } - } - } -} - - -pub fn print_error(message: &str, context: Context) { - print_source_issue(message, context, SourceIssueVariant::Error); -} - -pub fn print_warning(message: &str, context: Context) { - print_source_issue(message, context, SourceIssueVariant::Warning); -} - -fn print_source_issue(message: &str, context: Context, variant: SourceIssueVariant) { - let (label, colour) = match variant { - SourceIssueVariant::Warning => ("WARNING", YELLOW), - SourceIssueVariant::Error => ("ERROR", RED), - }; - - // Prepare variables. - let location = &context.source.in_merged; - let y = location.start.line + 1; - let digits = y.to_string().len(); - let arrow = "-->"; - let space = " "; - - // Print message and file path. - eprintln!("{BOLD}{colour}[{label}]{WHITE}: {message}{NORMAL}"); - eprintln!("{BLUE}{arrow:>w$}{NORMAL} {location}{NORMAL}", w=digits+3); - if let Some(source) = &context.source.in_source { - eprintln!("{BLUE}{arrow:>w$}{NORMAL} {source}{NORMAL}", w=digits+3); - } - - let start = location.start.column; - let end = location.end.column + 1; - - // Print source code line. - eprint!("{BLUE} {y} | {NORMAL}"); - let line = get_line_from_source_code(context.source_code, location.start.line); - for (i, c) in line.chars().enumerate() { - if i == start { eprint!("{colour}") } - if i == end { eprint!("{NORMAL}") } - eprint!("{c}"); - } - eprintln!("{NORMAL}"); - - // Print source code underline. - eprint!("{BLUE} {space:>w$} | {NORMAL}", w=digits); - for _ in 0..start { eprint!(" "); } - eprint!("{colour}"); - for _ in start..end { eprint!("^"); } - eprintln!("{NORMAL}"); -} - - -fn get_line_from_source_code(source_code: &str, line: usize) -> &str { - source_code.split('\n').nth(line).unwrap_or("<error reading line from source>") -} - - -enum SourceIssueVariant { - Warning, - Error, -} - - -/// Print a tree containing the name and path of each source unit. -pub fn print_source_tree(resolver: &SymbolResolver) { - eprintln!("."); - let len = resolver.root_unit_ids.len(); - for (i, id) in resolver.root_unit_ids.iter().enumerate() { - let end = i + 1 == len; - print_source_tree_leaf(resolver, *id, Vec::new(), end); - } - eprintln!(); -} - -fn print_source_tree_leaf(resolver: &SymbolResolver, id: usize, mut levels: Vec<bool>, end: bool) { - // A level entry is true if all entries in that level have been printed. - for level in &levels { - match level { - false => eprint!("│ "), - true => eprint!(" "), - } - } - // The end value is true if all siblings of this entry have been printed. - match end { - false => eprint!("├── "), - true => eprint!("└── "), - } - if let Some(unit) = resolver.source_units.get(id) { - let path_str = &unit.source_unit.main.path.as_os_str().to_string_lossy(); - if let Some(name_str) = get_unit_name(&unit.source_unit) { - eprint!("{name_str}{BLUE}"); - if unit.source_unit.head.is_some() { eprint!(" +head") } - if unit.source_unit.tail.is_some() { eprint!(" +tail") } - let mut unresolved = 0; - for symbol in &resolver.unresolved { - if symbol.source_id == id { unresolved += 1; } - } - if unresolved > 0 { eprint!("{RED} ({unresolved})"); } - eprintln!("{NORMAL} {DIM}({path_str}){NORMAL}"); - } else { - eprintln!("{path_str}"); - } - levels.push(end); - let len = unit.child_ids.len(); - for (i, id) in unit.child_ids.iter().enumerate() { - let end = i + 1 == len; - print_source_tree_leaf(resolver, *id, levels.clone(), end); - } - } else { - eprintln!("<error loading source unit details>"); - } -} - - -fn get_unit_name(source_unit: &SourceUnit) -> Option<String> { - source_unit.main.path.file_name().map(|s| s.to_string_lossy().to_string()) -} diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs new file mode 100644 index 0000000..02cc739 --- /dev/null +++ b/src/stages/bytecode.rs @@ -0,0 +1,150 @@ +use crate::*; + +use indexmap::IndexMap; + + +/// Doesn't truncate trailing null bytes. +pub fn generate_bytecode(semantic: &Program) -> Result<AssembledProgram, Vec<Tracked<BytecodeError>>> { + let mut generator = BytecodeGenerator::new(&semantic.definitions); + generator.parse(&semantic.tokens, false); + generator.fill_slots(); + let mut symbols = Vec::new(); + for (name, information) in generator.labels { + let source = semantic.definitions.get(&name).unwrap().source.clone(); + let address = information.address; + symbols.push(AssembledSymbol { name, address, source }); + } + match generator.errors.is_empty() { + true => Ok(AssembledProgram { bytecode: generator.bytecode, symbols }), + false => Err(generator.errors), + } +} + + +pub struct BytecodeGenerator<'a> { + definitions: &'a IndexMap<String, Tracked<Definition>>, + labels: IndexMap<String, LabelInformation>, + stack: Vec<usize>, + bytecode: Vec<u8>, + errors: Vec<Tracked<BytecodeError>>, +} + +struct LabelInformation { + address: usize, + slots: Vec<usize>, +} + +impl<'a> BytecodeGenerator<'a> { + pub fn new(definitions: &'a IndexMap<String, Tracked<Definition>>) -> Self { + let mut labels = IndexMap::new(); + for (name, definition) in definitions { + if let DefinitionVariant::LabelDefinition = definition.variant { + // Use fake address for now. + let information = LabelInformation { address: 0, slots: Vec::new() }; + labels.insert(name.to_string(), information); + } + } + Self { + definitions, + labels, + stack: Vec::new(), + bytecode: Vec::new(), + errors: Vec::new(), + } + } + + pub fn parse(&mut self, tokens: &[Tracked<SemanticToken>], in_macro: bool) { + macro_rules! byte { + ($byte:expr) => { self.bytecode.push($byte) }; + } + macro_rules! double { + ($double:expr) => {{ + let [high, low] = u16::to_be_bytes($double); + self.bytecode.push(high); self.bytecode.push(low); + }}; + } + + for token in tokens { + let i = self.bytecode.len(); + match &token.value { + SemanticToken::Literal(value) => match value { + Value::Byte(byte) => byte!(*byte), + Value::Double(double) => double!(*double), + } + SemanticToken::Pad(value) => { + self.bytecode.resize(i + usize::from(value), 0); + }, + SemanticToken::String(bytes) => { + self.bytecode.extend_from_slice(bytes) + }, + SemanticToken::Comment(_) => (), + SemanticToken::BlockOpen(_) => { + self.stack.push(i); + // Use a fake index for now. + double!(0); + } + SemanticToken::BlockClose(_) => { + if i > 0xFFFF { + let error = BytecodeError::InvalidBlockAddress(i); + self.errors.push(Tracked::from(error, token.source.clone())); + } + let Some(addr) = self.stack.pop() else { + unreachable!("Uncaught unmatched block terminator"); + }; + let [high, low] = (i as u16).to_be_bytes(); + self.bytecode[addr] = high; + self.bytecode[addr+1] = low; + } + SemanticToken::Symbol(name) => { + if let Some(definition) = self.definitions.get(name) { + match &definition.variant { + DefinitionVariant::MacroDefinition(body) => { + self.parse(body, true); + } + DefinitionVariant::LabelDefinition => { + let information = self.labels.get_mut(name).unwrap(); + information.slots.push(i); + // Use a fake index for now. + double!(0); + } + } + } else { + unreachable!("Uncaught undefined symbol '{name}'"); + } + } + SemanticToken::Instruction(instruction) => { + byte!(instruction.value) + } + SemanticToken::LabelDefinition(name) => if in_macro { + unreachable!("Uncaught label definition in macro"); + } else { + if i > 0xFFFF { + let error = BytecodeError::InvalidLabelAddress(i); + self.errors.push(Tracked::from(error, token.source.clone())); + } + let information = self.labels.get_mut(name).unwrap(); + // Replace fake index with real index. + information.address = i; + } + SemanticToken::MacroDefinition{ .. } => if in_macro { + unreachable!("Uncaught macro definition in macro"); + } + } + } + + if !in_macro && !self.stack.is_empty() { + unreachable!("Uncaught unterminated block"); + } + } + + /// Fill each label slot with a real label address. + pub fn fill_slots(&mut self) { + for information in self.labels.values() { + let [high, low] = (information.address as u16).to_be_bytes(); + for addr in &information.slots { + self.bytecode[*addr] = high; + self.bytecode[*addr + 1] = low; + } + } + } +} diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs new file mode 100644 index 0000000..902fcd7 --- /dev/null +++ b/src/stages/bytecode_tokens.rs @@ -0,0 +1,37 @@ +use crate::*; + + +pub struct AssembledProgram { + pub bytecode: Vec<u8>, + pub symbols: Vec<AssembledSymbol>, +} + +pub struct AssembledSymbol { + pub name: String, + pub address: usize, + pub source: SourceSpan, +} + +pub enum BytecodeError { + InvalidLabelAddress(usize), + InvalidBlockAddress(usize), +} + + +pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) { + for error in errors { + report_bytecode_error(error, source_code); + } +} + + +fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + BytecodeError::InvalidLabelAddress(address) => + &format!("The label address exceeds 0xFFFF: 0x{address:X}"), + BytecodeError::InvalidBlockAddress(address) => + &format!("The block address exceeds 0xFFFF: 0x{address:X}"), + }; + report_source_issue(LogLevel::Error, &context, message); +} diff --git a/src/stages/compiler.rs b/src/stages/compiler.rs new file mode 100644 index 0000000..97bf20c --- /dev/null +++ b/src/stages/compiler.rs @@ -0,0 +1,84 @@ +use crate::*; + +use assembler::SymbolRole::*; +use assembler::DefinitionType::*; + + +pub fn new_compiler() -> Compiler { + Compiler::new(parse_symbols, push_code) +} + + +/// Parse all symbols from a source code string. +pub fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbol>> { + let syntactic = match parse_syntactic(source_code, path) { + Ok(syntactic) => syntactic, + Err(_) => return None, + }; + Some(SymbolParser::new().parse(&syntactic)) +} + +/// Push source code to a source compilation string. +pub fn push_code(compilation: &mut String, source_file: &SourceFile) { + // Skip blank files. + let source_code = &source_file.source_code; + if source_code.chars().all(|c| c.is_whitespace()) { return; } + // Ensure that the previous section is followed by two newline characters. + if !compilation.is_empty() { + if !compilation.ends_with('\n') { compilation.push('\n'); } + if !compilation.ends_with("\n\n") { compilation.push('\n'); } + } + // Push a path comment and the source code. + let path_str = source_file.path.as_os_str().to_string_lossy(); + let path_comment = format!("(: {path_str} )\n"); + compilation.push_str(&path_comment); + compilation.push_str(&source_code); +} + + +// Extract symbol definitions from a list of syntactic tokens. +pub struct SymbolParser { + pub symbols: Vec<Symbol>, +} + +impl SymbolParser { + pub fn new() -> Self { + Self { + symbols: Vec::new(), + } + } + + fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) { + let name = name.to_string(); + let namespace = Vec::new(); + let source = source.to_owned(); + self.symbols.push(Symbol { name, namespace, source, role }); + } + + pub fn parse(mut self, syntactic: &[Tracked<SyntacticToken>]) -> Vec<Symbol> { + for token in syntactic { + match &token.value { + SyntacticToken::MacroDefinition(definition) => { + self.record_symbol( + &definition.name, + &definition.name.source, + Definition(MustPrecedeReference), + ); + for token in &definition.body { + if let SyntacticToken::Symbol(name) = &token.value { + self.record_symbol(&name, &token.source, Reference); + } + } + } + SyntacticToken::LabelDefinition(name) => { + self.record_symbol(&name, &token.source, Definition(CanFollowReference)); + } + SyntacticToken::Symbol(name) => { + self.record_symbol(&name, &token.source, Reference); + } + _ => (), + } + } + return self.symbols; + } +} diff --git a/src/stages/mod.rs b/src/stages/mod.rs new file mode 100644 index 0000000..76bda0d --- /dev/null +++ b/src/stages/mod.rs @@ -0,0 +1,26 @@ +mod compiler; +mod syntactic; +mod syntactic_tokens; +mod semantic; +mod semantic_tokens; +mod bytecode; +mod bytecode_tokens; +pub use compiler::*; +pub use syntactic::*; +pub use syntactic_tokens::*; +pub use semantic::*; +pub use semantic_tokens::*; +pub use bytecode::*; +pub use bytecode_tokens::*; + + +#[macro_export] +macro_rules! indent { + (0, $($tokens:tt)*) => {{ + println!($($tokens)*); + }}; + ($indent:expr, $($tokens:tt)*) => {{ + for _ in 0..$indent { print!(" "); } + println!($($tokens)*); + }}; +} diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..dc9709e --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,154 @@ +use crate::*; + +use std::str::FromStr; + +use indexmap::{IndexMap, IndexSet}; + + +pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Program, Vec<Tracked<SemanticError>>> { + let mut errors = Vec::new(); + + // Record all label definitions and macro names up front. + let mut definitions = IndexMap::new(); + let mut macro_names = IndexSet::new(); + for token in &syntactic { + match &token.value { + SyntacticToken::LabelDefinition(name) => { + // Check if identifier is reserved. + if Instruction::from_str(&name).is_ok() { + let error = SemanticError::ReservedIdentifier(name.to_string()); + errors.push(Tracked::from(error, token.source.clone())); + } + // Use a fake index for now. + let definition = Definition::new(0, DefinitionVariant::LabelDefinition); + let tracked = Tracked::from(definition, token.source.clone()); + if let Some(_) = definitions.insert(name.clone(), tracked) { + unreachable!("Uncaught duplicate label definition '{name}'"); + } + } + SyntacticToken::MacroDefinition(definition) => { + let name = &definition.name; + // Check if identifier is reserved. + if Instruction::from_str(&name).is_ok() { + let error = SemanticError::ReservedIdentifier(name.to_string()); + errors.push(Tracked::from(error, name.source.clone())); + } + if !macro_names.insert(name.clone()) { + unreachable!("Uncaught duplicate macro definition '{name}'") + } + } + _ => (), + } + } + + // Convert syntactic tokens to semantic tokens. + let mut tokens: Vec<Tracked<SemanticToken>> = Vec::new(); + let mut stack = Vec::new(); + + for syn_token in syntactic { + let i = tokens.len(); + let sem_token = match syn_token.value { + SyntacticToken::Literal(value) => SemanticToken::Literal(value), + SyntacticToken::Pad(value) => SemanticToken::Pad(value), + SyntacticToken::String(bytes) => SemanticToken::String(bytes), + SyntacticToken::Comment(string) => SemanticToken::Comment(string), + SyntacticToken::BlockOpen => { + stack.push(i); + // Use a fake index for now. + SemanticToken::BlockOpen(0) + } + SyntacticToken::BlockClose => { + let Some(k) = stack.pop() else { + unreachable!("Uncaught unmatched block terminator"); + }; + // Replace fake index with real index. + tokens[k].value = SemanticToken::BlockOpen(i); + SemanticToken::BlockClose(k) + } + SyntacticToken::Symbol(symbol) => { + if let Some(definition) = definitions.get_mut(&symbol) { + definition.value.references.push(i); + } else if let Some(definition) = macro_names.get(&symbol) { + let error = SemanticError::InvocationBeforeDefinition; + let source = syn_token.source.wrap(definition.source.clone()); + errors.push(Tracked::from(error, source)); + } else { + unreachable!("Uncaught undefined symbol '{symbol}'"); + }; + SemanticToken::Symbol(symbol) + } + SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), + SyntacticToken::LabelDefinition(name) => { + let definition = definitions.get_mut(&name).unwrap(); + // Replace fake index with real index. + definition.value.definition = i; + SemanticToken::LabelDefinition(name) + } + SyntacticToken::MacroDefinition(definition) => { + let name = definition.name.clone(); + let mut body: Vec<Tracked<SemanticToken>> = Vec::new(); + let mut body_stack = Vec::new(); + for syn_token in definition.body { + let j = body.len(); + let sem_token = match syn_token.value { + SyntacticToken::Literal(value) => SemanticToken::Literal(value), + SyntacticToken::Pad(value) => SemanticToken::Pad(value), + SyntacticToken::String(bytes) => SemanticToken::String(bytes), + SyntacticToken::Comment(string) => SemanticToken::Comment(string), + SyntacticToken::BlockOpen => { + body_stack.push(j); + // Use a fake index for now. + SemanticToken::BlockOpen(0) + } + SyntacticToken::BlockClose => { + let Some(k) = body_stack.pop() else { + unreachable!("Uncaught unmatched block terminator in macro '{name}'"); + }; + // Replace fake index with real index. + body[k].value = SemanticToken::BlockOpen(j); + SemanticToken::BlockClose(k) + } + SyntacticToken::Symbol(symbol) => { + if let Some(definition) = definitions.get_mut(&symbol) { + definition.value.deep_references.push((i, j)); + } else if let Some(definition) = macro_names.get(&symbol) { + let error = SemanticError::InvocationBeforeDefinition; + let source = syn_token.source.wrap(definition.source.clone()); + errors.push(Tracked::from(error, source)); + } else { + unreachable!("Uncaught undefined symbol '{symbol}' in macro '{name}'"); + }; + SemanticToken::Symbol(symbol) + } + SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), + SyntacticToken::LabelDefinition(label) => + unreachable!("Uncaught label definition '{label}' in macro '{name}'"), + SyntacticToken::MacroDefinition(definition) => + unreachable!("Uncaught macro definition '{}' in macro '{name}'", definition.name), + }; + body.push(Tracked::from(sem_token, syn_token.source)); + } + + let variant = DefinitionVariant::MacroDefinition(body); + let source = definition.name.source.clone(); + let tracked = Tracked::from(Definition::new(i, variant), source); + if let Some(_) = definitions.insert(name.value.clone(), tracked) { + unreachable!("Uncaught duplicate definition '{name}'") + }; + if !body_stack.is_empty() { + unreachable!("Uncaught unterminated block in macro '{name}'"); + } + SemanticToken::MacroDefinition(name) + } + }; + tokens.push(Tracked::from(sem_token, syn_token.source)); + } + + if !stack.is_empty() { + unreachable!("Uncaught unterminated block"); + } + match errors.is_empty() { + true => Ok(Program { definitions, tokens }), + false => Err(errors), + } +} diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs new file mode 100644 index 0000000..c735828 --- /dev/null +++ b/src/stages/semantic_tokens.rs @@ -0,0 +1,97 @@ +use crate::*; + +use indexmap::IndexMap; + + +pub struct Program { + pub definitions: IndexMap<String, Tracked<Definition>>, + pub tokens: Vec<Tracked<SemanticToken>>, +} + +pub struct Definition { + pub variant: DefinitionVariant, + /// Index of definition token. + pub definition: usize, + /// Indices of symbols referencing this definition. + pub references: Vec<usize>, + /// Indices of references inside other definitions. + pub deep_references: Vec<(usize, usize)>, +} + +impl Definition { + pub fn new(i: usize, variant: DefinitionVariant) -> Self { + Self { + variant, + definition: i, + references: Vec::new(), + deep_references: Vec::new(), + } + } +} + +pub enum DefinitionVariant { + LabelDefinition, + MacroDefinition(Vec<Tracked<SemanticToken>>), +} + +pub enum SemanticToken { + Literal(Value), + Pad(Value), + String(Vec<u8>), + Comment(String), + BlockOpen(usize), // index to matching block-close + BlockClose(usize), // index to matching block-open + Symbol(String), + Instruction(Instruction), + LabelDefinition(String), + MacroDefinition(Tracked<String>), +} + +pub enum SemanticError { + InvocationBeforeDefinition, + ReservedIdentifier(String), +} + + +pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) { + for error in errors { + report_semantic_error(error, source_code); + } +} + + +fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + SemanticError::InvocationBeforeDefinition => + "Macro cannot be invoked before it has been defined", + SemanticError::ReservedIdentifier(name) => + &format!("Identifier '{name}' is reserved for a built-in instruction"), + }; + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_semantic_token(i: usize, token: &SemanticToken, definitions: &IndexMap<String, Tracked<Definition>>) { + match token { + SemanticToken::Literal(value) => indent!(i, "Literal({value})"), + SemanticToken::Pad(value) => indent!(i, "Pad({value})"), + SemanticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)), + SemanticToken::Comment(_) => indent!(i, "Comment"), + SemanticToken::BlockOpen(pointer) => indent!(i, "BlockOpen(*{pointer})"), + SemanticToken::BlockClose(pointer) => indent!(i, "BlockClose(*{pointer})"), + SemanticToken::Symbol(name) => indent!(i, "Symbol({name})"), + SemanticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"), + SemanticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"), + SemanticToken::MacroDefinition(name) => { + indent!(i, "MacroDefinition({name})"); + if let Some(definition) = definitions.get(name.as_str()) { + if let DefinitionVariant::MacroDefinition(body) = &definition.variant { + for token in body { + print_semantic_token(i+1, token, definitions); + } + } + } + } + } +} diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs new file mode 100644 index 0000000..59b8b95 --- /dev/null +++ b/src/stages/syntactic.rs @@ -0,0 +1,211 @@ +use crate::*; + +use std::path::PathBuf; + + +pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path), "") +} + +fn parse_syntactic_from_tokeniser(mut t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + t.add_delimiters(&['(',')','[',']','{','}',';']); + t.add_terminators(&[':']); + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + let mut label_name = label_name.to_string(); + + macro_rules! err { + ($error:expr) => {{ + err!($error, t.get_source()); + }}; + ($error:expr, $source:expr) => {{ + errors.push(Tracked::from($error, $source)); + continue; + }}; + } + + macro_rules! check_name { + ($name:expr) => {{ + check_name!($name, t.get_source()); + }}; + ($name:expr, $source:expr) => { + if $name.chars().count() > 63 { + let error = SyntacticError::InvalidIdentifier($name.clone()); + errors.push(Tracked::from(error, $source.clone())); + } + }; + } + + // Eat characters until the end character is found. + macro_rules! is_end { + ($end:expr) => { + |t: &mut Tokeniser| { + t.eat_char() == Some($end) + } + }; + } + + loop { + // Eat leading whitespace. + while let Some(c) = t.peek_char() { + match [' ', '\n', '\r', '\t'].contains(&c) { + true => t.eat_char(), + false => break, + }; + } + t.mark_start(); + let Some(c) = t.eat_char() else { break }; + let token = match c { + '"' => { + let source = t.get_source(); + match t.track_until(is_end!('"')) { + Some(string) => { + let mut bytes = string.into_bytes(); + bytes.push(0x00); + SyntacticToken::String(bytes) + } + None => err!(SyntacticError::UnterminatedNullString, source), + } + } + '\'' => { + let source = t.get_source(); + match t.track_until(is_end!('\'')) { + Some(string) => SyntacticToken::String(string.into_bytes()), + None => err!(SyntacticError::UnterminatedRawString, source), + } + } + '(' => { + let source = t.get_source(); + if let Some(string) = t.track_until(is_end!(')')) { + // Check if the comment fills the entire line. + if t.start.position.column == 0 && t.end_of_line() { + if let Some(path) = string.strip_prefix(": ") { + t.embedded_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start.position.line + 1; + continue; + } + } + SyntacticToken::Comment(string) + } else { + err!(SyntacticError::UnterminatedComment, source) + } + } + ')' => err!(SyntacticError::UnmatchedCommentTerminator), + '%' => { + let name = t.eat_token(); + let source = t.get_source(); + check_name!(name, source); + t.mark_child(); + if let Some(_) = t.track_until(is_end!(';')) { + let child = t.tokenise_child_span(); + match parse_body_from_tokeniser(child, &label_name) { + Ok(body) => { + let name = Tracked::from(name, source); + let definition = SyntacticMacroDefinition { name, body }; + SyntacticToken::MacroDefinition(definition) + } + Err(mut err) => { + errors.append(&mut err); + continue; + } + } + } else { + err!(SyntacticError::UnterminatedMacroDefinition, source); + } + } + ';' => err!(SyntacticError::UnmatchedMacroTerminator), + '{' => SyntacticToken::BlockOpen, + '}' => SyntacticToken::BlockClose, + '['|']' => continue, + '@' => { + label_name = t.eat_token(); + check_name!(label_name); + SyntacticToken::LabelDefinition(label_name.clone()) + } + '&' => { + let name = format!("{label_name}/{}", t.eat_token()); + check_name!(name); + SyntacticToken::LabelDefinition(name) + } + '~' => { + let name = format!("{label_name}/{}", t.eat_token()); + check_name!(name); + SyntacticToken::Symbol(name) + } + '#' => { + let token = t.eat_token(); + match token.parse::<Value>() { + Ok(value) => SyntacticToken::Pad(value), + Err(_) => err!(SyntacticError::InvalidPadValue), + } + }, + ':' => { + SyntacticToken::Instruction(Instruction { value: 0x21 }) + } + c => { + let token = format!("{c}{}", t.eat_token()); + if let Ok(value) = token.parse::<Value>() { + SyntacticToken::Literal(value) + } else if let Ok(instruction) = token.parse::<Instruction>() { + SyntacticToken::Instruction(instruction) + } else { + check_name!(token); + SyntacticToken::Symbol(token) + } + } + }; + + t.mark_end(); + let source = t.get_source(); + tokens.push(Tracked::from(token, source)); + } + + // Check that every block open matches a block close. + let mut stack = Vec::new(); + for token in &tokens { + match &token.value { + SyntacticToken::BlockOpen => stack.push(token.source.clone()), + SyntacticToken::BlockClose => if let None = stack.pop() { + let error = SyntacticError::UnmatchedBlockTerminator; + errors.push(Tracked::from(error, token.source.clone())); + } + _ => (), + } + } + for source in stack { + let error = SyntacticError::UnterminatedBlock; + errors.push(Tracked::from(error, source)); + } + + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} + + +fn parse_body_from_tokeniser(t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + + for token in parse_syntactic_from_tokeniser(t, label_name)? { + match token.value { + SyntacticToken::LabelDefinition(_) => { + let error = SyntacticError::LabelDefinitionInMacroDefinition; + errors.push(Tracked::from(error, token.source)); + continue; + } + SyntacticToken::MacroDefinition(_) => { + let error = SyntacticError::MacroDefinitionInMacroDefinition; + errors.push(Tracked::from(error, token.source)); + continue; + } + _ => tokens.push(token), + }; + } + + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs new file mode 100644 index 0000000..35afa80 --- /dev/null +++ b/src/stages/syntactic_tokens.rs @@ -0,0 +1,94 @@ +use crate::*; + + +pub enum SyntacticToken { + Literal(Value), + Pad(Value), + String(Vec<u8>), + Comment(String), + BlockOpen, + BlockClose, + Symbol(String), + Instruction(Instruction), + LabelDefinition(String), + MacroDefinition(SyntacticMacroDefinition), +} + +pub struct SyntacticMacroDefinition { + pub name: Tracked<String>, + pub body: Vec<Tracked<SyntacticToken>>, +} + +pub enum SyntacticError { + UnterminatedBlock, + UnterminatedComment, + UnterminatedRawString, + UnterminatedNullString, + UnterminatedMacroDefinition, + UnmatchedBlockTerminator, + UnmatchedCommentTerminator, + UnmatchedMacroTerminator, + InvalidPadValue, + InvalidIdentifier(String), + MacroDefinitionInMacroDefinition, + LabelDefinitionInMacroDefinition, +} + + +pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) { + for error in errors { + report_syntactic_error(error, source_code); + } +} + +fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + SyntacticError::UnterminatedBlock => + "Block was not terminated, add a '}' character to terminate", + SyntacticError::UnterminatedComment => + "Comment was not terminated, add a ')' character to terminate", + SyntacticError::UnterminatedRawString => + "String was not terminated, add a ' character to terminate", + SyntacticError::UnterminatedNullString => + "String was not terminated, add a '\"' character to terminate", + SyntacticError::UnterminatedMacroDefinition => + "Macro definition was not terminated, add a ';' character to terminate", + SyntacticError::UnmatchedBlockTerminator => + "Attempted to terminate a block, but no block was in progress", + SyntacticError::UnmatchedCommentTerminator => + "Attempted to terminate a comment, but no comment was in progress", + SyntacticError::UnmatchedMacroTerminator => + "Attempted to terminate a macro definition, but no macro definition was in progress", + SyntacticError::InvalidPadValue => + "The pad value must be two or four hexadecimal digits", + SyntacticError::InvalidIdentifier(name) => + &format!("An identifier cannot exceed 63 characters in length: {name}"), + SyntacticError::MacroDefinitionInMacroDefinition => + "A macro cannot be defined inside another macro", + SyntacticError::LabelDefinitionInMacroDefinition => + "A label cannot be defined inside a macro", + }; + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_syntactic_token(i: usize, token: &SyntacticToken) { + match token { + SyntacticToken::Literal(value) => indent!(i, "Literal({value})"), + SyntacticToken::Pad(value) => indent!(i, "Pad({value})"), + SyntacticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)), + SyntacticToken::Comment(_) => indent!(i, "Comment"), + SyntacticToken::BlockOpen => indent!(i, "BlockOpen"), + SyntacticToken::BlockClose => indent!(i, "BlockClose"), + SyntacticToken::Symbol(name) => indent!(i, "Symbol({name})"), + SyntacticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"), + SyntacticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"), + SyntacticToken::MacroDefinition(definition) => { + indent!(i, "MacroDefinition({})", definition.name); + for token in &definition.body { + print_syntactic_token(i+1, token); + } + } + } +} diff --git a/src/symbol_resolver.rs b/src/symbol_resolver.rs deleted file mode 100644 index 0b89fb1..0000000 --- a/src/symbol_resolver.rs +++ /dev/null @@ -1,296 +0,0 @@ -use crate::*; - -use std::mem::take; - - -/// Resolve symbol references across source units. -pub struct SymbolResolver { - pub definitions: Vec<TrackedSymbol>, - /// All resolved references. - pub resolved: Vec<TrackedSymbol>, - /// All unresolved references. - pub unresolved: Vec<TrackedSymbol>, - /// Contains the `definitions` index of the original definition. - pub redefinitions: Vec<(TrackedSymbol, usize)>, - pub source_units: Vec<HeirarchicalSourceUnit>, - pub root_unit_ids: Vec<usize>, - pub unused_library_units: Vec<SourceUnit>, -} - - -impl SymbolResolver { - /// Construct a resolver from a root source unit. - pub fn from_source_unit(source_unit: SourceUnit) -> Self { - let mut new = Self { - definitions: Vec::new(), - resolved: Vec::new(), - unresolved: Vec::new(), - redefinitions: Vec::new(), - source_units: Vec::new(), - root_unit_ids: Vec::new(), - unused_library_units: Vec::new(), - }; - new.add_source_unit(source_unit, None); - return new; - } - - pub fn add_library_units(&mut self, mut source_units: Vec<SourceUnit>) { - self.unused_library_units.append(&mut source_units); - } - - pub fn resolve(&mut self) { - // Repeatedly test if any unused source unit resolves an unresolved symbol, - // breaking the loop when no new resolutions are found. - 'outer: loop { - for (i, source_unit) in self.unused_library_units.iter().enumerate() { - if let Some(id) = self.resolves_reference(&source_unit) { - let source_unit = self.unused_library_units.remove(i); - self.add_source_unit(source_unit, Some(id)); - continue 'outer; - } - } - break; - } - - // For every macro reference in every unit, find the ID of the unit which - // resolves that reference and add it to the .parent_ids field of the - // referencing unit. - for reference in &self.resolved { - let predicate = |d: &&TrackedSymbol| d.symbol.name == reference.symbol.name; - if let Some(definition) = self.definitions.iter().find(predicate) { - let is_self = reference.source_id == definition.source_id; - let is_label = definition.symbol.variant == SymbolVariant::LabelDefinition; - if is_self || is_label { continue; } - let referencing_unit = &mut self.source_units[reference.source_id]; - referencing_unit.parent_ids.push(definition.source_id); - }; - } - } - - /// Add a source unit to the resolver and link it to a parent unit. - pub fn add_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) { - let source_id = self.source_units.len(); - - // Add all main symbols. - if let Some(definitions) = take(&mut source_unit.main.symbols.definitions) { - self.add_definitions(definitions, source_id, SourceRole::Main); } - if let Some(references) = take(&mut source_unit.main.symbols.references) { - self.add_references(references, source_id, SourceRole::Main); } - - // Add all head symbols. - if let Some(head) = &mut source_unit.head { - if let Some(references) = take(&mut head.symbols.references) { - self.add_references(references, source_id, SourceRole::Head); } - if let Some(definitions) = take(&mut head.symbols.definitions) { - self.add_definitions(definitions, source_id, SourceRole::Head); } - } - - // Add all tail symbols. - if let Some(tail) = &mut source_unit.tail { - if let Some(references) = take(&mut tail.symbols.references) { - self.add_references(references, source_id, SourceRole::Tail); } - if let Some(definitions) = take(&mut tail.symbols.definitions) { - self.add_definitions(definitions, source_id, SourceRole::Tail); } - } - - if let Some(parent_id) = parent_id { - if let Some(parent_unit) = self.source_units.get_mut(parent_id) { - parent_unit.child_ids.push(source_id); - } - } else { - self.root_unit_ids.push(source_id); - } - - self.source_units.push( - HeirarchicalSourceUnit { - source_unit, - child_ids: Vec::new(), - parent_ids: Vec::new(), - } - ); - } - - fn add_references(&mut self, references: Vec<Symbol>, source_id: usize, source_role: SourceRole) { - for symbol in references { - let reference = TrackedSymbol { symbol, source_id, source_role }; - match self.definitions.contains(&reference) { - true => self.resolved.push(reference), - false => self.unresolved.push(reference), - } - } - } - - fn add_definitions(&mut self, definitions: Vec<Symbol>, source_id: usize, source_role: SourceRole) { - for symbol in definitions { - let predicate = |d: &TrackedSymbol| { &d.symbol.name == &symbol.name }; - if let Some(original) = self.definitions.iter().position(predicate) { - let definition = TrackedSymbol { symbol, source_id, source_role }; - let redefinition = (definition, original); - self.redefinitions.push(redefinition); - } else { - let predicate = |s: &mut TrackedSymbol| s.symbol.name == symbol.name; - for symbol in self.unresolved.extract_if(predicate) { - self.resolved.push(symbol); - } - self.unresolved.retain(|s| s.symbol.name != symbol.name); - let definition = TrackedSymbol { symbol, source_id, source_role }; - self.definitions.push(definition); - } - } - } - - /// Returns the ID of the owner of a symbol resolved by this unit. - pub fn resolves_reference(&self, source_unit: &SourceUnit) -> Option<usize> { - if let Some(definitions) = &source_unit.main.symbols.definitions { - if let Some(id) = self.source_id_of_unresolved(&definitions) { - return Some(id); - } - } - if let Some(head) = &source_unit.head { - if let Some(definitions) = &head.symbols.definitions { - if let Some(id) = self.source_id_of_unresolved(&definitions) { - return Some(id); - } - } - } - if let Some(tail) = &source_unit.tail { - if let Some(definitions) = &tail.symbols.definitions { - if let Some(id) = self.source_id_of_unresolved(&definitions) { - return Some(id); - } - } - } - return None; - } - - /// Returns the ID of the owner of a reference to one of these symbols. - fn source_id_of_unresolved(&self, symbols: &[Symbol]) -> Option<usize> { - for symbol in symbols { - let opt = self.unresolved.iter().find(|s| s.symbol.name == symbol.name); - if let Some(unresolved) = opt { - return Some(unresolved.source_id); - } - } - return None; - } - - pub fn get_source_code_for_tracked_symbol(&self, symbol: &TrackedSymbol) -> &str { - let source_unit = &self.source_units[symbol.source_id].source_unit; - match symbol.source_role { - SourceRole::Main => source_unit.main.symbols.source_code.as_str(), - SourceRole::Head => match &source_unit.head { - Some(head) => head.symbols.source_code.as_str(), - None => unreachable!("Failed to find source for token"), - } - SourceRole::Tail => match &source_unit.tail { - Some(tail) => tail.symbols.source_code.as_str(), - None => unreachable!("Failed to find source for token"), - } - } - } - - /// Create a source file by concatenating all source units. - /// If the source unit dependency graph contains a cycle, the IDs of the - /// source units involved in the cycle will be returned. - pub fn get_merged_source_code(&self) -> Result<String, Vec<usize>> { - // The ID of a given source unit will come after the IDs of all - // source units which define at least one symbol referenced in the - // given source unit. - let head_order = { - let mut included_source_ids: Vec<usize> = Vec::new(); - let mut remaining_source_ids: Vec<usize> = Vec::new(); - // Reverse the order so that the root unit is the last to be added. - for i in (0..self.source_units.len()).rev() { - remaining_source_ids.push(i); - } - - 'restart: while !remaining_source_ids.is_empty() { - 'next: for (i, id) in remaining_source_ids.iter().enumerate() { - let unit = &self.source_units[*id]; - for parent_id in &unit.parent_ids { - if !included_source_ids.contains(&parent_id) { - continue 'next; - } - } - included_source_ids.push(*id); - remaining_source_ids.remove(i); - continue 'restart; - } - // All remaining source units depend on at least one remaining - // source unit, indicating a dependency cycle. - return Err(remaining_source_ids); - } - included_source_ids - }; - - let mut source_code = String::new(); - - // Push head source code in macro-definition order. - for id in &head_order { - let source_unit = &self.source_units[*id]; - if let Some(head) = &source_unit.source_unit.head { - push_source_code_to_string(&mut source_code, head); - } - } - // Push main source code in source-added order. - for source_unit in self.source_units.iter() { - let main = &source_unit.source_unit.main; - push_source_code_to_string(&mut source_code, &main); - } - // Push tail source code in reverse source-added order. - for source_unit in self.source_units.iter().rev() { - if let Some(tail) = &source_unit.source_unit.tail { - push_source_code_to_string(&mut source_code, tail); - } - } - return Ok(source_code); - } -} - - -fn push_source_code_to_string(string: &mut String, source_file: &SourceFile) { - // Don't push source code if it contains only whitespace. - let source_code = &source_file.symbols.source_code; - if source_code.chars().all(|c| c.is_whitespace()) { return; } - // Ensure that sections are separated by two newlines. - if !string.is_empty() { - if !string.ends_with('\n') { string.push('\n'); } - if !string.ends_with("\n\n") { string.push('\n'); } - } - // Write a path comment to the string. - let path_str = source_file.path.as_os_str().to_string_lossy(); - let path_comment = format!("(: {path_str} )\n"); - string.push_str(&path_comment); - string.push_str(&source_code); -} - - -pub struct HeirarchicalSourceUnit { - pub source_unit: SourceUnit, - /// IDs of units which were added to resolve symbol references this unit. - pub child_ids: Vec<usize>, - /// IDs of units which resolve macro references in this unit. - pub parent_ids: Vec<usize>, -} - - -pub struct TrackedSymbol { - pub symbol: Symbol, - pub source_id: usize, - pub source_role: SourceRole, -} - - -#[derive(Clone, Copy)] -pub enum SourceRole { - Main, - Head, - Tail, -} - - -impl PartialEq for TrackedSymbol { - fn eq(&self, other: &TrackedSymbol) -> bool { - self.symbol.name.eq(&other.symbol.name) - } -} diff --git a/src/tokens.rs b/src/tokens.rs deleted file mode 100644 index 81bf9d5..0000000 --- a/src/tokens.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod syntactic; -mod semantic; -mod instruction; -mod value; - -pub use syntactic::*; -pub use semantic::*; -pub use instruction::*; -pub use value::*; diff --git a/src/tokens/instruction.rs b/src/tokens/instruction.rs deleted file mode 100644 index d5fb3e5..0000000 --- a/src/tokens/instruction.rs +++ /dev/null @@ -1,170 +0,0 @@ -use Operation as Op; - - -pub struct Instruction { - pub value: u8, -} - - -impl Instruction { - pub fn operation(&self) -> Operation { - match self.value & 0x1f { - 0x00=>Op::HLT, 0x01=>Op::JMP, 0x02=>Op::JCN, 0x03=>Op::JCK, - 0x04=>Op::LDA, 0x05=>Op::STA, 0x06=>Op::LDD, 0x07=>Op::STD, - 0x08=>Op::PSH, 0x09=>Op::POP, 0x0a=>Op::CPY, 0x0b=>Op::SPL, - 0x0c=>Op::DUP, 0x0d=>Op::OVR, 0x0e=>Op::SWP, 0x0f=>Op::ROT, - 0x10=>Op::ADD, 0x11=>Op::SUB, 0x12=>Op::INC, 0x13=>Op::DEC, - 0x14=>Op::LTH, 0x15=>Op::GTH, 0x16=>Op::EQU, 0x17=>Op::NQK, - 0x18=>Op::IOR, 0x19=>Op::XOR, 0x1a=>Op::AND, 0x1b=>Op::NOT, - 0x1c=>Op::SHF, 0x1d=>Op::SHC, 0x1e=>Op::TAL, 0x1f=>Op::REV, - _ => unreachable!(), - } - } - - pub fn return_mode(&self) -> bool { - self.value & 0x80 != 0 - } - - pub fn literal_mode(&self) -> bool { - self.value & 0x40 != 0 - } - - pub fn double_mode(&self) -> bool { - self.value & 0x20 != 0 - } -} - - -impl std::fmt::Display for Instruction { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{}", match self.value { - // Control operators - 0x00=>"HLT",0x20=>"NOP" ,0x40=>"DB1" ,0x60=>"DB2" ,0x80=>"DB3" ,0xA0=>"DB4" ,0xC0=>"DB5" ,0xE0=>"DB6" , - 0x01=>"JMP",0x21=>"JMS" ,0x41=>"JMP:",0x61=>"JMS:" ,0x81=>"JMPr",0xA1=>"JMSr" ,0xC1=>"JMPr:",0xE1=>"JMSr:" , - 0x02=>"JCN",0x22=>"JCS" ,0x42=>"JCN:",0x62=>"JCS:" ,0x82=>"JCNr",0xA2=>"JCSr" ,0xC2=>"JCNr:",0xE2=>"JCSr:" , - 0x03=>"JCK",0x23=>"JCK*",0x43=>"JCK:",0x63=>"JCK*:",0x83=>"JCKr",0xA3=>"JCKr*",0xC3=>"JCKr:",0xE3=>"JCKr*:", - 0x04=>"LDA",0x24=>"LDA*",0x44=>"LDA:",0x64=>"LDA*:",0x84=>"LDAr",0xA4=>"LDAr*",0xC4=>"LDAr:",0xE4=>"LDAr*:", - 0x05=>"STA",0x25=>"STA*",0x45=>"STA:",0x65=>"STA*:",0x85=>"STAr",0xA5=>"STAr*",0xC5=>"STAr:",0xE5=>"STAr*:", - 0x06=>"LDD",0x26=>"LDD*",0x46=>"LDD:",0x66=>"LDD*:",0x86=>"LDDr",0xA6=>"LDDr*",0xC6=>"LDDr:",0xE6=>"LDDr*:", - 0x07=>"STD",0x27=>"STD*",0x47=>"STD:",0x67=>"STD*:",0x87=>"STDr",0xA7=>"STDr*",0xC7=>"STDr:",0xE7=>"STDr*:", - // Stack operators - 0x08=>"PSH",0x28=>"PSH*",0x48=>"PSH:",0x68=>"PSH*:",0x88=>"PSHr",0xA8=>"PSHr*",0xC8=>"PSHr:",0xE8=>"PSHr*:", - 0x09=>"POP",0x29=>"POP*",0x49=>"POP:",0x69=>"POP*:",0x89=>"POPr",0xA9=>"POPr*",0xC9=>"POPr:",0xE9=>"POPr*:", - 0x0A=>"CPY",0x2A=>"CPY*",0x4A=>"CPY:",0x6A=>"CPY*:",0x8A=>"CPYr",0xAA=>"CPYr*",0xCA=>"CPYr:",0xEA=>"CPYr*:", - 0x0B=>"SPL",0x2B=>"SPL*",0x4B=>"SPL:",0x6B=>"SPL*:",0x8B=>"SPLr",0xAB=>"SPLr*",0xCB=>"SPLr:",0xEB=>"SPLr*:", - 0x0C=>"DUP",0x2C=>"DUP*",0x4C=>"DUP:",0x6C=>"DUP*:",0x8C=>"DUPr",0xAC=>"DUPr*",0xCC=>"DUPr:",0xEC=>"DUPr*:", - 0x0D=>"OVR",0x2D=>"OVR*",0x4D=>"OVR:",0x6D=>"OVR*:",0x8D=>"OVRr",0xAD=>"OVRr*",0xCD=>"OVRr:",0xED=>"OVRr*:", - 0x0E=>"SWP",0x2E=>"SWP*",0x4E=>"SWP:",0x6E=>"SWP*:",0x8E=>"SWPr",0xAE=>"SWPr*",0xCE=>"SWPr:",0xEE=>"SWPr*:", - 0x0F=>"ROT",0x2F=>"ROT*",0x4F=>"ROT:",0x6F=>"ROT*:",0x8F=>"ROTr",0xAF=>"ROTr*",0xCF=>"ROTr:",0xEF=>"ROTr*:", - // Numeric operators - 0x10=>"ADD",0x30=>"ADD*",0x50=>"ADD:",0x70=>"ADD*:",0x90=>"ADDr",0xB0=>"ADDr*",0xD0=>"ADDr:",0xF0=>"ADDr*:", - 0x11=>"SUB",0x31=>"SUB*",0x51=>"SUB:",0x71=>"SUB*:",0x91=>"SUBr",0xB1=>"SUBr*",0xD1=>"SUBr:",0xF1=>"SUBr*:", - 0x12=>"INC",0x32=>"INC*",0x52=>"INC:",0x72=>"INC*:",0x92=>"INCr",0xB2=>"INCr*",0xD2=>"INCr:",0xF2=>"INCr*:", - 0x13=>"DEC",0x33=>"DEC*",0x53=>"DEC:",0x73=>"DEC*:",0x93=>"DECr",0xB3=>"DECr*",0xD3=>"DECr:",0xF3=>"DECr*:", - 0x14=>"LTH",0x34=>"LTH*",0x54=>"LTH:",0x74=>"LTH*:",0x94=>"LTHr",0xB4=>"LTHr*",0xD4=>"LTHr:",0xF4=>"LTHr*:", - 0x15=>"GTH",0x35=>"GTH*",0x55=>"GTH:",0x75=>"GTH*:",0x95=>"GTHr",0xB5=>"GTHr*",0xD5=>"GTHr:",0xF5=>"GTHr*:", - 0x16=>"EQU",0x36=>"EQU*",0x56=>"EQU:",0x76=>"EQU*:",0x96=>"EQUr",0xB6=>"EQUr*",0xD6=>"EQUr:",0xF6=>"EQUr*:", - 0x17=>"NQK",0x37=>"NQK*",0x57=>"NQK:",0x77=>"NQK*:",0x97=>"NQKr",0xB7=>"NQKr*",0xD7=>"NQKr:",0xF7=>"NQKr*:", - // Bitwise operators - 0x18=>"IOR",0x38=>"IOR*",0x58=>"IOR:",0x78=>"IOR*:",0x98=>"IORr",0xB8=>"IORr*",0xD8=>"IORr:",0xF8=>"IORr*:", - 0x19=>"XOR",0x39=>"XOR*",0x59=>"XOR:",0x79=>"XOR*:",0x99=>"XORr",0xB9=>"XORr*",0xD9=>"XORr:",0xF9=>"XORr*:", - 0x1A=>"AND",0x3A=>"AND*",0x5A=>"AND:",0x7A=>"AND*:",0x9A=>"ANDr",0xBA=>"ANDr*",0xDA=>"ANDr:",0xFA=>"ANDr*:", - 0x1B=>"NOT",0x3B=>"NOT*",0x5B=>"NOT:",0x7B=>"NOT*:",0x9B=>"NOTr",0xBB=>"NOTr*",0xDB=>"NOTr:",0xFB=>"NOTr*:", - 0x1C=>"SHF",0x3C=>"SHF*",0x5C=>"SHF:",0x7C=>"SHF*:",0x9C=>"SHFr",0xBC=>"SHFr*",0xDC=>"SHFr:",0xFC=>"SHFr*:", - 0x1D=>"SHC",0x3D=>"SHC*",0x5D=>"SHC:",0x7D=>"SHC*:",0x9D=>"SHCr",0xBD=>"SHCr*",0xDD=>"SHCr:",0xFD=>"SHCr*:", - 0x1E=>"TAL",0x3E=>"TAL*",0x5E=>"TAL:",0x7E=>"TAL*:",0x9E=>"TALr",0xBE=>"TALr*",0xDE=>"TALr:",0xFE=>"TALr*:", - 0x1F=>"REV",0x3F=>"REV*",0x5F=>"REV:",0x7F=>"REV*:",0x9F=>"REVr",0xBF=>"REVr*",0xDF=>"REVr:",0xFF=>"REVr*:", - }) - } -} - - -impl std::str::FromStr for Instruction { - type Err = (); - - fn from_str(token: &str) -> Result<Self, Self::Err> { - Ok( Instruction { value: match token { - // Control operators - "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2" =>0x60,"DB3" =>0x80,"DB4" =>0xA0,"DB5" =>0xC0,"DB6" =>0xE0, - "JMP"=>0x01,"JMS" =>0x21,"JMP:"=>0x41,"JMS:" =>0x61,"JMPr"=>0x81,"JMSr" =>0xA1,"JMPr:"=>0xC1,"JMSr:" =>0xE1, - "JCN"=>0x02,"JCS" =>0x22,"JCN:"=>0x42,"JCS:" =>0x62,"JCNr"=>0x82,"JCSr" =>0xA2,"JCNr:"=>0xC2,"JCSr:" =>0xE2, - "JCK"=>0x03,"JCK*"=>0x23,"JCK:"=>0x43,"JCK*:"=>0x63,"JCKr"=>0x83,"JCKr*"=>0xA3,"JCKr:"=>0xC3,"JCKr*:"=>0xE3, - "LDA"=>0x04,"LDA*"=>0x24,"LDA:"=>0x44,"LDA*:"=>0x64,"LDAr"=>0x84,"LDAr*"=>0xA4,"LDAr:"=>0xC4,"LDAr*:"=>0xE4, - "STA"=>0x05,"STA*"=>0x25,"STA:"=>0x45,"STA*:"=>0x65,"STAr"=>0x85,"STAr*"=>0xA5,"STAr:"=>0xC5,"STAr*:"=>0xE5, - "LDD"=>0x06,"LDD*"=>0x26,"LDD:"=>0x46,"LDD*:"=>0x66,"LDDr"=>0x86,"LDDr*"=>0xA6,"LDDr:"=>0xC6,"LDDr*:"=>0xE6, - "STD"=>0x07,"STD*"=>0x27,"STD:"=>0x47,"STD*:"=>0x67,"STDr"=>0x87,"STDr*"=>0xA7,"STDr:"=>0xC7,"STDr*:"=>0xE7, - // Stack operators - "PSH"=>0x08,"PSH*"=>0x28,"PSH:"=>0x48,"PSH*:"=>0x68,"PSHr"=>0x88,"PSHr*"=>0xA8,"PSHr:"=>0xC8,"PSHr*:"=>0xE8, - "POP"=>0x09,"POP*"=>0x29,"POP:"=>0x49,"POP*:"=>0x69,"POPr"=>0x89,"POPr*"=>0xA9,"POPr:"=>0xC9,"POPr*:"=>0xE9, - "CPY"=>0x0A,"CPY*"=>0x2A,"CPY:"=>0x4A,"CPY*:"=>0x6A,"CPYr"=>0x8A,"CPYr*"=>0xAA,"CPYr:"=>0xCA,"CPYr*:"=>0xEA, - "SPL"=>0x0B,"SPL*"=>0x2B,"SPL:"=>0x4B,"SPL*:"=>0x6B,"SPLr"=>0x8B,"SPLr*"=>0xAB,"SPLr:"=>0xCB,"SPLr*:"=>0xEB, - "DUP"=>0x0C,"DUP*"=>0x2C,"DUP:"=>0x4C,"DUP*:"=>0x6C,"DUPr"=>0x8C,"DUPr*"=>0xAC,"DUPr:"=>0xCC,"DUPr*:"=>0xEC, - "OVR"=>0x0D,"OVR*"=>0x2D,"OVR:"=>0x4D,"OVR*:"=>0x6D,"OVRr"=>0x8D,"OVRr*"=>0xAD,"OVRr:"=>0xCD,"OVRr*:"=>0xED, - "SWP"=>0x0E,"SWP*"=>0x2E,"SWP:"=>0x4E,"SWP*:"=>0x6E,"SWPr"=>0x8E,"SWPr*"=>0xAE,"SWPr:"=>0xCE,"SWPr*:"=>0xEE, - "ROT"=>0x0F,"ROT*"=>0x2F,"ROT:"=>0x4F,"ROT*:"=>0x6F,"ROTr"=>0x8F,"ROTr*"=>0xAF,"ROTr:"=>0xCF,"ROTr*:"=>0xEF, - // Numeric operators - "ADD"=>0x10,"ADD*"=>0x30,"ADD:"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr*"=>0xB0,"ADDr:"=>0xD0,"ADDr*:"=>0xF0, - "SUB"=>0x11,"SUB*"=>0x31,"SUB:"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr*"=>0xB1,"SUBr:"=>0xD1,"SUBr*:"=>0xF1, - "INC"=>0x12,"INC*"=>0x32,"INC:"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr*"=>0xB2,"INCr:"=>0xD2,"INCr*:"=>0xF2, - "DEC"=>0x13,"DEC*"=>0x33,"DEC:"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr*"=>0xB3,"DECr:"=>0xD3,"DECr*:"=>0xF3, - "LTH"=>0x14,"LTH*"=>0x34,"LTH:"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr*"=>0xB4,"LTHr:"=>0xD4,"LTHr*:"=>0xF4, - "GTH"=>0x15,"GTH*"=>0x35,"GTH:"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr*"=>0xB5,"GTHr:"=>0xD5,"GTHr*:"=>0xF5, - "EQU"=>0x16,"EQU*"=>0x36,"EQU:"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr*"=>0xB6,"EQUr:"=>0xD6,"EQUr*:"=>0xF6, - "NQK"=>0x17,"NQK*"=>0x37,"NQK:"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr*"=>0xB7,"NQKr:"=>0xD7,"NQKr*:"=>0xF7, - // Bitwise operators - "IOR"=>0x18,"IOR*"=>0x38,"IOR:"=>0x58,"IOR*:"=>0x78,"IORr"=>0x98,"IORr*"=>0xB8,"IORr:"=>0xD8,"IORr*:"=>0xF8, - "XOR"=>0x19,"XOR*"=>0x39,"XOR:"=>0x59,"XOR*:"=>0x79,"XORr"=>0x99,"XORr*"=>0xB9,"XORr:"=>0xD9,"XORr*:"=>0xF9, - "AND"=>0x1A,"AND*"=>0x3A,"AND:"=>0x5A,"AND*:"=>0x7A,"ANDr"=>0x9A,"ANDr*"=>0xBA,"ANDr:"=>0xDA,"ANDr*:"=>0xFA, - "NOT"=>0x1B,"NOT*"=>0x3B,"NOT:"=>0x5B,"NOT*:"=>0x7B,"NOTr"=>0x9B,"NOTr*"=>0xBB,"NOTr:"=>0xDB,"NOTr*:"=>0xFB, - "SHF"=>0x1C,"SHF*"=>0x3C,"SHF:"=>0x5C,"SHF*:"=>0x7C,"SHFr"=>0x9C,"SHFr*"=>0xBC,"SHFr:"=>0xDC,"SHFr*:"=>0xFC, - "SHC"=>0x1D,"SHC*"=>0x3D,"SHC:"=>0x5D,"SHC*:"=>0x7D,"SHCr"=>0x9D,"SHCr*"=>0xBD,"SHCr:"=>0xDD,"SHCr*:"=>0xFD, - "TAL"=>0x1E,"TAL*"=>0x3E,"TAL:"=>0x5E,"TAL*:"=>0x7E,"TALr"=>0x9E,"TALr*"=>0xBE,"TALr:"=>0xDE,"TALr*:"=>0xFE, - "REV"=>0x1F,"REV*"=>0x3F,"REV:"=>0x5F,"REV*:"=>0x7F,"REVr"=>0x9F,"REVr*"=>0xBF,"REVr:"=>0xDF,"REVr*:"=>0xFF, - _ => return Err(()), - }}) - } -} - - -pub enum Operation { - HLT, JMP, JCN, JCK, - LDA, STA, LDD, STD, - PSH, POP, CPY, SPL, - DUP, OVR, SWP, ROT, - ADD, SUB, INC, DEC, - LTH, GTH, EQU, NQK, - IOR, XOR, AND, NOT, - SHF, SHC, TAL, REV, -} - - -impl From<Operation> for u8 { - fn from(operation: Operation) -> Self { - match operation { - Op::HLT=>0x00, Op::JMP=>0x01, Op::JCN=>0x02, Op::JCK=>0x03, - Op::LDA=>0x04, Op::STA=>0x05, Op::LDD=>0x06, Op::STD=>0x07, - Op::PSH=>0x08, Op::POP=>0x09, Op::CPY=>0x0a, Op::SPL=>0x0b, - Op::DUP=>0x0c, Op::OVR=>0x0d, Op::SWP=>0x0e, Op::ROT=>0x0f, - Op::ADD=>0x10, Op::SUB=>0x11, Op::INC=>0x12, Op::DEC=>0x13, - Op::LTH=>0x14, Op::GTH=>0x15, Op::EQU=>0x16, Op::NQK=>0x17, - Op::IOR=>0x18, Op::XOR=>0x19, Op::AND=>0x1a, Op::NOT=>0x1b, - Op::SHF=>0x1c, Op::SHC=>0x1d, Op::TAL=>0x1e, Op::REV=>0x1f, - } - } -} - - -impl std::fmt::Display for Operation { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{}", match self { - Op::HLT=>"HLT", Op::JMP=>"JMP", Op::JCN=>"JCN", Op::JCK=>"JCK", - Op::LDA=>"LDA", Op::STA=>"STA", Op::LDD=>"LDD", Op::STD=>"STD", - Op::PSH=>"PSH", Op::POP=>"POP", Op::CPY=>"CPY", Op::SPL=>"SPL", - Op::DUP=>"DUP", Op::OVR=>"OVR", Op::SWP=>"SWP", Op::ROT=>"ROT", - Op::ADD=>"ADD", Op::SUB=>"SUB", Op::INC=>"INC", Op::DEC=>"DEC", - Op::LTH=>"LTH", Op::GTH=>"GTH", Op::EQU=>"EQU", Op::NQK=>"NQK", - Op::IOR=>"IOR", Op::XOR=>"XOR", Op::AND=>"AND", Op::NOT=>"NOT", - Op::SHF=>"SHF", Op::SHC=>"SHC", Op::TAL=>"TAL", Op::REV=>"REV", - }) - } -} diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs deleted file mode 100644 index ac5179c..0000000 --- a/src/tokens/semantic.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; - - -pub struct SemanticToken { - pub source: SourceSpan, - pub bytecode: BytecodeSpan, - pub variant: SemanticTokenVariant, -} - - -pub enum SemanticTokenVariant { - LabelDefinition(LabelDefinition), - MacroDefinition(MacroDefinition), - - /// Pointer to the matching label definition. - LabelReference(usize), - /// Pointer to the matching macro definition. - MacroInvocation(usize), - - Literal(Value), - Padding(Value), - Instruction(Instruction), - - Comment(String), - String(Vec<u8>), - - /// Pointer to the matching block close. - BlockOpen(usize), - /// Pointer to the matching block open. - BlockClose(usize), - MarkOpen, - MarkClose, - - Error(SemanticParseError), -} - -impl std::fmt::Debug for SemanticToken { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - match &self.variant { - SemVar::LabelDefinition(def) => write!(f, "LabelDefinition({})", def.name), - SemVar::MacroDefinition(def) => write!(f, "MacroDefinition({})", def.name), - SemVar::LabelReference(pointer) => write!(f, "LabelReference(*{pointer})"), - SemVar::MacroInvocation(pointer) => write!(f, "MacroInvocation(*{pointer})"), - SemVar::Literal(value) => write!(f, "Literal({value})"), - SemVar::Padding(value) => write!(f, "Padding({value})"), - SemVar::Instruction(instr) => write!(f, "Instruction(0x{:02x})", instr.value), - SemVar::Comment(comment) => write!(f, "Comment({comment})"), - SemVar::String(string) => write!(f, "String({})", String::from_utf8_lossy(&string)), - SemVar::BlockOpen(_) => write!(f, "BlockOpen"), - SemVar::BlockClose(_) => write!(f, "BlockClose"), - SemVar::MarkOpen => write!(f, "MarkOpen"), - SemVar::MarkClose => write!(f, "MarkClose"), - SemVar::Error(_) => write!(f, "Error"), - } - } -} - - -pub struct LabelDefinition { - /// The absolute name of the label or sublabel. - pub name: String, - /// List of pointers to label reference tokens. - pub references: Vec<usize>, -} - - -pub struct MacroDefinition { - pub name: String, - pub references: Vec<usize>, - pub body_tokens: Vec<SemanticToken>, -} - - -pub enum SemanticParseError { - LabelDefinitionInMacroDefinition, - MacroDefinitionInMacroDefinition, - - StrayMacroTerminator, - StrayBlockClose, - UnclosedBlock, - - UndefinedSymbol(String), - RedefinedSymbol((String, SourceSpan)), - - MacroInvocationBeforeDefinition((String, SourceSpan)), - - SyntaxError(SyntacticParseError) -} diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs deleted file mode 100644 index 8684ed9..0000000 --- a/src/tokens/syntactic.rs +++ /dev/null @@ -1,39 +0,0 @@ -use crate::*; - - -pub struct SyntacticToken { - /// Location of token in source files. - pub source: SourceSpan, - pub variant: SyntacticTokenVariant, -} - - -pub enum SyntacticTokenVariant { - LabelDefinition(String), - MacroDefinition(String), - MacroDefinitionTerminator, - - Literal(Value), - Padding(Value), - Instruction(Instruction), - - Comment(String), - String(Vec<u8>), - - BlockOpen, - BlockClose, - MarkOpen, - MarkClose, - - Symbol(String), - - Error(SyntacticParseError), -} - - -pub enum SyntacticParseError { - UnterminatedComment, - UnterminatedRawString, - UnterminatedNullString, - InvalidPaddingValue(String), -} diff --git a/src/translators.rs b/src/translators.rs deleted file mode 100644 index cce5633..0000000 --- a/src/translators.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod syntactic_parser; -mod semantic_parser; -mod bytecode_generator; -mod symbols_generator; - -pub use syntactic_parser::*; -pub use semantic_parser::*; -pub use bytecode_generator::*; -pub use symbols_generator::*; diff --git a/src/translators/bytecode_generator.rs b/src/translators/bytecode_generator.rs deleted file mode 100644 index 956aca5..0000000 --- a/src/translators/bytecode_generator.rs +++ /dev/null @@ -1,131 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; - - -pub fn generate_bytecode(semantic_tokens: &mut [SemanticToken]) -> Vec<u8> { - let generator = BytecodeGenerator::from_semantic_tokens(semantic_tokens); - generator.generate() -} - - -/// Translate semantic tokens into bytecode. -struct BytecodeGenerator<'a> { - semantic_tokens: &'a mut [SemanticToken], - block_stack: Vec<usize>, - bytecode: Vec<u8>, - /// (address in bytecode, label definition token index) - label_references: Vec<(usize, usize)>, -} - -impl<'a> BytecodeGenerator<'a> { - pub fn from_semantic_tokens(semantic_tokens: &'a mut [SemanticToken]) -> Self { - Self { - semantic_tokens, - block_stack: Vec::new(), - bytecode: Vec::new(), - label_references: Vec::new(), - } - } - - pub fn generate(mut self) -> Vec<u8> { - for i in 0..self.semantic_tokens.len() { - let address = self.bytecode.len(); - self.generate_bytecode_for_token(i, None); - self.semantic_tokens[i].bytecode = BytecodeSpan { - bytes: self.bytecode[address..].to_vec(), - location: BytecodeLocation { - address, - length: self.bytecode.len().saturating_sub(address), - } - }; - } - - // Replace blank label references in bytecode with real label addresses. - // The layer of indirection is necessary because the iteration borrows - // self immutably. - let mut insertions: Vec<(usize, u16)> = Vec::new(); - for (bytecode_address, token_pointer) in &self.label_references { - let label_token = &self.semantic_tokens[*token_pointer]; - // TODO: If greater than u16, print a warning. - let address_value = label_token.bytecode.location.address as u16; - insertions.push((*bytecode_address, address_value)); - } - for (bytecode_address, address_value) in insertions { - self.replace_address_in_bytecode(bytecode_address, address_value); - } - - // Strip trailing null bytes from the bytecode. - let mut length = self.bytecode.len(); - for (i, byte) in self.bytecode.iter().enumerate().rev() { - match *byte == 0 { - true => length = i, - false => break, - }; - } - self.bytecode.truncate(length); - - return self.bytecode; - } - - fn generate_bytecode_for_token(&mut self, pointer: usize, macro_pointer: Option<usize>) { - macro_rules! push_byte { - ($byte:expr) => { self.bytecode.push($byte) }; } - macro_rules! push_double { - ($double:expr) => { self.bytecode.extend_from_slice(&$double.to_be_bytes()) }; } - macro_rules! pad { - ($len:expr) => { for _ in 0..$len { push_byte!(0); } } } - - let semantic_token = if let Some(macro_pointer) = macro_pointer { - let macro_definition = &self.semantic_tokens[macro_pointer]; - if let SemVar::MacroDefinition(def) = ¯o_definition.variant { - &def.body_tokens[pointer] - } else { unreachable!() } - } else { - &self.semantic_tokens[pointer] - }; - match &semantic_token.variant { - SemVar::MacroInvocation(pointer) => { - let macro_definition = &self.semantic_tokens[*pointer]; - if let SemVar::MacroDefinition(def) = ¯o_definition.variant { - let length = def.body_tokens.len(); - let macro_pointer = Some(*pointer); - for body_pointer in 0..length { - // Recurse, generate bytecode for each macro body token. - self.generate_bytecode_for_token(body_pointer, macro_pointer); - } - } else { unreachable!() } - } - SemVar::Literal(value) => match value { - Value::Byte(value) => push_byte!(*value), - Value::Double(value) => push_double!(value), - } - SemVar::Padding(value) => match value { - Value::Byte(value) => pad!(*value), - Value::Double(value) => pad!(*value), - } - SemVar::Instruction(instr) => push_byte!(instr.value), - SemVar::String(bytes) => self.bytecode.extend_from_slice(&bytes), - SemVar::LabelReference(pointer) => { - self.label_references.push((self.bytecode.len(), *pointer)); - push_double!(0u16); - } - SemVar::BlockOpen(_) => { - self.block_stack.push(self.bytecode.len()); - push_double!(0u16); - } - SemVar::BlockClose(_) => { - let bytecode_address = self.block_stack.pop().unwrap(); - // TODO: If greater than u16, print a warning. - let address_value = self.bytecode.len() as u16; - self.replace_address_in_bytecode(bytecode_address, address_value); - } - _ => (), - }; - } - - fn replace_address_in_bytecode(&mut self, bytecode_address: usize, address_value: u16) { - let range = bytecode_address..bytecode_address+2; - self.bytecode[range].clone_from_slice(&address_value.to_be_bytes()); - } -} diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs deleted file mode 100644 index cb6a435..0000000 --- a/src/translators/semantic_parser.rs +++ /dev/null @@ -1,245 +0,0 @@ -use crate::*; - -use std::collections::HashMap; -use std::path::PathBuf; - -use SyntacticTokenVariant as SynVar; -use SemanticTokenVariant as SemVar; -use SemanticParseError as SemErr; - - -pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> { - let semantic_parser = SemanticParser::from_source_code(source_code, path); - semantic_parser.parse() -} - - -/// Translate syntactic tokens into semantic tokens. -struct SemanticParser { - labels: HashMap<String, Definition>, - macros: HashMap<String, Definition>, - syntactic_tokens: Vec<SyntacticToken>, - /// Index of the current outer token. - current_outer_index: usize, -} - -impl SemanticParser { - pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { - let mut labels = HashMap::new(); - let mut macros = HashMap::new(); - let mut syntactic_tokens = Vec::new(); - - let parser = SyntacticParser::from_source_code(source_code, path); - for syntactic_token in parser { - let definition = Definition::new(syntactic_token.source.clone()); - match &syntactic_token.variant { - SynVar::LabelDefinition(name) => { - let _ = labels.try_insert(name.to_owned(), definition); - }, - SynVar::MacroDefinition(name) => { - let _ = macros.try_insert(name.to_owned(), definition); - }, - _ => (), - } - syntactic_tokens.push(syntactic_token); - } - - Self { - labels, - macros, - syntactic_tokens, - current_outer_index: 0, - } - } - - /// Parse syntactic tokens as semantic tokens. - pub fn parse(mut self) -> Vec<SemanticToken> { - let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens); - let mut syntactic = syntactic_tokens.into_iter(); - let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false); - - // Insert real label definition pointers into label reference tokens. - for definition in self.labels.values_mut() { - if let Some(definition_pointer) = definition.pointer { - // Insert definition pointer into reference tokens. - for reference_pointer in &definition.references { - let reference_token = &mut semantic_tokens[*reference_pointer]; - reference_token.variant = SemVar::LabelReference(definition_pointer); - } - // Insert reference pointers into definition token. - let definition_token = &mut semantic_tokens[definition_pointer]; - if let SemVar::LabelDefinition(ref mut def) = definition_token.variant { - def.references = std::mem::take(&mut definition.references); - } else { unreachable!() } - // Insert definition pointer into reference tokens inside macros. - for (outer, inner) in &definition.deep_references { - let macro_token = &mut semantic_tokens[*outer]; - if let SemVar::MacroDefinition(ref mut def) = macro_token.variant { - let reference_token = &mut def.body_tokens[*inner]; - reference_token.variant = SemVar::LabelReference(definition_pointer); - } else { unreachable!() } - } - // TODO: Record deep references in macro and label definitions? - } - } - - return semantic_tokens; - } - - fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken> - where I: Iterator<Item = SyntacticToken> - { - let mut semantic_tokens: Vec<SemanticToken> = Vec::new(); - let mut block_stack: Vec<usize> = Vec::new(); - - while let Some(syntactic_token) = parser.next() { - let current_index = semantic_tokens.len(); - if !in_macro { - self.current_outer_index = current_index; - } - - let semantic_token_variant = match syntactic_token.variant { - SynVar::LabelDefinition(name) => { - if in_macro { - SemVar::Error(SemErr::LabelDefinitionInMacroDefinition) - } else if let Some(definition) = self.macros.get(&name) { - let source = definition.source.clone(); - SemVar::Error(SemErr::RedefinedSymbol((name, source))) - } else if let Some(definition) = self.labels.get_mut(&name) { - if definition.pointer.is_some() { - let source = definition.source.clone(); - SemVar::Error(SemErr::RedefinedSymbol((name, source))) - } else { - definition.pointer = Some(current_index); - let references = Vec::new(); - SemVar::LabelDefinition(LabelDefinition { name, references }) - } - } else { - unreachable!() - } - } - SynVar::MacroDefinition(name) => { - if in_macro { - SemVar::Error(SemErr::MacroDefinitionInMacroDefinition) - } else if let Some(definition) = self.labels.get(&name) { - let source = definition.source.clone(); - SemVar::Error(SemErr::RedefinedSymbol((name, source))) - } else if let Some(definition) = self.macros.get_mut(&name) { - if definition.pointer.is_some() { - let source = definition.source.clone(); - SemVar::Error(SemErr::RedefinedSymbol((name, source))) - } else { - definition.pointer = Some(current_index); - let references = Vec::new(); - let body_tokens = self.pull_semantic_tokens(parser, true); - SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens }) - } - } else { - unreachable!() - } - } - SynVar::MacroDefinitionTerminator => if in_macro { - break; - } else { - SemVar::Error(SemErr::StrayMacroTerminator) - } - SynVar::Literal(value) => { - SemVar::Literal(value) - } - SynVar::Padding(value) => { - SemVar::Padding(value) - } - SynVar::Instruction(instr) => { - SemVar::Instruction(instr) - } - SynVar::Comment(comment) => { - SemVar::Comment(comment) - } - SynVar::String(bytes) => { - SemVar::String(bytes) - } - SynVar::BlockOpen => { - block_stack.push(current_index); - SemVar::BlockOpen(0) - } - SynVar::BlockClose => { - if let Some(pointer) = block_stack.pop() { - let open = &mut semantic_tokens[pointer]; - open.variant = SemVar::BlockOpen(current_index); - SemVar::BlockClose(pointer) - } else { - SemVar::Error(SemErr::StrayBlockClose) - } - } - SynVar::MarkOpen => { - SemVar::MarkOpen - } - SynVar::MarkClose => { - SemVar::MarkClose - } - SynVar::Symbol(name) => { - if let Some(definition) = self.labels.get_mut(&name) { - if in_macro { - let pointer = (self.current_outer_index, current_index); - definition.deep_references.push(pointer); - } else { - definition.references.push(current_index); - } - SemVar::LabelReference(0) - } else if let Some(definition) = self.macros.get_mut(&name) { - if let Some(pointer) = definition.pointer { - if !in_macro { definition.references.push(current_index); } - SemVar::MacroInvocation(pointer) - } else { - let source = definition.source.clone(); - SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source))) - } - } else { - SemVar::Error(SemErr::UndefinedSymbol(name)) - } - } - SynVar::Error(syntax_err) => { - SemVar::Error(SemErr::SyntaxError(syntax_err)) - } - }; - - let semantic_token = SemanticToken { - source: syntactic_token.source, - bytecode: BytecodeSpan::default(), - variant: semantic_token_variant, - }; - semantic_tokens.push(semantic_token); - } - - if in_macro { - //TODO: UnterminatedMacroDefinition - } - - // Replace each unclosed BlockOpen token with an error. - for block_pointer in block_stack { - semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock); - } - - return semantic_tokens; - } -} - - -struct Definition { - pub source: SourceSpan, - pub pointer: Option<usize>, - pub references: Vec<usize>, - /// (macro index, label reference index) - pub deep_references: Vec<(usize, usize)>, -} - -impl Definition { - pub fn new(source: SourceSpan) -> Self { - Self { - source, - pointer: None, - references: Vec::new(), - deep_references: Vec::new(), - } - } -} diff --git a/src/translators/symbols_generator.rs b/src/translators/symbols_generator.rs deleted file mode 100644 index d30facd..0000000 --- a/src/translators/symbols_generator.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; - - -pub fn generate_symbols_file(semantic_tokens: &[SemanticToken]) -> String { - let mut symbols = String::new(); - - for token in semantic_tokens { - if let SemVar::LabelDefinition(definition) = &token.variant { - let address = token.bytecode.location.address; - if address > 0xffff { break; } - let name = &definition.name; - let location = token.source.location(); - symbols.push_str(&format!("{address:04x} {name} {location}\n")); - } - } - - return symbols; -} diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs deleted file mode 100644 index 7279daf..0000000 --- a/src/translators/syntactic_parser.rs +++ /dev/null @@ -1,247 +0,0 @@ -use crate::*; - -use std::path::PathBuf; - - -/// Translate raw source code characters into syntactic tokens. -pub struct SyntacticParser { - /// Path of file from which the source was read. - path: Option<PathBuf>, - /// Path of the original source file. - source_path: Option<PathBuf>, - /// Position of the next character to be read. - position: Position, - /// Previous value of the position field. - prev_position: Position, - /// Line where the embedded source file begins. - source_line_start: usize, - /// Characters waiting to be parsed, in reverse order. - chars: Vec<char>, - /// The token currently being parsed. - token_source_string: String, - /// The name of the most recently parsed label. - label: String, -} - - -impl SyntacticParser { - /// Parse source code. - pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { - Self { - path: path.map(|p| p.into()), - source_path: None, - position: Position { line: 0, column: 0 }, - prev_position: Position { line: 0, column: 0 }, - source_line_start: 0, - chars: source_code.chars().rev().collect(), - token_source_string: String::new(), - label: String::new(), - } - } - - /// Return the next character, keeping it on the queue. - fn peek_char(&self) -> Option<char> { - self.chars.last().copied() - } - - /// Return the next character, removing it from the queue. - fn eat_char(&mut self) -> Option<char> { - let option = self.chars.pop(); - if let Some(c) = option { - self.prev_position = self.position; - self.position.advance(c); - self.token_source_string.push(c); - } - return option; - } - - /// Remove the next character from the queue. - fn drop_char(&mut self) { - if let Some(c) = self.chars.pop() { - self.prev_position = self.position; - self.position.advance(c); - } - } - - /// Remove leading whitespace. - fn drop_whitespace(&mut self) { - while let Some(c) = self.peek_char() { - match c.is_whitespace() { - true => self.drop_char(), - false => break, - } - } - } - - /// Remove a full token from the queue. - fn eat_token(&mut self) -> String { - const DELIMITERS: [char; 13] = - ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~']; - let mut token = String::new(); - while let Some(peek) = self.peek_char() { - if peek.is_whitespace() || DELIMITERS.contains(&peek) { - break; - } - let c = self.eat_char().unwrap(); - token.push(c); - if c == ':' { - break; - } - } - token - } - - /// Return all characters until the delimiter, removing all returned - /// characters and the delimiter from the queue. Returns None if end - /// of source is reached before delimiter is found. - fn eat_to_delim(&mut self, delim: char) -> Option<String> { - let mut token = String::new(); - while let Some(c) = self.eat_char() { - self.token_source_string.push(c); - match c == delim { - true => return Some(token), - false => token.push(c), - } - } - return None; - } - - fn is_line_empty(&self) -> bool { - for c in self.chars.iter().rev() { - if *c == '\n' { - return true; - } - if !c.is_whitespace() { - return false - } - } - return false; - } -} - - -impl Iterator for SyntacticParser { - type Item = SyntacticToken; - - /// Sequentially parse tokens from the source code. - fn next(&mut self) -> Option<SyntacticToken> { - use SyntacticTokenVariant as SynVar; - use SyntacticParseError as SynErr; - - self.drop_whitespace(); - let start = self.position; - - let variant = match self.eat_char()? { - '@' => { - self.label = self.eat_token(); - SynVar::LabelDefinition(self.label.clone()) - } - '&' => { - let token = self.eat_token(); - let sublabel = format!("{}/{token}", self.label); - SynVar::LabelDefinition(sublabel) - } - '%' => SynVar::MacroDefinition(self.eat_token()), - ';' => SynVar::MacroDefinitionTerminator, - '[' => SynVar::MarkOpen, - ']' => SynVar::MarkClose, - '{' => SynVar::BlockOpen, - '}' => SynVar::BlockClose, - '(' => match self.eat_to_delim(')') { - Some(string) => SynVar::Comment(string), - None => SynVar::Error(SynErr::UnterminatedComment), - } - '\'' => match self.eat_to_delim('\'') { - Some(string) => SynVar::String(string.as_bytes().to_vec()), - None => SynVar::Error(SynErr::UnterminatedRawString), - } - '"' => match self.eat_to_delim('"') { - Some(string) => { - let mut bytes = string.as_bytes().to_vec(); - bytes.push(0x00); - SynVar::String(bytes) - } - None => SynVar::Error(SynErr::UnterminatedNullString), - } - '#' => { - let token = self.eat_token(); - match token.parse::<Value>() { - Ok(value) => SynVar::Padding(value), - Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)), - } - }, - '~' => { - let token = self.eat_token(); - let symbol = format!("{}/{token}", self.label); - SynVar::Symbol(symbol) - } - ':' => SynVar::Symbol(String::from(':')), - c => { - let token = format!("{c}{}", self.eat_token()); - match token.parse::<Value>() { - Ok(value) => SynVar::Literal(value), - Err(_) => match token.parse::<Instruction>() { - Ok(instruction) => SynVar::Instruction(instruction), - Err(_) => SynVar::Symbol(token), - } - } - } - }; - - // Parse source path comments. - if let SynVar::Comment(comment) = &variant { - // Check that the comment fills the entire line. - if start.column == 0 && self.is_line_empty() { - if let Some(path) = comment.strip_prefix(": ") { - self.source_path = Some(PathBuf::from(path.trim())); - self.source_line_start = start.line + 1; - } - } - } - - // Find location in current merged file. - let in_merged = SourceLocation { - path: self.path.to_owned(), - start, - end: self.prev_position, - }; - - // Find location in original source file. - let in_source = if start.line >= self.source_line_start { - match &self.source_path { - Some(path) => { - let offset = self.source_line_start; - Some( SourceLocation { - path: Some(path.to_owned()), - start: Position { - line: in_merged.start.line.saturating_sub(offset), - column: in_merged.start.column, - }, - end: Position { - line: in_merged.end.line.saturating_sub(offset), - column: in_merged.end.column, - } - }) - } - None => None, - } - } else { - None - }; - - let string = std::mem::take(&mut self.token_source_string); - let source = SourceSpan { string, in_merged, in_source }; - Some( SyntacticToken { source, variant } ) - } -} - - -#[derive(Debug)] -pub enum ParseError { - InvalidExtension, - NotFound, - NotReadable, - IsADirectory, - InvalidUtf8, - Unknown, -} diff --git a/src/types/instruction.rs b/src/types/instruction.rs new file mode 100644 index 0000000..252fc68 --- /dev/null +++ b/src/types/instruction.rs @@ -0,0 +1,168 @@ +use crate::*; + +use Operation as Op; + + +pub struct Instruction { + pub value: u8, +} + +impl Instruction { + pub fn operation(&self) -> Operation { + match self.value & 0x1f { + 0x00=>Op::HLT, 0x01=>Op::PSH, 0x02=>Op::POP, 0x03=>Op::CPY, + 0x04=>Op::DUP, 0x05=>Op::OVR, 0x06=>Op::SWP, 0x07=>Op::ROT, + 0x08=>Op::JMP, 0x09=>Op::JMS, 0x0a=>Op::JCN, 0x0b=>Op::JCS, + 0x0c=>Op::LDA, 0x0d=>Op::STA, 0x0e=>Op::LDD, 0x0f=>Op::STD, + 0x10=>Op::ADD, 0x11=>Op::SUB, 0x12=>Op::INC, 0x13=>Op::DEC, + 0x14=>Op::LTH, 0x15=>Op::GTH, 0x16=>Op::EQU, 0x17=>Op::NQK, + 0x18=>Op::SHL, 0x19=>Op::SHR, 0x1a=>Op::ROL, 0x1b=>Op::ROR, + 0x1c=>Op::IOR, 0x1d=>Op::XOR, 0x1e=>Op::AND, 0x1f=>Op::NOT, + _ => unreachable!(), + } + } + + pub fn return_mode(&self) -> bool { + self.value & RETURN_MODE != 0 + } + + pub fn wide_mode(&self) -> bool { + self.value & WIDE_MODE != 0 + } + + pub fn immediate_mode(&self) -> bool { + self.value & IMMEDIATE_MODE != 0 + } +} + +impl std::fmt::Display for Instruction { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}", match self.value { + // Stack operators + 0x00=>"HLT",0x20=>"NOP" ,0x40=>"DB1" ,0x60=>"DB2" ,0x80=>"DB3" ,0xA0=>"DB4" ,0xC0=>"DB5" ,0xE0=>"DB6" , + 0x01=>"PSH",0x21=>"PSH:",0x41=>"PSH*",0x61=>"PSH*:",0x81=>"PSHr",0xA1=>"PSHr:",0xC1=>"PSHr*",0xE1=>"PSHr*:", + 0x02=>"POP",0x22=>"POP:",0x42=>"POP*",0x62=>"POP*:",0x82=>"POPr",0xA2=>"POPr:",0xC2=>"POPr*",0xE2=>"POPr*:", + 0x03=>"CPY",0x23=>"CPY:",0x43=>"CPY*",0x63=>"CPY*:",0x83=>"CPYr",0xA3=>"CPYr:",0xC3=>"CPYr*",0xE3=>"CPYr*:", + 0x04=>"DUP",0x24=>"DUP:",0x44=>"DUP*",0x64=>"DUP*:",0x84=>"DUPr",0xA4=>"DUPr:",0xC4=>"DUPr*",0xE4=>"DUPr*:", + 0x05=>"OVR",0x25=>"OVR:",0x45=>"OVR*",0x65=>"OVR*:",0x85=>"OVRr",0xA5=>"OVRr:",0xC5=>"OVRr*",0xE5=>"OVRr*:", + 0x06=>"SWP",0x26=>"SWP:",0x46=>"SWP*",0x66=>"SWP*:",0x86=>"SWPr",0xA6=>"SWPr:",0xC6=>"SWPr*",0xE6=>"SWPr*:", + 0x07=>"ROT",0x27=>"ROT:",0x47=>"ROT*",0x67=>"ROT*:",0x87=>"ROTr",0xA7=>"ROTr:",0xC7=>"ROTr*",0xE7=>"ROTr*:", + // Control operators + 0x08=>"JMP",0x28=>"JMP:",0x48=>"JMP*",0x68=>"JMP*:",0x88=>"JMPr",0xA8=>"JMPr:",0xC8=>"JMPr*",0xE8=>"JMPr*:", + 0x09=>"JMS",0x29=>"JMS:",0x49=>"JMS*",0x69=>"JMS*:",0x89=>"JMSr",0xA9=>"JMSr:",0xC9=>"JMSr*",0xE9=>"JMSr*:", + 0x0A=>"JCN",0x2A=>"JCN:",0x4A=>"JCN*",0x6A=>"JCN*:",0x8A=>"JCNr",0xAA=>"JCNr:",0xCA=>"JCNr*",0xEA=>"JCNr*:", + 0x0B=>"JCS",0x2B=>"JCS:",0x4B=>"JCS*",0x6B=>"JCS*:",0x8B=>"JCSr",0xAB=>"JCSr:",0xCB=>"JCSr*",0xEB=>"JCSr*:", + 0x0C=>"LDA",0x2C=>"LDA:",0x4C=>"LDA*",0x6C=>"LDA*:",0x8C=>"LDAr",0xAC=>"LDAr:",0xCC=>"LDAr*",0xEC=>"LDAr*:", + 0x0D=>"STA",0x2D=>"STA:",0x4D=>"STA*",0x6D=>"STA*:",0x8D=>"STAr",0xAD=>"STAr:",0xCD=>"STAr*",0xED=>"STAr*:", + 0x0E=>"LDD",0x2E=>"LDD:",0x4E=>"LDD*",0x6E=>"LDD*:",0x8E=>"LDDr",0xAE=>"LDDr:",0xCE=>"LDDr*",0xEE=>"LDDr*:", + 0x0F=>"STD",0x2F=>"STD:",0x4F=>"STD*",0x6F=>"STD*:",0x8F=>"STDr",0xAF=>"STDr:",0xCF=>"STDr*",0xEF=>"STDr*:", + // Numeric operators + 0x10=>"ADD",0x30=>"ADD:",0x50=>"ADD*",0x70=>"ADD*:",0x90=>"ADDr",0xB0=>"ADDr:",0xD0=>"ADDr*",0xF0=>"ADDr*:", + 0x11=>"SUB",0x31=>"SUB:",0x51=>"SUB*",0x71=>"SUB*:",0x91=>"SUBr",0xB1=>"SUBr:",0xD1=>"SUBr*",0xF1=>"SUBr*:", + 0x12=>"INC",0x32=>"INC:",0x52=>"INC*",0x72=>"INC*:",0x92=>"INCr",0xB2=>"INCr:",0xD2=>"INCr*",0xF2=>"INCr*:", + 0x13=>"DEC",0x33=>"DEC:",0x53=>"DEC*",0x73=>"DEC*:",0x93=>"DECr",0xB3=>"DECr:",0xD3=>"DECr*",0xF3=>"DECr*:", + 0x14=>"LTH",0x34=>"LTH:",0x54=>"LTH*",0x74=>"LTH*:",0x94=>"LTHr",0xB4=>"LTHr:",0xD4=>"LTHr*",0xF4=>"LTHr*:", + 0x15=>"GTH",0x35=>"GTH:",0x55=>"GTH*",0x75=>"GTH*:",0x95=>"GTHr",0xB5=>"GTHr:",0xD5=>"GTHr*",0xF5=>"GTHr*:", + 0x16=>"EQU",0x36=>"EQU:",0x56=>"EQU*",0x76=>"EQU*:",0x96=>"EQUr",0xB6=>"EQUr:",0xD6=>"EQUr*",0xF6=>"EQUr*:", + 0x17=>"NQK",0x37=>"NQK:",0x57=>"NQK*",0x77=>"NQK*:",0x97=>"NQKr",0xB7=>"NQKr:",0xD7=>"NQKr*",0xF7=>"NQKr*:", + // Bitwise operators + 0x18=>"SHL",0x38=>"SHL:",0x58=>"SHL*",0x78=>"SHL*:",0x98=>"SHLr",0xB8=>"SHLr:",0xD8=>"SHLr*",0xF8=>"SHLr*:", + 0x19=>"SHR",0x39=>"SHR:",0x59=>"SHR*",0x79=>"SHR*:",0x99=>"SHRr",0xB9=>"SHRr:",0xD9=>"SHRr*",0xF9=>"SHRr*:", + 0x1A=>"ROL",0x3A=>"ROL:",0x5A=>"ROL*",0x7A=>"ROL*:",0x9A=>"ROLr",0xBA=>"ROLr:",0xDA=>"ROLr*",0xFA=>"ROLr*:", + 0x1B=>"ROR",0x3B=>"ROR:",0x5B=>"ROR*",0x7B=>"ROR*:",0x9B=>"RORr",0xBB=>"RORr:",0xDB=>"RORr*",0xFB=>"RORr*:", + 0x1C=>"IOR",0x3C=>"IOR:",0x5C=>"IOR*",0x7C=>"IOR*:",0x9C=>"IORr",0xBC=>"IORr:",0xDC=>"IORr*",0xFC=>"IORr*:", + 0x1D=>"XOR",0x3D=>"XOR:",0x5D=>"XOR*",0x7D=>"XOR*:",0x9D=>"XORr",0xBD=>"XORr:",0xDD=>"XORr*",0xFD=>"XORr*:", + 0x1E=>"AND",0x3E=>"AND:",0x5E=>"AND*",0x7E=>"AND*:",0x9E=>"ANDr",0xBE=>"ANDr:",0xDE=>"ANDr*",0xFE=>"ANDr*:", + 0x1F=>"NOT",0x3F=>"NOT:",0x5F=>"NOT*",0x7F=>"NOT*:",0x9F=>"NOTr",0xBF=>"NOTr:",0xDF=>"NOTr*",0xFF=>"NOTr*:", + }) + } +} + +impl std::str::FromStr for Instruction { + type Err = (); + + fn from_str(token: &str) -> Result<Self, Self::Err> { + Ok( Instruction { value: match token { + // Stack operators + "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2" =>0x60,"DB3" =>0x80,"DB4" =>0xA0,"DB5" =>0xC0,"DB6" =>0xE0, + "PSH"=>0x01,"PSH:"=>0x21,"PSH*"=>0x41,"PSH*:"=>0x61,"PSHr"=>0x81,"PSHr:"=>0xA1,"PSHr*"=>0xC1,"PSHr*:"=>0xE1, + ":"=>0x21, "*:"=>0x61, "r:"=>0xA1, "r*:"=>0xE1, + "POP"=>0x02,"POP:"=>0x22,"POP*"=>0x42,"POP*:"=>0x62,"POPr"=>0x82,"POPr:"=>0xA2,"POPr*"=>0xC2,"POPr*:"=>0xE2, + "CPY"=>0x03,"CPY:"=>0x23,"CPY*"=>0x43,"CPY*:"=>0x63,"CPYr"=>0x83,"CPYr:"=>0xA3,"CPYr*"=>0xC3,"CPYr*:"=>0xE3, + "DUP"=>0x04,"DUP:"=>0x24,"DUP*"=>0x44,"DUP*:"=>0x64,"DUPr"=>0x84,"DUPr:"=>0xA4,"DUPr*"=>0xC4,"DUPr*:"=>0xE4, + "OVR"=>0x05,"OVR:"=>0x25,"OVR*"=>0x45,"OVR*:"=>0x65,"OVRr"=>0x85,"OVRr:"=>0xA5,"OVRr*"=>0xC5,"OVRr*:"=>0xE5, + "SWP"=>0x06,"SWP:"=>0x26,"SWP*"=>0x46,"SWP*:"=>0x66,"SWPr"=>0x86,"SWPr:"=>0xA6,"SWPr*"=>0xC6,"SWPr*:"=>0xE6, + "ROT"=>0x07,"ROT:"=>0x27,"ROT*"=>0x47,"ROT*:"=>0x67,"ROTr"=>0x87,"ROTr:"=>0xA7,"ROTr*"=>0xC7,"ROTr*:"=>0xE7, + // Control operators + "JMP"=>0x08,"JMP:"=>0x28,"JMP*"=>0x48,"JMP*:"=>0x68,"JMPr"=>0x88,"JMPr:"=>0xA8,"JMPr*"=>0xC8,"JMPr*:"=>0xE8, + "JMS"=>0x09,"JMS:"=>0x29,"JMS*"=>0x49,"JMS*:"=>0x69,"JMSr"=>0x89,"JMSr:"=>0xA9,"JMSr*"=>0xC9,"JMSr*:"=>0xE9, + "JCN"=>0x0A,"JCN:"=>0x2A,"JCN*"=>0x4A,"JCN*:"=>0x6A,"JCNr"=>0x8A,"JCNr:"=>0xAA,"JCNr*"=>0xCA,"JCNr*:"=>0xEA, + "JCS"=>0x0B,"JCS:"=>0x2B,"JCS*"=>0x4B,"JCS*:"=>0x6B,"JCSr"=>0x8B,"JCSr:"=>0xAB,"JCSr*"=>0xCB,"JCSr*:"=>0xEB, + "LDA"=>0x0C,"LDA:"=>0x2C,"LDA*"=>0x4C,"LDA*:"=>0x6C,"LDAr"=>0x8C,"LDAr:"=>0xAC,"LDAr*"=>0xCC,"LDAr*:"=>0xEC, + "STA"=>0x0D,"STA:"=>0x2D,"STA*"=>0x4D,"STA*:"=>0x6D,"STAr"=>0x8D,"STAr:"=>0xAD,"STAr*"=>0xCD,"STAr*:"=>0xED, + "LDD"=>0x0E,"LDD:"=>0x2E,"LDD*"=>0x4E,"LDD*:"=>0x6E,"LDDr"=>0x8E,"LDDr:"=>0xAE,"LDDr*"=>0xCE,"LDDr*:"=>0xEE, + "STD"=>0x0F,"STD:"=>0x2F,"STD*"=>0x4F,"STD*:"=>0x6F,"STDr"=>0x8F,"STDr:"=>0xAF,"STDr*"=>0xCF,"STDr*:"=>0xEF, + // Numeric operators + "ADD"=>0x10,"ADD:"=>0x30,"ADD*"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr:"=>0xB0,"ADDr*"=>0xD0,"ADDr*:"=>0xF0, + "SUB"=>0x11,"SUB:"=>0x31,"SUB*"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr:"=>0xB1,"SUBr*"=>0xD1,"SUBr*:"=>0xF1, + "INC"=>0x12,"INC:"=>0x32,"INC*"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr:"=>0xB2,"INCr*"=>0xD2,"INCr*:"=>0xF2, + "DEC"=>0x13,"DEC:"=>0x33,"DEC*"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr:"=>0xB3,"DECr*"=>0xD3,"DECr*:"=>0xF3, + "LTH"=>0x14,"LTH:"=>0x34,"LTH*"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr:"=>0xB4,"LTHr*"=>0xD4,"LTHr*:"=>0xF4, + "GTH"=>0x15,"GTH:"=>0x35,"GTH*"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr:"=>0xB5,"GTHr*"=>0xD5,"GTHr*:"=>0xF5, + "EQU"=>0x16,"EQU:"=>0x36,"EQU*"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr:"=>0xB6,"EQUr*"=>0xD6,"EQUr*:"=>0xF6, + "NQK"=>0x17,"NQK:"=>0x37,"NQK*"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr:"=>0xB7,"NQKr*"=>0xD7,"NQKr*:"=>0xF7, + // Bitwise operators + "SHL"=>0x18,"SHL:"=>0x38,"SHL*"=>0x58,"SHL*:"=>0x78,"SHLr"=>0x98,"SHLr:"=>0xB8,"SHLr*"=>0xD8,"SHLr*:"=>0xF8, + "SHR"=>0x19,"SHR:"=>0x39,"SHR*"=>0x59,"SHR*:"=>0x79,"SHRr"=>0x99,"SHRr:"=>0xB9,"SHRr*"=>0xD9,"SHRr*:"=>0xF9, + "ROL"=>0x1A,"ROL:"=>0x3A,"ROL*"=>0x5A,"ROL*:"=>0x7A,"ROLr"=>0x9A,"ROLr:"=>0xBA,"ROLr*"=>0xDA,"ROLr*:"=>0xFA, + "ROR"=>0x1B,"ROR:"=>0x3B,"ROR*"=>0x5B,"ROR*:"=>0x7B,"RORr"=>0x9B,"RORr:"=>0xBB,"RORr*"=>0xDB,"RORr*:"=>0xFB, + "IOR"=>0x1C,"IOR:"=>0x3C,"IOR*"=>0x5C,"IOR*:"=>0x7C,"IORr"=>0x9C,"IORr:"=>0xBC,"IORr*"=>0xDC,"IORr*:"=>0xFC, + "XOR"=>0x1D,"XOR:"=>0x3D,"XOR*"=>0x5D,"XOR*:"=>0x7D,"XORr"=>0x9D,"XORr:"=>0xBD,"XORr*"=>0xDD,"XORr*:"=>0xFD, + "AND"=>0x1E,"AND:"=>0x3E,"AND*"=>0x5E,"AND*:"=>0x7E,"ANDr"=>0x9E,"ANDr:"=>0xBE,"ANDr*"=>0xDE,"ANDr*:"=>0xFE, + "NOT"=>0x1F,"NOT:"=>0x3F,"NOT*"=>0x5F,"NOT*:"=>0x7F,"NOTr"=>0x9F,"NOTr:"=>0xBF,"NOTr*"=>0xDF,"NOTr*:"=>0xFF, + _ => return Err(()), + }}) + } +} + + +pub enum Operation { + HLT, PSH, POP, CPY, + DUP, OVR, SWP, ROT, + JMP, JMS, JCN, JCS, + LDA, STA, LDD, STD, + ADD, SUB, INC, DEC, + LTH, GTH, EQU, NQK, + SHL, SHR, ROL, ROR, + IOR, XOR, AND, NOT, +} + +impl From<Operation> for u8 { + fn from(operation: Operation) -> Self { + match operation { + Op::HLT=>0x00, Op::PSH=>0x01, Op::POP=>0x02, Op::CPY=>0x03, + Op::DUP=>0x04, Op::OVR=>0x05, Op::SWP=>0x06, Op::ROT=>0x07, + Op::JMP=>0x08, Op::JMS=>0x09, Op::JCN=>0x0A, Op::JCS=>0x0B, + Op::LDA=>0x0C, Op::STA=>0x0D, Op::LDD=>0x0E, Op::STD=>0x0F, + Op::ADD=>0x10, Op::SUB=>0x11, Op::INC=>0x12, Op::DEC=>0x13, + Op::LTH=>0x14, Op::GTH=>0x15, Op::EQU=>0x16, Op::NQK=>0x17, + Op::SHL=>0x1C, Op::SHR=>0x1D, Op::ROL=>0x1E, Op::ROR=>0x1F, + Op::IOR=>0x18, Op::XOR=>0x19, Op::AND=>0x1A, Op::NOT=>0x1B, + } + } +} + +impl std::fmt::Display for Operation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}", match self { + Op::HLT=>"HLT", Op::PSH=>"PSH", Op::POP=>"POP", Op::CPY=>"CPY", + Op::DUP=>"DUP", Op::OVR=>"OVR", Op::SWP=>"SWP", Op::ROT=>"ROT", + Op::JMP=>"JMP", Op::JMS=>"JMS", Op::JCN=>"JCN", Op::JCS=>"JCS", + Op::LDA=>"LDA", Op::STA=>"STA", Op::LDD=>"LDD", Op::STD=>"STD", + Op::ADD=>"ADD", Op::SUB=>"SUB", Op::INC=>"INC", Op::DEC=>"DEC", + Op::LTH=>"LTH", Op::GTH=>"GTH", Op::EQU=>"EQU", Op::NQK=>"NQK", + Op::SHL=>"SHL", Op::SHR=>"SHR", Op::ROL=>"ROL", Op::ROR=>"ROR", + Op::IOR=>"IOR", Op::XOR=>"XOR", Op::AND=>"AND", Op::NOT=>"NOT", + }) + } +} diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 0000000..8094cb1 --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,4 @@ +mod instruction; +mod value; +pub use instruction::*; +pub use value::*; diff --git a/src/tokens/value.rs b/src/types/value.rs index e421bd5..fe82710 100644 --- a/src/tokens/value.rs +++ b/src/types/value.rs @@ -1,8 +1,24 @@ +#[derive(Clone, Copy)] pub enum Value { Byte(u8), Double(u16), } +impl From<Value> for usize { + fn from(value: Value) -> Self { + match value { + Value::Byte(byte) => byte.into(), + Value::Double(double) => double.into(), + } + } +} + +impl From<&Value> for usize { + fn from(value: &Value) -> Self { + (*value).into() + } +} + impl std::fmt::Display for Value { fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { match self { |
