diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/bin/br-asm.rs | 27 | ||||
-rw-r--r-- | src/formats/clang.rs | 10 | ||||
-rw-r--r-- | src/formats/mod.rs | 23 | ||||
-rw-r--r-- | src/lib.rs | 230 | ||||
-rw-r--r-- | src/stages/bytecode.rs | 158 | ||||
-rw-r--r-- | src/stages/bytecode_tokens.rs | 37 | ||||
-rw-r--r-- | src/stages/compiler.rs | 84 | ||||
-rw-r--r-- | src/stages/mod.rs | 26 | ||||
-rw-r--r-- | src/stages/semantic.rs | 141 | ||||
-rw-r--r-- | src/stages/semantic_tokens.rs | 94 | ||||
-rw-r--r-- | src/stages/syntactic.rs | 220 | ||||
-rw-r--r-- | src/stages/syntactic_tokens.rs | 94 | ||||
-rw-r--r-- | src/types/instruction.rs | 168 | ||||
-rw-r--r-- | src/types/mod.rs | 4 | ||||
-rw-r--r-- | src/types/value.rs | 48 |
15 files changed, 1364 insertions, 0 deletions
diff --git a/src/bin/br-asm.rs b/src/bin/br-asm.rs new file mode 100644 index 0000000..ced4245 --- /dev/null +++ b/src/bin/br-asm.rs @@ -0,0 +1,27 @@ +use bedrock_asm::*; +use switchboard::*; + + +fn main() { + let mut args = Switchboard::from_env(); + args.named("version"); + args.named("verbose").short('v'); + + if args.get("version").as_bool() { + print_version(); + } + if args.get("verbose").as_bool() { + log::set_log_level(log::LogLevel::Info); + } + + assemble(args, "br-asm"); +} + + +fn print_version() -> ! { + let name = env!("CARGO_PKG_NAME"); + let version = env!("CARGO_PKG_VERSION"); + eprintln!("{name} v{version}"); + eprintln!("Written by Ben Bridle."); + std::process::exit(0); +} diff --git a/src/formats/clang.rs b/src/formats/clang.rs new file mode 100644 index 0000000..524b501 --- /dev/null +++ b/src/formats/clang.rs @@ -0,0 +1,10 @@ +pub fn format_clang(bytecode: &[u8]) -> Vec<u8> { + let mut output = String::new(); + for chunk in bytecode.chunks(16) { + for byte in chunk { + output.push_str(&format!("0x{byte:02X}, ")); + } + output.push('\n'); + } + return output.into_bytes(); +} diff --git a/src/formats/mod.rs b/src/formats/mod.rs new file mode 100644 index 0000000..79b1c51 --- /dev/null +++ b/src/formats/mod.rs @@ -0,0 +1,23 @@ +mod clang; +pub use clang::*; + +use crate::*; + + +#[derive(Clone, Copy, PartialEq)] +pub enum Format { + Raw, + Source, + Clang, +} + +impl Format { + pub fn from_str(string: &str) -> Self { + match string { + "raw" => Self::Raw, + "source" => Self::Source, + "c" => Self::Clang, + _ => fatal!("Unknown format '{string}', expected 'raw', 'c', or 'source'"), + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..d45d449 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,230 @@ +#![feature(path_add_extension)] + +mod formats; +mod types; +mod stages; +pub use formats::*; +pub use types::*; +pub use stages::*; + +use assembler::*; +use log::*; +use switchboard::*; + +use std::io::Read; +use std::io::Write; + + +pub const RETURN_MODE: u8 = 0x80; +pub const WIDE_MODE: u8 = 0x40; +pub const IMMEDIATE_MODE: u8 = 0x20; + + +pub fn assemble(mut args: Switchboard, invocation: &str) { + args.positional("source"); + args.positional("destination"); + args.named("extension").default("brc"); + + args.named("no-libs"); + args.named("no-project-libs"); + args.named("no-env-libs"); + args.named("no-truncate"); + + args.named("format").default("raw"); + args.named("dry-run").short('n'); + args.named("tree"); + args.named("with-symbols"); + args.named("help").short('h'); + args.raise_errors(); + + if args.get("help").as_bool() { + print_help(invocation); + std::process::exit(0); + } + + let source_path = args.get("source").as_path_opt().map( + |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}"))); + let destination_path = args.get("destination").as_path_opt(); + let extension = args.get("extension").as_string(); + let opt_extension = Some(extension.as_str()); + + let no_libs = args.get("no-libs").as_bool(); + let no_project_libs = args.get("no-project-libs").as_bool(); + let no_env_libs = args.get("no-env-libs").as_bool(); + let no_truncate = args.get("no-truncate").as_bool(); + + let format = Format::from_str(args.get("format").as_str()); + let dry_run = args.get("dry-run").as_bool(); + let print_tree = args.get("tree").as_bool(); + let export_symbols = args.get("with-symbols").as_bool(); + + // ----------------------------------------------------------------------- + + let mut compiler = new_compiler(); + + if let Some(path) = &source_path { + info!("Reading program source from {path:?}"); + compiler.root_from_path(path).unwrap_or_else(|err| fatal!("{err:?}: {path:?}")); + } else { + let mut source_code = String::new(); + info!("Reading program source from standard input"); + if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { + fatal!("Could not read from standard input\n{err:?}"); + } + compiler.root_from_string(source_code, "<standard input>") + }; + if compiler.error().is_some() && !no_libs && !no_project_libs { + compiler.include_libs_from_parent(opt_extension); + } + if compiler.error().is_some() && !no_libs && !no_env_libs { + compiler.include_libs_from_path_variable("BEDROCK_LIBS", opt_extension); + } + + if print_tree { + compiler.hierarchy().report() + } + if let Some(error) = compiler.error() { + error.report(); + std::process::exit(1); + } + + let merged_source = compiler.get_compiled_source().unwrap_or_else(|error| { + error.report(); + std::process::exit(1); + }); + + if !dry_run && format == Format::Source { + write_bytes_and_exit(merged_source.as_bytes(), destination_path.as_ref()); + } + + // ----------------------------------------------------------------------- + + let path = Some("<merged source>"); + let syntactic = match parse_syntactic(&merged_source, path) { + Ok(tokens) => tokens, + Err(errors) => { + report_syntactic_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let semantic = match parse_semantic(syntactic) { + Ok(tokens) => tokens, + Err(errors) => { + report_semantic_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let program = match generate_bytecode(&semantic) { + Ok(program) => program, + Err(errors) => { + report_bytecode_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let AssembledProgram { mut bytecode, symbols } = program; + + let length = bytecode.len(); + let percentage = (length as f32 / 65536.0 * 100.0).round() as u16; + info!("Assembled program in {length} bytes ({percentage}% of maximum)"); + + if !no_truncate { + // Remove null bytes from end of bytecode. + while let Some(0) = bytecode.last() { + bytecode.pop(); + } + let difference = length - bytecode.len(); + if difference > 0 { + info!("Truncated program to {length} bytes (saved {difference} bytes)"); + } + } + + if !dry_run { + if export_symbols { + if let Some(path) = &destination_path { + let mut symbols_path = path.to_path_buf(); + symbols_path.add_extension("sym"); + let mut symbols_string = String::new(); + for symbol in &symbols { + let address = &symbol.address; + let name = &symbol.name; + let location = &symbol.source.location(); + symbols_string.push_str(&format!( + "{address:04x} {name} {location}\n" + )); + } + match std::fs::write(&symbols_path, symbols_string) { + Ok(_) => info!("Saved symbols to {symbols_path:?}"), + Err(err) => info!("Could not write symbols to {symbols_path:?}\n{err:?}"), + } + } + } + + let bytes = match format { + Format::Raw => bytecode, + Format::Clang => format_clang(&bytecode), + Format::Source => unreachable!("Source output is handled before full assembly"), + }; + write_bytes_and_exit(&bytes, destination_path.as_ref()); + } +} + + +fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { + match path { + Some(path) => match std::fs::write(path, bytes) { + Ok(_) => info!("Wrote output to {:?}", path.as_ref()), + Err(err) => fatal!("Could not write to {:?}\n{err:?}", path.as_ref()), + } + None => match std::io::stdout().write_all(bytes) { + Ok(_) => info!("Wrote output to standard output"), + Err(err) => fatal!("Could not write to standard output\n{err:?}"), + } + } + std::process::exit(0); +} + + +fn print_help(invocation: &str) { + eprintln!("\ +Usage: {invocation} [source] [destination] + +Convert Bedrock source code into an assembled Bedrock program. + +Usage: + To assemble a Bedrock program from a source file and write to an output + file, run `br-asm [source] [destination]`, where [source] is the path + of the source file and [destination] is the path to write to. + + If [destination] is omitted, the assembled program will be written to + standard output. If [source] is omitted, the program source code will + be read from standard input. + +Environment variables: + BEDROCK_LIBS + A list of colon-separated paths which will be searched to find Bedrock + source code files to use as libraries when assembling a Bedrock program. + If a library file resolves an unresolved symbol in the program being + assembled, the library file will be merged into the program. + +Arguments: + [source] Bedrock source code file to assemble. + [destination] Destination path for assembler output. + +Switches: + --dry-run (-n) Assemble and show errors only, don't write any output + --extension=<ext> File extension to identify source files (default is 'brc') + --format=<fmt> Output format to use for assembled program (default is 'raw') + --no-project-libs Don't search for libraries in the source parent folder + --no-env-libs Don't search for libraries in the BEDROCK_LIBS path variable + --no-libs Combination of --no-project-libs and --no-env-libs + --no-truncate Don't remove trailing zero-bytes from the assembled program + --tree Show a tree diagram of all included library files + --with-symbols Also generate debug symbols file with extension '.sym' + --help (-h) Print this help information + --verbose, (-v) Print additional information + --version Print the program version and exit +"); +} diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs new file mode 100644 index 0000000..6878c42 --- /dev/null +++ b/src/stages/bytecode.rs @@ -0,0 +1,158 @@ +use crate::*; + +use std::collections::HashMap; + + +/// Doesn't truncate trailing null bytes. +pub fn generate_bytecode(semantic: &Program) -> Result<AssembledProgram, Vec<Tracked<BytecodeError>>> { + let mut generator = BytecodeGenerator::new(&semantic.definitions); + generator.parse(&semantic.tokens, false); + generator.fill_slots(); + let mut symbols = Vec::new(); + for (name, information) in generator.labels { + let source = semantic.definitions.get(&name).unwrap().source.clone(); + let address = information.address; + symbols.push(AssembledSymbol { name, address, source }); + } + + match generator.errors.is_empty() { + true => Ok( + AssembledProgram { + bytecode: generator.bytecode, + symbols, + } + ), + false => Err(generator.errors), + } +} + + +pub struct BytecodeGenerator<'a> { + definitions: &'a HashMap<String, Tracked<Definition>>, + labels: HashMap<String, LabelInformation>, + stack: Vec<usize>, + bytecode: Vec<u8>, + errors: Vec<Tracked<BytecodeError>>, +} + +struct LabelInformation { + address: usize, + slots: Vec<usize>, +} + +impl<'a> BytecodeGenerator<'a> { + pub fn new(definitions: &'a HashMap<String, Tracked<Definition>>) -> Self { + let mut labels = HashMap::new(); + for (name, definition) in definitions { + if let DefinitionVariant::LabelDefinition = definition.variant { + // Use fake address for now. + let information = LabelInformation { address: 0, slots: Vec::new() }; + labels.insert(name.to_string(), information); + } + } + Self { + definitions, + labels, + stack: Vec::new(), + bytecode: Vec::new(), + errors: Vec::new(), + } + } + + pub fn parse(&mut self, tokens: &[Tracked<SemanticToken>], in_macro: bool) { + macro_rules! byte { + ($byte:expr) => { + self.bytecode.push($byte) + }; + } + macro_rules! double { + ($double:expr) => {{ + let [high, low] = u16::to_be_bytes($double); + self.bytecode.push(high); self.bytecode.push(low); + }}; + } + + for token in tokens { + let i = self.bytecode.len(); + match &token.value { + SemanticToken::Literal(value) => match value { + Value::Byte(byte) => byte!(*byte), + Value::Double(double) => double!(*double), + } + SemanticToken::Pad(value) => { + self.bytecode.resize(i + usize::from(value), 0); + }, + SemanticToken::String(bytes) => { + self.bytecode.extend_from_slice(bytes) + }, + SemanticToken::Comment(_) => (), + SemanticToken::BlockOpen(_) => { + self.stack.push(i); + // Use a fake index for now. + double!(0); + } + SemanticToken::BlockClose(_) => { + if i > 0xFFFF { + let error = BytecodeError::InvalidBlockAddress(i); + self.errors.push(Tracked::from(error, token.source.clone())); + } + let Some(addr) = self.stack.pop() else { + unreachable!("Uncaught unmatched block terminator"); + }; + let [high, low] = (i as u16).to_be_bytes(); + self.bytecode[addr] = high; + self.bytecode[addr+1] = low; + } + SemanticToken::Symbol(name) => { + if let Some(definition) = self.definitions.get(name) { + match &definition.variant { + DefinitionVariant::MacroDefinition(body) => { + self.parse(body, true); + } + DefinitionVariant::LabelDefinition => { + let information = self.labels.get_mut(name).unwrap(); + information.slots.push(i); + // Use a fake index for now. + double!(0); + } + } + } else { + unreachable!("Uncaught undefined symbol '{name}'"); + } + } + SemanticToken::Instruction(instruction) => { + byte!(instruction.value) + } + SemanticToken::LabelDefinition(name) => if in_macro { + unreachable!("Uncaught label definition in macro"); + } else { + if i > 0xFFFF { + let error = BytecodeError::InvalidLabelAddress(i); + self.errors.push(Tracked::from(error, token.source.clone())); + } + let information = self.labels.get_mut(name).unwrap(); + // Replace fake index with real index. + information.address = i; + } + SemanticToken::MacroDefinition{ .. } => if in_macro { + unreachable!("Uncaught macro definition in macro"); + } + } + } + + if !self.stack.is_empty() { + unreachable!("Uncaught unterminated block"); + } + } + + /// Fill each label slot with a real label address. + pub fn fill_slots(&mut self) { + for information in self.labels.values() { + let [high, low] = (information.address as u16).to_be_bytes(); + for addr in &information.slots { + self.bytecode[*addr] = high; + self.bytecode[*addr + 1] = low; + } + } + } +} diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs new file mode 100644 index 0000000..902fcd7 --- /dev/null +++ b/src/stages/bytecode_tokens.rs @@ -0,0 +1,37 @@ +use crate::*; + + +pub struct AssembledProgram { + pub bytecode: Vec<u8>, + pub symbols: Vec<AssembledSymbol>, +} + +pub struct AssembledSymbol { + pub name: String, + pub address: usize, + pub source: SourceSpan, +} + +pub enum BytecodeError { + InvalidLabelAddress(usize), + InvalidBlockAddress(usize), +} + + +pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) { + for error in errors { + report_bytecode_error(error, source_code); + } +} + + +fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + BytecodeError::InvalidLabelAddress(address) => + &format!("The label address exceeds 0xFFFF: 0x{address:X}"), + BytecodeError::InvalidBlockAddress(address) => + &format!("The block address exceeds 0xFFFF: 0x{address:X}"), + }; + report_source_issue(LogLevel::Error, &context, message); +} diff --git a/src/stages/compiler.rs b/src/stages/compiler.rs new file mode 100644 index 0000000..97bf20c --- /dev/null +++ b/src/stages/compiler.rs @@ -0,0 +1,84 @@ +use crate::*; + +use assembler::SymbolRole::*; +use assembler::DefinitionType::*; + + +pub fn new_compiler() -> Compiler { + Compiler::new(parse_symbols, push_code) +} + + +/// Parse all symbols from a source code string. +pub fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbol>> { + let syntactic = match parse_syntactic(source_code, path) { + Ok(syntactic) => syntactic, + Err(_) => return None, + }; + Some(SymbolParser::new().parse(&syntactic)) +} + +/// Push source code to a source compilation string. +pub fn push_code(compilation: &mut String, source_file: &SourceFile) { + // Skip blank files. + let source_code = &source_file.source_code; + if source_code.chars().all(|c| c.is_whitespace()) { return; } + // Ensure that the previous section is followed by two newline characters. + if !compilation.is_empty() { + if !compilation.ends_with('\n') { compilation.push('\n'); } + if !compilation.ends_with("\n\n") { compilation.push('\n'); } + } + // Push a path comment and the source code. + let path_str = source_file.path.as_os_str().to_string_lossy(); + let path_comment = format!("(: {path_str} )\n"); + compilation.push_str(&path_comment); + compilation.push_str(&source_code); +} + + +// Extract symbol definitions from a list of syntactic tokens. +pub struct SymbolParser { + pub symbols: Vec<Symbol>, +} + +impl SymbolParser { + pub fn new() -> Self { + Self { + symbols: Vec::new(), + } + } + + fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) { + let name = name.to_string(); + let namespace = Vec::new(); + let source = source.to_owned(); + self.symbols.push(Symbol { name, namespace, source, role }); + } + + pub fn parse(mut self, syntactic: &[Tracked<SyntacticToken>]) -> Vec<Symbol> { + for token in syntactic { + match &token.value { + SyntacticToken::MacroDefinition(definition) => { + self.record_symbol( + &definition.name, + &definition.name.source, + Definition(MustPrecedeReference), + ); + for token in &definition.body { + if let SyntacticToken::Symbol(name) = &token.value { + self.record_symbol(&name, &token.source, Reference); + } + } + } + SyntacticToken::LabelDefinition(name) => { + self.record_symbol(&name, &token.source, Definition(CanFollowReference)); + } + SyntacticToken::Symbol(name) => { + self.record_symbol(&name, &token.source, Reference); + } + _ => (), + } + } + return self.symbols; + } +} diff --git a/src/stages/mod.rs b/src/stages/mod.rs new file mode 100644 index 0000000..76bda0d --- /dev/null +++ b/src/stages/mod.rs @@ -0,0 +1,26 @@ +mod compiler; +mod syntactic; +mod syntactic_tokens; +mod semantic; +mod semantic_tokens; +mod bytecode; +mod bytecode_tokens; +pub use compiler::*; +pub use syntactic::*; +pub use syntactic_tokens::*; +pub use semantic::*; +pub use semantic_tokens::*; +pub use bytecode::*; +pub use bytecode_tokens::*; + + +#[macro_export] +macro_rules! indent { + (0, $($tokens:tt)*) => {{ + println!($($tokens)*); + }}; + ($indent:expr, $($tokens:tt)*) => {{ + for _ in 0..$indent { print!(" "); } + println!($($tokens)*); + }}; +} diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..f2774a4 --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,141 @@ +use crate::*; + +use std::collections::{HashMap, HashSet}; + + +pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Program, Vec<Tracked<SemanticError>>> { + // Record all label definitions and macro names up front. + let mut definitions = HashMap::new(); + let mut macro_names = HashSet::new(); + for token in &syntactic { + match &token.value { + SyntacticToken::LabelDefinition(name) => { + // Use a fake index for now. + let definition = Definition::new(0, DefinitionVariant::LabelDefinition); + let tracked = Tracked::from(definition, token.source.clone()); + if let Some(_) = definitions.insert(name.clone(), tracked) { + unreachable!("Uncaught duplicate label definition '{name}'"); + } + } + SyntacticToken::MacroDefinition(definition) => { + let name = &definition.name; + if !macro_names.insert(name.clone()) { + unreachable!("Uncaught duplicate macro definition '{name}'") + } + } + _ => (), + } + } + + // Convert syntactic tokens to semantic tokens. + let mut tokens: Vec<Tracked<SemanticToken>> = Vec::new(); + let mut errors = Vec::new(); + let mut stack = Vec::new(); + + for syn_token in syntactic { + let i = tokens.len(); + let sem_token = match syn_token.value { + SyntacticToken::Literal(value) => SemanticToken::Literal(value), + SyntacticToken::Pad(value) => SemanticToken::Pad(value), + SyntacticToken::String(bytes) => SemanticToken::String(bytes), + SyntacticToken::Comment(string) => SemanticToken::Comment(string), + SyntacticToken::BlockOpen => { + stack.push(i); + // Use a fake index for now. + SemanticToken::BlockOpen(0) + } + SyntacticToken::BlockClose => { + let Some(k) = stack.pop() else { + unreachable!("Uncaught unmatched block terminator"); + }; + // Replace fake index with real index. + tokens[k].value = SemanticToken::BlockOpen(i); + SemanticToken::BlockClose(k) + } + SyntacticToken::Symbol(symbol) => { + if let Some(definition) = definitions.get_mut(&symbol) { + definition.value.references.push(i); + } else if let Some(definition) = macro_names.get(&symbol) { + let error = SemanticError::InvocationBeforeDefinition; + let source = syn_token.source.wrap(definition.source.clone()); + errors.push(Tracked::from(error, source)); + } else { + unreachable!("Uncaught undefined symbol '{symbol}'"); + }; + SemanticToken::Symbol(symbol) + } + SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), + SyntacticToken::LabelDefinition(name) => { + let definition = definitions.get_mut(&name).unwrap(); + // Replace fake index with real index. + definition.value.definition = i; + SemanticToken::LabelDefinition(name) + } + SyntacticToken::MacroDefinition(definition) => { + let name = definition.name.clone(); + let mut body: Vec<Tracked<SemanticToken>> = Vec::new(); + let mut body_stack = Vec::new(); + for syn_token in definition.body { + let j = body.len(); + let sem_token = match syn_token.value { + SyntacticToken::Literal(value) => SemanticToken::Literal(value), + SyntacticToken::Pad(value) => SemanticToken::Pad(value), + SyntacticToken::String(bytes) => SemanticToken::String(bytes), + SyntacticToken::Comment(string) => SemanticToken::Comment(string), + SyntacticToken::BlockOpen => { + body_stack.push(j); + // Use a fake index for now. + SemanticToken::BlockOpen(0) + } + SyntacticToken::BlockClose => { + let Some(k) = body_stack.pop() else { + unreachable!("Uncaught unmatched block terminator in macro '{name}'"); + }; + // Replace fake index with real index. + body[k].value = SemanticToken::BlockOpen(j); + SemanticToken::BlockClose(k) + } + SyntacticToken::Symbol(symbol) => { + if let Some(definition) = definitions.get_mut(&symbol) { + definition.value.deep_references.push((i, j)); + } else if let Some(definition) = macro_names.get(&symbol) { + let error = SemanticError::InvocationBeforeDefinition; + let source = syn_token.source.wrap(definition.source.clone()); + errors.push(Tracked::from(error, source)); + } else { + unreachable!("Uncaught undefined symbol '{symbol}' in macro '{name}'"); + }; + SemanticToken::Symbol(symbol) + } + SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), + SyntacticToken::LabelDefinition(label) => + unreachable!("Uncaught label definition '{label}' in macro '{name}'"), + SyntacticToken::MacroDefinition(definition) => + unreachable!("Uncaught macro definition '{}' in macro '{name}'", definition.name), + }; + body.push(Tracked::from(sem_token, syn_token.source)); + } + + let variant = DefinitionVariant::MacroDefinition(body); + let source = definition.name.source.clone(); + let tracked = Tracked::from(Definition::new(i, variant), source); + if let Some(_) = definitions.insert(name.value.clone(), tracked) { + unreachable!("Uncaught duplicate definition '{name}'") + }; + if !body_stack.is_empty() { + unreachable!("Uncaught unterminated block in macro '{name}'"); + } + SemanticToken::MacroDefinition(name) + } + }; + tokens.push(Tracked::from(sem_token, syn_token.source)); + } + + if !stack.is_empty() { + unreachable!("Uncaught unterminated block"); + } + match errors.is_empty() { + true => Ok(Program { definitions, tokens }), + false => Err(errors), + } +} diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs new file mode 100644 index 0000000..fe49c26 --- /dev/null +++ b/src/stages/semantic_tokens.rs @@ -0,0 +1,94 @@ +use crate::*; + +use std::collections::HashMap; + + +pub struct Program { + pub definitions: HashMap<String, Tracked<Definition>>, + pub tokens: Vec<Tracked<SemanticToken>>, +} + +pub struct Definition { + pub variant: DefinitionVariant, + /// Index of definition token. + pub definition: usize, + /// Indices of symbols referencing this definition. + pub references: Vec<usize>, + /// Indices of references inside other definitions. + pub deep_references: Vec<(usize, usize)>, +} + +impl Definition { + pub fn new(i: usize, variant: DefinitionVariant) -> Self { + Self { + variant, + definition: i, + references: Vec::new(), + deep_references: Vec::new(), + } + } +} + +pub enum DefinitionVariant { + LabelDefinition, + MacroDefinition(Vec<Tracked<SemanticToken>>), +} + +pub enum SemanticToken { + Literal(Value), + Pad(Value), + String(Vec<u8>), + Comment(String), + BlockOpen(usize), // index to matching block-close + BlockClose(usize), // index to matching block-open + Symbol(String), + Instruction(Instruction), + LabelDefinition(String), + MacroDefinition(Tracked<String>), +} + +pub enum SemanticError { + InvocationBeforeDefinition, +} + + +pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) { + for error in errors { + report_semantic_error(error, source_code); + } +} + + +fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + SemanticError::InvocationBeforeDefinition => + "Macro cannot be invoked before it has been defined", + }; + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_semantic_token(i: usize, token: &SemanticToken, definitions: &HashMap<String, Tracked<Definition>>) { + match token { + SemanticToken::Literal(value) => indent!(i, "Literal({value})"), + SemanticToken::Pad(value) => indent!(i, "Pad({value})"), + SemanticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)), + SemanticToken::Comment(_) => indent!(i, "Comment"), + SemanticToken::BlockOpen(pointer) => indent!(i, "BlockOpen(*{pointer})"), + SemanticToken::BlockClose(pointer) => indent!(i, "BlockClose(*{pointer})"), + SemanticToken::Symbol(name) => indent!(i, "Symbol({name})"), + SemanticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"), + SemanticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"), + SemanticToken::MacroDefinition(name) => { + indent!(i, "MacroDefinition({name})"); + if let Some(definition) = definitions.get(name.as_str()) { + if let DefinitionVariant::MacroDefinition(body) = &definition.variant { + for token in body { + print_semantic_token(i+1, token, definitions); + } + } + } + } + } +} diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs new file mode 100644 index 0000000..6453ae0 --- /dev/null +++ b/src/stages/syntactic.rs @@ -0,0 +1,220 @@ +use crate::*; + +use std::path::PathBuf; + + +pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path), "") +} + +fn parse_syntactic_from_tokeniser(mut t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + t.add_delimiters(&['(',')','[',']','{','}',';']); + t.add_terminators(&[':']); + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + let mut label_name = label_name.to_string(); + + macro_rules! err { + ($error:expr) => {{ + err!($error, t.get_source()); + }}; + ($error:expr, $source:expr) => {{ + errors.push(Tracked::from($error, $source)); + continue; + }}; + } + + macro_rules! check_name { + ($name:expr) => {{ + check_name!($name, t.get_source()); + }}; + ($name:expr, $source:expr) => { + if $name.chars().count() > 63 { + let error = SyntacticError::InvalidIdentifier($name.clone()); + errors.push(Tracked::from(error, $source.clone())); + } + }; + } + + // Eat characters until the end character is found. + macro_rules! is_any_end { + ($end:expr) => { + |t: &mut Tokeniser| { + t.eat_char() == Some($end) + } + }; + } + + // Eat characters until the end character is found without a preceding back-slash. + macro_rules! is_plain_end { + ($end:expr) => { + |t: &mut Tokeniser| { + t.eat_if(concat!('\\', $end)).is_some() || t.eat_char() == Some($end) + } + }; + } + + loop { + // Eat leading whitespace. + while let Some(c) = t.peek_char() { + match [' ', '\n', '\r', '\t'].contains(&c) { + true => t.eat_char(), + false => break, + }; + } + t.mark_start(); + let Some(c) = t.eat_char() else { break }; + let token = match c { + '"' => { + let source = t.get_source(); + match t.track_until(is_plain_end!('"')) { + Some(string) => { + let mut bytes = string.into_bytes(); + bytes.push(0x00); + SyntacticToken::String(bytes) + } + None => err!(SyntacticError::UnterminatedNullString, source), + } + } + '\'' => { + let source = t.get_source(); + match t.track_until(is_plain_end!('\'')) { + Some(string) => SyntacticToken::String(string.into_bytes()), + None => err!(SyntacticError::UnterminatedRawString, source), + } + } + '(' => { + let source = t.get_source(); + if let Some(string) = t.track_until(is_any_end!(')')) { + // Check if the comment fills the entire line. + if t.start.position.column == 0 && t.end_of_line() { + if let Some(path) = string.strip_prefix(": ") { + t.embedded_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start.position.line + 1; + continue; + } + } + SyntacticToken::Comment(string) + } else { + err!(SyntacticError::UnterminatedComment, source) + } + } + ')' => err!(SyntacticError::UnmatchedCommentTerminator), + '%' => { + let name = t.eat_token(); + let source = t.get_source(); + check_name!(name, source); + t.mark_child(); + if let Some(_) = t.track_until(is_any_end!(';')) { + let child = t.tokenise_child_span(); + match parse_body_from_tokeniser(child, &label_name) { + Ok(body) => { + let name = Tracked::from(name, source); + let definition = SyntacticMacroDefinition { name, body }; + SyntacticToken::MacroDefinition(definition) + } + Err(mut err) => { + errors.append(&mut err); + continue; + } + } + } else { + err!(SyntacticError::UnterminatedMacroDefinition, source); + } + } + ';' => err!(SyntacticError::UnmatchedMacroTerminator), + '{' => SyntacticToken::BlockOpen, + '}' => SyntacticToken::BlockClose, + '['|']' => continue, + '@' => { + label_name = t.eat_token(); + check_name!(label_name); + SyntacticToken::LabelDefinition(label_name.clone()) + } + '&' => { + let name = format!("{label_name}/{}", t.eat_token()); + check_name!(name); + SyntacticToken::LabelDefinition(name) + } + '~' => { + let name = format!("{label_name}/{}", t.eat_token()); + check_name!(name); + SyntacticToken::Symbol(name) + } + '#' => { + let token = t.eat_token(); + match token.parse::<Value>() { + Ok(value) => SyntacticToken::Pad(value), + Err(_) => err!(SyntacticError::InvalidPadValue), + } + }, + ':' => { + SyntacticToken::Symbol(String::from(':')) + } + c => { + let token = format!("{c}{}", t.eat_token()); + if let Ok(value) = token.parse::<Value>() { + SyntacticToken::Literal(value) + } else if let Ok(instruction) = token.parse::<Instruction>() { + SyntacticToken::Instruction(instruction) + } else { + check_name!(token); + SyntacticToken::Symbol(token) + } + } + }; + + t.mark_end(); + let source = t.get_source(); + tokens.push(Tracked::from(token, source)); + } + + // Check that every block open matches a block close. + let mut stack = Vec::new(); + for token in &tokens { + match &token.value { + SyntacticToken::BlockOpen => stack.push(token.source.clone()), + SyntacticToken::BlockClose => if let None = stack.pop() { + let error = SyntacticError::UnmatchedBlockTerminator; + errors.push(Tracked::from(error, token.source.clone())); + } + _ => (), + } + } + for source in stack { + let error = SyntacticError::UnterminatedBlock; + errors.push(Tracked::from(error, source)); + } + + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} + + +fn parse_body_from_tokeniser(t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + + for token in parse_syntactic_from_tokeniser(t, label_name)? { + match token.value { + SyntacticToken::LabelDefinition(_) => { + let error = SyntacticError::LabelDefinitionInMacroDefinition; + errors.push(Tracked::from(error, token.source)); + continue; + } + SyntacticToken::MacroDefinition(_) => { + let error = SyntacticError::MacroDefinitionInMacroDefinition; + errors.push(Tracked::from(error, token.source)); + continue; + } + _ => tokens.push(token), + }; + } + + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs new file mode 100644 index 0000000..2a95967 --- /dev/null +++ b/src/stages/syntactic_tokens.rs @@ -0,0 +1,94 @@ +use crate::*; + + +pub enum SyntacticToken { + Literal(Value), + Pad(Value), + String(Vec<u8>), + Comment(String), + BlockOpen, + BlockClose, + Symbol(String), + Instruction(Instruction), + LabelDefinition(String), + MacroDefinition(SyntacticMacroDefinition), +} + +pub struct SyntacticMacroDefinition { + pub name: Tracked<String>, + pub body: Vec<Tracked<SyntacticToken>>, +} + +pub enum SyntacticError { + UnterminatedBlock, + UnterminatedComment, + UnterminatedRawString, + UnterminatedNullString, + UnterminatedMacroDefinition, + UnmatchedBlockTerminator, + UnmatchedCommentTerminator, + UnmatchedMacroTerminator, + InvalidPadValue, + InvalidIdentifier(String), + MacroDefinitionInMacroDefinition, + LabelDefinitionInMacroDefinition, +} + + +pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) { + for error in errors { + report_syntactic_error(error, source_code); + } +} + +fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + SyntacticError::UnterminatedBlock => + "Block was not terminated, add a '}' character to terminate", + SyntacticError::UnterminatedComment => + "Comment was not terminated, add a ')' character to terminate", + SyntacticError::UnterminatedRawString => + "String was not terminated, add a ' character to terminate", + SyntacticError::UnterminatedNullString => + "String was not terminated, add a '\"' character to terminate", + SyntacticError::UnterminatedMacroDefinition => + "Macro definition was not terminated, add a ';' character to terminate", + SyntacticError::UnmatchedBlockTerminator => + "Attempted to terminate a block, but no block was in progress", + SyntacticError::UnmatchedCommentTerminator => + "Attempted to terminate a comment, but no comment was in progress", + SyntacticError::UnmatchedMacroTerminator => + "Attempted to terminate a macro definition, but no macro definition was in progress", + SyntacticError::InvalidPadValue => + "The pad value must be two or four hexadecimal digits", + SyntacticError::InvalidIdentifier(name) => + &format!("An identifier cannot exceed 63 characters in length: {name}"), + SyntacticError::MacroDefinitionInMacroDefinition => + "A macro cannot be defined inside another macro", + SyntacticError::LabelDefinitionInMacroDefinition => + "A label cannot be defined inside a macro", + }; + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_syntactic_token(i: usize, token: &SyntacticToken) { + match token { + SyntacticToken::Literal(value) => indent!(i, "Literal({value})"), + SyntacticToken::Pad(value) => indent!(i, "Pad({value})"), + SyntacticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)), + SyntacticToken::Comment(_) => indent!(i, "Comment"), + SyntacticToken::BlockOpen => indent!(i, "BlockOpen"), + SyntacticToken::BlockClose => indent!(i, "BlockOpen"), + SyntacticToken::Symbol(name) => indent!(i, "Symbol({name})"), + SyntacticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"), + SyntacticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"), + SyntacticToken::MacroDefinition(definition) => { + indent!(i, "MacroDefinition({})", definition.name); + for token in &definition.body { + print_syntactic_token(i+1, token); + } + } + } +} diff --git a/src/types/instruction.rs b/src/types/instruction.rs new file mode 100644 index 0000000..252fc68 --- /dev/null +++ b/src/types/instruction.rs @@ -0,0 +1,168 @@ +use crate::*; + +use Operation as Op; + + +pub struct Instruction { + pub value: u8, +} + +impl Instruction { + pub fn operation(&self) -> Operation { + match self.value & 0x1f { + 0x00=>Op::HLT, 0x01=>Op::PSH, 0x02=>Op::POP, 0x03=>Op::CPY, + 0x04=>Op::DUP, 0x05=>Op::OVR, 0x06=>Op::SWP, 0x07=>Op::ROT, + 0x08=>Op::JMP, 0x09=>Op::JMS, 0x0a=>Op::JCN, 0x0b=>Op::JCS, + 0x0c=>Op::LDA, 0x0d=>Op::STA, 0x0e=>Op::LDD, 0x0f=>Op::STD, + 0x10=>Op::ADD, 0x11=>Op::SUB, 0x12=>Op::INC, 0x13=>Op::DEC, + 0x14=>Op::LTH, 0x15=>Op::GTH, 0x16=>Op::EQU, 0x17=>Op::NQK, + 0x18=>Op::SHL, 0x19=>Op::SHR, 0x1a=>Op::ROL, 0x1b=>Op::ROR, + 0x1c=>Op::IOR, 0x1d=>Op::XOR, 0x1e=>Op::AND, 0x1f=>Op::NOT, + _ => unreachable!(), + } + } + + pub fn return_mode(&self) -> bool { + self.value & RETURN_MODE != 0 + } + + pub fn wide_mode(&self) -> bool { + self.value & WIDE_MODE != 0 + } + + pub fn immediate_mode(&self) -> bool { + self.value & IMMEDIATE_MODE != 0 + } +} + +impl std::fmt::Display for Instruction { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}", match self.value { + // Stack operators + 0x00=>"HLT",0x20=>"NOP" ,0x40=>"DB1" ,0x60=>"DB2" ,0x80=>"DB3" ,0xA0=>"DB4" ,0xC0=>"DB5" ,0xE0=>"DB6" , + 0x01=>"PSH",0x21=>"PSH:",0x41=>"PSH*",0x61=>"PSH*:",0x81=>"PSHr",0xA1=>"PSHr:",0xC1=>"PSHr*",0xE1=>"PSHr*:", + 0x02=>"POP",0x22=>"POP:",0x42=>"POP*",0x62=>"POP*:",0x82=>"POPr",0xA2=>"POPr:",0xC2=>"POPr*",0xE2=>"POPr*:", + 0x03=>"CPY",0x23=>"CPY:",0x43=>"CPY*",0x63=>"CPY*:",0x83=>"CPYr",0xA3=>"CPYr:",0xC3=>"CPYr*",0xE3=>"CPYr*:", + 0x04=>"DUP",0x24=>"DUP:",0x44=>"DUP*",0x64=>"DUP*:",0x84=>"DUPr",0xA4=>"DUPr:",0xC4=>"DUPr*",0xE4=>"DUPr*:", + 0x05=>"OVR",0x25=>"OVR:",0x45=>"OVR*",0x65=>"OVR*:",0x85=>"OVRr",0xA5=>"OVRr:",0xC5=>"OVRr*",0xE5=>"OVRr*:", + 0x06=>"SWP",0x26=>"SWP:",0x46=>"SWP*",0x66=>"SWP*:",0x86=>"SWPr",0xA6=>"SWPr:",0xC6=>"SWPr*",0xE6=>"SWPr*:", + 0x07=>"ROT",0x27=>"ROT:",0x47=>"ROT*",0x67=>"ROT*:",0x87=>"ROTr",0xA7=>"ROTr:",0xC7=>"ROTr*",0xE7=>"ROTr*:", + // Control operators + 0x08=>"JMP",0x28=>"JMP:",0x48=>"JMP*",0x68=>"JMP*:",0x88=>"JMPr",0xA8=>"JMPr:",0xC8=>"JMPr*",0xE8=>"JMPr*:", + 0x09=>"JMS",0x29=>"JMS:",0x49=>"JMS*",0x69=>"JMS*:",0x89=>"JMSr",0xA9=>"JMSr:",0xC9=>"JMSr*",0xE9=>"JMSr*:", + 0x0A=>"JCN",0x2A=>"JCN:",0x4A=>"JCN*",0x6A=>"JCN*:",0x8A=>"JCNr",0xAA=>"JCNr:",0xCA=>"JCNr*",0xEA=>"JCNr*:", + 0x0B=>"JCS",0x2B=>"JCS:",0x4B=>"JCS*",0x6B=>"JCS*:",0x8B=>"JCSr",0xAB=>"JCSr:",0xCB=>"JCSr*",0xEB=>"JCSr*:", + 0x0C=>"LDA",0x2C=>"LDA:",0x4C=>"LDA*",0x6C=>"LDA*:",0x8C=>"LDAr",0xAC=>"LDAr:",0xCC=>"LDAr*",0xEC=>"LDAr*:", + 0x0D=>"STA",0x2D=>"STA:",0x4D=>"STA*",0x6D=>"STA*:",0x8D=>"STAr",0xAD=>"STAr:",0xCD=>"STAr*",0xED=>"STAr*:", + 0x0E=>"LDD",0x2E=>"LDD:",0x4E=>"LDD*",0x6E=>"LDD*:",0x8E=>"LDDr",0xAE=>"LDDr:",0xCE=>"LDDr*",0xEE=>"LDDr*:", + 0x0F=>"STD",0x2F=>"STD:",0x4F=>"STD*",0x6F=>"STD*:",0x8F=>"STDr",0xAF=>"STDr:",0xCF=>"STDr*",0xEF=>"STDr*:", + // Numeric operators + 0x10=>"ADD",0x30=>"ADD:",0x50=>"ADD*",0x70=>"ADD*:",0x90=>"ADDr",0xB0=>"ADDr:",0xD0=>"ADDr*",0xF0=>"ADDr*:", + 0x11=>"SUB",0x31=>"SUB:",0x51=>"SUB*",0x71=>"SUB*:",0x91=>"SUBr",0xB1=>"SUBr:",0xD1=>"SUBr*",0xF1=>"SUBr*:", + 0x12=>"INC",0x32=>"INC:",0x52=>"INC*",0x72=>"INC*:",0x92=>"INCr",0xB2=>"INCr:",0xD2=>"INCr*",0xF2=>"INCr*:", + 0x13=>"DEC",0x33=>"DEC:",0x53=>"DEC*",0x73=>"DEC*:",0x93=>"DECr",0xB3=>"DECr:",0xD3=>"DECr*",0xF3=>"DECr*:", + 0x14=>"LTH",0x34=>"LTH:",0x54=>"LTH*",0x74=>"LTH*:",0x94=>"LTHr",0xB4=>"LTHr:",0xD4=>"LTHr*",0xF4=>"LTHr*:", + 0x15=>"GTH",0x35=>"GTH:",0x55=>"GTH*",0x75=>"GTH*:",0x95=>"GTHr",0xB5=>"GTHr:",0xD5=>"GTHr*",0xF5=>"GTHr*:", + 0x16=>"EQU",0x36=>"EQU:",0x56=>"EQU*",0x76=>"EQU*:",0x96=>"EQUr",0xB6=>"EQUr:",0xD6=>"EQUr*",0xF6=>"EQUr*:", + 0x17=>"NQK",0x37=>"NQK:",0x57=>"NQK*",0x77=>"NQK*:",0x97=>"NQKr",0xB7=>"NQKr:",0xD7=>"NQKr*",0xF7=>"NQKr*:", + // Bitwise operators + 0x18=>"SHL",0x38=>"SHL:",0x58=>"SHL*",0x78=>"SHL*:",0x98=>"SHLr",0xB8=>"SHLr:",0xD8=>"SHLr*",0xF8=>"SHLr*:", + 0x19=>"SHR",0x39=>"SHR:",0x59=>"SHR*",0x79=>"SHR*:",0x99=>"SHRr",0xB9=>"SHRr:",0xD9=>"SHRr*",0xF9=>"SHRr*:", + 0x1A=>"ROL",0x3A=>"ROL:",0x5A=>"ROL*",0x7A=>"ROL*:",0x9A=>"ROLr",0xBA=>"ROLr:",0xDA=>"ROLr*",0xFA=>"ROLr*:", + 0x1B=>"ROR",0x3B=>"ROR:",0x5B=>"ROR*",0x7B=>"ROR*:",0x9B=>"RORr",0xBB=>"RORr:",0xDB=>"RORr*",0xFB=>"RORr*:", + 0x1C=>"IOR",0x3C=>"IOR:",0x5C=>"IOR*",0x7C=>"IOR*:",0x9C=>"IORr",0xBC=>"IORr:",0xDC=>"IORr*",0xFC=>"IORr*:", + 0x1D=>"XOR",0x3D=>"XOR:",0x5D=>"XOR*",0x7D=>"XOR*:",0x9D=>"XORr",0xBD=>"XORr:",0xDD=>"XORr*",0xFD=>"XORr*:", + 0x1E=>"AND",0x3E=>"AND:",0x5E=>"AND*",0x7E=>"AND*:",0x9E=>"ANDr",0xBE=>"ANDr:",0xDE=>"ANDr*",0xFE=>"ANDr*:", + 0x1F=>"NOT",0x3F=>"NOT:",0x5F=>"NOT*",0x7F=>"NOT*:",0x9F=>"NOTr",0xBF=>"NOTr:",0xDF=>"NOTr*",0xFF=>"NOTr*:", + }) + } +} + +impl std::str::FromStr for Instruction { + type Err = (); + + fn from_str(token: &str) -> Result<Self, Self::Err> { + Ok( Instruction { value: match token { + // Stack operators + "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2" =>0x60,"DB3" =>0x80,"DB4" =>0xA0,"DB5" =>0xC0,"DB6" =>0xE0, + "PSH"=>0x01,"PSH:"=>0x21,"PSH*"=>0x41,"PSH*:"=>0x61,"PSHr"=>0x81,"PSHr:"=>0xA1,"PSHr*"=>0xC1,"PSHr*:"=>0xE1, + ":"=>0x21, "*:"=>0x61, "r:"=>0xA1, "r*:"=>0xE1, + "POP"=>0x02,"POP:"=>0x22,"POP*"=>0x42,"POP*:"=>0x62,"POPr"=>0x82,"POPr:"=>0xA2,"POPr*"=>0xC2,"POPr*:"=>0xE2, + "CPY"=>0x03,"CPY:"=>0x23,"CPY*"=>0x43,"CPY*:"=>0x63,"CPYr"=>0x83,"CPYr:"=>0xA3,"CPYr*"=>0xC3,"CPYr*:"=>0xE3, + "DUP"=>0x04,"DUP:"=>0x24,"DUP*"=>0x44,"DUP*:"=>0x64,"DUPr"=>0x84,"DUPr:"=>0xA4,"DUPr*"=>0xC4,"DUPr*:"=>0xE4, + "OVR"=>0x05,"OVR:"=>0x25,"OVR*"=>0x45,"OVR*:"=>0x65,"OVRr"=>0x85,"OVRr:"=>0xA5,"OVRr*"=>0xC5,"OVRr*:"=>0xE5, + "SWP"=>0x06,"SWP:"=>0x26,"SWP*"=>0x46,"SWP*:"=>0x66,"SWPr"=>0x86,"SWPr:"=>0xA6,"SWPr*"=>0xC6,"SWPr*:"=>0xE6, + "ROT"=>0x07,"ROT:"=>0x27,"ROT*"=>0x47,"ROT*:"=>0x67,"ROTr"=>0x87,"ROTr:"=>0xA7,"ROTr*"=>0xC7,"ROTr*:"=>0xE7, + // Control operators + "JMP"=>0x08,"JMP:"=>0x28,"JMP*"=>0x48,"JMP*:"=>0x68,"JMPr"=>0x88,"JMPr:"=>0xA8,"JMPr*"=>0xC8,"JMPr*:"=>0xE8, + "JMS"=>0x09,"JMS:"=>0x29,"JMS*"=>0x49,"JMS*:"=>0x69,"JMSr"=>0x89,"JMSr:"=>0xA9,"JMSr*"=>0xC9,"JMSr*:"=>0xE9, + "JCN"=>0x0A,"JCN:"=>0x2A,"JCN*"=>0x4A,"JCN*:"=>0x6A,"JCNr"=>0x8A,"JCNr:"=>0xAA,"JCNr*"=>0xCA,"JCNr*:"=>0xEA, + "JCS"=>0x0B,"JCS:"=>0x2B,"JCS*"=>0x4B,"JCS*:"=>0x6B,"JCSr"=>0x8B,"JCSr:"=>0xAB,"JCSr*"=>0xCB,"JCSr*:"=>0xEB, + "LDA"=>0x0C,"LDA:"=>0x2C,"LDA*"=>0x4C,"LDA*:"=>0x6C,"LDAr"=>0x8C,"LDAr:"=>0xAC,"LDAr*"=>0xCC,"LDAr*:"=>0xEC, + "STA"=>0x0D,"STA:"=>0x2D,"STA*"=>0x4D,"STA*:"=>0x6D,"STAr"=>0x8D,"STAr:"=>0xAD,"STAr*"=>0xCD,"STAr*:"=>0xED, + "LDD"=>0x0E,"LDD:"=>0x2E,"LDD*"=>0x4E,"LDD*:"=>0x6E,"LDDr"=>0x8E,"LDDr:"=>0xAE,"LDDr*"=>0xCE,"LDDr*:"=>0xEE, + "STD"=>0x0F,"STD:"=>0x2F,"STD*"=>0x4F,"STD*:"=>0x6F,"STDr"=>0x8F,"STDr:"=>0xAF,"STDr*"=>0xCF,"STDr*:"=>0xEF, + // Numeric operators + "ADD"=>0x10,"ADD:"=>0x30,"ADD*"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr:"=>0xB0,"ADDr*"=>0xD0,"ADDr*:"=>0xF0, + "SUB"=>0x11,"SUB:"=>0x31,"SUB*"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr:"=>0xB1,"SUBr*"=>0xD1,"SUBr*:"=>0xF1, + "INC"=>0x12,"INC:"=>0x32,"INC*"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr:"=>0xB2,"INCr*"=>0xD2,"INCr*:"=>0xF2, + "DEC"=>0x13,"DEC:"=>0x33,"DEC*"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr:"=>0xB3,"DECr*"=>0xD3,"DECr*:"=>0xF3, + "LTH"=>0x14,"LTH:"=>0x34,"LTH*"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr:"=>0xB4,"LTHr*"=>0xD4,"LTHr*:"=>0xF4, + "GTH"=>0x15,"GTH:"=>0x35,"GTH*"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr:"=>0xB5,"GTHr*"=>0xD5,"GTHr*:"=>0xF5, + "EQU"=>0x16,"EQU:"=>0x36,"EQU*"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr:"=>0xB6,"EQUr*"=>0xD6,"EQUr*:"=>0xF6, + "NQK"=>0x17,"NQK:"=>0x37,"NQK*"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr:"=>0xB7,"NQKr*"=>0xD7,"NQKr*:"=>0xF7, + // Bitwise operators + "SHL"=>0x18,"SHL:"=>0x38,"SHL*"=>0x58,"SHL*:"=>0x78,"SHLr"=>0x98,"SHLr:"=>0xB8,"SHLr*"=>0xD8,"SHLr*:"=>0xF8, + "SHR"=>0x19,"SHR:"=>0x39,"SHR*"=>0x59,"SHR*:"=>0x79,"SHRr"=>0x99,"SHRr:"=>0xB9,"SHRr*"=>0xD9,"SHRr*:"=>0xF9, + "ROL"=>0x1A,"ROL:"=>0x3A,"ROL*"=>0x5A,"ROL*:"=>0x7A,"ROLr"=>0x9A,"ROLr:"=>0xBA,"ROLr*"=>0xDA,"ROLr*:"=>0xFA, + "ROR"=>0x1B,"ROR:"=>0x3B,"ROR*"=>0x5B,"ROR*:"=>0x7B,"RORr"=>0x9B,"RORr:"=>0xBB,"RORr*"=>0xDB,"RORr*:"=>0xFB, + "IOR"=>0x1C,"IOR:"=>0x3C,"IOR*"=>0x5C,"IOR*:"=>0x7C,"IORr"=>0x9C,"IORr:"=>0xBC,"IORr*"=>0xDC,"IORr*:"=>0xFC, + "XOR"=>0x1D,"XOR:"=>0x3D,"XOR*"=>0x5D,"XOR*:"=>0x7D,"XORr"=>0x9D,"XORr:"=>0xBD,"XORr*"=>0xDD,"XORr*:"=>0xFD, + "AND"=>0x1E,"AND:"=>0x3E,"AND*"=>0x5E,"AND*:"=>0x7E,"ANDr"=>0x9E,"ANDr:"=>0xBE,"ANDr*"=>0xDE,"ANDr*:"=>0xFE, + "NOT"=>0x1F,"NOT:"=>0x3F,"NOT*"=>0x5F,"NOT*:"=>0x7F,"NOTr"=>0x9F,"NOTr:"=>0xBF,"NOTr*"=>0xDF,"NOTr*:"=>0xFF, + _ => return Err(()), + }}) + } +} + + +pub enum Operation { + HLT, PSH, POP, CPY, + DUP, OVR, SWP, ROT, + JMP, JMS, JCN, JCS, + LDA, STA, LDD, STD, + ADD, SUB, INC, DEC, + LTH, GTH, EQU, NQK, + SHL, SHR, ROL, ROR, + IOR, XOR, AND, NOT, +} + +impl From<Operation> for u8 { + fn from(operation: Operation) -> Self { + match operation { + Op::HLT=>0x00, Op::PSH=>0x01, Op::POP=>0x02, Op::CPY=>0x03, + Op::DUP=>0x04, Op::OVR=>0x05, Op::SWP=>0x06, Op::ROT=>0x07, + Op::JMP=>0x08, Op::JMS=>0x09, Op::JCN=>0x0A, Op::JCS=>0x0B, + Op::LDA=>0x0C, Op::STA=>0x0D, Op::LDD=>0x0E, Op::STD=>0x0F, + Op::ADD=>0x10, Op::SUB=>0x11, Op::INC=>0x12, Op::DEC=>0x13, + Op::LTH=>0x14, Op::GTH=>0x15, Op::EQU=>0x16, Op::NQK=>0x17, + Op::SHL=>0x1C, Op::SHR=>0x1D, Op::ROL=>0x1E, Op::ROR=>0x1F, + Op::IOR=>0x18, Op::XOR=>0x19, Op::AND=>0x1A, Op::NOT=>0x1B, + } + } +} + +impl std::fmt::Display for Operation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}", match self { + Op::HLT=>"HLT", Op::PSH=>"PSH", Op::POP=>"POP", Op::CPY=>"CPY", + Op::DUP=>"DUP", Op::OVR=>"OVR", Op::SWP=>"SWP", Op::ROT=>"ROT", + Op::JMP=>"JMP", Op::JMS=>"JMS", Op::JCN=>"JCN", Op::JCS=>"JCS", + Op::LDA=>"LDA", Op::STA=>"STA", Op::LDD=>"LDD", Op::STD=>"STD", + Op::ADD=>"ADD", Op::SUB=>"SUB", Op::INC=>"INC", Op::DEC=>"DEC", + Op::LTH=>"LTH", Op::GTH=>"GTH", Op::EQU=>"EQU", Op::NQK=>"NQK", + Op::SHL=>"SHL", Op::SHR=>"SHR", Op::ROL=>"ROL", Op::ROR=>"ROR", + Op::IOR=>"IOR", Op::XOR=>"XOR", Op::AND=>"AND", Op::NOT=>"NOT", + }) + } +} diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 0000000..8094cb1 --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,4 @@ +mod instruction; +mod value; +pub use instruction::*; +pub use value::*; diff --git a/src/types/value.rs b/src/types/value.rs new file mode 100644 index 0000000..fe82710 --- /dev/null +++ b/src/types/value.rs @@ -0,0 +1,48 @@ +#[derive(Clone, Copy)] +pub enum Value { + Byte(u8), + Double(u16), +} + +impl From<Value> for usize { + fn from(value: Value) -> Self { + match value { + Value::Byte(byte) => byte.into(), + Value::Double(double) => double.into(), + } + } +} + +impl From<&Value> for usize { + fn from(value: &Value) -> Self { + (*value).into() + } +} + +impl std::fmt::Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + match self { + Self::Byte(value) => write!(f, "0x{value:02x}"), + Self::Double(value) => write!(f, "0x{value:04x}"), + } + } +} + + +impl std::str::FromStr for Value { + type Err = (); + + fn from_str(token: &str) -> Result<Self, Self::Err> { + match token.len() { + 2 => match u8::from_str_radix(&token, 16) { + Ok(value) => Ok(Value::Byte(value)), + Err(_) => Err(()), + } + 4 => match u16::from_str_radix(&token, 16) { + Ok(value) => Ok(Value::Double(value)), + Err(_) => Err(()), + } + _ => Err(()), + } + } +} |