diff options
| author | Ben Bridle <bridle.benjamin@gmail.com> | 2025-07-03 14:56:25 +1200 | 
|---|---|---|
| committer | Ben Bridle <ben@derelict.engineering> | 2025-07-03 21:22:31 +1200 | 
| commit | 21101c197643836184e754c60d5075ee5a2d3cdf (patch) | |
| tree | cc4533251bfce394e913009473b7dd49b3bbb1ac /src | |
| download | bedrock-asm-21101c197643836184e754c60d5075ee5a2d3cdf.zip | |
Initial commit
Diffstat (limited to 'src')
| -rw-r--r-- | src/bin/br-asm.rs | 8 | ||||
| -rw-r--r-- | src/formats/clang.rs | 10 | ||||
| -rw-r--r-- | src/formats/mod.rs | 23 | ||||
| -rw-r--r-- | src/lib.rs | 244 | ||||
| -rw-r--r-- | src/stages/bytecode.rs | 150 | ||||
| -rw-r--r-- | src/stages/bytecode_tokens.rs | 37 | ||||
| -rw-r--r-- | src/stages/compiler.rs | 84 | ||||
| -rw-r--r-- | src/stages/mod.rs | 26 | ||||
| -rw-r--r-- | src/stages/semantic.rs | 141 | ||||
| -rw-r--r-- | src/stages/semantic_tokens.rs | 94 | ||||
| -rw-r--r-- | src/stages/syntactic.rs | 211 | ||||
| -rw-r--r-- | src/stages/syntactic_tokens.rs | 94 | ||||
| -rw-r--r-- | src/types/instruction.rs | 168 | ||||
| -rw-r--r-- | src/types/mod.rs | 4 | ||||
| -rw-r--r-- | src/types/value.rs | 48 | 
15 files changed, 1342 insertions, 0 deletions
| diff --git a/src/bin/br-asm.rs b/src/bin/br-asm.rs new file mode 100644 index 0000000..e7a9230 --- /dev/null +++ b/src/bin/br-asm.rs @@ -0,0 +1,8 @@ +use bedrock_asm::*; +use switchboard::*; + + +fn main() { +    let args = Switchboard::from_env(); +    assemble(args, "br-asm"); +} diff --git a/src/formats/clang.rs b/src/formats/clang.rs new file mode 100644 index 0000000..524b501 --- /dev/null +++ b/src/formats/clang.rs @@ -0,0 +1,10 @@ +pub fn format_clang(bytecode: &[u8]) -> Vec<u8> { +    let mut output = String::new(); +    for chunk in bytecode.chunks(16) { +        for byte in chunk { +            output.push_str(&format!("0x{byte:02X}, ")); +        } +        output.push('\n'); +    } +    return output.into_bytes(); +} diff --git a/src/formats/mod.rs b/src/formats/mod.rs new file mode 100644 index 0000000..79b1c51 --- /dev/null +++ b/src/formats/mod.rs @@ -0,0 +1,23 @@ +mod clang; +pub use clang::*; + +use crate::*; + + +#[derive(Clone, Copy, PartialEq)] +pub enum Format { +    Raw, +    Source, +    Clang, +} + +impl Format { +    pub fn from_str(string: &str) -> Self { +        match string { +            "raw" => Self::Raw, +            "source" => Self::Source, +            "c" => Self::Clang, +            _ => fatal!("Unknown format '{string}', expected 'raw', 'c', or 'source'"), +        } +    } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..3f7bf59 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,244 @@ +#![feature(path_add_extension)] + +mod formats; +mod types; +mod stages; +pub use formats::*; +pub use types::*; +pub use stages::*; + +use assembler::*; +use log::*; +use switchboard::*; + +use std::io::Read; +use std::io::Write; + + +pub const RETURN_MODE:    u8 = 0x80; +pub const WIDE_MODE:      u8 = 0x40; +pub const IMMEDIATE_MODE: u8 = 0x20; + + +pub fn assemble(mut args: Switchboard, invocation: &str) -> ! { +    args.named("help").short('h'); +    args.named("version"); +    args.named("verbose").short('v'); + +    if args.get("help").as_bool() { +        print_help(invocation); +        std::process::exit(0); +    } +    if args.get("version").as_bool() { +        let name = env!("CARGO_PKG_NAME"); +        let version = env!("CARGO_PKG_VERSION"); +        eprintln!("{name} v{version}"); +        eprintln!("Written by Ben Bridle."); +        std::process::exit(0); +    } +    if args.get("verbose").as_bool() { +        log::set_log_level(log::LogLevel::Info); +    } + +    args.positional("source"); +    args.positional("destination"); +    args.named("extension").default("brc"); + +    args.named("no-libs"); +    args.named("no-project-libs"); +    args.named("no-env-libs"); +    args.named("no-truncate"); + +    args.named("format").default("raw"); +    args.named("dry-run").short('n'); +    args.named("tree"); +    args.named("with-symbols"); +    args.raise_errors(); + +    let source_path        = args.get("source").as_path_opt().map( +        |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}"))); +    let destination_path   = args.get("destination").as_path_opt(); +    let extension          = args.get("extension").as_string(); +    let opt_extension      = Some(extension.as_str()); + +    let no_libs            = args.get("no-libs").as_bool(); +    let no_project_libs    = args.get("no-project-libs").as_bool(); +    let no_env_libs        = args.get("no-env-libs").as_bool(); +    let no_truncate        = args.get("no-truncate").as_bool(); + +    let format             = Format::from_str(args.get("format").as_str()); +    let dry_run            = args.get("dry-run").as_bool(); +    let print_tree         = args.get("tree").as_bool(); +    let export_symbols     = args.get("with-symbols").as_bool(); + +    // ----------------------------------------------------------------------- + +    let mut compiler = new_compiler(); + +    if let Some(path) = &source_path { +        info!("Reading program source from {path:?}"); +        compiler.root_from_path(path).unwrap_or_else(|err| fatal!("{err:?}: {path:?}")); +    } else { +        let mut source_code = String::new(); +        info!("Reading program source from standard input"); +        if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { +            fatal!("Could not read from standard input\n{err:?}"); +        } +        compiler.root_from_string(source_code, "<standard input>") +    }; +    if compiler.error().is_some() && !no_libs && !no_project_libs { +        compiler.include_libs_from_parent(opt_extension); +    } +    if compiler.error().is_some() && !no_libs && !no_env_libs { +        compiler.include_libs_from_path_variable("BEDROCK_LIBS", opt_extension); +    } + +    if print_tree { +        compiler.hierarchy().report() +    } +    if let Some(error) = compiler.error() { +        error.report(); +        std::process::exit(1); +    } + +    let merged_source = compiler.get_compiled_source().unwrap_or_else(|error| { +        error.report(); +        std::process::exit(1); +    }); + +    if !dry_run && format == Format::Source { +        write_bytes_and_exit(merged_source.as_bytes(), destination_path.as_ref()); +    } + +    // ----------------------------------------------------------------------- + +    let path = Some("<merged source>"); +    let syntactic = match parse_syntactic(&merged_source, path) { +        Ok(tokens) => tokens, +        Err(errors) => { +            report_syntactic_errors(&errors, &merged_source); +            std::process::exit(1); +        } +    }; + +    let semantic = match parse_semantic(syntactic) { +        Ok(tokens) => tokens, +        Err(errors) => { +            report_semantic_errors(&errors, &merged_source); +            std::process::exit(1); +        } +    }; + +    let program = match generate_bytecode(&semantic) { +        Ok(program) => program, +        Err(errors) => { +            report_bytecode_errors(&errors, &merged_source); +            std::process::exit(1); +        } +    }; + +    let AssembledProgram { mut bytecode, symbols } = program; + +    let length = bytecode.len(); +    let percentage = (length as f32 / 65536.0 * 100.0).round() as u16; +    info!("Assembled program in {length} bytes ({percentage}% of maximum)"); + +    if !no_truncate { +        // Remove null bytes from end of bytecode. +        while let Some(0) = bytecode.last() { +            bytecode.pop(); +        } +        let difference = length - bytecode.len(); +        if difference > 0 { +            info!("Truncated program to {length} bytes (saved {difference} bytes)"); +        } +    } + +    if !dry_run { +        if export_symbols { +            if let Some(path) = &destination_path { +                let mut symbols_path = path.to_path_buf(); +                symbols_path.add_extension("sym"); +                let mut symbols_string = String::new(); +                for symbol in &symbols { +                    let address = &symbol.address; +                    let name = &symbol.name; +                    let location = &symbol.source.location(); +                    symbols_string.push_str(&format!( +                        "{address:04x} {name} {location}\n" +                    )); +                } +                match std::fs::write(&symbols_path, symbols_string) { +                    Ok(_) => info!("Saved symbols to {symbols_path:?}"), +                    Err(err) => info!("Could not write symbols to {symbols_path:?}\n{err:?}"), +                } +            } +        } + +        let bytes = match format { +            Format::Raw => bytecode, +            Format::Clang => format_clang(&bytecode), +            Format::Source => unreachable!("Source output is handled before full assembly"), +        }; +        write_bytes_and_exit(&bytes, destination_path.as_ref()); +    } +    std::process::exit(0); +} + + +fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { +    match path { +        Some(path) => match std::fs::write(path, bytes) { +            Ok(_) => info!("Wrote output to {:?}", path.as_ref()), +            Err(err) => fatal!("Could not write to {:?}\n{err:?}", path.as_ref()), +        } +        None => match std::io::stdout().write_all(bytes) { +            Ok(_) => info!("Wrote output to standard output"), +            Err(err) => fatal!("Could not write to standard output\n{err:?}"), +        } +    } +    std::process::exit(0); +} + + +fn print_help(invocation: &str) { +    eprintln!("\ +Usage: {invocation} [source] [destination] + +Assembler for the Bedrock computer system. + +Usage: +  To assemble a Bedrock program from a source file and write to an output +  file, run `br-asm [source] [destination]`, where [source] is the path +  of the source file and [destination] is the path to write to. + +  If [destination] is omitted, the assembled program will be written to +  standard output. If [source] is omitted, the program source code will +  be read from standard input. + +Environment variables: +  BEDROCK_LIBS +    A list of colon-separated paths that will be searched to find Bedrock +    source code files to use as libraries when assembling a Bedrock program. +    If a library file resolves an unresolved symbol in the program being +    assembled, the library file will be merged into the program. + +Arguments: +  [source]               Bedrock source code file to assemble. +  [destination]          Destination path for assembler output. + +Switches: +  --dry-run        (-n)  Assemble and show errors only, don't write any output +  --extension=<ext>      File extension to identify source files (default is 'brc') +  --format=<fmt>         Output format to use for assembled program (default is 'raw') +  --no-project-libs      Don't search for libraries in the source parent folder +  --no-env-libs          Don't search for libraries in the BEDROCK_LIBS path variable +  --no-libs              Combination of --no-project-libs and --no-env-libs +  --no-truncate          Don't remove trailing zero-bytes from the assembled program +  --tree                 Show a tree diagram of all included library files +  --with-symbols         Also generate debug symbols file with extension '.sym' +  --help           (-h)  Print this help information +  --verbose,       (-v)  Print additional information +  --version              Print the program version and exit +"); +} diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs new file mode 100644 index 0000000..f0b99df --- /dev/null +++ b/src/stages/bytecode.rs @@ -0,0 +1,150 @@ +use crate::*; + +use std::collections::HashMap; + + +/// Doesn't truncate trailing null bytes. +pub fn generate_bytecode(semantic: &Program) -> Result<AssembledProgram, Vec<Tracked<BytecodeError>>> { +    let mut generator = BytecodeGenerator::new(&semantic.definitions); +    generator.parse(&semantic.tokens, false); +    generator.fill_slots(); +    let mut symbols = Vec::new(); +    for (name, information) in generator.labels { +        let source = semantic.definitions.get(&name).unwrap().source.clone(); +        let address = information.address; +        symbols.push(AssembledSymbol { name, address, source }); +    } +    match generator.errors.is_empty() { +        true => Ok(AssembledProgram { bytecode: generator.bytecode, symbols }), +        false => Err(generator.errors), +    } +} + + +pub struct BytecodeGenerator<'a> { +    definitions: &'a HashMap<String, Tracked<Definition>>, +    labels: HashMap<String, LabelInformation>, +    stack: Vec<usize>, +    bytecode: Vec<u8>, +    errors: Vec<Tracked<BytecodeError>>, +} + +struct LabelInformation { +    address: usize, +    slots: Vec<usize>, +} + +impl<'a> BytecodeGenerator<'a> { +    pub fn new(definitions: &'a HashMap<String, Tracked<Definition>>) -> Self { +        let mut labels = HashMap::new(); +        for (name, definition) in definitions { +            if let DefinitionVariant::LabelDefinition = definition.variant { +                // Use fake address for now. +                let information = LabelInformation { address: 0, slots: Vec::new() }; +                labels.insert(name.to_string(), information); +            } +        } +        Self { +            definitions, +            labels, +            stack: Vec::new(), +            bytecode: Vec::new(), +            errors: Vec::new(), +        } +    } + +    pub fn parse(&mut self, tokens: &[Tracked<SemanticToken>], in_macro: bool) { +        macro_rules! byte { +            ($byte:expr) => { self.bytecode.push($byte) }; +        } +        macro_rules! double { +            ($double:expr) => {{ +                let [high, low] = u16::to_be_bytes($double); +                self.bytecode.push(high); self.bytecode.push(low); +            }}; +        } + +        for token in tokens { +            let i = self.bytecode.len(); +            match &token.value { +                SemanticToken::Literal(value) => match value { +                    Value::Byte(byte) => byte!(*byte), +                    Value::Double(double) => double!(*double), +                } +                SemanticToken::Pad(value) => { +                    self.bytecode.resize(i + usize::from(value), 0); +                }, +                SemanticToken::String(bytes) => { +                    self.bytecode.extend_from_slice(bytes) +                }, +                SemanticToken::Comment(_) => (), +                SemanticToken::BlockOpen(_) => { +                    self.stack.push(i); +                    // Use a fake index for now. +                    double!(0); +                } +                SemanticToken::BlockClose(_) => { +                    if i > 0xFFFF { +                        let error = BytecodeError::InvalidBlockAddress(i); +                        self.errors.push(Tracked::from(error, token.source.clone())); +                    } +                    let Some(addr) = self.stack.pop() else { +                        unreachable!("Uncaught unmatched block terminator"); +                    }; +                    let [high, low] = (i as u16).to_be_bytes(); +                    self.bytecode[addr] = high; +                    self.bytecode[addr+1] = low; +                } +                SemanticToken::Symbol(name) => { +                    if let Some(definition) = self.definitions.get(name) { +                        match &definition.variant { +                            DefinitionVariant::MacroDefinition(body) => { +                                self.parse(body, true); +                            } +                            DefinitionVariant::LabelDefinition => { +                                let information = self.labels.get_mut(name).unwrap(); +                                information.slots.push(i); +                                // Use a fake index for now. +                                double!(0); +                            } +                        } +                    } else { +                        unreachable!("Uncaught undefined symbol '{name}'"); +                    } +                } +                SemanticToken::Instruction(instruction) => { +                    byte!(instruction.value) +                } +                SemanticToken::LabelDefinition(name) => if in_macro { +                    unreachable!("Uncaught label definition in macro"); +                } else { +                    if i > 0xFFFF { +                        let error = BytecodeError::InvalidLabelAddress(i); +                        self.errors.push(Tracked::from(error, token.source.clone())); +                    } +                    let information = self.labels.get_mut(name).unwrap(); +                    // Replace fake index with real index. +                    information.address = i; +                } +                SemanticToken::MacroDefinition{ .. } => if in_macro { +                    unreachable!("Uncaught macro definition in macro"); +                } +            } +        } + +        if !in_macro && !self.stack.is_empty() { +            unreachable!("Uncaught unterminated block"); +        } +    } + +    /// Fill each label slot with a real label address. +    pub fn fill_slots(&mut self) { +        for information in self.labels.values() { +            let [high, low] = (information.address as u16).to_be_bytes(); +            for addr in &information.slots { +                self.bytecode[*addr] = high; +                self.bytecode[*addr + 1] = low; +            } +        } +    } +} diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs new file mode 100644 index 0000000..902fcd7 --- /dev/null +++ b/src/stages/bytecode_tokens.rs @@ -0,0 +1,37 @@ +use crate::*; + + +pub struct AssembledProgram { +    pub bytecode: Vec<u8>, +    pub symbols: Vec<AssembledSymbol>, +} + +pub struct AssembledSymbol { +    pub name: String, +    pub address: usize, +    pub source: SourceSpan, +} + +pub enum BytecodeError { +    InvalidLabelAddress(usize), +    InvalidBlockAddress(usize), +} + + +pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) { +    for error in errors { +        report_bytecode_error(error, source_code); +    } +} + + +fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) { +    let context = Context { source_code: &source_code, source: &error.source }; +    let message = match &error.value { +        BytecodeError::InvalidLabelAddress(address) => +            &format!("The label address exceeds 0xFFFF: 0x{address:X}"), +        BytecodeError::InvalidBlockAddress(address) => +            &format!("The block address exceeds 0xFFFF: 0x{address:X}"), +    }; +    report_source_issue(LogLevel::Error, &context, message); +} diff --git a/src/stages/compiler.rs b/src/stages/compiler.rs new file mode 100644 index 0000000..97bf20c --- /dev/null +++ b/src/stages/compiler.rs @@ -0,0 +1,84 @@ +use crate::*; + +use assembler::SymbolRole::*; +use assembler::DefinitionType::*; + + +pub fn new_compiler() -> Compiler { +    Compiler::new(parse_symbols, push_code) +} + + +/// Parse all symbols from a source code string. +pub fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbol>> { +    let syntactic = match parse_syntactic(source_code, path) { +        Ok(syntactic) => syntactic, +        Err(_) => return None, +    }; +    Some(SymbolParser::new().parse(&syntactic)) +} + +/// Push source code to a source compilation string. +pub fn push_code(compilation: &mut String, source_file: &SourceFile) { +    // Skip blank files. +    let source_code = &source_file.source_code; +    if source_code.chars().all(|c| c.is_whitespace()) { return; } +    // Ensure that the previous section is followed by two newline characters. +    if !compilation.is_empty() { +        if !compilation.ends_with('\n') { compilation.push('\n'); } +        if !compilation.ends_with("\n\n") { compilation.push('\n'); } +    } +    // Push a path comment and the source code. +    let path_str = source_file.path.as_os_str().to_string_lossy(); +    let path_comment = format!("(: {path_str} )\n"); +    compilation.push_str(&path_comment); +    compilation.push_str(&source_code); +} + + +// Extract symbol definitions from a list of syntactic tokens. +pub struct SymbolParser { +    pub symbols: Vec<Symbol>, +} + +impl SymbolParser { +    pub fn new() -> Self { +        Self { +            symbols: Vec::new(), +        } +    } + +    fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) { +        let name = name.to_string(); +        let namespace = Vec::new(); +        let source = source.to_owned(); +        self.symbols.push(Symbol { name, namespace, source, role }); +    } + +    pub fn parse(mut self, syntactic: &[Tracked<SyntacticToken>]) -> Vec<Symbol> { +        for token in syntactic { +            match &token.value { +                SyntacticToken::MacroDefinition(definition) => { +                    self.record_symbol( +                        &definition.name, +                        &definition.name.source, +                        Definition(MustPrecedeReference), +                    ); +                    for token in &definition.body { +                        if let SyntacticToken::Symbol(name) = &token.value { +                            self.record_symbol(&name, &token.source, Reference); +                        } +                    } +                } +                SyntacticToken::LabelDefinition(name) => { +                    self.record_symbol(&name, &token.source, Definition(CanFollowReference)); +                } +                SyntacticToken::Symbol(name) => { +                    self.record_symbol(&name, &token.source, Reference); +                } +                _ => (), +            } +        } +        return self.symbols; +    } +} diff --git a/src/stages/mod.rs b/src/stages/mod.rs new file mode 100644 index 0000000..76bda0d --- /dev/null +++ b/src/stages/mod.rs @@ -0,0 +1,26 @@ +mod compiler; +mod syntactic; +mod syntactic_tokens; +mod semantic; +mod semantic_tokens; +mod bytecode; +mod bytecode_tokens; +pub use compiler::*; +pub use syntactic::*; +pub use syntactic_tokens::*; +pub use semantic::*; +pub use semantic_tokens::*; +pub use bytecode::*; +pub use bytecode_tokens::*; + + +#[macro_export] +macro_rules! indent { +    (0, $($tokens:tt)*) => {{ +        println!($($tokens)*); +    }}; +    ($indent:expr, $($tokens:tt)*) => {{ +        for _ in 0..$indent { print!("  "); } +        println!($($tokens)*); +    }}; +} diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..f2774a4 --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,141 @@ +use crate::*; + +use std::collections::{HashMap, HashSet}; + + +pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Program, Vec<Tracked<SemanticError>>> { +    // Record all label definitions and macro names up front. +    let mut definitions = HashMap::new(); +    let mut macro_names = HashSet::new(); +    for token in &syntactic { +        match &token.value { +            SyntacticToken::LabelDefinition(name) => { +                // Use a fake index for now. +                let definition = Definition::new(0, DefinitionVariant::LabelDefinition); +                let tracked = Tracked::from(definition, token.source.clone()); +                if let Some(_) = definitions.insert(name.clone(), tracked) { +                    unreachable!("Uncaught duplicate label definition '{name}'"); +                } +            } +            SyntacticToken::MacroDefinition(definition) => { +                let name = &definition.name; +                if !macro_names.insert(name.clone()) { +                    unreachable!("Uncaught duplicate macro definition '{name}'") +                } +            } +            _ => (), +        } +    } + +    // Convert syntactic tokens to semantic tokens. +    let mut tokens: Vec<Tracked<SemanticToken>> = Vec::new(); +    let mut errors = Vec::new(); +    let mut stack = Vec::new(); + +    for syn_token in syntactic { +        let i = tokens.len(); +        let sem_token = match syn_token.value { +            SyntacticToken::Literal(value) => SemanticToken::Literal(value), +            SyntacticToken::Pad(value) => SemanticToken::Pad(value), +            SyntacticToken::String(bytes) => SemanticToken::String(bytes), +            SyntacticToken::Comment(string) => SemanticToken::Comment(string), +            SyntacticToken::BlockOpen => { +                stack.push(i); +                // Use a fake index for now. +                SemanticToken::BlockOpen(0) +            } +            SyntacticToken::BlockClose => { +                let Some(k) = stack.pop() else { +                    unreachable!("Uncaught unmatched block terminator"); +                }; +                // Replace fake index with real index. +                tokens[k].value = SemanticToken::BlockOpen(i); +                SemanticToken::BlockClose(k) +            } +            SyntacticToken::Symbol(symbol) => { +                if let Some(definition) = definitions.get_mut(&symbol) { +                    definition.value.references.push(i); +                } else if let Some(definition) = macro_names.get(&symbol) { +                    let error = SemanticError::InvocationBeforeDefinition; +                    let source = syn_token.source.wrap(definition.source.clone()); +                    errors.push(Tracked::from(error, source)); +                } else { +                    unreachable!("Uncaught undefined symbol '{symbol}'"); +                }; +                SemanticToken::Symbol(symbol) +            } +            SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), +            SyntacticToken::LabelDefinition(name) => { +                let definition = definitions.get_mut(&name).unwrap(); +                // Replace fake index with real index. +                definition.value.definition = i; +                SemanticToken::LabelDefinition(name) +            } +            SyntacticToken::MacroDefinition(definition) => { +                let name = definition.name.clone(); +                let mut body: Vec<Tracked<SemanticToken>> = Vec::new(); +                let mut body_stack = Vec::new(); +                for syn_token in definition.body { +                    let j = body.len(); +                    let sem_token = match syn_token.value { +                        SyntacticToken::Literal(value) => SemanticToken::Literal(value), +                        SyntacticToken::Pad(value) => SemanticToken::Pad(value), +                        SyntacticToken::String(bytes) => SemanticToken::String(bytes), +                        SyntacticToken::Comment(string) => SemanticToken::Comment(string), +                        SyntacticToken::BlockOpen => { +                            body_stack.push(j); +                            // Use a fake index for now. +                            SemanticToken::BlockOpen(0) +                        } +                        SyntacticToken::BlockClose => { +                            let Some(k) = body_stack.pop() else { +                                unreachable!("Uncaught unmatched block terminator in macro '{name}'"); +                            }; +                            // Replace fake index with real index. +                            body[k].value = SemanticToken::BlockOpen(j); +                            SemanticToken::BlockClose(k) +                        } +                        SyntacticToken::Symbol(symbol) => { +                            if let Some(definition) = definitions.get_mut(&symbol) { +                                definition.value.deep_references.push((i, j)); +                            } else if let Some(definition) = macro_names.get(&symbol) { +                                let error = SemanticError::InvocationBeforeDefinition; +                                let source = syn_token.source.wrap(definition.source.clone()); +                                errors.push(Tracked::from(error, source)); +                            } else { +                                unreachable!("Uncaught undefined symbol '{symbol}' in macro '{name}'"); +                            }; +                            SemanticToken::Symbol(symbol) +                        } +                        SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), +                        SyntacticToken::LabelDefinition(label) => +                            unreachable!("Uncaught label definition '{label}' in macro '{name}'"), +                        SyntacticToken::MacroDefinition(definition) => +                            unreachable!("Uncaught macro definition '{}' in macro '{name}'", definition.name), +                    }; +                    body.push(Tracked::from(sem_token, syn_token.source)); +                } + +                let variant = DefinitionVariant::MacroDefinition(body); +                let source = definition.name.source.clone(); +                let tracked = Tracked::from(Definition::new(i, variant), source); +                if let Some(_) = definitions.insert(name.value.clone(), tracked) { +                    unreachable!("Uncaught duplicate definition '{name}'") +                }; +                if !body_stack.is_empty() { +                    unreachable!("Uncaught unterminated block in macro '{name}'"); +                } +                SemanticToken::MacroDefinition(name) +            } +        }; +        tokens.push(Tracked::from(sem_token, syn_token.source)); +    } + +    if !stack.is_empty() { +        unreachable!("Uncaught unterminated block"); +    } +    match errors.is_empty() { +        true => Ok(Program { definitions, tokens }), +        false => Err(errors), +    } +} diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs new file mode 100644 index 0000000..fe49c26 --- /dev/null +++ b/src/stages/semantic_tokens.rs @@ -0,0 +1,94 @@ +use crate::*; + +use std::collections::HashMap; + + +pub struct Program { +    pub definitions: HashMap<String, Tracked<Definition>>, +    pub tokens: Vec<Tracked<SemanticToken>>, +} + +pub struct Definition { +    pub variant: DefinitionVariant, +    /// Index of definition token. +    pub definition: usize, +    /// Indices of symbols referencing this definition. +    pub references: Vec<usize>, +    /// Indices of references inside other definitions. +    pub deep_references: Vec<(usize, usize)>, +} + +impl Definition { +    pub fn new(i: usize, variant: DefinitionVariant) -> Self { +        Self { +            variant, +            definition: i, +            references: Vec::new(), +            deep_references: Vec::new(), +        } +    } +} + +pub enum DefinitionVariant { +    LabelDefinition, +    MacroDefinition(Vec<Tracked<SemanticToken>>), +} + +pub enum SemanticToken { +    Literal(Value), +    Pad(Value), +    String(Vec<u8>), +    Comment(String), +    BlockOpen(usize),   // index to matching block-close +    BlockClose(usize),  // index to matching block-open +    Symbol(String), +    Instruction(Instruction), +    LabelDefinition(String), +    MacroDefinition(Tracked<String>), +} + +pub enum SemanticError { +    InvocationBeforeDefinition, +} + + +pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) { +    for error in errors { +        report_semantic_error(error, source_code); +    } +} + + +fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) { +    let context = Context { source_code: &source_code, source: &error.source }; +    let message = match &error.value { +        SemanticError::InvocationBeforeDefinition => +            "Macro cannot be invoked before it has been defined", +    }; +    report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_semantic_token(i: usize, token: &SemanticToken, definitions: &HashMap<String, Tracked<Definition>>) { +    match token { +        SemanticToken::Literal(value) => indent!(i, "Literal({value})"), +        SemanticToken::Pad(value) => indent!(i, "Pad({value})"), +        SemanticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)), +        SemanticToken::Comment(_) => indent!(i, "Comment"), +        SemanticToken::BlockOpen(pointer) => indent!(i, "BlockOpen(*{pointer})"), +        SemanticToken::BlockClose(pointer) => indent!(i, "BlockClose(*{pointer})"), +        SemanticToken::Symbol(name) => indent!(i, "Symbol({name})"), +        SemanticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"), +        SemanticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"), +        SemanticToken::MacroDefinition(name) => { +            indent!(i, "MacroDefinition({name})"); +            if let Some(definition) = definitions.get(name.as_str()) { +                if let DefinitionVariant::MacroDefinition(body) = &definition.variant { +                    for token in body { +                        print_semantic_token(i+1, token, definitions); +                    } +                } +            } +        } +    } +} diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs new file mode 100644 index 0000000..59b8b95 --- /dev/null +++ b/src/stages/syntactic.rs @@ -0,0 +1,211 @@ +use crate::*; + +use std::path::PathBuf; + + +pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { +    parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path), "") +} + +fn parse_syntactic_from_tokeniser(mut t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { +    t.add_delimiters(&['(',')','[',']','{','}',';']); +    t.add_terminators(&[':']); +    let mut tokens = Vec::new(); +    let mut errors = Vec::new(); +    let mut label_name = label_name.to_string(); + +    macro_rules! err { +        ($error:expr) => {{ +            err!($error, t.get_source()); +        }}; +        ($error:expr, $source:expr) => {{ +            errors.push(Tracked::from($error, $source)); +            continue; +        }}; +    } + +    macro_rules! check_name { +        ($name:expr) => {{ +            check_name!($name, t.get_source()); +        }}; +        ($name:expr, $source:expr) => { +            if $name.chars().count() > 63 { +                let error = SyntacticError::InvalidIdentifier($name.clone()); +                errors.push(Tracked::from(error, $source.clone())); +            } +        }; +    } + +    // Eat characters until the end character is found. +    macro_rules! is_end { +        ($end:expr) => { +            |t: &mut Tokeniser| { +                t.eat_char() == Some($end) +            } +        }; +    } + +    loop { +        // Eat leading whitespace. +        while let Some(c) = t.peek_char() { +            match [' ', '\n', '\r', '\t'].contains(&c) { +                true => t.eat_char(), +                false => break, +            }; +        } +        t.mark_start(); +        let Some(c) = t.eat_char() else { break }; +        let token = match c { +            '"' => { +                let source = t.get_source(); +                match t.track_until(is_end!('"')) { +                    Some(string) => { +                        let mut bytes = string.into_bytes(); +                        bytes.push(0x00); +                        SyntacticToken::String(bytes) +                    } +                    None => err!(SyntacticError::UnterminatedNullString, source), +                } +            } +            '\'' => { +                let source = t.get_source(); +                match t.track_until(is_end!('\'')) { +                    Some(string) => SyntacticToken::String(string.into_bytes()), +                    None => err!(SyntacticError::UnterminatedRawString, source), +                } +            } +            '(' => { +                let source = t.get_source(); +                if let Some(string) = t.track_until(is_end!(')')) { +                    // Check if the comment fills the entire line. +                    if t.start.position.column == 0 && t.end_of_line() { +                        if let Some(path) = string.strip_prefix(": ") { +                            t.embedded_path = Some(PathBuf::from(path.trim())); +                            t.embedded_first_line = t.start.position.line + 1; +                            continue; +                        } +                    } +                    SyntacticToken::Comment(string) +                } else { +                    err!(SyntacticError::UnterminatedComment, source) +                } +            } +            ')' => err!(SyntacticError::UnmatchedCommentTerminator), +            '%' => { +                let name = t.eat_token(); +                let source = t.get_source(); +                check_name!(name, source); +                t.mark_child(); +                if let Some(_) = t.track_until(is_end!(';')) { +                    let child = t.tokenise_child_span(); +                    match parse_body_from_tokeniser(child, &label_name) { +                        Ok(body) => { +                            let name = Tracked::from(name, source); +                            let definition = SyntacticMacroDefinition { name, body }; +                            SyntacticToken::MacroDefinition(definition) +                        } +                        Err(mut err) => { +                            errors.append(&mut err); +                            continue; +                        } +                    } +                } else { +                    err!(SyntacticError::UnterminatedMacroDefinition, source); +                } +            } +            ';' => err!(SyntacticError::UnmatchedMacroTerminator), +            '{' => SyntacticToken::BlockOpen, +            '}' => SyntacticToken::BlockClose, +            '['|']' => continue, +            '@' => { +                label_name = t.eat_token(); +                check_name!(label_name); +                SyntacticToken::LabelDefinition(label_name.clone()) +            } +            '&' => { +                let name = format!("{label_name}/{}", t.eat_token()); +                check_name!(name); +                SyntacticToken::LabelDefinition(name) +            } +            '~' => { +                let name = format!("{label_name}/{}", t.eat_token()); +                check_name!(name); +                SyntacticToken::Symbol(name) +            } +            '#' => { +                let token = t.eat_token(); +                match token.parse::<Value>() { +                    Ok(value) => SyntacticToken::Pad(value), +                    Err(_) => err!(SyntacticError::InvalidPadValue), +                } +            }, +            ':' => { +                SyntacticToken::Instruction(Instruction { value: 0x21 }) +            } +            c => { +                let token = format!("{c}{}", t.eat_token()); +                if let Ok(value) = token.parse::<Value>() { +                    SyntacticToken::Literal(value) +                } else if let Ok(instruction) = token.parse::<Instruction>() { +                    SyntacticToken::Instruction(instruction) +                } else { +                    check_name!(token); +                    SyntacticToken::Symbol(token) +                } +            } +        }; + +        t.mark_end(); +        let source = t.get_source(); +        tokens.push(Tracked::from(token, source)); +    } + +    // Check that every block open matches a block close. +    let mut stack = Vec::new(); +    for token in &tokens { +        match &token.value { +            SyntacticToken::BlockOpen => stack.push(token.source.clone()), +            SyntacticToken::BlockClose => if let None = stack.pop() { +                let error = SyntacticError::UnmatchedBlockTerminator; +                errors.push(Tracked::from(error, token.source.clone())); +            } +            _ => (), +        } +    } +    for source in stack { +        let error = SyntacticError::UnterminatedBlock; +        errors.push(Tracked::from(error, source)); +    } + +    match errors.is_empty() { +        true => Ok(tokens), +        false => Err(errors), +    } +} + + +fn parse_body_from_tokeniser(t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { +    let mut tokens = Vec::new(); +    let mut errors = Vec::new(); + +    for token in parse_syntactic_from_tokeniser(t, label_name)? { +        match token.value { +            SyntacticToken::LabelDefinition(_) => { +                let error = SyntacticError::LabelDefinitionInMacroDefinition; +                errors.push(Tracked::from(error, token.source)); +                continue; +            } +            SyntacticToken::MacroDefinition(_) => { +                let error = SyntacticError::MacroDefinitionInMacroDefinition; +                errors.push(Tracked::from(error, token.source)); +                continue; +            } +            _ => tokens.push(token), +        }; +    } + +    match errors.is_empty() { +        true => Ok(tokens), +        false => Err(errors), +    } +} diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs new file mode 100644 index 0000000..35afa80 --- /dev/null +++ b/src/stages/syntactic_tokens.rs @@ -0,0 +1,94 @@ +use crate::*; + + +pub enum SyntacticToken { +    Literal(Value), +    Pad(Value), +    String(Vec<u8>), +    Comment(String), +    BlockOpen, +    BlockClose, +    Symbol(String), +    Instruction(Instruction), +    LabelDefinition(String), +    MacroDefinition(SyntacticMacroDefinition), +} + +pub struct SyntacticMacroDefinition { +    pub name: Tracked<String>, +    pub body: Vec<Tracked<SyntacticToken>>, +} + +pub enum SyntacticError { +    UnterminatedBlock, +    UnterminatedComment, +    UnterminatedRawString, +    UnterminatedNullString, +    UnterminatedMacroDefinition, +    UnmatchedBlockTerminator, +    UnmatchedCommentTerminator, +    UnmatchedMacroTerminator, +    InvalidPadValue, +    InvalidIdentifier(String), +    MacroDefinitionInMacroDefinition, +    LabelDefinitionInMacroDefinition, +} + + +pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) { +    for error in errors { +        report_syntactic_error(error, source_code); +    } +} + +fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) { +    let context = Context { source_code: &source_code, source: &error.source }; +    let message = match &error.value { +        SyntacticError::UnterminatedBlock => +            "Block was not terminated, add a '}' character to terminate", +        SyntacticError::UnterminatedComment => +            "Comment was not terminated, add a ')' character to terminate", +        SyntacticError::UnterminatedRawString => +            "String was not terminated, add a ' character to terminate", +        SyntacticError::UnterminatedNullString => +            "String was not terminated, add a '\"' character to terminate", +        SyntacticError::UnterminatedMacroDefinition => +            "Macro definition was not terminated, add a ';' character to terminate", +        SyntacticError::UnmatchedBlockTerminator => +            "Attempted to terminate a block, but no block was in progress", +        SyntacticError::UnmatchedCommentTerminator => +            "Attempted to terminate a comment, but no comment was in progress", +        SyntacticError::UnmatchedMacroTerminator => +            "Attempted to terminate a macro definition, but no macro definition was in progress", +        SyntacticError::InvalidPadValue => +            "The pad value must be two or four hexadecimal digits", +        SyntacticError::InvalidIdentifier(name) => +            &format!("An identifier cannot exceed 63 characters in length: {name}"), +        SyntacticError::MacroDefinitionInMacroDefinition => +            "A macro cannot be defined inside another macro", +        SyntacticError::LabelDefinitionInMacroDefinition => +            "A label cannot be defined inside a macro", +    }; +    report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_syntactic_token(i: usize, token: &SyntacticToken) { +    match token { +        SyntacticToken::Literal(value) => indent!(i, "Literal({value})"), +        SyntacticToken::Pad(value) => indent!(i, "Pad({value})"), +        SyntacticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)), +        SyntacticToken::Comment(_) => indent!(i, "Comment"), +        SyntacticToken::BlockOpen => indent!(i, "BlockOpen"), +        SyntacticToken::BlockClose => indent!(i, "BlockClose"), +        SyntacticToken::Symbol(name) => indent!(i, "Symbol({name})"), +        SyntacticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"), +        SyntacticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"), +        SyntacticToken::MacroDefinition(definition) => { +            indent!(i, "MacroDefinition({})", definition.name); +            for token in &definition.body { +                print_syntactic_token(i+1, token); +            } +        } +    } +} diff --git a/src/types/instruction.rs b/src/types/instruction.rs new file mode 100644 index 0000000..252fc68 --- /dev/null +++ b/src/types/instruction.rs @@ -0,0 +1,168 @@ +use crate::*; + +use Operation as Op; + + +pub struct Instruction { +    pub value: u8, +} + +impl Instruction { +    pub fn operation(&self) -> Operation { +        match self.value & 0x1f { +            0x00=>Op::HLT, 0x01=>Op::PSH, 0x02=>Op::POP, 0x03=>Op::CPY, +            0x04=>Op::DUP, 0x05=>Op::OVR, 0x06=>Op::SWP, 0x07=>Op::ROT, +            0x08=>Op::JMP, 0x09=>Op::JMS, 0x0a=>Op::JCN, 0x0b=>Op::JCS, +            0x0c=>Op::LDA, 0x0d=>Op::STA, 0x0e=>Op::LDD, 0x0f=>Op::STD, +            0x10=>Op::ADD, 0x11=>Op::SUB, 0x12=>Op::INC, 0x13=>Op::DEC, +            0x14=>Op::LTH, 0x15=>Op::GTH, 0x16=>Op::EQU, 0x17=>Op::NQK, +            0x18=>Op::SHL, 0x19=>Op::SHR, 0x1a=>Op::ROL, 0x1b=>Op::ROR, +            0x1c=>Op::IOR, 0x1d=>Op::XOR, 0x1e=>Op::AND, 0x1f=>Op::NOT, +            _ => unreachable!(), +        } +    } + +    pub fn return_mode(&self) -> bool { +        self.value & RETURN_MODE != 0 +    } + +    pub fn wide_mode(&self) -> bool { +        self.value & WIDE_MODE != 0 +    } + +    pub fn immediate_mode(&self) -> bool { +        self.value & IMMEDIATE_MODE != 0 +    } +} + +impl std::fmt::Display for Instruction { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +       write!(f, "{}", match self.value { +            // Stack operators +            0x00=>"HLT",0x20=>"NOP" ,0x40=>"DB1" ,0x60=>"DB2"  ,0x80=>"DB3" ,0xA0=>"DB4"  ,0xC0=>"DB5"  ,0xE0=>"DB6"   , +            0x01=>"PSH",0x21=>"PSH:",0x41=>"PSH*",0x61=>"PSH*:",0x81=>"PSHr",0xA1=>"PSHr:",0xC1=>"PSHr*",0xE1=>"PSHr*:", +            0x02=>"POP",0x22=>"POP:",0x42=>"POP*",0x62=>"POP*:",0x82=>"POPr",0xA2=>"POPr:",0xC2=>"POPr*",0xE2=>"POPr*:", +            0x03=>"CPY",0x23=>"CPY:",0x43=>"CPY*",0x63=>"CPY*:",0x83=>"CPYr",0xA3=>"CPYr:",0xC3=>"CPYr*",0xE3=>"CPYr*:", +            0x04=>"DUP",0x24=>"DUP:",0x44=>"DUP*",0x64=>"DUP*:",0x84=>"DUPr",0xA4=>"DUPr:",0xC4=>"DUPr*",0xE4=>"DUPr*:", +            0x05=>"OVR",0x25=>"OVR:",0x45=>"OVR*",0x65=>"OVR*:",0x85=>"OVRr",0xA5=>"OVRr:",0xC5=>"OVRr*",0xE5=>"OVRr*:", +            0x06=>"SWP",0x26=>"SWP:",0x46=>"SWP*",0x66=>"SWP*:",0x86=>"SWPr",0xA6=>"SWPr:",0xC6=>"SWPr*",0xE6=>"SWPr*:", +            0x07=>"ROT",0x27=>"ROT:",0x47=>"ROT*",0x67=>"ROT*:",0x87=>"ROTr",0xA7=>"ROTr:",0xC7=>"ROTr*",0xE7=>"ROTr*:", +            // Control operators +            0x08=>"JMP",0x28=>"JMP:",0x48=>"JMP*",0x68=>"JMP*:",0x88=>"JMPr",0xA8=>"JMPr:",0xC8=>"JMPr*",0xE8=>"JMPr*:", +            0x09=>"JMS",0x29=>"JMS:",0x49=>"JMS*",0x69=>"JMS*:",0x89=>"JMSr",0xA9=>"JMSr:",0xC9=>"JMSr*",0xE9=>"JMSr*:", +            0x0A=>"JCN",0x2A=>"JCN:",0x4A=>"JCN*",0x6A=>"JCN*:",0x8A=>"JCNr",0xAA=>"JCNr:",0xCA=>"JCNr*",0xEA=>"JCNr*:", +            0x0B=>"JCS",0x2B=>"JCS:",0x4B=>"JCS*",0x6B=>"JCS*:",0x8B=>"JCSr",0xAB=>"JCSr:",0xCB=>"JCSr*",0xEB=>"JCSr*:", +            0x0C=>"LDA",0x2C=>"LDA:",0x4C=>"LDA*",0x6C=>"LDA*:",0x8C=>"LDAr",0xAC=>"LDAr:",0xCC=>"LDAr*",0xEC=>"LDAr*:", +            0x0D=>"STA",0x2D=>"STA:",0x4D=>"STA*",0x6D=>"STA*:",0x8D=>"STAr",0xAD=>"STAr:",0xCD=>"STAr*",0xED=>"STAr*:", +            0x0E=>"LDD",0x2E=>"LDD:",0x4E=>"LDD*",0x6E=>"LDD*:",0x8E=>"LDDr",0xAE=>"LDDr:",0xCE=>"LDDr*",0xEE=>"LDDr*:", +            0x0F=>"STD",0x2F=>"STD:",0x4F=>"STD*",0x6F=>"STD*:",0x8F=>"STDr",0xAF=>"STDr:",0xCF=>"STDr*",0xEF=>"STDr*:", +            // Numeric operators +            0x10=>"ADD",0x30=>"ADD:",0x50=>"ADD*",0x70=>"ADD*:",0x90=>"ADDr",0xB0=>"ADDr:",0xD0=>"ADDr*",0xF0=>"ADDr*:", +            0x11=>"SUB",0x31=>"SUB:",0x51=>"SUB*",0x71=>"SUB*:",0x91=>"SUBr",0xB1=>"SUBr:",0xD1=>"SUBr*",0xF1=>"SUBr*:", +            0x12=>"INC",0x32=>"INC:",0x52=>"INC*",0x72=>"INC*:",0x92=>"INCr",0xB2=>"INCr:",0xD2=>"INCr*",0xF2=>"INCr*:", +            0x13=>"DEC",0x33=>"DEC:",0x53=>"DEC*",0x73=>"DEC*:",0x93=>"DECr",0xB3=>"DECr:",0xD3=>"DECr*",0xF3=>"DECr*:", +            0x14=>"LTH",0x34=>"LTH:",0x54=>"LTH*",0x74=>"LTH*:",0x94=>"LTHr",0xB4=>"LTHr:",0xD4=>"LTHr*",0xF4=>"LTHr*:", +            0x15=>"GTH",0x35=>"GTH:",0x55=>"GTH*",0x75=>"GTH*:",0x95=>"GTHr",0xB5=>"GTHr:",0xD5=>"GTHr*",0xF5=>"GTHr*:", +            0x16=>"EQU",0x36=>"EQU:",0x56=>"EQU*",0x76=>"EQU*:",0x96=>"EQUr",0xB6=>"EQUr:",0xD6=>"EQUr*",0xF6=>"EQUr*:", +            0x17=>"NQK",0x37=>"NQK:",0x57=>"NQK*",0x77=>"NQK*:",0x97=>"NQKr",0xB7=>"NQKr:",0xD7=>"NQKr*",0xF7=>"NQKr*:", +            // Bitwise operators +            0x18=>"SHL",0x38=>"SHL:",0x58=>"SHL*",0x78=>"SHL*:",0x98=>"SHLr",0xB8=>"SHLr:",0xD8=>"SHLr*",0xF8=>"SHLr*:", +            0x19=>"SHR",0x39=>"SHR:",0x59=>"SHR*",0x79=>"SHR*:",0x99=>"SHRr",0xB9=>"SHRr:",0xD9=>"SHRr*",0xF9=>"SHRr*:", +            0x1A=>"ROL",0x3A=>"ROL:",0x5A=>"ROL*",0x7A=>"ROL*:",0x9A=>"ROLr",0xBA=>"ROLr:",0xDA=>"ROLr*",0xFA=>"ROLr*:", +            0x1B=>"ROR",0x3B=>"ROR:",0x5B=>"ROR*",0x7B=>"ROR*:",0x9B=>"RORr",0xBB=>"RORr:",0xDB=>"RORr*",0xFB=>"RORr*:", +            0x1C=>"IOR",0x3C=>"IOR:",0x5C=>"IOR*",0x7C=>"IOR*:",0x9C=>"IORr",0xBC=>"IORr:",0xDC=>"IORr*",0xFC=>"IORr*:", +            0x1D=>"XOR",0x3D=>"XOR:",0x5D=>"XOR*",0x7D=>"XOR*:",0x9D=>"XORr",0xBD=>"XORr:",0xDD=>"XORr*",0xFD=>"XORr*:", +            0x1E=>"AND",0x3E=>"AND:",0x5E=>"AND*",0x7E=>"AND*:",0x9E=>"ANDr",0xBE=>"ANDr:",0xDE=>"ANDr*",0xFE=>"ANDr*:", +            0x1F=>"NOT",0x3F=>"NOT:",0x5F=>"NOT*",0x7F=>"NOT*:",0x9F=>"NOTr",0xBF=>"NOTr:",0xDF=>"NOTr*",0xFF=>"NOTr*:", +        }) +    } +} + +impl std::str::FromStr for Instruction { +    type Err = (); + +    fn from_str(token: &str) -> Result<Self, Self::Err> { +        Ok( Instruction { value: match token { +            // Stack operators +            "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2"  =>0x60,"DB3" =>0x80,"DB4"  =>0xA0,"DB5"  =>0xC0,"DB6"   =>0xE0, +            "PSH"=>0x01,"PSH:"=>0x21,"PSH*"=>0x41,"PSH*:"=>0x61,"PSHr"=>0x81,"PSHr:"=>0xA1,"PSHr*"=>0xC1,"PSHr*:"=>0xE1, +                           ":"=>0x21,                "*:"=>0x61,                "r:"=>0xA1,                 "r*:"=>0xE1, +            "POP"=>0x02,"POP:"=>0x22,"POP*"=>0x42,"POP*:"=>0x62,"POPr"=>0x82,"POPr:"=>0xA2,"POPr*"=>0xC2,"POPr*:"=>0xE2, +            "CPY"=>0x03,"CPY:"=>0x23,"CPY*"=>0x43,"CPY*:"=>0x63,"CPYr"=>0x83,"CPYr:"=>0xA3,"CPYr*"=>0xC3,"CPYr*:"=>0xE3, +            "DUP"=>0x04,"DUP:"=>0x24,"DUP*"=>0x44,"DUP*:"=>0x64,"DUPr"=>0x84,"DUPr:"=>0xA4,"DUPr*"=>0xC4,"DUPr*:"=>0xE4, +            "OVR"=>0x05,"OVR:"=>0x25,"OVR*"=>0x45,"OVR*:"=>0x65,"OVRr"=>0x85,"OVRr:"=>0xA5,"OVRr*"=>0xC5,"OVRr*:"=>0xE5, +            "SWP"=>0x06,"SWP:"=>0x26,"SWP*"=>0x46,"SWP*:"=>0x66,"SWPr"=>0x86,"SWPr:"=>0xA6,"SWPr*"=>0xC6,"SWPr*:"=>0xE6, +            "ROT"=>0x07,"ROT:"=>0x27,"ROT*"=>0x47,"ROT*:"=>0x67,"ROTr"=>0x87,"ROTr:"=>0xA7,"ROTr*"=>0xC7,"ROTr*:"=>0xE7, +            // Control operators +            "JMP"=>0x08,"JMP:"=>0x28,"JMP*"=>0x48,"JMP*:"=>0x68,"JMPr"=>0x88,"JMPr:"=>0xA8,"JMPr*"=>0xC8,"JMPr*:"=>0xE8, +            "JMS"=>0x09,"JMS:"=>0x29,"JMS*"=>0x49,"JMS*:"=>0x69,"JMSr"=>0x89,"JMSr:"=>0xA9,"JMSr*"=>0xC9,"JMSr*:"=>0xE9, +            "JCN"=>0x0A,"JCN:"=>0x2A,"JCN*"=>0x4A,"JCN*:"=>0x6A,"JCNr"=>0x8A,"JCNr:"=>0xAA,"JCNr*"=>0xCA,"JCNr*:"=>0xEA, +            "JCS"=>0x0B,"JCS:"=>0x2B,"JCS*"=>0x4B,"JCS*:"=>0x6B,"JCSr"=>0x8B,"JCSr:"=>0xAB,"JCSr*"=>0xCB,"JCSr*:"=>0xEB, +            "LDA"=>0x0C,"LDA:"=>0x2C,"LDA*"=>0x4C,"LDA*:"=>0x6C,"LDAr"=>0x8C,"LDAr:"=>0xAC,"LDAr*"=>0xCC,"LDAr*:"=>0xEC, +            "STA"=>0x0D,"STA:"=>0x2D,"STA*"=>0x4D,"STA*:"=>0x6D,"STAr"=>0x8D,"STAr:"=>0xAD,"STAr*"=>0xCD,"STAr*:"=>0xED, +            "LDD"=>0x0E,"LDD:"=>0x2E,"LDD*"=>0x4E,"LDD*:"=>0x6E,"LDDr"=>0x8E,"LDDr:"=>0xAE,"LDDr*"=>0xCE,"LDDr*:"=>0xEE, +            "STD"=>0x0F,"STD:"=>0x2F,"STD*"=>0x4F,"STD*:"=>0x6F,"STDr"=>0x8F,"STDr:"=>0xAF,"STDr*"=>0xCF,"STDr*:"=>0xEF, +            // Numeric operators +            "ADD"=>0x10,"ADD:"=>0x30,"ADD*"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr:"=>0xB0,"ADDr*"=>0xD0,"ADDr*:"=>0xF0, +            "SUB"=>0x11,"SUB:"=>0x31,"SUB*"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr:"=>0xB1,"SUBr*"=>0xD1,"SUBr*:"=>0xF1, +            "INC"=>0x12,"INC:"=>0x32,"INC*"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr:"=>0xB2,"INCr*"=>0xD2,"INCr*:"=>0xF2, +            "DEC"=>0x13,"DEC:"=>0x33,"DEC*"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr:"=>0xB3,"DECr*"=>0xD3,"DECr*:"=>0xF3, +            "LTH"=>0x14,"LTH:"=>0x34,"LTH*"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr:"=>0xB4,"LTHr*"=>0xD4,"LTHr*:"=>0xF4, +            "GTH"=>0x15,"GTH:"=>0x35,"GTH*"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr:"=>0xB5,"GTHr*"=>0xD5,"GTHr*:"=>0xF5, +            "EQU"=>0x16,"EQU:"=>0x36,"EQU*"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr:"=>0xB6,"EQUr*"=>0xD6,"EQUr*:"=>0xF6, +            "NQK"=>0x17,"NQK:"=>0x37,"NQK*"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr:"=>0xB7,"NQKr*"=>0xD7,"NQKr*:"=>0xF7, +            // Bitwise operators +            "SHL"=>0x18,"SHL:"=>0x38,"SHL*"=>0x58,"SHL*:"=>0x78,"SHLr"=>0x98,"SHLr:"=>0xB8,"SHLr*"=>0xD8,"SHLr*:"=>0xF8, +            "SHR"=>0x19,"SHR:"=>0x39,"SHR*"=>0x59,"SHR*:"=>0x79,"SHRr"=>0x99,"SHRr:"=>0xB9,"SHRr*"=>0xD9,"SHRr*:"=>0xF9, +            "ROL"=>0x1A,"ROL:"=>0x3A,"ROL*"=>0x5A,"ROL*:"=>0x7A,"ROLr"=>0x9A,"ROLr:"=>0xBA,"ROLr*"=>0xDA,"ROLr*:"=>0xFA, +            "ROR"=>0x1B,"ROR:"=>0x3B,"ROR*"=>0x5B,"ROR*:"=>0x7B,"RORr"=>0x9B,"RORr:"=>0xBB,"RORr*"=>0xDB,"RORr*:"=>0xFB, +            "IOR"=>0x1C,"IOR:"=>0x3C,"IOR*"=>0x5C,"IOR*:"=>0x7C,"IORr"=>0x9C,"IORr:"=>0xBC,"IORr*"=>0xDC,"IORr*:"=>0xFC, +            "XOR"=>0x1D,"XOR:"=>0x3D,"XOR*"=>0x5D,"XOR*:"=>0x7D,"XORr"=>0x9D,"XORr:"=>0xBD,"XORr*"=>0xDD,"XORr*:"=>0xFD, +            "AND"=>0x1E,"AND:"=>0x3E,"AND*"=>0x5E,"AND*:"=>0x7E,"ANDr"=>0x9E,"ANDr:"=>0xBE,"ANDr*"=>0xDE,"ANDr*:"=>0xFE, +            "NOT"=>0x1F,"NOT:"=>0x3F,"NOT*"=>0x5F,"NOT*:"=>0x7F,"NOTr"=>0x9F,"NOTr:"=>0xBF,"NOTr*"=>0xDF,"NOTr*:"=>0xFF, +            _ => return Err(()), +        }}) +    } +} + + +pub enum Operation { +    HLT, PSH, POP, CPY, +    DUP, OVR, SWP, ROT, +    JMP, JMS, JCN, JCS, +    LDA, STA, LDD, STD, +    ADD, SUB, INC, DEC, +    LTH, GTH, EQU, NQK, +    SHL, SHR, ROL, ROR, +    IOR, XOR, AND, NOT, +} + +impl From<Operation> for u8 { +    fn from(operation: Operation) -> Self { +        match operation { +            Op::HLT=>0x00, Op::PSH=>0x01, Op::POP=>0x02, Op::CPY=>0x03, +            Op::DUP=>0x04, Op::OVR=>0x05, Op::SWP=>0x06, Op::ROT=>0x07, +            Op::JMP=>0x08, Op::JMS=>0x09, Op::JCN=>0x0A, Op::JCS=>0x0B, +            Op::LDA=>0x0C, Op::STA=>0x0D, Op::LDD=>0x0E, Op::STD=>0x0F, +            Op::ADD=>0x10, Op::SUB=>0x11, Op::INC=>0x12, Op::DEC=>0x13, +            Op::LTH=>0x14, Op::GTH=>0x15, Op::EQU=>0x16, Op::NQK=>0x17, +            Op::SHL=>0x1C, Op::SHR=>0x1D, Op::ROL=>0x1E, Op::ROR=>0x1F, +            Op::IOR=>0x18, Op::XOR=>0x19, Op::AND=>0x1A, Op::NOT=>0x1B, +        } +    } +} + +impl std::fmt::Display for Operation { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        write!(f, "{}", match self { +            Op::HLT=>"HLT", Op::PSH=>"PSH", Op::POP=>"POP", Op::CPY=>"CPY", +            Op::DUP=>"DUP", Op::OVR=>"OVR", Op::SWP=>"SWP", Op::ROT=>"ROT", +            Op::JMP=>"JMP", Op::JMS=>"JMS", Op::JCN=>"JCN", Op::JCS=>"JCS", +            Op::LDA=>"LDA", Op::STA=>"STA", Op::LDD=>"LDD", Op::STD=>"STD", +            Op::ADD=>"ADD", Op::SUB=>"SUB", Op::INC=>"INC", Op::DEC=>"DEC", +            Op::LTH=>"LTH", Op::GTH=>"GTH", Op::EQU=>"EQU", Op::NQK=>"NQK", +            Op::SHL=>"SHL", Op::SHR=>"SHR", Op::ROL=>"ROL", Op::ROR=>"ROR", +            Op::IOR=>"IOR", Op::XOR=>"XOR", Op::AND=>"AND", Op::NOT=>"NOT", +        }) +    } +} diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 0000000..8094cb1 --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,4 @@ +mod instruction; +mod value; +pub use instruction::*; +pub use value::*; diff --git a/src/types/value.rs b/src/types/value.rs new file mode 100644 index 0000000..fe82710 --- /dev/null +++ b/src/types/value.rs @@ -0,0 +1,48 @@ +#[derive(Clone, Copy)] +pub enum Value { +    Byte(u8), +    Double(u16), +} + +impl From<Value> for usize { +    fn from(value: Value) -> Self { +        match value { +            Value::Byte(byte) => byte.into(), +            Value::Double(double) => double.into(), +        } +    } +} + +impl From<&Value> for usize { +    fn from(value: &Value) -> Self { +        (*value).into() +    } +} + +impl std::fmt::Display for Value { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        match self { +            Self::Byte(value) => write!(f, "0x{value:02x}"), +            Self::Double(value) => write!(f, "0x{value:04x}"), +        } +    } +} + + +impl std::str::FromStr for Value { +    type Err = (); + +    fn from_str(token: &str) -> Result<Self, Self::Err> { +        match token.len() { +            2 => match u8::from_str_radix(&token, 16) { +                Ok(value) => Ok(Value::Byte(value)), +                Err(_) => Err(()), +            } +            4 => match u16::from_str_radix(&token, 16) { +                Ok(value) => Ok(Value::Double(value)), +                Err(_) => Err(()), +            } +            _ => Err(()), +        } +    } +} | 
