summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock174
-rw-r--r--Cargo.toml15
-rw-r--r--LICENSE.md9
-rw-r--r--rust-toolchain.toml2
-rw-r--r--src/addressing.rs44
-rw-r--r--src/assembler.rs278
-rw-r--r--src/bin/br-asm.rs8
-rw-r--r--src/error.rs10
-rw-r--r--src/formats/clang.rs10
-rw-r--r--src/formats/mod.rs23
-rw-r--r--src/lib.rs258
-rw-r--r--src/main.rs43
-rw-r--r--src/semantic_token.rs116
-rw-r--r--src/stages/bytecode.rs150
-rw-r--r--src/stages/bytecode_tokens.rs37
-rw-r--r--src/stages/compiler.rs84
-rw-r--r--src/stages/mod.rs26
-rw-r--r--src/stages/semantic.rs154
-rw-r--r--src/stages/semantic_tokens.rs97
-rw-r--r--src/stages/syntactic.rs211
-rw-r--r--src/stages/syntactic_tokens.rs94
-rw-r--r--src/syntactic_token.rs43
-rw-r--r--src/tokenizer.rs235
-rw-r--r--src/types/instruction.rs168
-rw-r--r--src/types/mod.rs4
-rw-r--r--src/types/value.rs48
26 files changed, 1547 insertions, 794 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 1ccf90d..ea491c1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1,7 +1,177 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
-version = 3
+version = 4
[[package]]
-name = "bedrock_asm"
+name = "assembler"
+version = "2.3.0"
+source = "git+git://benbridle.com/assembler?tag=v2.3.0#a9640fce1aaa5e80170ce4d2ac700f66cfffbb4b"
+dependencies = [
+ "inked",
+ "log 2.0.0",
+ "vagabond",
+]
+
+[[package]]
+name = "bedrock-asm"
+version = "1.0.2"
+dependencies = [
+ "assembler",
+ "indexmap",
+ "log 2.0.0",
+ "switchboard",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "hashbrown"
+version = "0.15.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
+
+[[package]]
+name = "indexmap"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "inked"
version = "1.0.0"
+source = "git+git://benbridle.com/inked?tag=v1.0.0#2954d37b638fa2c1dd3d51ff53f08f475aea6ea3"
+dependencies = [
+ "termcolor",
+]
+
+[[package]]
+name = "log"
+version = "1.1.1"
+source = "git+git://benbridle.com/log?tag=v1.1.1#930f3d0e2b82df1243f423c092a38546ea7533c3"
+
+[[package]]
+name = "log"
+version = "2.0.0"
+source = "git+git://benbridle.com/log?tag=v2.0.0#a38d3dd487594f41151db57625410d1b786bebe4"
+dependencies = [
+ "inked",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "switchboard"
+version = "2.1.0"
+source = "git+git://benbridle.com/switchboard?tag=v2.1.0#e6435712ba5b3ca36e99fc8cbe7755940f8b1f3f"
+dependencies = [
+ "log 1.1.1",
+ "paste",
+]
+
+[[package]]
+name = "termcolor"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "vagabond"
+version = "1.1.1"
+source = "git+git://benbridle.com/vagabond?tag=v1.1.1#b190582517e6008ad1deff1859f15988e4efaa26"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
diff --git a/Cargo.toml b/Cargo.toml
index 3ee43f3..747594f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,13 +1,16 @@
[package]
-name = "bedrock_asm"
-version = "1.0.0"
+name = "bedrock-asm"
+version = "1.0.2"
authors = ["Ben Bridle"]
-edition = "2021"
-description = "Assembler program for the Bedrock assembly language"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+edition = "2024"
+description = "Assembler for the Bedrock assembly language"
[dependencies]
+assembler = { git = "git://benbridle.com/assembler", tag = "v2.3.0" }
+log = { git = "git://benbridle.com/log", tag = "v2.0.0" }
+switchboard = { git = "git://benbridle.com/switchboard", tag = "v2.1.0" }
+
+indexmap = "2.7.1"
[profile.release]
lto=true
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..21ed643
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,9 @@
+MIT License
+
+Copyright (c) Ben Bridle
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so.
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
new file mode 100644
index 0000000..5d56faf
--- /dev/null
+++ b/rust-toolchain.toml
@@ -0,0 +1,2 @@
+[toolchain]
+channel = "nightly"
diff --git a/src/addressing.rs b/src/addressing.rs
deleted file mode 100644
index dd7638e..0000000
--- a/src/addressing.rs
+++ /dev/null
@@ -1,44 +0,0 @@
-#[derive(Clone,Copy)]
-pub struct CharAddress {
- /// The number of lines that precede this line in the file.
- pub line:usize,
- /// The number of characters that precede this character in the line.
- pub column:usize,
-}
-impl CharAddress {
- pub fn new(line:usize, column:usize) -> Self {
- Self { line, column }
- }
- pub fn zero() -> Self {
- Self::new(0,0)
- }
-}
-
-pub struct SourceLocation {
- /// The slice of the source file from which this token was parsed.
- pub source: String,
- /// The address of the first character of this token.
- pub start: CharAddress,
- /// The address of the final character of this token.
- pub end: CharAddress
-}
-impl SourceLocation {
- pub fn new(source:String, start:CharAddress, end:CharAddress) -> Self {
- Self { source, start, end }
- }
- pub fn zero() -> Self {
- Self { source:String::new(), start:CharAddress::zero(), end:CharAddress::zero() }
- }
-}
-
-pub struct BytecodeLocation {
- /// The number of bytes that precede this byte sequence in the bytecode.
- pub start: u16,
- /// The length of this byte sequence, in bytes.
- pub length: u16,
-}
-impl BytecodeLocation {
- pub fn zero() -> Self {
- Self { start:0, length:0 }
- }
-}
diff --git a/src/assembler.rs b/src/assembler.rs
deleted file mode 100644
index 692eb14..0000000
--- a/src/assembler.rs
+++ /dev/null
@@ -1,278 +0,0 @@
-use std::mem::take;
-use std::collections::hash_map::Entry;
-
-use SyntacticTokenType as Syn;
-use SemanticTokenType as Sem;
-use crate::*;
-
-use std::collections::HashMap;
-
-/// The inner value is the index of the token that defines this symbol.
-pub enum SymbolDefinition {
- Macro(usize),
- Label(usize),
-}
-
-pub struct Assembler {
- /// The contents of the program as a list of syntactic tokens.
- syntactic_tokens: Vec<SyntacticToken>,
- /// The contents of the program as a list of semantic tokens.
- semantic_tokens: Vec<SemanticToken>,
- /// Map the name of each defined symbol to the index of the defining token.
- symbol_definitions: HashMap<String, SymbolDefinition>,
- /// Map each macro definition token index to a list of syntactic body tokens.
- syntactic_macro_bodies: HashMap<usize, Vec<SyntacticToken>>,
- /// Map each macro definition token index to a list of semantic body tokens.
- semantic_macro_bodies: HashMap<usize, Vec<SemanticToken>>,
-}
-
-impl Assembler {
- pub fn new() -> Self {
- Self {
- syntactic_tokens: Vec::new(),
- semantic_tokens: Vec::new(),
- symbol_definitions: HashMap::new(),
- syntactic_macro_bodies: HashMap::new(),
- semantic_macro_bodies: HashMap::new(),
- }
- }
-
- pub fn tokenise_source(&mut self, source_code: &str) {
- // The index of the current macro definition token
- let mut macro_definition: Option<usize> = None;
- let mut macro_definition_body_tokens: Vec<SyntacticToken> = Vec::new();
-
- for mut token in TokenIterator::from_str(source_code) {
- let next_index = self.syntactic_tokens.len();
- if let Some(index) = macro_definition {
- token.use_in_macro_body();
- if token.is_macro_terminator() {
- // Commit the current macro definition
- macro_definition_body_tokens.push(token);
- self.syntactic_macro_bodies.insert(
- index, take(&mut macro_definition_body_tokens));
- macro_definition = None;
- } else {
- macro_definition_body_tokens.push(token);
- }
- } else {
- if let Syn::MacroDefinition(ref name) = token.r#type {
- macro_definition = Some(next_index);
- match self.symbol_definitions.entry(name.to_string()) {
- Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);}
- Entry::Vacant(v) => {v.insert(SymbolDefinition::Macro(next_index));}
- }
- } else if let Syn::LabelDefinition(ref name) = token.r#type {
- match self.symbol_definitions.entry(name.to_string()) {
- Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);}
- Entry::Vacant(v) => {v.insert(SymbolDefinition::Label(next_index));}
- }
- } else if token.is_macro_terminator() {
- token.set_error(Error::OrphanedMacroDefinitionTerminator);
- }
- self.syntactic_tokens.push(token);
- }
- }
- }
-
- pub fn resolve_references(&mut self) {
- let syntactic_tokens = take(&mut self.syntactic_tokens);
- let syntactic_token_count = syntactic_tokens.len();
- let mut parent_label = None;
-
- for (index, syntactic_token) in syntactic_tokens.into_iter().enumerate() {
- if let SyntacticTokenType::LabelDefinition(name) = &syntactic_token.r#type {
- parent_label = Some(name.to_owned());
- }
- let semantic_token = self.convert_syn_token_to_sem_token(syntactic_token, index, parent_label.clone());
- self.semantic_tokens.push(semantic_token);
- }
- assert_eq!(syntactic_token_count, self.semantic_tokens.len());
-
- // Find all cyclic macros
- let cyclic_macros: Vec<usize> = self.semantic_macro_bodies.keys().map(|i|*i).filter(
- |i| !self.traverse_macro_definition(*i, 0)).collect();
- // Replace each cyclic macro reference in a macro definition with an error
- for body_tokens in &mut self.semantic_macro_bodies.values_mut() {
- for body_token in body_tokens {
- if let Sem::MacroReference(i) = body_token.r#type {
- if cyclic_macros.contains(&i) {
- let name = body_token.source_location.source.clone();
- body_token.r#type = Sem::Error(Syn::Reference(name), Error::CyclicMacroReference);
- }
- }
- }
- }
-
- }
-
- /// Attempt to recursively traverse the body tokens of a macro definition, returning
- /// false if the depth exceeds a preset maximum, and returning true otherwise.
- fn traverse_macro_definition(&self, index: usize, level: usize) -> bool {
- if level == 16 {
- false
- } else {
- self.semantic_macro_bodies[&index].iter().all(
- |token| if let Sem::MacroReference(i) = token.r#type {
- self.traverse_macro_definition(i, level+1)
- } else {
- true
- }
- )
- }
- }
-
- pub fn generate_bytecode(&mut self) -> (Vec<u8>, Vec<SemanticToken>) {
- let mut bytecode: Vec<u8> = Vec::new();
- // Map each label definition token index to the bytecode addresses of the references
- let mut reference_addresses: HashMap<usize, Vec<u16>> = HashMap::new();
- // Map each label and macro definition token to a list of reference token indices
- let mut reference_tokens: HashMap<usize, Vec<usize>> = HashMap::new();
-
- macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};}
- macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};}
- macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};}
-
- let mut semantic_tokens = take(&mut self.semantic_tokens);
-
- // Translate semantic tokens into bytecode
- for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() {
- let start_addr = bytecode.len() as u16;
- match &mut semantic_token.r#type {
- Sem::LabelReference(i) => {
- reference_tokens.entry(*i).or_default().push(index);
- reference_addresses.entry(*i).or_default().push(start_addr);
- push_u16!(0);
- }
- Sem::MacroReference(i) => {
- reference_tokens.entry(*i).or_default().push(index);
- self.expand_macro_reference(*i, &mut bytecode, &mut reference_addresses);
- }
- Sem::LabelDefinition(def) => def.address=start_addr,
- Sem::MacroDefinition(_) => (),
-
- Sem::Padding(p) => pad!(*p),
- Sem::ByteLiteral(b) => push_u8!(*b),
- Sem::ShortLiteral(s) => push_u16!(*s),
- Sem::Instruction(b) => push_u8!(*b),
-
- Sem::MacroDefinitionTerminator => unreachable!(),
- Sem::Comment => (),
- Sem::Error(..) => (),
- };
- let end_addr = bytecode.len() as u16;
- semantic_token.bytecode_location.start = start_addr;
- semantic_token.bytecode_location.length = end_addr - start_addr;
- }
-
- // Fill each label reference with the address of the matching label definition
- for (index, slots) in reference_addresses {
- if let Sem::LabelDefinition(definition) = &semantic_tokens[index].r#type {
- let [h,l] = definition.address.to_be_bytes();
- for slot in slots {
- bytecode[slot as usize] = h;
- bytecode[slot.wrapping_add(1) as usize] = l;
- }
- } else { unreachable!() }
- }
-
- // Move references and macro body tokens into label and macro definition tokens
- for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() {
- if let Sem::MacroDefinition(definition) = &mut semantic_token.r#type {
- definition.body_tokens = self.semantic_macro_bodies.remove(&index).unwrap();
- if let Some(references) = reference_tokens.remove(&index) {
- definition.references = references;
- }
- } else if let Sem::LabelDefinition(definition) = &mut semantic_token.r#type {
- if let Some(references) = reference_tokens.remove(&index) {
- definition.references = references;
- }
- }
- }
- assert_eq!(reference_tokens.len(), 0);
-
- // Remove trailing null bytes from the bytecode
- if let Some(final_nonnull_byte) = bytecode.iter().rposition(|b| *b != 0) {
- let truncated_length = final_nonnull_byte + 1;
- let removed_byte_count = bytecode.len() - truncated_length;
- if removed_byte_count > 0 {
- bytecode.truncate(truncated_length);
- }
- }
-
- (bytecode, semantic_tokens)
- }
-
- fn convert_syn_token_to_sem_token(&mut self, mut syn_token: SyntacticToken, index: usize, parent_label: Option<String>) -> SemanticToken {
- SemanticToken {
- r#type: {
- if let Some(err) = syn_token.error {
- Sem::Error(syn_token.r#type, err)
- } else {
- match syn_token.r#type {
- Syn::Reference(ref name) => {
- match self.symbol_definitions.get(name) {
- Some(SymbolDefinition::Macro(i)) => Sem::MacroReference(*i),
- Some(SymbolDefinition::Label(i)) => Sem::LabelReference(*i),
- None => Sem::Error(syn_token.r#type, Error::UnresolvedReference),
- }
- }
- Syn::LabelDefinition(name) => {Sem::LabelDefinition(LabelDefinition::new(name))},
- Syn::MacroDefinition(name) => {
- let mut sem_body_tokens = Vec::new();
- for syn_body_token in self.syntactic_macro_bodies.remove(&index).unwrap() {
- // Make the source location of the macro definition token span the entire definition
- if syn_body_token.is_macro_terminator() {
- syn_token.source_location.end = syn_body_token.source_location.start;
- }
- let sem_body_token = self.convert_syn_token_to_sem_token(syn_body_token, 0, parent_label.clone());
- sem_body_tokens.push(sem_body_token);
- }
- self.semantic_macro_bodies.insert(index, sem_body_tokens);
- Sem::MacroDefinition(MacroDefinition::new(name))
- },
- Syn::MacroDefinitionTerminator => Sem::MacroDefinitionTerminator,
- Syn::Padding(v) => Sem::Padding(v),
- Syn::ByteLiteral(v) => Sem::ByteLiteral(v),
- Syn::ShortLiteral(v) => Sem::ShortLiteral(v),
- Syn::Instruction(v) => Sem::Instruction(v),
- Syn::Comment => Sem::Comment,
- }
- }
- },
- source_location: syn_token.source_location,
- bytecode_location: BytecodeLocation::zero(),
- parent_label,
- }
- }
-
- fn expand_macro_reference(&self, index: usize, bytecode: &mut Vec<u8>, reference_addresses: &mut HashMap<usize, Vec<u16>>) {
- macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};}
- macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};}
- macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};}
-
- for body_token in self.semantic_macro_bodies.get(&index).unwrap() {
- let start_addr = bytecode.len() as u16;
- match &body_token.r#type {
- Sem::LabelReference(i) => {
- reference_addresses.entry(*i).or_default().push(start_addr);
- push_u16!(0u16);
- },
- Sem::MacroReference(i) => {
- self.expand_macro_reference(*i, bytecode, reference_addresses);
- },
- Sem::LabelDefinition(_) => unreachable!(),
- Sem::MacroDefinition(_) => unreachable!(),
-
- Sem::Padding(p) => pad!(*p),
- Sem::ByteLiteral(b) => push_u8!(*b),
- Sem::ShortLiteral(s) => push_u16!(*s),
- Sem::Instruction(b) => push_u8!(*b),
-
- Sem::MacroDefinitionTerminator => (),
- Sem::Comment => (),
- Sem::Error(..) => (),
- };
- }
- }
-}
diff --git a/src/bin/br-asm.rs b/src/bin/br-asm.rs
new file mode 100644
index 0000000..e7a9230
--- /dev/null
+++ b/src/bin/br-asm.rs
@@ -0,0 +1,8 @@
+use bedrock_asm::*;
+use switchboard::*;
+
+
+fn main() {
+ let args = Switchboard::from_env();
+ assemble(args, "br-asm");
+}
diff --git a/src/error.rs b/src/error.rs
deleted file mode 100644
index 8a6c0d6..0000000
--- a/src/error.rs
+++ /dev/null
@@ -1,10 +0,0 @@
-#[derive(Clone)]
-pub enum Error {
- UnresolvedReference,
- DuplicateDefinition,
- InvalidPaddingValue,
- InvalidTypeInMacroDefinition,
- OrphanedMacroDefinitionTerminator,
- CyclicMacroReference,
-}
-
diff --git a/src/formats/clang.rs b/src/formats/clang.rs
new file mode 100644
index 0000000..524b501
--- /dev/null
+++ b/src/formats/clang.rs
@@ -0,0 +1,10 @@
+pub fn format_clang(bytecode: &[u8]) -> Vec<u8> {
+ let mut output = String::new();
+ for chunk in bytecode.chunks(16) {
+ for byte in chunk {
+ output.push_str(&format!("0x{byte:02X}, "));
+ }
+ output.push('\n');
+ }
+ return output.into_bytes();
+}
diff --git a/src/formats/mod.rs b/src/formats/mod.rs
new file mode 100644
index 0000000..79b1c51
--- /dev/null
+++ b/src/formats/mod.rs
@@ -0,0 +1,23 @@
+mod clang;
+pub use clang::*;
+
+use crate::*;
+
+
+#[derive(Clone, Copy, PartialEq)]
+pub enum Format {
+ Raw,
+ Source,
+ Clang,
+}
+
+impl Format {
+ pub fn from_str(string: &str) -> Self {
+ match string {
+ "raw" => Self::Raw,
+ "source" => Self::Source,
+ "c" => Self::Clang,
+ _ => fatal!("Unknown format '{string}', expected 'raw', 'c', or 'source'"),
+ }
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index a657354..76ec544 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,21 +1,245 @@
-mod addressing;
-mod syntactic_token;
-mod semantic_token;
-mod tokenizer;
-mod error;
-mod assembler;
+#![feature(path_add_extension)]
-pub use addressing::{CharAddress, SourceLocation, BytecodeLocation};
-pub use syntactic_token::{SyntacticToken, SyntacticTokenType};
-pub use semantic_token::{SemanticToken, SemanticTokenType, LabelDefinition, MacroDefinition};
-pub use error::Error;
-pub use tokenizer::TokenIterator;
-pub use assembler::Assembler;
+mod formats;
+mod types;
+mod stages;
+pub use formats::*;
+pub use types::*;
+pub use stages::*;
-pub fn assemble(source_code: &str) -> (Vec<u8>, Vec<SemanticToken>) {
- let mut assembler = Assembler::new();
- assembler.tokenise_source(source_code);
- assembler.resolve_references();
- assembler.generate_bytecode()
+use assembler::*;
+use log::*;
+use switchboard::*;
+
+use std::io::Read;
+use std::io::Write;
+
+
+pub const RETURN_MODE: u8 = 0x80;
+pub const WIDE_MODE: u8 = 0x40;
+pub const IMMEDIATE_MODE: u8 = 0x20;
+
+
+pub fn assemble(mut args: Switchboard, invocation: &str) -> ! {
+ args.named("help").short('h');
+ args.named("version");
+ args.named("verbose").short('v');
+
+ if args.get("help").as_bool() {
+ print_help(invocation);
+ std::process::exit(0);
+ }
+ if args.get("version").as_bool() {
+ let name = env!("CARGO_PKG_NAME");
+ let version = env!("CARGO_PKG_VERSION");
+ eprintln!("{name} v{version}");
+ eprintln!("Written by Ben Bridle.");
+ std::process::exit(0);
+ }
+ if args.get("verbose").as_bool() {
+ log::set_log_level(log::LogLevel::Info);
+ }
+
+ args.positional("source");
+ args.positional("destination");
+ args.named("extension").default("brc");
+
+ args.named("no-libs");
+ args.named("no-project-libs");
+ args.named("no-env-libs");
+ args.named("no-truncate");
+
+ args.named("format").default("raw");
+ args.named("dry-run").short('n');
+ args.named("tree");
+ args.named("with-symbols");
+ args.raise_errors();
+
+ let source_path = args.get("source").as_path_opt().map(
+ |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}")));
+ let destination_path = args.get("destination").as_path_opt();
+ let extension = args.get("extension").as_string();
+ let opt_extension = Some(extension.as_str());
+
+ let no_libs = args.get("no-libs").as_bool();
+ let no_project_libs = args.get("no-project-libs").as_bool();
+ let no_env_libs = args.get("no-env-libs").as_bool();
+ let no_truncate = args.get("no-truncate").as_bool();
+
+ let format = Format::from_str(args.get("format").as_str());
+ let dry_run = args.get("dry-run").as_bool();
+ let print_tree = args.get("tree").as_bool();
+ let export_symbols = args.get("with-symbols").as_bool();
+
+ // -----------------------------------------------------------------------
+
+ let mut compiler = new_compiler();
+
+ if let Some(path) = &source_path {
+ info!("Reading program source from {path:?}");
+ compiler.root_from_path(path).unwrap_or_else(|err| fatal!("{err:?}: {path:?}"));
+ } else {
+ let mut source_code = String::new();
+ info!("Reading program source from standard input");
+ if let Err(err) = std::io::stdin().read_to_string(&mut source_code) {
+ fatal!("Could not read from standard input\n{err:?}");
+ }
+ compiler.root_from_string(source_code, "<standard input>")
+ };
+ if compiler.error().is_some() && !no_libs && !no_project_libs {
+ compiler.include_libs_from_parent(opt_extension);
+ }
+ if compiler.error().is_some() && !no_libs && !no_env_libs {
+ compiler.include_libs_from_path_variable("BEDROCK_LIBS", opt_extension);
+ }
+
+ if print_tree {
+ compiler.hierarchy().report()
+ }
+ if let Some(error) = compiler.error() {
+ error.report();
+ std::process::exit(1);
+ }
+
+ let merged_source = compiler.get_compiled_source().unwrap_or_else(|error| {
+ error.report();
+ std::process::exit(1);
+ });
+
+ if !dry_run && format == Format::Source {
+ write_bytes_and_exit(merged_source.as_bytes(), destination_path.as_ref());
+ }
+
+ // -----------------------------------------------------------------------
+
+ let path = Some("<merged source>");
+ let syntactic = match parse_syntactic(&merged_source, path) {
+ Ok(tokens) => tokens,
+ Err(errors) => {
+ report_syntactic_errors(&errors, &merged_source);
+ std::process::exit(1);
+ }
+ };
+
+ let semantic = match parse_semantic(syntactic) {
+ Ok(tokens) => tokens,
+ Err(errors) => {
+ report_semantic_errors(&errors, &merged_source);
+ std::process::exit(1);
+ }
+ };
+
+ let program = match generate_bytecode(&semantic) {
+ Ok(program) => program,
+ Err(errors) => {
+ report_bytecode_errors(&errors, &merged_source);
+ std::process::exit(1);
+ }
+ };
+
+ let AssembledProgram { mut bytecode, symbols } = program;
+
+ let length = bytecode.len();
+ let percentage = (length as f32 / 65536.0 * 100.0).round() as u16;
+ info!("Assembled program in {length} bytes ({percentage}% of maximum)");
+
+ if !no_truncate {
+ // Remove null bytes from end of bytecode.
+ while let Some(0) = bytecode.last() {
+ bytecode.pop();
+ }
+ let new_length = bytecode.len();
+ let difference = length - new_length;
+ if difference > 0 {
+ info!("Truncated program to {new_length} bytes (saved {difference} bytes)");
+ }
+ }
+
+ if !dry_run {
+ if export_symbols {
+ if let Some(path) = &destination_path {
+ let mut symbols_path = path.to_path_buf();
+ symbols_path.add_extension("sym");
+ let mut symbols_string = String::new();
+ for symbol in &symbols {
+ let address = &symbol.address;
+ let name = &symbol.name;
+ let location = &symbol.source.location();
+ symbols_string.push_str(&format!(
+ "{address:04x} {name} {location}\n"
+ ));
+ }
+ match std::fs::write(&symbols_path, symbols_string) {
+ Ok(_) => info!("Saved symbols to {symbols_path:?}"),
+ Err(err) => info!("Could not write symbols to {symbols_path:?}\n{err:?}"),
+ }
+ }
+ }
+
+ let bytes = match format {
+ Format::Raw => bytecode,
+ Format::Clang => format_clang(&bytecode),
+ Format::Source => unreachable!("Source output is handled before full assembly"),
+ };
+ write_bytes_and_exit(&bytes, destination_path.as_ref());
+ }
+ std::process::exit(0);
}
+
+fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! {
+ match path {
+ Some(path) => match std::fs::write(path, bytes) {
+ Ok(_) => info!("Wrote output to {:?}", path.as_ref()),
+ Err(err) => fatal!("Could not write to {:?}\n{err:?}", path.as_ref()),
+ }
+ None => match std::io::stdout().write_all(bytes) {
+ Ok(_) => info!("Wrote output to standard output"),
+ Err(err) => fatal!("Could not write to standard output\n{err:?}"),
+ }
+ }
+ std::process::exit(0);
+}
+
+
+fn print_help(invocation: &str) {
+ eprintln!("\
+Usage: {invocation} [source] [destination]
+
+Assembler for the Bedrock computer system.
+
+Usage:
+ To assemble a Bedrock program from a source file and write to an output
+ file, run `br-asm [source] [destination]`, where [source] is the path
+ of the source file and [destination] is the path to write to.
+
+ If [destination] is omitted, the assembled program will be written to
+ standard output. If [source] is omitted, the program source code will
+ be read from standard input.
+
+Environment variables:
+ BEDROCK_LIBS
+ A list of colon-separated paths that will be searched to find Bedrock
+ source code files to use as libraries when assembling a Bedrock program.
+ If a library file resolves an unresolved symbol in the program being
+ assembled, the library file will be merged into the program.
+
+Arguments:
+ [source] Bedrock source code file to assemble.
+ [destination] Destination path for assembler output.
+
+Switches:
+ --dry-run (-n) Assemble and show errors only, don't write any output
+ --extension=<ext> File extension to identify source files (default is 'brc')
+ --format=<fmt> Output format to use for assembled program (default is 'raw')
+ --no-project-libs Don't search for libraries in the source parent folder
+ --no-env-libs Don't search for libraries in the BEDROCK_LIBS path variable
+ --no-libs Combination of --no-project-libs and --no-env-libs
+ --no-truncate Don't remove trailing zero-bytes from the assembled program
+ --tree Show a tree diagram of all included library files
+ --with-symbols Also generate debug symbols file with extension '.sym'
+ --help (-h) Print this help information
+ --verbose, (-v) Print additional information
+ --version Print the program version and exit
+");
+}
diff --git a/src/main.rs b/src/main.rs
deleted file mode 100644
index 11ce42b..0000000
--- a/src/main.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-use std::io::{Read, Write};
-use bedrock_asm::*;
-
-fn main() {
- // Read source code from standard input
- let mut source_code = String::new();
- if let Err(err) = std::io::stdin().read_to_string(&mut source_code) {
- eprintln!("Could not read from standard input, quitting.");
- eprintln!("({err:?})");
- std::process::exit(1);
- };
-
- let (bytecode, tokens) = assemble(&source_code);
- let mut is_error = false;
- for token in &tokens {
- if token.print_error(&source_code) { is_error = true };
- }
- if !is_error {
- for token in &tokens {
- if let SemanticTokenType::LabelDefinition(def) = &token.r#type {
- if def.references.is_empty() {
- eprintln!("Unused label definition: {}", def.name);
- }
- }
- }
- eprintln!();
- }
-
- let byte_count = bytecode.len();
- let byte_percentage = (byte_count as f32 / 65536.0 * 100.0).round() as u16;
- eprintln!("Assembled program in {byte_count} bytes ({byte_percentage}% of maximum).");
-
- if is_error {
- std::process::exit(1)
- }
-
- // Write bytecode to standard output
- if let Err(_) = std::io::stdout().write_all(&bytecode) {
- eprintln!("Could not write to standard output, quitting.");
- std::process::exit(1);
- }
-}
-
diff --git a/src/semantic_token.rs b/src/semantic_token.rs
deleted file mode 100644
index 265db91..0000000
--- a/src/semantic_token.rs
+++ /dev/null
@@ -1,116 +0,0 @@
-use crate::*;
-
-pub enum SemanticTokenType {
- LabelReference(usize),
- MacroReference(usize),
-
- LabelDefinition(LabelDefinition),
- MacroDefinition(MacroDefinition),
-
- Padding(u16),
- ByteLiteral(u8),
- ShortLiteral(u16),
- Instruction(u8),
-
- MacroDefinitionTerminator,
- Comment,
- Error(SyntacticTokenType, Error),
-}
-
-pub struct SemanticToken {
- pub r#type: SemanticTokenType,
- pub source_location: SourceLocation,
- pub bytecode_location: BytecodeLocation,
- pub parent_label: Option<String>,
-}
-
-impl SemanticToken {
- /// Returns true if an error was printed.
- pub fn print_error(&self, source_code: &str) -> bool {
- let mut is_error = false;
- macro_rules! red {()=>{eprint!("\x1b[31m")};}
- macro_rules! dim {()=>{eprint!("\x1b[0;2m")};}
- macro_rules! normal {()=>{eprint!("\x1b[0m")};}
-
- if let SemanticTokenType::Error(token, error) = &self.r#type {
- is_error = true;
-
- red!(); eprint!("[ERROR] "); normal!();
- let source = &self.source_location.source;
- match error {
- Error::UnresolvedReference => {
- eprintln!("Unresolved reference, no label or macro has been defined with the name '{source}'") }
- Error::DuplicateDefinition => {
- eprintln!("Duplicate definition, a label or macro has already been defined with the name '{source}'") }
- Error::OrphanedMacroDefinitionTerminator => {
- eprintln!("Unmatched macro definition terminator, no macro definition is in progress") }
- Error::InvalidPaddingValue => {
- eprintln!("Invalid value for padding, the value must be at least one and at most four hexadecimal characters") }
- Error::CyclicMacroReference => {
- eprintln!("Cyclic macro reference, this macro reference contains a reference to the macro being defined") }
- Error::InvalidTypeInMacroDefinition => {
- let name = match token {
- SyntacticTokenType::Reference(_) => "references",
- SyntacticTokenType::LabelDefinition(_) => "label definitions",
- SyntacticTokenType::MacroDefinition(_) => "macro definitions",
- SyntacticTokenType::MacroDefinitionTerminator => "macro definition terminators",
- SyntacticTokenType::Padding(_) => "padding",
- SyntacticTokenType::ByteLiteral(_) => "byte literals",
- SyntacticTokenType::ShortLiteral(_) => "short literals",
- SyntacticTokenType::Instruction(_) => "instructions",
- SyntacticTokenType::Comment => "comments",
- };
- eprintln!("Invalid token in macro definition, macro definitions are not allowed to contain {name}") }
- }
-
- if let Some(label) = &self.parent_label {
- eprint!(" ... "); red!(); eprint!("| "); dim!(); eprintln!("@{label} "); normal!();
- }
-
- let line = source_code.split('\n').nth(self.source_location.start.line).unwrap();
- eprint!("{:>5} ", self.source_location.start.line+1);
- red!(); eprint!("| "); normal!();
- for (i, c) in line.chars().enumerate() {
- if i == self.source_location.start.column { red!() }
- eprint!("{c}");
- if i == self.source_location.end.column { normal!() }
- }
- eprintln!(); red!(); eprint!(" | ");
- for i in 0..=self.source_location.end.column {
- if i < self.source_location.start.column { eprint!(" ") } else { eprint!("^") };
- }
- normal!(); eprintln!();
- }
- else if let SemanticTokenType::MacroDefinition(definition) = &self.r#type {
- for token in &definition.body_tokens {
- if token.print_error(source_code) { is_error = true }
- }
- }
- is_error
- }
-}
-
-pub struct LabelDefinition {
- pub name: String,
- pub address: u16,
- /// A list of pointers to label reference tokens
- pub references: Vec<usize>,
-}
-impl LabelDefinition {
- pub fn new(name: String) -> Self {
- Self { name, address:0, references:Vec::new() }
- }
-}
-
-pub struct MacroDefinition {
- pub name: String,
- pub body_tokens: Vec<SemanticToken>,
- /// A list of pointers to macro reference tokens
- pub references: Vec<usize>,
-}
-impl MacroDefinition {
- pub fn new(name: String) -> Self {
- Self { name, body_tokens:Vec::new(), references:Vec::new() }
- }
-}
-
diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs
new file mode 100644
index 0000000..02cc739
--- /dev/null
+++ b/src/stages/bytecode.rs
@@ -0,0 +1,150 @@
+use crate::*;
+
+use indexmap::IndexMap;
+
+
+/// Doesn't truncate trailing null bytes.
+pub fn generate_bytecode(semantic: &Program) -> Result<AssembledProgram, Vec<Tracked<BytecodeError>>> {
+ let mut generator = BytecodeGenerator::new(&semantic.definitions);
+ generator.parse(&semantic.tokens, false);
+ generator.fill_slots();
+ let mut symbols = Vec::new();
+ for (name, information) in generator.labels {
+ let source = semantic.definitions.get(&name).unwrap().source.clone();
+ let address = information.address;
+ symbols.push(AssembledSymbol { name, address, source });
+ }
+ match generator.errors.is_empty() {
+ true => Ok(AssembledProgram { bytecode: generator.bytecode, symbols }),
+ false => Err(generator.errors),
+ }
+}
+
+
+pub struct BytecodeGenerator<'a> {
+ definitions: &'a IndexMap<String, Tracked<Definition>>,
+ labels: IndexMap<String, LabelInformation>,
+ stack: Vec<usize>,
+ bytecode: Vec<u8>,
+ errors: Vec<Tracked<BytecodeError>>,
+}
+
+struct LabelInformation {
+ address: usize,
+ slots: Vec<usize>,
+}
+
+impl<'a> BytecodeGenerator<'a> {
+ pub fn new(definitions: &'a IndexMap<String, Tracked<Definition>>) -> Self {
+ let mut labels = IndexMap::new();
+ for (name, definition) in definitions {
+ if let DefinitionVariant::LabelDefinition = definition.variant {
+ // Use fake address for now.
+ let information = LabelInformation { address: 0, slots: Vec::new() };
+ labels.insert(name.to_string(), information);
+ }
+ }
+ Self {
+ definitions,
+ labels,
+ stack: Vec::new(),
+ bytecode: Vec::new(),
+ errors: Vec::new(),
+ }
+ }
+
+ pub fn parse(&mut self, tokens: &[Tracked<SemanticToken>], in_macro: bool) {
+ macro_rules! byte {
+ ($byte:expr) => { self.bytecode.push($byte) };
+ }
+ macro_rules! double {
+ ($double:expr) => {{
+ let [high, low] = u16::to_be_bytes($double);
+ self.bytecode.push(high); self.bytecode.push(low);
+ }};
+ }
+
+ for token in tokens {
+ let i = self.bytecode.len();
+ match &token.value {
+ SemanticToken::Literal(value) => match value {
+ Value::Byte(byte) => byte!(*byte),
+ Value::Double(double) => double!(*double),
+ }
+ SemanticToken::Pad(value) => {
+ self.bytecode.resize(i + usize::from(value), 0);
+ },
+ SemanticToken::String(bytes) => {
+ self.bytecode.extend_from_slice(bytes)
+ },
+ SemanticToken::Comment(_) => (),
+ SemanticToken::BlockOpen(_) => {
+ self.stack.push(i);
+ // Use a fake index for now.
+ double!(0);
+ }
+ SemanticToken::BlockClose(_) => {
+ if i > 0xFFFF {
+ let error = BytecodeError::InvalidBlockAddress(i);
+ self.errors.push(Tracked::from(error, token.source.clone()));
+ }
+ let Some(addr) = self.stack.pop() else {
+ unreachable!("Uncaught unmatched block terminator");
+ };
+ let [high, low] = (i as u16).to_be_bytes();
+ self.bytecode[addr] = high;
+ self.bytecode[addr+1] = low;
+ }
+ SemanticToken::Symbol(name) => {
+ if let Some(definition) = self.definitions.get(name) {
+ match &definition.variant {
+ DefinitionVariant::MacroDefinition(body) => {
+ self.parse(body, true);
+ }
+ DefinitionVariant::LabelDefinition => {
+ let information = self.labels.get_mut(name).unwrap();
+ information.slots.push(i);
+ // Use a fake index for now.
+ double!(0);
+ }
+ }
+ } else {
+ unreachable!("Uncaught undefined symbol '{name}'");
+ }
+ }
+ SemanticToken::Instruction(instruction) => {
+ byte!(instruction.value)
+ }
+ SemanticToken::LabelDefinition(name) => if in_macro {
+ unreachable!("Uncaught label definition in macro");
+ } else {
+ if i > 0xFFFF {
+ let error = BytecodeError::InvalidLabelAddress(i);
+ self.errors.push(Tracked::from(error, token.source.clone()));
+ }
+ let information = self.labels.get_mut(name).unwrap();
+ // Replace fake index with real index.
+ information.address = i;
+ }
+ SemanticToken::MacroDefinition{ .. } => if in_macro {
+ unreachable!("Uncaught macro definition in macro");
+ }
+ }
+ }
+
+ if !in_macro && !self.stack.is_empty() {
+ unreachable!("Uncaught unterminated block");
+ }
+ }
+
+ /// Fill each label slot with a real label address.
+ pub fn fill_slots(&mut self) {
+ for information in self.labels.values() {
+ let [high, low] = (information.address as u16).to_be_bytes();
+ for addr in &information.slots {
+ self.bytecode[*addr] = high;
+ self.bytecode[*addr + 1] = low;
+ }
+ }
+ }
+}
diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs
new file mode 100644
index 0000000..902fcd7
--- /dev/null
+++ b/src/stages/bytecode_tokens.rs
@@ -0,0 +1,37 @@
+use crate::*;
+
+
+pub struct AssembledProgram {
+ pub bytecode: Vec<u8>,
+ pub symbols: Vec<AssembledSymbol>,
+}
+
+pub struct AssembledSymbol {
+ pub name: String,
+ pub address: usize,
+ pub source: SourceSpan,
+}
+
+pub enum BytecodeError {
+ InvalidLabelAddress(usize),
+ InvalidBlockAddress(usize),
+}
+
+
+pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) {
+ for error in errors {
+ report_bytecode_error(error, source_code);
+ }
+}
+
+
+fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ BytecodeError::InvalidLabelAddress(address) =>
+ &format!("The label address exceeds 0xFFFF: 0x{address:X}"),
+ BytecodeError::InvalidBlockAddress(address) =>
+ &format!("The block address exceeds 0xFFFF: 0x{address:X}"),
+ };
+ report_source_issue(LogLevel::Error, &context, message);
+}
diff --git a/src/stages/compiler.rs b/src/stages/compiler.rs
new file mode 100644
index 0000000..97bf20c
--- /dev/null
+++ b/src/stages/compiler.rs
@@ -0,0 +1,84 @@
+use crate::*;
+
+use assembler::SymbolRole::*;
+use assembler::DefinitionType::*;
+
+
+pub fn new_compiler() -> Compiler {
+ Compiler::new(parse_symbols, push_code)
+}
+
+
+/// Parse all symbols from a source code string.
+pub fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbol>> {
+ let syntactic = match parse_syntactic(source_code, path) {
+ Ok(syntactic) => syntactic,
+ Err(_) => return None,
+ };
+ Some(SymbolParser::new().parse(&syntactic))
+}
+
+/// Push source code to a source compilation string.
+pub fn push_code(compilation: &mut String, source_file: &SourceFile) {
+ // Skip blank files.
+ let source_code = &source_file.source_code;
+ if source_code.chars().all(|c| c.is_whitespace()) { return; }
+ // Ensure that the previous section is followed by two newline characters.
+ if !compilation.is_empty() {
+ if !compilation.ends_with('\n') { compilation.push('\n'); }
+ if !compilation.ends_with("\n\n") { compilation.push('\n'); }
+ }
+ // Push a path comment and the source code.
+ let path_str = source_file.path.as_os_str().to_string_lossy();
+ let path_comment = format!("(: {path_str} )\n");
+ compilation.push_str(&path_comment);
+ compilation.push_str(&source_code);
+}
+
+
+// Extract symbol definitions from a list of syntactic tokens.
+pub struct SymbolParser {
+ pub symbols: Vec<Symbol>,
+}
+
+impl SymbolParser {
+ pub fn new() -> Self {
+ Self {
+ symbols: Vec::new(),
+ }
+ }
+
+ fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) {
+ let name = name.to_string();
+ let namespace = Vec::new();
+ let source = source.to_owned();
+ self.symbols.push(Symbol { name, namespace, source, role });
+ }
+
+ pub fn parse(mut self, syntactic: &[Tracked<SyntacticToken>]) -> Vec<Symbol> {
+ for token in syntactic {
+ match &token.value {
+ SyntacticToken::MacroDefinition(definition) => {
+ self.record_symbol(
+ &definition.name,
+ &definition.name.source,
+ Definition(MustPrecedeReference),
+ );
+ for token in &definition.body {
+ if let SyntacticToken::Symbol(name) = &token.value {
+ self.record_symbol(&name, &token.source, Reference);
+ }
+ }
+ }
+ SyntacticToken::LabelDefinition(name) => {
+ self.record_symbol(&name, &token.source, Definition(CanFollowReference));
+ }
+ SyntacticToken::Symbol(name) => {
+ self.record_symbol(&name, &token.source, Reference);
+ }
+ _ => (),
+ }
+ }
+ return self.symbols;
+ }
+}
diff --git a/src/stages/mod.rs b/src/stages/mod.rs
new file mode 100644
index 0000000..76bda0d
--- /dev/null
+++ b/src/stages/mod.rs
@@ -0,0 +1,26 @@
+mod compiler;
+mod syntactic;
+mod syntactic_tokens;
+mod semantic;
+mod semantic_tokens;
+mod bytecode;
+mod bytecode_tokens;
+pub use compiler::*;
+pub use syntactic::*;
+pub use syntactic_tokens::*;
+pub use semantic::*;
+pub use semantic_tokens::*;
+pub use bytecode::*;
+pub use bytecode_tokens::*;
+
+
+#[macro_export]
+macro_rules! indent {
+ (0, $($tokens:tt)*) => {{
+ println!($($tokens)*);
+ }};
+ ($indent:expr, $($tokens:tt)*) => {{
+ for _ in 0..$indent { print!(" "); }
+ println!($($tokens)*);
+ }};
+}
diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs
new file mode 100644
index 0000000..dc9709e
--- /dev/null
+++ b/src/stages/semantic.rs
@@ -0,0 +1,154 @@
+use crate::*;
+
+use std::str::FromStr;
+
+use indexmap::{IndexMap, IndexSet};
+
+
+pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Program, Vec<Tracked<SemanticError>>> {
+ let mut errors = Vec::new();
+
+ // Record all label definitions and macro names up front.
+ let mut definitions = IndexMap::new();
+ let mut macro_names = IndexSet::new();
+ for token in &syntactic {
+ match &token.value {
+ SyntacticToken::LabelDefinition(name) => {
+ // Check if identifier is reserved.
+ if Instruction::from_str(&name).is_ok() {
+ let error = SemanticError::ReservedIdentifier(name.to_string());
+ errors.push(Tracked::from(error, token.source.clone()));
+ }
+ // Use a fake index for now.
+ let definition = Definition::new(0, DefinitionVariant::LabelDefinition);
+ let tracked = Tracked::from(definition, token.source.clone());
+ if let Some(_) = definitions.insert(name.clone(), tracked) {
+ unreachable!("Uncaught duplicate label definition '{name}'");
+ }
+ }
+ SyntacticToken::MacroDefinition(definition) => {
+ let name = &definition.name;
+ // Check if identifier is reserved.
+ if Instruction::from_str(&name).is_ok() {
+ let error = SemanticError::ReservedIdentifier(name.to_string());
+ errors.push(Tracked::from(error, name.source.clone()));
+ }
+ if !macro_names.insert(name.clone()) {
+ unreachable!("Uncaught duplicate macro definition '{name}'")
+ }
+ }
+ _ => (),
+ }
+ }
+
+ // Convert syntactic tokens to semantic tokens.
+ let mut tokens: Vec<Tracked<SemanticToken>> = Vec::new();
+ let mut stack = Vec::new();
+
+ for syn_token in syntactic {
+ let i = tokens.len();
+ let sem_token = match syn_token.value {
+ SyntacticToken::Literal(value) => SemanticToken::Literal(value),
+ SyntacticToken::Pad(value) => SemanticToken::Pad(value),
+ SyntacticToken::String(bytes) => SemanticToken::String(bytes),
+ SyntacticToken::Comment(string) => SemanticToken::Comment(string),
+ SyntacticToken::BlockOpen => {
+ stack.push(i);
+ // Use a fake index for now.
+ SemanticToken::BlockOpen(0)
+ }
+ SyntacticToken::BlockClose => {
+ let Some(k) = stack.pop() else {
+ unreachable!("Uncaught unmatched block terminator");
+ };
+ // Replace fake index with real index.
+ tokens[k].value = SemanticToken::BlockOpen(i);
+ SemanticToken::BlockClose(k)
+ }
+ SyntacticToken::Symbol(symbol) => {
+ if let Some(definition) = definitions.get_mut(&symbol) {
+ definition.value.references.push(i);
+ } else if let Some(definition) = macro_names.get(&symbol) {
+ let error = SemanticError::InvocationBeforeDefinition;
+ let source = syn_token.source.wrap(definition.source.clone());
+ errors.push(Tracked::from(error, source));
+ } else {
+ unreachable!("Uncaught undefined symbol '{symbol}'");
+ };
+ SemanticToken::Symbol(symbol)
+ }
+ SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction),
+ SyntacticToken::LabelDefinition(name) => {
+ let definition = definitions.get_mut(&name).unwrap();
+ // Replace fake index with real index.
+ definition.value.definition = i;
+ SemanticToken::LabelDefinition(name)
+ }
+ SyntacticToken::MacroDefinition(definition) => {
+ let name = definition.name.clone();
+ let mut body: Vec<Tracked<SemanticToken>> = Vec::new();
+ let mut body_stack = Vec::new();
+ for syn_token in definition.body {
+ let j = body.len();
+ let sem_token = match syn_token.value {
+ SyntacticToken::Literal(value) => SemanticToken::Literal(value),
+ SyntacticToken::Pad(value) => SemanticToken::Pad(value),
+ SyntacticToken::String(bytes) => SemanticToken::String(bytes),
+ SyntacticToken::Comment(string) => SemanticToken::Comment(string),
+ SyntacticToken::BlockOpen => {
+ body_stack.push(j);
+ // Use a fake index for now.
+ SemanticToken::BlockOpen(0)
+ }
+ SyntacticToken::BlockClose => {
+ let Some(k) = body_stack.pop() else {
+ unreachable!("Uncaught unmatched block terminator in macro '{name}'");
+ };
+ // Replace fake index with real index.
+ body[k].value = SemanticToken::BlockOpen(j);
+ SemanticToken::BlockClose(k)
+ }
+ SyntacticToken::Symbol(symbol) => {
+ if let Some(definition) = definitions.get_mut(&symbol) {
+ definition.value.deep_references.push((i, j));
+ } else if let Some(definition) = macro_names.get(&symbol) {
+ let error = SemanticError::InvocationBeforeDefinition;
+ let source = syn_token.source.wrap(definition.source.clone());
+ errors.push(Tracked::from(error, source));
+ } else {
+ unreachable!("Uncaught undefined symbol '{symbol}' in macro '{name}'");
+ };
+ SemanticToken::Symbol(symbol)
+ }
+ SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction),
+ SyntacticToken::LabelDefinition(label) =>
+ unreachable!("Uncaught label definition '{label}' in macro '{name}'"),
+ SyntacticToken::MacroDefinition(definition) =>
+ unreachable!("Uncaught macro definition '{}' in macro '{name}'", definition.name),
+ };
+ body.push(Tracked::from(sem_token, syn_token.source));
+ }
+
+ let variant = DefinitionVariant::MacroDefinition(body);
+ let source = definition.name.source.clone();
+ let tracked = Tracked::from(Definition::new(i, variant), source);
+ if let Some(_) = definitions.insert(name.value.clone(), tracked) {
+ unreachable!("Uncaught duplicate definition '{name}'")
+ };
+ if !body_stack.is_empty() {
+ unreachable!("Uncaught unterminated block in macro '{name}'");
+ }
+ SemanticToken::MacroDefinition(name)
+ }
+ };
+ tokens.push(Tracked::from(sem_token, syn_token.source));
+ }
+
+ if !stack.is_empty() {
+ unreachable!("Uncaught unterminated block");
+ }
+ match errors.is_empty() {
+ true => Ok(Program { definitions, tokens }),
+ false => Err(errors),
+ }
+}
diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs
new file mode 100644
index 0000000..c735828
--- /dev/null
+++ b/src/stages/semantic_tokens.rs
@@ -0,0 +1,97 @@
+use crate::*;
+
+use indexmap::IndexMap;
+
+
+pub struct Program {
+ pub definitions: IndexMap<String, Tracked<Definition>>,
+ pub tokens: Vec<Tracked<SemanticToken>>,
+}
+
+pub struct Definition {
+ pub variant: DefinitionVariant,
+ /// Index of definition token.
+ pub definition: usize,
+ /// Indices of symbols referencing this definition.
+ pub references: Vec<usize>,
+ /// Indices of references inside other definitions.
+ pub deep_references: Vec<(usize, usize)>,
+}
+
+impl Definition {
+ pub fn new(i: usize, variant: DefinitionVariant) -> Self {
+ Self {
+ variant,
+ definition: i,
+ references: Vec::new(),
+ deep_references: Vec::new(),
+ }
+ }
+}
+
+pub enum DefinitionVariant {
+ LabelDefinition,
+ MacroDefinition(Vec<Tracked<SemanticToken>>),
+}
+
+pub enum SemanticToken {
+ Literal(Value),
+ Pad(Value),
+ String(Vec<u8>),
+ Comment(String),
+ BlockOpen(usize), // index to matching block-close
+ BlockClose(usize), // index to matching block-open
+ Symbol(String),
+ Instruction(Instruction),
+ LabelDefinition(String),
+ MacroDefinition(Tracked<String>),
+}
+
+pub enum SemanticError {
+ InvocationBeforeDefinition,
+ ReservedIdentifier(String),
+}
+
+
+pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) {
+ for error in errors {
+ report_semantic_error(error, source_code);
+ }
+}
+
+
+fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ SemanticError::InvocationBeforeDefinition =>
+ "Macro cannot be invoked before it has been defined",
+ SemanticError::ReservedIdentifier(name) =>
+ &format!("Identifier '{name}' is reserved for a built-in instruction"),
+ };
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_semantic_token(i: usize, token: &SemanticToken, definitions: &IndexMap<String, Tracked<Definition>>) {
+ match token {
+ SemanticToken::Literal(value) => indent!(i, "Literal({value})"),
+ SemanticToken::Pad(value) => indent!(i, "Pad({value})"),
+ SemanticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)),
+ SemanticToken::Comment(_) => indent!(i, "Comment"),
+ SemanticToken::BlockOpen(pointer) => indent!(i, "BlockOpen(*{pointer})"),
+ SemanticToken::BlockClose(pointer) => indent!(i, "BlockClose(*{pointer})"),
+ SemanticToken::Symbol(name) => indent!(i, "Symbol({name})"),
+ SemanticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"),
+ SemanticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"),
+ SemanticToken::MacroDefinition(name) => {
+ indent!(i, "MacroDefinition({name})");
+ if let Some(definition) = definitions.get(name.as_str()) {
+ if let DefinitionVariant::MacroDefinition(body) = &definition.variant {
+ for token in body {
+ print_semantic_token(i+1, token, definitions);
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs
new file mode 100644
index 0000000..59b8b95
--- /dev/null
+++ b/src/stages/syntactic.rs
@@ -0,0 +1,211 @@
+use crate::*;
+
+use std::path::PathBuf;
+
+
+pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path), "")
+}
+
+fn parse_syntactic_from_tokeniser(mut t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ t.add_delimiters(&['(',')','[',']','{','}',';']);
+ t.add_terminators(&[':']);
+ let mut tokens = Vec::new();
+ let mut errors = Vec::new();
+ let mut label_name = label_name.to_string();
+
+ macro_rules! err {
+ ($error:expr) => {{
+ err!($error, t.get_source());
+ }};
+ ($error:expr, $source:expr) => {{
+ errors.push(Tracked::from($error, $source));
+ continue;
+ }};
+ }
+
+ macro_rules! check_name {
+ ($name:expr) => {{
+ check_name!($name, t.get_source());
+ }};
+ ($name:expr, $source:expr) => {
+ if $name.chars().count() > 63 {
+ let error = SyntacticError::InvalidIdentifier($name.clone());
+ errors.push(Tracked::from(error, $source.clone()));
+ }
+ };
+ }
+
+ // Eat characters until the end character is found.
+ macro_rules! is_end {
+ ($end:expr) => {
+ |t: &mut Tokeniser| {
+ t.eat_char() == Some($end)
+ }
+ };
+ }
+
+ loop {
+ // Eat leading whitespace.
+ while let Some(c) = t.peek_char() {
+ match [' ', '\n', '\r', '\t'].contains(&c) {
+ true => t.eat_char(),
+ false => break,
+ };
+ }
+ t.mark_start();
+ let Some(c) = t.eat_char() else { break };
+ let token = match c {
+ '"' => {
+ let source = t.get_source();
+ match t.track_until(is_end!('"')) {
+ Some(string) => {
+ let mut bytes = string.into_bytes();
+ bytes.push(0x00);
+ SyntacticToken::String(bytes)
+ }
+ None => err!(SyntacticError::UnterminatedNullString, source),
+ }
+ }
+ '\'' => {
+ let source = t.get_source();
+ match t.track_until(is_end!('\'')) {
+ Some(string) => SyntacticToken::String(string.into_bytes()),
+ None => err!(SyntacticError::UnterminatedRawString, source),
+ }
+ }
+ '(' => {
+ let source = t.get_source();
+ if let Some(string) = t.track_until(is_end!(')')) {
+ // Check if the comment fills the entire line.
+ if t.start.position.column == 0 && t.end_of_line() {
+ if let Some(path) = string.strip_prefix(": ") {
+ t.embedded_path = Some(PathBuf::from(path.trim()));
+ t.embedded_first_line = t.start.position.line + 1;
+ continue;
+ }
+ }
+ SyntacticToken::Comment(string)
+ } else {
+ err!(SyntacticError::UnterminatedComment, source)
+ }
+ }
+ ')' => err!(SyntacticError::UnmatchedCommentTerminator),
+ '%' => {
+ let name = t.eat_token();
+ let source = t.get_source();
+ check_name!(name, source);
+ t.mark_child();
+ if let Some(_) = t.track_until(is_end!(';')) {
+ let child = t.tokenise_child_span();
+ match parse_body_from_tokeniser(child, &label_name) {
+ Ok(body) => {
+ let name = Tracked::from(name, source);
+ let definition = SyntacticMacroDefinition { name, body };
+ SyntacticToken::MacroDefinition(definition)
+ }
+ Err(mut err) => {
+ errors.append(&mut err);
+ continue;
+ }
+ }
+ } else {
+ err!(SyntacticError::UnterminatedMacroDefinition, source);
+ }
+ }
+ ';' => err!(SyntacticError::UnmatchedMacroTerminator),
+ '{' => SyntacticToken::BlockOpen,
+ '}' => SyntacticToken::BlockClose,
+ '['|']' => continue,
+ '@' => {
+ label_name = t.eat_token();
+ check_name!(label_name);
+ SyntacticToken::LabelDefinition(label_name.clone())
+ }
+ '&' => {
+ let name = format!("{label_name}/{}", t.eat_token());
+ check_name!(name);
+ SyntacticToken::LabelDefinition(name)
+ }
+ '~' => {
+ let name = format!("{label_name}/{}", t.eat_token());
+ check_name!(name);
+ SyntacticToken::Symbol(name)
+ }
+ '#' => {
+ let token = t.eat_token();
+ match token.parse::<Value>() {
+ Ok(value) => SyntacticToken::Pad(value),
+ Err(_) => err!(SyntacticError::InvalidPadValue),
+ }
+ },
+ ':' => {
+ SyntacticToken::Instruction(Instruction { value: 0x21 })
+ }
+ c => {
+ let token = format!("{c}{}", t.eat_token());
+ if let Ok(value) = token.parse::<Value>() {
+ SyntacticToken::Literal(value)
+ } else if let Ok(instruction) = token.parse::<Instruction>() {
+ SyntacticToken::Instruction(instruction)
+ } else {
+ check_name!(token);
+ SyntacticToken::Symbol(token)
+ }
+ }
+ };
+
+ t.mark_end();
+ let source = t.get_source();
+ tokens.push(Tracked::from(token, source));
+ }
+
+ // Check that every block open matches a block close.
+ let mut stack = Vec::new();
+ for token in &tokens {
+ match &token.value {
+ SyntacticToken::BlockOpen => stack.push(token.source.clone()),
+ SyntacticToken::BlockClose => if let None = stack.pop() {
+ let error = SyntacticError::UnmatchedBlockTerminator;
+ errors.push(Tracked::from(error, token.source.clone()));
+ }
+ _ => (),
+ }
+ }
+ for source in stack {
+ let error = SyntacticError::UnterminatedBlock;
+ errors.push(Tracked::from(error, source));
+ }
+
+ match errors.is_empty() {
+ true => Ok(tokens),
+ false => Err(errors),
+ }
+}
+
+
+fn parse_body_from_tokeniser(t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ let mut tokens = Vec::new();
+ let mut errors = Vec::new();
+
+ for token in parse_syntactic_from_tokeniser(t, label_name)? {
+ match token.value {
+ SyntacticToken::LabelDefinition(_) => {
+ let error = SyntacticError::LabelDefinitionInMacroDefinition;
+ errors.push(Tracked::from(error, token.source));
+ continue;
+ }
+ SyntacticToken::MacroDefinition(_) => {
+ let error = SyntacticError::MacroDefinitionInMacroDefinition;
+ errors.push(Tracked::from(error, token.source));
+ continue;
+ }
+ _ => tokens.push(token),
+ };
+ }
+
+ match errors.is_empty() {
+ true => Ok(tokens),
+ false => Err(errors),
+ }
+}
diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs
new file mode 100644
index 0000000..35afa80
--- /dev/null
+++ b/src/stages/syntactic_tokens.rs
@@ -0,0 +1,94 @@
+use crate::*;
+
+
+pub enum SyntacticToken {
+ Literal(Value),
+ Pad(Value),
+ String(Vec<u8>),
+ Comment(String),
+ BlockOpen,
+ BlockClose,
+ Symbol(String),
+ Instruction(Instruction),
+ LabelDefinition(String),
+ MacroDefinition(SyntacticMacroDefinition),
+}
+
+pub struct SyntacticMacroDefinition {
+ pub name: Tracked<String>,
+ pub body: Vec<Tracked<SyntacticToken>>,
+}
+
+pub enum SyntacticError {
+ UnterminatedBlock,
+ UnterminatedComment,
+ UnterminatedRawString,
+ UnterminatedNullString,
+ UnterminatedMacroDefinition,
+ UnmatchedBlockTerminator,
+ UnmatchedCommentTerminator,
+ UnmatchedMacroTerminator,
+ InvalidPadValue,
+ InvalidIdentifier(String),
+ MacroDefinitionInMacroDefinition,
+ LabelDefinitionInMacroDefinition,
+}
+
+
+pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) {
+ for error in errors {
+ report_syntactic_error(error, source_code);
+ }
+}
+
+fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ SyntacticError::UnterminatedBlock =>
+ "Block was not terminated, add a '}' character to terminate",
+ SyntacticError::UnterminatedComment =>
+ "Comment was not terminated, add a ')' character to terminate",
+ SyntacticError::UnterminatedRawString =>
+ "String was not terminated, add a ' character to terminate",
+ SyntacticError::UnterminatedNullString =>
+ "String was not terminated, add a '\"' character to terminate",
+ SyntacticError::UnterminatedMacroDefinition =>
+ "Macro definition was not terminated, add a ';' character to terminate",
+ SyntacticError::UnmatchedBlockTerminator =>
+ "Attempted to terminate a block, but no block was in progress",
+ SyntacticError::UnmatchedCommentTerminator =>
+ "Attempted to terminate a comment, but no comment was in progress",
+ SyntacticError::UnmatchedMacroTerminator =>
+ "Attempted to terminate a macro definition, but no macro definition was in progress",
+ SyntacticError::InvalidPadValue =>
+ "The pad value must be two or four hexadecimal digits",
+ SyntacticError::InvalidIdentifier(name) =>
+ &format!("An identifier cannot exceed 63 characters in length: {name}"),
+ SyntacticError::MacroDefinitionInMacroDefinition =>
+ "A macro cannot be defined inside another macro",
+ SyntacticError::LabelDefinitionInMacroDefinition =>
+ "A label cannot be defined inside a macro",
+ };
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_syntactic_token(i: usize, token: &SyntacticToken) {
+ match token {
+ SyntacticToken::Literal(value) => indent!(i, "Literal({value})"),
+ SyntacticToken::Pad(value) => indent!(i, "Pad({value})"),
+ SyntacticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)),
+ SyntacticToken::Comment(_) => indent!(i, "Comment"),
+ SyntacticToken::BlockOpen => indent!(i, "BlockOpen"),
+ SyntacticToken::BlockClose => indent!(i, "BlockClose"),
+ SyntacticToken::Symbol(name) => indent!(i, "Symbol({name})"),
+ SyntacticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"),
+ SyntacticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"),
+ SyntacticToken::MacroDefinition(definition) => {
+ indent!(i, "MacroDefinition({})", definition.name);
+ for token in &definition.body {
+ print_syntactic_token(i+1, token);
+ }
+ }
+ }
+}
diff --git a/src/syntactic_token.rs b/src/syntactic_token.rs
deleted file mode 100644
index 4a50e8a..0000000
--- a/src/syntactic_token.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-use crate::*;
-
-pub enum SyntacticTokenType {
- Reference(String),
-
- LabelDefinition(String),
- MacroDefinition(String),
- MacroDefinitionTerminator,
-
- Padding(u16),
- ByteLiteral(u8),
- ShortLiteral(u16),
- Instruction(u8),
-
- Comment,
-}
-
-
-
-pub struct SyntacticToken {
- pub r#type: SyntacticTokenType,
- pub source_location: SourceLocation,
- pub error: Option<Error>,
-}
-
-impl SyntacticToken {
- // Call when this token is found inside a macro definition.
- pub fn use_in_macro_body(&mut self) {
- match self.r#type {
- SyntacticTokenType::LabelDefinition(..) |
- SyntacticTokenType::MacroDefinition(..) => {
- self.set_error(Error::InvalidTypeInMacroDefinition)
- }
- _ => (),
- };
- }
- pub fn set_error(&mut self, error: Error) {
- self.error = Some(error);
- }
- pub fn is_macro_terminator(&self) -> bool {
- if let SyntacticTokenType::MacroDefinitionTerminator = self.r#type {true} else {false}
- }
-}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
deleted file mode 100644
index 02bf490..0000000
--- a/src/tokenizer.rs
+++ /dev/null
@@ -1,235 +0,0 @@
-use std::mem::take;
-use crate::*;
-
-#[derive(PartialEq)]
-enum StringLiteral {
- None,
- Raw,
- NullTerminated,
-}
-
-pub struct TokenIterator {
- /// The characters that comprise the program souce code.
- chars: Vec<char>,
- /// The index of the next character to read.
- i: usize,
- /// The address of the next character to read.
- addr: CharAddress,
- /// If true, skip over any whitespace characters. If false, stop reading
- /// when a whitespace character is encountered.
- skip_whitespace: bool,
- /// The name of the most recently defined label.
- label: String,
- /// If not None, each individual character will be tokenised as a ByteLiteral.
- parse_string_literal: StringLiteral,
-
-
- /// The address of the first character of the current token.
- start: CharAddress,
- /// The address of the final character of the current token.
- end: CharAddress,
- /// The entire current token.
- source: String,
- /// The first character of the current token.
- prefix: char,
- /// The second and remaining characters of the current token.
- suffix: String,
-}
-
-impl TokenIterator {
- /// Create an iterator from a string of program source code.
- pub fn from_str(source_code: &str) -> Self {
- Self {
- chars: source_code.chars().collect(),
- i: 0,
- addr: CharAddress::zero(),
- skip_whitespace: true,
- parse_string_literal: StringLiteral::None,
- label: String::new(),
- start: CharAddress::zero(),
- end: CharAddress::zero(),
- source: String::new(),
- prefix: ' ',
- suffix: String::new(),
- }
- }
- /// Append a character to the current token.
- fn push(&mut self, c:char) {
- self.end = self.addr;
- self.source.push(c);
- self.suffix.push(c);
- self.next(c);
- }
- /// Move forward to the next source character.
- fn next(&mut self, c: char) {
- self.addr.column += 1;
- self.i += 1;
- if c == '\n' {
- self.addr.column = 0;
- self.addr.line += 1;
- }
- }
- /// Mark the current character as being the first character of a new token.
- fn mark_start(&mut self, c:char) {
- if c == '"' {
- self.parse_string_literal = StringLiteral::NullTerminated;
- } else if c == '\'' {
- self.parse_string_literal = StringLiteral::Raw;
- } else {
- self.start=self.addr;
- self.end=self.addr;
- self.prefix=c;
- self.source.push(c);
- self.skip_whitespace=false;
- }
- self.next(c);
- }
-}
-
-impl Iterator for TokenIterator {
- type Item = SyntacticToken;
-
- fn next(&mut self) -> Option<SyntacticToken> {
- // Initialise values before reading the next token
- let mut is_comment = false;
- self.skip_whitespace = true;
-
- // Iterate over source characters until a full token is read
- while let Some(c) = self.chars.get(self.i) {
- let c = *c;
- // Parse individual characters from a string literal
- if self.parse_string_literal != StringLiteral::None {
- if c == '"' && self.parse_string_literal == StringLiteral::NullTerminated {
- self.parse_string_literal = StringLiteral::None;
- let token = SyntacticToken {
- r#type: SyntacticTokenType::ByteLiteral(0),
- source_location: SourceLocation {
- source: c.to_string(), start:self.addr, end:self.addr },
- error: None,
- };
- self.next(c);
- return Some(token);
- } else if c == '\'' && self.parse_string_literal == StringLiteral::Raw {
- self.parse_string_literal = StringLiteral::None;
- self.next(c);
- continue
- } else {
- self.next(c);
- return Some(SyntacticToken {
- r#type: SyntacticTokenType::ByteLiteral(c as u8),
- source_location: SourceLocation {
- source: c.to_string(), start:self.addr, end:self.addr },
- error: None,
- });
- }
- }
- // Intercept comments
- if is_comment {
- self.push(c); if c == ')' { break } else { continue }; }
- else if self.skip_whitespace && c == '(' {
- is_comment = true; self.mark_start(c); continue }
-
- // Allow a semicolon at the end of a token to be handled as a separate token
- if self.source.len() > 0 && c == ';' { break }
- // Handle the current character
- match (is_whitespace(c), self.skip_whitespace) {
- (true, true) => self.next(c), // c is the expected leading whitespace
- (false, true) => self.mark_start(c), // c is the first character of the token
- (false, false) => self.push(c), // c is a character of the token
- (true, false) => break, // c is trailing whitespace
- }
- // Allow literal values to be chained to the end of the previous token
- if self.source.len() > 0 && c == ':' { break }
- }
-
- // If no source characters were grabbed then we have read through the entire source file
- if self.source.len() == 0 { return None; }
- // Allow handling macro terminators and symbols of length 1 in the match expression
- if self.suffix.len() == 0 { self.prefix = '\0'; }
- // Consume the collected characters to be used in the match expression
- let full = take(&mut self.source);
- let suffix = take(&mut self.suffix);
- let mut error = None;
- let mut parse_padding_value = |v| {
- parse_short(v).or_else(|| {
- error = Some(Error::InvalidPaddingValue); Some(0)
- }).unwrap()
- };
-
- let r#type = match self.prefix {
- '(' => { SyntacticTokenType::Comment }
- '@' => { SyntacticTokenType::LabelDefinition({self.label=suffix.clone(); suffix}) }
- '&' => { SyntacticTokenType::LabelDefinition(format!("{}/{}", self.label, suffix)) }
- '$' => { SyntacticTokenType::Padding(parse_padding_value(&suffix)) }
- '~' => { SyntacticTokenType::Reference(format!("{}/{}", self.label, suffix)) }
- '%' => if let Some(("", sublabel)) = suffix.split_once("~") {
- SyntacticTokenType::MacroDefinition(format!("{}/{}", self.label, sublabel))
- } else {
- SyntacticTokenType::MacroDefinition(suffix)
- }
- _ => {
- if ";" == &full { SyntacticTokenType::MacroDefinitionTerminator }
- else if let Some(value) = parse_byte_lit(&full) { SyntacticTokenType::ByteLiteral(value) }
- else if let Some(value) = parse_short_lit(&full) { SyntacticTokenType::ShortLiteral(value) }
- else if let Some(value) = parse_instruction(&full) { SyntacticTokenType::Instruction(value) }
- else { SyntacticTokenType::Reference(full.clone()) }
- }
- };
- Some(SyntacticToken {
- r#type,
- source_location:SourceLocation::new(full,self.start,self.end),
- error,
- })
- }
-}
-
-
-fn parse_byte_lit(token: &str) -> Option<u8> {
- match token.len() { 2 => u8::from_str_radix(token, 16).ok(), _ => None } }
-fn parse_short_lit(token: &str) -> Option<u16> {
- match token.len() { 4 => u16::from_str_radix(token, 16).ok(), _ => None } }
-fn parse_short(token: &str) -> Option<u16> {
- match token.len() { 1..=4 => u16::from_str_radix(token, 16).ok(), _ => None } }
-fn is_whitespace(c: char) -> bool {
- match c { ' '|'\t'|'\n'|'\r'|'['|']'|'(' =>true, _=>false } }
-fn parse_instruction(token: &str) -> Option<u8> {
- Some(match token {
- // Control operators
- "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2" =>0x60,"DB3" =>0x80,"DB4" =>0xA0,"DB5" =>0xC0,"DB6" =>0xE0,
- "JMP"=>0x01,"JMS" =>0x21,"JMP:"=>0x41,"JMS:" =>0x61,"JMPr"=>0x81,"JMSr" =>0xA1,"JMPr:"=>0xC1,"JMSr:" =>0xE1,
- "JCN"=>0x02,"JCS" =>0x22,"JCN:"=>0x42,"JCS:" =>0x62,"JCNr"=>0x82,"JCSr" =>0xA2,"JCNr:"=>0xC2,"JCSr:" =>0xE2,
- "JCK"=>0x03,"JCK*"=>0x23,"JCK:"=>0x43,"JCK*:"=>0x63,"JCKr"=>0x83,"JCKr*"=>0xA3,"JCKr:"=>0xC3,"JCKr*:"=>0xE3,
- "LDA"=>0x04,"LDA*"=>0x24,"LDA:"=>0x44,"LDA*:"=>0x64,"LDAr"=>0x84,"LDAr*"=>0xA4,"LDAr:"=>0xC4,"LDAr*:"=>0xE4,
- "STA"=>0x05,"STA*"=>0x25,"STA:"=>0x45,"STA*:"=>0x65,"STAr"=>0x85,"STAr*"=>0xA5,"STAr:"=>0xC5,"STAr*:"=>0xE5,
- "LDD"=>0x06,"LDD*"=>0x26,"LDD:"=>0x46,"LDD*:"=>0x66,"LDDr"=>0x86,"LDDr*"=>0xA6,"LDDr:"=>0xC6,"LDDr*:"=>0xE6,
- "STD"=>0x07,"STD*"=>0x27,"STD:"=>0x47,"STD*:"=>0x67,"STDr"=>0x87,"STDr*"=>0xA7,"STDr:"=>0xC7,"STDr*:"=>0xE7,
- // Stack operators
- "PSH"=>0x08,"PSH*"=>0x28,"PSH:"=>0x48,"PSH*:"=>0x68,"PSHr"=>0x88,"PSHr*"=>0xA8,"PSHr:"=>0xC8,"PSHr*:"=>0xE8,
- "POP"=>0x09,"POP*"=>0x29,"POP:"=>0x49,"POP*:"=>0x69,"POPr"=>0x89,"POPr*"=>0xA9,"POPr:"=>0xC9,"POPr*:"=>0xE9,
- "CPY"=>0x0A,"CPY*"=>0x2A,"CPY:"=>0x4A,"CPY*:"=>0x6A,"CPYr"=>0x8A,"CPYr*"=>0xAA,"CPYr:"=>0xCA,"CPYr*:"=>0xEA,
- "SPL"=>0x0B,"SPL*"=>0x2B,"SPL:"=>0x4B,"SPL*:"=>0x6B,"SPLr"=>0x8B,"SPLr*"=>0xAB,"SPLr:"=>0xCB,"SPLr*:"=>0xEB,
- "DUP"=>0x0C,"DUP*"=>0x2C,"DUP:"=>0x4C,"DUP*:"=>0x6C,"DUPr"=>0x8C,"DUPr*"=>0xAC,"DUPr:"=>0xCC,"DUPr*:"=>0xEC,
- "OVR"=>0x0D,"OVR*"=>0x2D,"OVR:"=>0x4D,"OVR*:"=>0x6D,"OVRr"=>0x8D,"OVRr*"=>0xAD,"OVRr:"=>0xCD,"OVRr*:"=>0xED,
- "SWP"=>0x0E,"SWP*"=>0x2E,"SWP:"=>0x4E,"SWP*:"=>0x6E,"SWPr"=>0x8E,"SWPr*"=>0xAE,"SWPr:"=>0xCE,"SWPr*:"=>0xEE,
- "ROT"=>0x0F,"ROT*"=>0x2F,"ROT:"=>0x4F,"ROT*:"=>0x6F,"ROTr"=>0x8F,"ROTr*"=>0xAF,"ROTr:"=>0xCF,"ROTr*:"=>0xEF,
- // Numeric operators
- "ADD"=>0x10,"ADD*"=>0x30,"ADD:"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr*"=>0xB0,"ADDr:"=>0xD0,"ADDr*:"=>0xF0,
- "SUB"=>0x11,"SUB*"=>0x31,"SUB:"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr*"=>0xB1,"SUBr:"=>0xD1,"SUBr*:"=>0xF1,
- "INC"=>0x12,"INC*"=>0x32,"INC:"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr*"=>0xB2,"INCr:"=>0xD2,"INCr*:"=>0xF2,
- "DEC"=>0x13,"DEC*"=>0x33,"DEC:"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr*"=>0xB3,"DECr:"=>0xD3,"DECr*:"=>0xF3,
- "LTH"=>0x14,"LTH*"=>0x34,"LTH:"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr*"=>0xB4,"LTHr:"=>0xD4,"LTHr*:"=>0xF4,
- "GTH"=>0x15,"GTH*"=>0x35,"GTH:"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr*"=>0xB5,"GTHr:"=>0xD5,"GTHr*:"=>0xF5,
- "EQU"=>0x16,"EQU*"=>0x36,"EQU:"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr*"=>0xB6,"EQUr:"=>0xD6,"EQUr*:"=>0xF6,
- "NQK"=>0x17,"NQK*"=>0x37,"NQK:"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr*"=>0xB7,"NQKr:"=>0xD7,"NQKr*:"=>0xF7,
- // Bitwise operators
- "IOR"=>0x18,"IOR*"=>0x38,"IOR:"=>0x58,"IOR*:"=>0x78,"IORr"=>0x98,"IORr*"=>0xB8,"IORr:"=>0xD8,"IORr*:"=>0xF8,
- "XOR"=>0x19,"XOR*"=>0x39,"XOR:"=>0x59,"XOR*:"=>0x79,"XORr"=>0x99,"XORr*"=>0xB9,"XORr:"=>0xD9,"XORr*:"=>0xF9,
- "AND"=>0x1A,"AND*"=>0x3A,"AND:"=>0x5A,"AND*:"=>0x7A,"ANDr"=>0x9A,"ANDr*"=>0xBA,"ANDr:"=>0xDA,"ANDr*:"=>0xFA,
- "NOT"=>0x1B,"NOT*"=>0x3B,"NOT:"=>0x5B,"NOT*:"=>0x7B,"NOTr"=>0x9B,"NOTr*"=>0xBB,"NOTr:"=>0xDB,"NOTr*:"=>0xFB,
- "SHF"=>0x1C,"SHF*"=>0x3C,"SHF:"=>0x5C,"SHF*:"=>0x7C,"SHFr"=>0x9C,"SHFr*"=>0xBC,"SHFr:"=>0xDC,"SHFr*:"=>0xFC,
- "SHC"=>0x1D,"SHC*"=>0x3D,"SHC:"=>0x5D,"SHC*:"=>0x7D,"SHCr"=>0x9D,"SHCr*"=>0xBD,"SHCr:"=>0xDD,"SHCr*:"=>0xFD,
- "TAL"=>0x1E,"TAL*"=>0x3E,"TAL:"=>0x5E,"TAL*:"=>0x7E,"TALr"=>0x9E,"TALr*"=>0xBE,"TALr:"=>0xDE,"TALr*:"=>0xFE,
- "REV"=>0x1F,"REV*"=>0x3F,"REV:"=>0x5F,"REV*:"=>0x7F,"REVr"=>0x9F,"REVr*"=>0xBF,"REVr:"=>0xDF,"REVr*:"=>0xFF,
- _ => return None,
- })
-}
diff --git a/src/types/instruction.rs b/src/types/instruction.rs
new file mode 100644
index 0000000..252fc68
--- /dev/null
+++ b/src/types/instruction.rs
@@ -0,0 +1,168 @@
+use crate::*;
+
+use Operation as Op;
+
+
+pub struct Instruction {
+ pub value: u8,
+}
+
+impl Instruction {
+ pub fn operation(&self) -> Operation {
+ match self.value & 0x1f {
+ 0x00=>Op::HLT, 0x01=>Op::PSH, 0x02=>Op::POP, 0x03=>Op::CPY,
+ 0x04=>Op::DUP, 0x05=>Op::OVR, 0x06=>Op::SWP, 0x07=>Op::ROT,
+ 0x08=>Op::JMP, 0x09=>Op::JMS, 0x0a=>Op::JCN, 0x0b=>Op::JCS,
+ 0x0c=>Op::LDA, 0x0d=>Op::STA, 0x0e=>Op::LDD, 0x0f=>Op::STD,
+ 0x10=>Op::ADD, 0x11=>Op::SUB, 0x12=>Op::INC, 0x13=>Op::DEC,
+ 0x14=>Op::LTH, 0x15=>Op::GTH, 0x16=>Op::EQU, 0x17=>Op::NQK,
+ 0x18=>Op::SHL, 0x19=>Op::SHR, 0x1a=>Op::ROL, 0x1b=>Op::ROR,
+ 0x1c=>Op::IOR, 0x1d=>Op::XOR, 0x1e=>Op::AND, 0x1f=>Op::NOT,
+ _ => unreachable!(),
+ }
+ }
+
+ pub fn return_mode(&self) -> bool {
+ self.value & RETURN_MODE != 0
+ }
+
+ pub fn wide_mode(&self) -> bool {
+ self.value & WIDE_MODE != 0
+ }
+
+ pub fn immediate_mode(&self) -> bool {
+ self.value & IMMEDIATE_MODE != 0
+ }
+}
+
+impl std::fmt::Display for Instruction {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ write!(f, "{}", match self.value {
+ // Stack operators
+ 0x00=>"HLT",0x20=>"NOP" ,0x40=>"DB1" ,0x60=>"DB2" ,0x80=>"DB3" ,0xA0=>"DB4" ,0xC0=>"DB5" ,0xE0=>"DB6" ,
+ 0x01=>"PSH",0x21=>"PSH:",0x41=>"PSH*",0x61=>"PSH*:",0x81=>"PSHr",0xA1=>"PSHr:",0xC1=>"PSHr*",0xE1=>"PSHr*:",
+ 0x02=>"POP",0x22=>"POP:",0x42=>"POP*",0x62=>"POP*:",0x82=>"POPr",0xA2=>"POPr:",0xC2=>"POPr*",0xE2=>"POPr*:",
+ 0x03=>"CPY",0x23=>"CPY:",0x43=>"CPY*",0x63=>"CPY*:",0x83=>"CPYr",0xA3=>"CPYr:",0xC3=>"CPYr*",0xE3=>"CPYr*:",
+ 0x04=>"DUP",0x24=>"DUP:",0x44=>"DUP*",0x64=>"DUP*:",0x84=>"DUPr",0xA4=>"DUPr:",0xC4=>"DUPr*",0xE4=>"DUPr*:",
+ 0x05=>"OVR",0x25=>"OVR:",0x45=>"OVR*",0x65=>"OVR*:",0x85=>"OVRr",0xA5=>"OVRr:",0xC5=>"OVRr*",0xE5=>"OVRr*:",
+ 0x06=>"SWP",0x26=>"SWP:",0x46=>"SWP*",0x66=>"SWP*:",0x86=>"SWPr",0xA6=>"SWPr:",0xC6=>"SWPr*",0xE6=>"SWPr*:",
+ 0x07=>"ROT",0x27=>"ROT:",0x47=>"ROT*",0x67=>"ROT*:",0x87=>"ROTr",0xA7=>"ROTr:",0xC7=>"ROTr*",0xE7=>"ROTr*:",
+ // Control operators
+ 0x08=>"JMP",0x28=>"JMP:",0x48=>"JMP*",0x68=>"JMP*:",0x88=>"JMPr",0xA8=>"JMPr:",0xC8=>"JMPr*",0xE8=>"JMPr*:",
+ 0x09=>"JMS",0x29=>"JMS:",0x49=>"JMS*",0x69=>"JMS*:",0x89=>"JMSr",0xA9=>"JMSr:",0xC9=>"JMSr*",0xE9=>"JMSr*:",
+ 0x0A=>"JCN",0x2A=>"JCN:",0x4A=>"JCN*",0x6A=>"JCN*:",0x8A=>"JCNr",0xAA=>"JCNr:",0xCA=>"JCNr*",0xEA=>"JCNr*:",
+ 0x0B=>"JCS",0x2B=>"JCS:",0x4B=>"JCS*",0x6B=>"JCS*:",0x8B=>"JCSr",0xAB=>"JCSr:",0xCB=>"JCSr*",0xEB=>"JCSr*:",
+ 0x0C=>"LDA",0x2C=>"LDA:",0x4C=>"LDA*",0x6C=>"LDA*:",0x8C=>"LDAr",0xAC=>"LDAr:",0xCC=>"LDAr*",0xEC=>"LDAr*:",
+ 0x0D=>"STA",0x2D=>"STA:",0x4D=>"STA*",0x6D=>"STA*:",0x8D=>"STAr",0xAD=>"STAr:",0xCD=>"STAr*",0xED=>"STAr*:",
+ 0x0E=>"LDD",0x2E=>"LDD:",0x4E=>"LDD*",0x6E=>"LDD*:",0x8E=>"LDDr",0xAE=>"LDDr:",0xCE=>"LDDr*",0xEE=>"LDDr*:",
+ 0x0F=>"STD",0x2F=>"STD:",0x4F=>"STD*",0x6F=>"STD*:",0x8F=>"STDr",0xAF=>"STDr:",0xCF=>"STDr*",0xEF=>"STDr*:",
+ // Numeric operators
+ 0x10=>"ADD",0x30=>"ADD:",0x50=>"ADD*",0x70=>"ADD*:",0x90=>"ADDr",0xB0=>"ADDr:",0xD0=>"ADDr*",0xF0=>"ADDr*:",
+ 0x11=>"SUB",0x31=>"SUB:",0x51=>"SUB*",0x71=>"SUB*:",0x91=>"SUBr",0xB1=>"SUBr:",0xD1=>"SUBr*",0xF1=>"SUBr*:",
+ 0x12=>"INC",0x32=>"INC:",0x52=>"INC*",0x72=>"INC*:",0x92=>"INCr",0xB2=>"INCr:",0xD2=>"INCr*",0xF2=>"INCr*:",
+ 0x13=>"DEC",0x33=>"DEC:",0x53=>"DEC*",0x73=>"DEC*:",0x93=>"DECr",0xB3=>"DECr:",0xD3=>"DECr*",0xF3=>"DECr*:",
+ 0x14=>"LTH",0x34=>"LTH:",0x54=>"LTH*",0x74=>"LTH*:",0x94=>"LTHr",0xB4=>"LTHr:",0xD4=>"LTHr*",0xF4=>"LTHr*:",
+ 0x15=>"GTH",0x35=>"GTH:",0x55=>"GTH*",0x75=>"GTH*:",0x95=>"GTHr",0xB5=>"GTHr:",0xD5=>"GTHr*",0xF5=>"GTHr*:",
+ 0x16=>"EQU",0x36=>"EQU:",0x56=>"EQU*",0x76=>"EQU*:",0x96=>"EQUr",0xB6=>"EQUr:",0xD6=>"EQUr*",0xF6=>"EQUr*:",
+ 0x17=>"NQK",0x37=>"NQK:",0x57=>"NQK*",0x77=>"NQK*:",0x97=>"NQKr",0xB7=>"NQKr:",0xD7=>"NQKr*",0xF7=>"NQKr*:",
+ // Bitwise operators
+ 0x18=>"SHL",0x38=>"SHL:",0x58=>"SHL*",0x78=>"SHL*:",0x98=>"SHLr",0xB8=>"SHLr:",0xD8=>"SHLr*",0xF8=>"SHLr*:",
+ 0x19=>"SHR",0x39=>"SHR:",0x59=>"SHR*",0x79=>"SHR*:",0x99=>"SHRr",0xB9=>"SHRr:",0xD9=>"SHRr*",0xF9=>"SHRr*:",
+ 0x1A=>"ROL",0x3A=>"ROL:",0x5A=>"ROL*",0x7A=>"ROL*:",0x9A=>"ROLr",0xBA=>"ROLr:",0xDA=>"ROLr*",0xFA=>"ROLr*:",
+ 0x1B=>"ROR",0x3B=>"ROR:",0x5B=>"ROR*",0x7B=>"ROR*:",0x9B=>"RORr",0xBB=>"RORr:",0xDB=>"RORr*",0xFB=>"RORr*:",
+ 0x1C=>"IOR",0x3C=>"IOR:",0x5C=>"IOR*",0x7C=>"IOR*:",0x9C=>"IORr",0xBC=>"IORr:",0xDC=>"IORr*",0xFC=>"IORr*:",
+ 0x1D=>"XOR",0x3D=>"XOR:",0x5D=>"XOR*",0x7D=>"XOR*:",0x9D=>"XORr",0xBD=>"XORr:",0xDD=>"XORr*",0xFD=>"XORr*:",
+ 0x1E=>"AND",0x3E=>"AND:",0x5E=>"AND*",0x7E=>"AND*:",0x9E=>"ANDr",0xBE=>"ANDr:",0xDE=>"ANDr*",0xFE=>"ANDr*:",
+ 0x1F=>"NOT",0x3F=>"NOT:",0x5F=>"NOT*",0x7F=>"NOT*:",0x9F=>"NOTr",0xBF=>"NOTr:",0xDF=>"NOTr*",0xFF=>"NOTr*:",
+ })
+ }
+}
+
+impl std::str::FromStr for Instruction {
+ type Err = ();
+
+ fn from_str(token: &str) -> Result<Self, Self::Err> {
+ Ok( Instruction { value: match token {
+ // Stack operators
+ "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2" =>0x60,"DB3" =>0x80,"DB4" =>0xA0,"DB5" =>0xC0,"DB6" =>0xE0,
+ "PSH"=>0x01,"PSH:"=>0x21,"PSH*"=>0x41,"PSH*:"=>0x61,"PSHr"=>0x81,"PSHr:"=>0xA1,"PSHr*"=>0xC1,"PSHr*:"=>0xE1,
+ ":"=>0x21, "*:"=>0x61, "r:"=>0xA1, "r*:"=>0xE1,
+ "POP"=>0x02,"POP:"=>0x22,"POP*"=>0x42,"POP*:"=>0x62,"POPr"=>0x82,"POPr:"=>0xA2,"POPr*"=>0xC2,"POPr*:"=>0xE2,
+ "CPY"=>0x03,"CPY:"=>0x23,"CPY*"=>0x43,"CPY*:"=>0x63,"CPYr"=>0x83,"CPYr:"=>0xA3,"CPYr*"=>0xC3,"CPYr*:"=>0xE3,
+ "DUP"=>0x04,"DUP:"=>0x24,"DUP*"=>0x44,"DUP*:"=>0x64,"DUPr"=>0x84,"DUPr:"=>0xA4,"DUPr*"=>0xC4,"DUPr*:"=>0xE4,
+ "OVR"=>0x05,"OVR:"=>0x25,"OVR*"=>0x45,"OVR*:"=>0x65,"OVRr"=>0x85,"OVRr:"=>0xA5,"OVRr*"=>0xC5,"OVRr*:"=>0xE5,
+ "SWP"=>0x06,"SWP:"=>0x26,"SWP*"=>0x46,"SWP*:"=>0x66,"SWPr"=>0x86,"SWPr:"=>0xA6,"SWPr*"=>0xC6,"SWPr*:"=>0xE6,
+ "ROT"=>0x07,"ROT:"=>0x27,"ROT*"=>0x47,"ROT*:"=>0x67,"ROTr"=>0x87,"ROTr:"=>0xA7,"ROTr*"=>0xC7,"ROTr*:"=>0xE7,
+ // Control operators
+ "JMP"=>0x08,"JMP:"=>0x28,"JMP*"=>0x48,"JMP*:"=>0x68,"JMPr"=>0x88,"JMPr:"=>0xA8,"JMPr*"=>0xC8,"JMPr*:"=>0xE8,
+ "JMS"=>0x09,"JMS:"=>0x29,"JMS*"=>0x49,"JMS*:"=>0x69,"JMSr"=>0x89,"JMSr:"=>0xA9,"JMSr*"=>0xC9,"JMSr*:"=>0xE9,
+ "JCN"=>0x0A,"JCN:"=>0x2A,"JCN*"=>0x4A,"JCN*:"=>0x6A,"JCNr"=>0x8A,"JCNr:"=>0xAA,"JCNr*"=>0xCA,"JCNr*:"=>0xEA,
+ "JCS"=>0x0B,"JCS:"=>0x2B,"JCS*"=>0x4B,"JCS*:"=>0x6B,"JCSr"=>0x8B,"JCSr:"=>0xAB,"JCSr*"=>0xCB,"JCSr*:"=>0xEB,
+ "LDA"=>0x0C,"LDA:"=>0x2C,"LDA*"=>0x4C,"LDA*:"=>0x6C,"LDAr"=>0x8C,"LDAr:"=>0xAC,"LDAr*"=>0xCC,"LDAr*:"=>0xEC,
+ "STA"=>0x0D,"STA:"=>0x2D,"STA*"=>0x4D,"STA*:"=>0x6D,"STAr"=>0x8D,"STAr:"=>0xAD,"STAr*"=>0xCD,"STAr*:"=>0xED,
+ "LDD"=>0x0E,"LDD:"=>0x2E,"LDD*"=>0x4E,"LDD*:"=>0x6E,"LDDr"=>0x8E,"LDDr:"=>0xAE,"LDDr*"=>0xCE,"LDDr*:"=>0xEE,
+ "STD"=>0x0F,"STD:"=>0x2F,"STD*"=>0x4F,"STD*:"=>0x6F,"STDr"=>0x8F,"STDr:"=>0xAF,"STDr*"=>0xCF,"STDr*:"=>0xEF,
+ // Numeric operators
+ "ADD"=>0x10,"ADD:"=>0x30,"ADD*"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr:"=>0xB0,"ADDr*"=>0xD0,"ADDr*:"=>0xF0,
+ "SUB"=>0x11,"SUB:"=>0x31,"SUB*"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr:"=>0xB1,"SUBr*"=>0xD1,"SUBr*:"=>0xF1,
+ "INC"=>0x12,"INC:"=>0x32,"INC*"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr:"=>0xB2,"INCr*"=>0xD2,"INCr*:"=>0xF2,
+ "DEC"=>0x13,"DEC:"=>0x33,"DEC*"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr:"=>0xB3,"DECr*"=>0xD3,"DECr*:"=>0xF3,
+ "LTH"=>0x14,"LTH:"=>0x34,"LTH*"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr:"=>0xB4,"LTHr*"=>0xD4,"LTHr*:"=>0xF4,
+ "GTH"=>0x15,"GTH:"=>0x35,"GTH*"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr:"=>0xB5,"GTHr*"=>0xD5,"GTHr*:"=>0xF5,
+ "EQU"=>0x16,"EQU:"=>0x36,"EQU*"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr:"=>0xB6,"EQUr*"=>0xD6,"EQUr*:"=>0xF6,
+ "NQK"=>0x17,"NQK:"=>0x37,"NQK*"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr:"=>0xB7,"NQKr*"=>0xD7,"NQKr*:"=>0xF7,
+ // Bitwise operators
+ "SHL"=>0x18,"SHL:"=>0x38,"SHL*"=>0x58,"SHL*:"=>0x78,"SHLr"=>0x98,"SHLr:"=>0xB8,"SHLr*"=>0xD8,"SHLr*:"=>0xF8,
+ "SHR"=>0x19,"SHR:"=>0x39,"SHR*"=>0x59,"SHR*:"=>0x79,"SHRr"=>0x99,"SHRr:"=>0xB9,"SHRr*"=>0xD9,"SHRr*:"=>0xF9,
+ "ROL"=>0x1A,"ROL:"=>0x3A,"ROL*"=>0x5A,"ROL*:"=>0x7A,"ROLr"=>0x9A,"ROLr:"=>0xBA,"ROLr*"=>0xDA,"ROLr*:"=>0xFA,
+ "ROR"=>0x1B,"ROR:"=>0x3B,"ROR*"=>0x5B,"ROR*:"=>0x7B,"RORr"=>0x9B,"RORr:"=>0xBB,"RORr*"=>0xDB,"RORr*:"=>0xFB,
+ "IOR"=>0x1C,"IOR:"=>0x3C,"IOR*"=>0x5C,"IOR*:"=>0x7C,"IORr"=>0x9C,"IORr:"=>0xBC,"IORr*"=>0xDC,"IORr*:"=>0xFC,
+ "XOR"=>0x1D,"XOR:"=>0x3D,"XOR*"=>0x5D,"XOR*:"=>0x7D,"XORr"=>0x9D,"XORr:"=>0xBD,"XORr*"=>0xDD,"XORr*:"=>0xFD,
+ "AND"=>0x1E,"AND:"=>0x3E,"AND*"=>0x5E,"AND*:"=>0x7E,"ANDr"=>0x9E,"ANDr:"=>0xBE,"ANDr*"=>0xDE,"ANDr*:"=>0xFE,
+ "NOT"=>0x1F,"NOT:"=>0x3F,"NOT*"=>0x5F,"NOT*:"=>0x7F,"NOTr"=>0x9F,"NOTr:"=>0xBF,"NOTr*"=>0xDF,"NOTr*:"=>0xFF,
+ _ => return Err(()),
+ }})
+ }
+}
+
+
+pub enum Operation {
+ HLT, PSH, POP, CPY,
+ DUP, OVR, SWP, ROT,
+ JMP, JMS, JCN, JCS,
+ LDA, STA, LDD, STD,
+ ADD, SUB, INC, DEC,
+ LTH, GTH, EQU, NQK,
+ SHL, SHR, ROL, ROR,
+ IOR, XOR, AND, NOT,
+}
+
+impl From<Operation> for u8 {
+ fn from(operation: Operation) -> Self {
+ match operation {
+ Op::HLT=>0x00, Op::PSH=>0x01, Op::POP=>0x02, Op::CPY=>0x03,
+ Op::DUP=>0x04, Op::OVR=>0x05, Op::SWP=>0x06, Op::ROT=>0x07,
+ Op::JMP=>0x08, Op::JMS=>0x09, Op::JCN=>0x0A, Op::JCS=>0x0B,
+ Op::LDA=>0x0C, Op::STA=>0x0D, Op::LDD=>0x0E, Op::STD=>0x0F,
+ Op::ADD=>0x10, Op::SUB=>0x11, Op::INC=>0x12, Op::DEC=>0x13,
+ Op::LTH=>0x14, Op::GTH=>0x15, Op::EQU=>0x16, Op::NQK=>0x17,
+ Op::SHL=>0x1C, Op::SHR=>0x1D, Op::ROL=>0x1E, Op::ROR=>0x1F,
+ Op::IOR=>0x18, Op::XOR=>0x19, Op::AND=>0x1A, Op::NOT=>0x1B,
+ }
+ }
+}
+
+impl std::fmt::Display for Operation {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ write!(f, "{}", match self {
+ Op::HLT=>"HLT", Op::PSH=>"PSH", Op::POP=>"POP", Op::CPY=>"CPY",
+ Op::DUP=>"DUP", Op::OVR=>"OVR", Op::SWP=>"SWP", Op::ROT=>"ROT",
+ Op::JMP=>"JMP", Op::JMS=>"JMS", Op::JCN=>"JCN", Op::JCS=>"JCS",
+ Op::LDA=>"LDA", Op::STA=>"STA", Op::LDD=>"LDD", Op::STD=>"STD",
+ Op::ADD=>"ADD", Op::SUB=>"SUB", Op::INC=>"INC", Op::DEC=>"DEC",
+ Op::LTH=>"LTH", Op::GTH=>"GTH", Op::EQU=>"EQU", Op::NQK=>"NQK",
+ Op::SHL=>"SHL", Op::SHR=>"SHR", Op::ROL=>"ROL", Op::ROR=>"ROR",
+ Op::IOR=>"IOR", Op::XOR=>"XOR", Op::AND=>"AND", Op::NOT=>"NOT",
+ })
+ }
+}
diff --git a/src/types/mod.rs b/src/types/mod.rs
new file mode 100644
index 0000000..8094cb1
--- /dev/null
+++ b/src/types/mod.rs
@@ -0,0 +1,4 @@
+mod instruction;
+mod value;
+pub use instruction::*;
+pub use value::*;
diff --git a/src/types/value.rs b/src/types/value.rs
new file mode 100644
index 0000000..fe82710
--- /dev/null
+++ b/src/types/value.rs
@@ -0,0 +1,48 @@
+#[derive(Clone, Copy)]
+pub enum Value {
+ Byte(u8),
+ Double(u16),
+}
+
+impl From<Value> for usize {
+ fn from(value: Value) -> Self {
+ match value {
+ Value::Byte(byte) => byte.into(),
+ Value::Double(double) => double.into(),
+ }
+ }
+}
+
+impl From<&Value> for usize {
+ fn from(value: &Value) -> Self {
+ (*value).into()
+ }
+}
+
+impl std::fmt::Display for Value {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ match self {
+ Self::Byte(value) => write!(f, "0x{value:02x}"),
+ Self::Double(value) => write!(f, "0x{value:04x}"),
+ }
+ }
+}
+
+
+impl std::str::FromStr for Value {
+ type Err = ();
+
+ fn from_str(token: &str) -> Result<Self, Self::Err> {
+ match token.len() {
+ 2 => match u8::from_str_radix(&token, 16) {
+ Ok(value) => Ok(Value::Byte(value)),
+ Err(_) => Err(()),
+ }
+ 4 => match u16::from_str_radix(&token, 16) {
+ Ok(value) => Ok(Value::Double(value)),
+ Err(_) => Err(()),
+ }
+ _ => Err(()),
+ }
+ }
+}