summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Bridle <bridle.benjamin@gmail.com>2024-10-28 19:52:29 +1300
committerBen Bridle <bridle.benjamin@gmail.com>2024-10-28 19:52:47 +1300
commitf4027cae775e3c9c237675f9df35a744d54f3f2e (patch)
tree733fa3af9e1bd44d61dd83983a2da86cb75c53e9
parent16ee0e9e8dce2c88acc88ba5ffd97e013624fa5e (diff)
downloadbedrock-asm-f4027cae775e3c9c237675f9df35a744d54f3f2e.zip
Rewrite assembler
This is an almost complete rewrite of the entire assembler from the ground up, with a different parsing strategy and a whole new symbol resolution mechanism for automatically including library files. The assembly syntax has also been slightly modified, with padding tokens now being prefixed with '#' instead of '$', and a block-style anonymous-label syntax which uses the '{' and '}' characters.
-rw-r--r--Cargo.lock27
-rw-r--r--Cargo.toml6
-rw-r--r--src/addressing.rs44
-rw-r--r--src/assembler.rs278
-rw-r--r--src/bin/bedrock-asm.rs148
-rw-r--r--src/error.rs10
-rw-r--r--src/gather_libraries.rs185
-rw-r--r--src/lib.rs35
-rw-r--r--src/locators.rs5
-rw-r--r--src/locators/bytecode.rs39
-rw-r--r--src/locators/source.rs69
-rw-r--r--src/main.rs43
-rw-r--r--src/print.rs237
-rw-r--r--src/semantic_token.rs116
-rw-r--r--src/symbol_resolver.rs230
-rw-r--r--src/syntactic_token.rs43
-rw-r--r--src/tokenizer.rs235
-rw-r--r--src/tokens.rs9
-rw-r--r--src/tokens/instruction.rs170
-rw-r--r--src/tokens/semantic.rs90
-rw-r--r--src/tokens/syntactic.rs39
-rw-r--r--src/tokens/value.rs32
-rw-r--r--src/translators.rs9
-rw-r--r--src/translators/bytecode_generator.rs131
-rw-r--r--src/translators/semantic_parser.rs245
-rw-r--r--src/translators/symbols_generator.rs28
-rw-r--r--src/translators/syntactic_parser.rs247
27 files changed, 1959 insertions, 791 deletions
diff --git a/Cargo.lock b/Cargo.lock
index edeeaf1..ab31947 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1,7 +1,30 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
-version = 3
+version = 4
[[package]]
-name = "bedrock_asm"
+name = "bedrock-asm"
version = "3.0.0"
+dependencies = [
+ "vagabond",
+ "xflags",
+]
+
+[[package]]
+name = "vagabond"
+version = "1.0.0"
+
+[[package]]
+name = "xflags"
+version = "0.4.0-pre.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4697c0db52cfb7277cf997ed334c92c739fafc7c5d44a948a906a5bf4b41a63f"
+dependencies = [
+ "xflags-macros",
+]
+
+[[package]]
+name = "xflags-macros"
+version = "0.4.0-pre.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94d18ac1a136311770ed587356f8a828c9b86261f68761f34e6cdc6d5b4c435c"
diff --git a/Cargo.toml b/Cargo.toml
index cec4e4d..bd4b7b4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,13 +1,15 @@
[package]
-name = "bedrock_asm"
+name = "bedrock-asm"
version = "3.0.0"
authors = ["Ben Bridle"]
edition = "2021"
description = "Assembler program for the Bedrock assembly language"
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
+vagabond = { path = "/home/ben/Libraries/vagabond" }
+xflags = "0.4.0-pre"
+
[profile.release]
lto=true
diff --git a/src/addressing.rs b/src/addressing.rs
deleted file mode 100644
index dd7638e..0000000
--- a/src/addressing.rs
+++ /dev/null
@@ -1,44 +0,0 @@
-#[derive(Clone,Copy)]
-pub struct CharAddress {
- /// The number of lines that precede this line in the file.
- pub line:usize,
- /// The number of characters that precede this character in the line.
- pub column:usize,
-}
-impl CharAddress {
- pub fn new(line:usize, column:usize) -> Self {
- Self { line, column }
- }
- pub fn zero() -> Self {
- Self::new(0,0)
- }
-}
-
-pub struct SourceLocation {
- /// The slice of the source file from which this token was parsed.
- pub source: String,
- /// The address of the first character of this token.
- pub start: CharAddress,
- /// The address of the final character of this token.
- pub end: CharAddress
-}
-impl SourceLocation {
- pub fn new(source:String, start:CharAddress, end:CharAddress) -> Self {
- Self { source, start, end }
- }
- pub fn zero() -> Self {
- Self { source:String::new(), start:CharAddress::zero(), end:CharAddress::zero() }
- }
-}
-
-pub struct BytecodeLocation {
- /// The number of bytes that precede this byte sequence in the bytecode.
- pub start: u16,
- /// The length of this byte sequence, in bytes.
- pub length: u16,
-}
-impl BytecodeLocation {
- pub fn zero() -> Self {
- Self { start:0, length:0 }
- }
-}
diff --git a/src/assembler.rs b/src/assembler.rs
deleted file mode 100644
index 692eb14..0000000
--- a/src/assembler.rs
+++ /dev/null
@@ -1,278 +0,0 @@
-use std::mem::take;
-use std::collections::hash_map::Entry;
-
-use SyntacticTokenType as Syn;
-use SemanticTokenType as Sem;
-use crate::*;
-
-use std::collections::HashMap;
-
-/// The inner value is the index of the token that defines this symbol.
-pub enum SymbolDefinition {
- Macro(usize),
- Label(usize),
-}
-
-pub struct Assembler {
- /// The contents of the program as a list of syntactic tokens.
- syntactic_tokens: Vec<SyntacticToken>,
- /// The contents of the program as a list of semantic tokens.
- semantic_tokens: Vec<SemanticToken>,
- /// Map the name of each defined symbol to the index of the defining token.
- symbol_definitions: HashMap<String, SymbolDefinition>,
- /// Map each macro definition token index to a list of syntactic body tokens.
- syntactic_macro_bodies: HashMap<usize, Vec<SyntacticToken>>,
- /// Map each macro definition token index to a list of semantic body tokens.
- semantic_macro_bodies: HashMap<usize, Vec<SemanticToken>>,
-}
-
-impl Assembler {
- pub fn new() -> Self {
- Self {
- syntactic_tokens: Vec::new(),
- semantic_tokens: Vec::new(),
- symbol_definitions: HashMap::new(),
- syntactic_macro_bodies: HashMap::new(),
- semantic_macro_bodies: HashMap::new(),
- }
- }
-
- pub fn tokenise_source(&mut self, source_code: &str) {
- // The index of the current macro definition token
- let mut macro_definition: Option<usize> = None;
- let mut macro_definition_body_tokens: Vec<SyntacticToken> = Vec::new();
-
- for mut token in TokenIterator::from_str(source_code) {
- let next_index = self.syntactic_tokens.len();
- if let Some(index) = macro_definition {
- token.use_in_macro_body();
- if token.is_macro_terminator() {
- // Commit the current macro definition
- macro_definition_body_tokens.push(token);
- self.syntactic_macro_bodies.insert(
- index, take(&mut macro_definition_body_tokens));
- macro_definition = None;
- } else {
- macro_definition_body_tokens.push(token);
- }
- } else {
- if let Syn::MacroDefinition(ref name) = token.r#type {
- macro_definition = Some(next_index);
- match self.symbol_definitions.entry(name.to_string()) {
- Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);}
- Entry::Vacant(v) => {v.insert(SymbolDefinition::Macro(next_index));}
- }
- } else if let Syn::LabelDefinition(ref name) = token.r#type {
- match self.symbol_definitions.entry(name.to_string()) {
- Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);}
- Entry::Vacant(v) => {v.insert(SymbolDefinition::Label(next_index));}
- }
- } else if token.is_macro_terminator() {
- token.set_error(Error::OrphanedMacroDefinitionTerminator);
- }
- self.syntactic_tokens.push(token);
- }
- }
- }
-
- pub fn resolve_references(&mut self) {
- let syntactic_tokens = take(&mut self.syntactic_tokens);
- let syntactic_token_count = syntactic_tokens.len();
- let mut parent_label = None;
-
- for (index, syntactic_token) in syntactic_tokens.into_iter().enumerate() {
- if let SyntacticTokenType::LabelDefinition(name) = &syntactic_token.r#type {
- parent_label = Some(name.to_owned());
- }
- let semantic_token = self.convert_syn_token_to_sem_token(syntactic_token, index, parent_label.clone());
- self.semantic_tokens.push(semantic_token);
- }
- assert_eq!(syntactic_token_count, self.semantic_tokens.len());
-
- // Find all cyclic macros
- let cyclic_macros: Vec<usize> = self.semantic_macro_bodies.keys().map(|i|*i).filter(
- |i| !self.traverse_macro_definition(*i, 0)).collect();
- // Replace each cyclic macro reference in a macro definition with an error
- for body_tokens in &mut self.semantic_macro_bodies.values_mut() {
- for body_token in body_tokens {
- if let Sem::MacroReference(i) = body_token.r#type {
- if cyclic_macros.contains(&i) {
- let name = body_token.source_location.source.clone();
- body_token.r#type = Sem::Error(Syn::Reference(name), Error::CyclicMacroReference);
- }
- }
- }
- }
-
- }
-
- /// Attempt to recursively traverse the body tokens of a macro definition, returning
- /// false if the depth exceeds a preset maximum, and returning true otherwise.
- fn traverse_macro_definition(&self, index: usize, level: usize) -> bool {
- if level == 16 {
- false
- } else {
- self.semantic_macro_bodies[&index].iter().all(
- |token| if let Sem::MacroReference(i) = token.r#type {
- self.traverse_macro_definition(i, level+1)
- } else {
- true
- }
- )
- }
- }
-
- pub fn generate_bytecode(&mut self) -> (Vec<u8>, Vec<SemanticToken>) {
- let mut bytecode: Vec<u8> = Vec::new();
- // Map each label definition token index to the bytecode addresses of the references
- let mut reference_addresses: HashMap<usize, Vec<u16>> = HashMap::new();
- // Map each label and macro definition token to a list of reference token indices
- let mut reference_tokens: HashMap<usize, Vec<usize>> = HashMap::new();
-
- macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};}
- macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};}
- macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};}
-
- let mut semantic_tokens = take(&mut self.semantic_tokens);
-
- // Translate semantic tokens into bytecode
- for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() {
- let start_addr = bytecode.len() as u16;
- match &mut semantic_token.r#type {
- Sem::LabelReference(i) => {
- reference_tokens.entry(*i).or_default().push(index);
- reference_addresses.entry(*i).or_default().push(start_addr);
- push_u16!(0);
- }
- Sem::MacroReference(i) => {
- reference_tokens.entry(*i).or_default().push(index);
- self.expand_macro_reference(*i, &mut bytecode, &mut reference_addresses);
- }
- Sem::LabelDefinition(def) => def.address=start_addr,
- Sem::MacroDefinition(_) => (),
-
- Sem::Padding(p) => pad!(*p),
- Sem::ByteLiteral(b) => push_u8!(*b),
- Sem::ShortLiteral(s) => push_u16!(*s),
- Sem::Instruction(b) => push_u8!(*b),
-
- Sem::MacroDefinitionTerminator => unreachable!(),
- Sem::Comment => (),
- Sem::Error(..) => (),
- };
- let end_addr = bytecode.len() as u16;
- semantic_token.bytecode_location.start = start_addr;
- semantic_token.bytecode_location.length = end_addr - start_addr;
- }
-
- // Fill each label reference with the address of the matching label definition
- for (index, slots) in reference_addresses {
- if let Sem::LabelDefinition(definition) = &semantic_tokens[index].r#type {
- let [h,l] = definition.address.to_be_bytes();
- for slot in slots {
- bytecode[slot as usize] = h;
- bytecode[slot.wrapping_add(1) as usize] = l;
- }
- } else { unreachable!() }
- }
-
- // Move references and macro body tokens into label and macro definition tokens
- for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() {
- if let Sem::MacroDefinition(definition) = &mut semantic_token.r#type {
- definition.body_tokens = self.semantic_macro_bodies.remove(&index).unwrap();
- if let Some(references) = reference_tokens.remove(&index) {
- definition.references = references;
- }
- } else if let Sem::LabelDefinition(definition) = &mut semantic_token.r#type {
- if let Some(references) = reference_tokens.remove(&index) {
- definition.references = references;
- }
- }
- }
- assert_eq!(reference_tokens.len(), 0);
-
- // Remove trailing null bytes from the bytecode
- if let Some(final_nonnull_byte) = bytecode.iter().rposition(|b| *b != 0) {
- let truncated_length = final_nonnull_byte + 1;
- let removed_byte_count = bytecode.len() - truncated_length;
- if removed_byte_count > 0 {
- bytecode.truncate(truncated_length);
- }
- }
-
- (bytecode, semantic_tokens)
- }
-
- fn convert_syn_token_to_sem_token(&mut self, mut syn_token: SyntacticToken, index: usize, parent_label: Option<String>) -> SemanticToken {
- SemanticToken {
- r#type: {
- if let Some(err) = syn_token.error {
- Sem::Error(syn_token.r#type, err)
- } else {
- match syn_token.r#type {
- Syn::Reference(ref name) => {
- match self.symbol_definitions.get(name) {
- Some(SymbolDefinition::Macro(i)) => Sem::MacroReference(*i),
- Some(SymbolDefinition::Label(i)) => Sem::LabelReference(*i),
- None => Sem::Error(syn_token.r#type, Error::UnresolvedReference),
- }
- }
- Syn::LabelDefinition(name) => {Sem::LabelDefinition(LabelDefinition::new(name))},
- Syn::MacroDefinition(name) => {
- let mut sem_body_tokens = Vec::new();
- for syn_body_token in self.syntactic_macro_bodies.remove(&index).unwrap() {
- // Make the source location of the macro definition token span the entire definition
- if syn_body_token.is_macro_terminator() {
- syn_token.source_location.end = syn_body_token.source_location.start;
- }
- let sem_body_token = self.convert_syn_token_to_sem_token(syn_body_token, 0, parent_label.clone());
- sem_body_tokens.push(sem_body_token);
- }
- self.semantic_macro_bodies.insert(index, sem_body_tokens);
- Sem::MacroDefinition(MacroDefinition::new(name))
- },
- Syn::MacroDefinitionTerminator => Sem::MacroDefinitionTerminator,
- Syn::Padding(v) => Sem::Padding(v),
- Syn::ByteLiteral(v) => Sem::ByteLiteral(v),
- Syn::ShortLiteral(v) => Sem::ShortLiteral(v),
- Syn::Instruction(v) => Sem::Instruction(v),
- Syn::Comment => Sem::Comment,
- }
- }
- },
- source_location: syn_token.source_location,
- bytecode_location: BytecodeLocation::zero(),
- parent_label,
- }
- }
-
- fn expand_macro_reference(&self, index: usize, bytecode: &mut Vec<u8>, reference_addresses: &mut HashMap<usize, Vec<u16>>) {
- macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};}
- macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};}
- macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};}
-
- for body_token in self.semantic_macro_bodies.get(&index).unwrap() {
- let start_addr = bytecode.len() as u16;
- match &body_token.r#type {
- Sem::LabelReference(i) => {
- reference_addresses.entry(*i).or_default().push(start_addr);
- push_u16!(0u16);
- },
- Sem::MacroReference(i) => {
- self.expand_macro_reference(*i, bytecode, reference_addresses);
- },
- Sem::LabelDefinition(_) => unreachable!(),
- Sem::MacroDefinition(_) => unreachable!(),
-
- Sem::Padding(p) => pad!(*p),
- Sem::ByteLiteral(b) => push_u8!(*b),
- Sem::ShortLiteral(s) => push_u16!(*s),
- Sem::Instruction(b) => push_u8!(*b),
-
- Sem::MacroDefinitionTerminator => (),
- Sem::Comment => (),
- Sem::Error(..) => (),
- };
- }
- }
-}
diff --git a/src/bin/bedrock-asm.rs b/src/bin/bedrock-asm.rs
new file mode 100644
index 0000000..2a29ee3
--- /dev/null
+++ b/src/bin/bedrock-asm.rs
@@ -0,0 +1,148 @@
+use bedrock_asm::*;
+
+use std::io::{Read, Write};
+use std::path::{Path, PathBuf};
+
+
+static mut VERBOSE: bool = false;
+
+macro_rules! verbose {
+ ($($tokens:tt)*) => { if unsafe { VERBOSE } {
+ eprint!("[INFO] "); eprintln!($($tokens)*);
+ } };
+}
+macro_rules! error {
+ ($($tokens:tt)*) => {{
+ eprint!("[ERROR] "); eprintln!($($tokens)*); std::process::exit(1);
+ }};
+}
+
+
+fn main() {
+ let args = Arguments::from_env_or_exit();
+
+ // -----------------------------------------------------------------------
+ // RESOLVE syntactic symbols
+ let ext = args.ext.unwrap_or(String::from("brc"));
+ let mut resolver = if let Some(path) = &args.source {
+ match SourceUnit::from_path(&path, &ext) {
+ Ok(source_unit) => SymbolResolver::from_source_unit(source_unit),
+ Err(err) => match err {
+ ParseError::InvalidExtension => error!(
+ "File {path:?} has invalid extension, must be '.{ext}'"),
+ ParseError::NotFound => error!(
+ "File {path:?} was not found"),
+ ParseError::InvalidUtf8 => error!(
+ "File {path:?} does not contain valid UTF-8 text"),
+ ParseError::NotReadable => error!(
+ "File {path:?} is not readable"),
+ ParseError::IsADirectory => error!(
+ "File {path:?} is a directory"),
+ ParseError::Unknown => error!(
+ "Unknown error while attempting to read from {path:?}")
+ }
+ }
+ } else {
+ let mut source_code = String::new();
+ verbose!("Reading program source from standard input");
+ if let Err(err) = std::io::stdin().read_to_string(&mut source_code) {
+ eprintln!("Could not read from standard input, exiting.");
+ eprintln!("({err:?})");
+ std::process::exit(1);
+ }
+ let path = "<standard input>";
+ let source_unit = SourceUnit::from_source_code(source_code, path);
+ SymbolResolver::from_source_unit(source_unit)
+ };
+ // Load project libraries.
+ if let Some(path) = &args.source {
+ if !args.no_libs && !args.no_project_libs {
+ let project_library = gather_project_libraries(path, &ext);
+ resolver.add_library_units(project_library);
+ }
+ }
+ // Load environment libraries.
+ if !args.no_libs && !args.no_env_libs {
+ for env_library in gather_environment_libraries(&ext) {
+ resolver.add_library_units(env_library);
+ }
+ }
+ resolver.resolve();
+
+ // -----------------------------------------------------------------------
+ // PRINT information, generate merged source code
+ if args.tree {
+ print_source_tree(&resolver);
+ }
+ if print_resolver_errors(&resolver) {
+ std::process::exit(1);
+ };
+ let merged_source = resolver.get_merged_source_code();
+ if args.resolve {
+ write_bytes_and_exit(merged_source.as_bytes(), args.output.as_ref());
+ }
+
+ // -----------------------------------------------------------------------
+ // PARSE semantic tokens from merged source code
+ let path = Some("<merged source>");
+ let mut semantic_tokens = generate_semantic_tokens(&merged_source, path);
+ if print_semantic_errors(&semantic_tokens, &merged_source) {
+ std::process::exit(1);
+ };
+
+ // -----------------------------------------------------------------------
+ // GENERATE symbols file and bytecode
+ let bytecode = generate_bytecode(&mut semantic_tokens);
+ // let symbols = generate_symbols_file(&semantic_tokens);
+ write_bytes_and_exit(&bytecode, args.output.as_ref());
+}
+
+
+fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! {
+ if let Some(path) = path {
+ if let Err(err) = std::fs::write(path, bytes) {
+ eprintln!("Could not write to path {:?}, exiting.", path.as_ref());
+ eprintln!("({err:?})");
+ std::process::exit(1);
+ }
+ } else {
+ if let Err(err) = std::io::stdout().write_all(bytes) {
+ eprintln!("Could not write to standard output, exiting.");
+ eprintln!("({err:?})");
+ std::process::exit(1);
+ }
+ }
+ std::process::exit(0);
+}
+
+
+xflags::xflags! {
+ cmd arguments {
+ /// Print additional debug information
+ optional --verbose
+ /// Print the assembler version and exit
+ optional --version
+
+
+ /// Bedrock source code file to assemble.
+ optional source: PathBuf
+ /// Destination path for assembler output.
+ optional output: PathBuf
+ /// File extension to identify source files.
+ optional ext: String
+
+ /// Don't include libraries or resolve references.
+ optional --no-libs
+ /// Don't include project libraries
+ optional --no-project-libs
+ /// Don't include environment libraries.
+ optional --no-env-libs
+
+ /// Show the resolved source file heirarchy
+ optional --tree
+ /// Assemble the program without saving any output
+ optional --check
+ /// Only return resolved source code.
+ optional --resolve
+ }
+}
diff --git a/src/error.rs b/src/error.rs
deleted file mode 100644
index 8a6c0d6..0000000
--- a/src/error.rs
+++ /dev/null
@@ -1,10 +0,0 @@
-#[derive(Clone)]
-pub enum Error {
- UnresolvedReference,
- DuplicateDefinition,
- InvalidPaddingValue,
- InvalidTypeInMacroDefinition,
- OrphanedMacroDefinitionTerminator,
- CyclicMacroReference,
-}
-
diff --git a/src/gather_libraries.rs b/src/gather_libraries.rs
new file mode 100644
index 0000000..0b5d2a6
--- /dev/null
+++ b/src/gather_libraries.rs
@@ -0,0 +1,185 @@
+use crate::*;
+
+use vagabond::*;
+
+
+/// Gather all library units from the given path.
+pub fn gather_project_libraries(path: &Path, extension: &str) -> Vec<SourceUnit> {
+ match path.parent() {
+ Some(parent_path) => gather_source_units(parent_path, extension),
+ None => Vec::new(),
+ }
+}
+
+
+/// Gather all library units from the paths specified in an environment variable.
+pub fn gather_environment_libraries(extension: &str) -> Vec<Vec<SourceUnit>> {
+ let mut environment_libraries = Vec::new();
+ if let Ok(lib_var) = std::env::var("BEDROCK_LIBS") {
+ for path_str in lib_var.split(":") {
+ let lib_path = PathBuf::from(path_str);
+ let source_units = gather_source_units(&lib_path, extension);
+ if !source_units.is_empty() {
+ environment_libraries.push(source_units);
+ }
+ }
+ };
+ return environment_libraries;
+}
+
+
+/// Gather all source units at or descended from the given entry.
+fn gather_source_units(path: &Path, extension: &str) -> Vec<SourceUnit> {
+ let mut source_units = Vec::new();
+ if let Ok(entry) = Entry::from_path(path) {
+ match entry.entry_type {
+ EntryType::File => {
+ if let Ok(source) = SourceUnit::from_path(entry.path, extension) {
+ source_units.push(source);
+ }
+ }
+ EntryType::Directory => {
+ if let Ok(entries) = traverse_directory(entry.path) {
+ for entry in entries {
+ if let Ok(source) = SourceUnit::from_path(entry.path, extension) {
+ source_units.push(source);
+ }
+ }
+ }
+ }
+ }
+ };
+ return source_units;
+}
+
+
+pub struct SourceUnit {
+ pub main: SourceFile,
+ pub head: Option<SourceFile>,
+ pub tail: Option<SourceFile>,
+}
+
+
+impl SourceUnit {
+ /// Load from a source file and an associated head and tail file.
+ pub fn from_path<P: Into<PathBuf>>(path: P, extension: &str) -> Result<Self, ParseError> {
+ let main_path = canonicalize_path(path);
+ let main_path_str = main_path.as_os_str().to_string_lossy().to_string();
+ let head_extension = format!("head.{extension}");
+ let tail_extension = format!("tail.{extension}");
+ let is_head = main_path_str.ends_with(&head_extension);
+ let is_tail = main_path_str.ends_with(&tail_extension);
+ let is_not_main = !main_path_str.ends_with(extension);
+ if is_not_main || is_head || is_tail { return Err(ParseError::InvalidExtension); }
+
+ let symbols = parse_symbols_from_file(&main_path)?;
+ let head_path = main_path.with_extension(head_extension);
+ let tail_path = main_path.with_extension(tail_extension);
+
+ let main = SourceFile { path: main_path, symbols };
+ let head = match parse_symbols_from_file(&head_path) {
+ Ok(symbols) => Some(SourceFile { path: head_path, symbols }),
+ Err(_) => None,
+ };
+ let tail = match parse_symbols_from_file(&tail_path) {
+ Ok(symbols) => Some(SourceFile { path: tail_path, symbols }),
+ Err(_) => None,
+ };
+ Ok( SourceUnit { main, head, tail } )
+ }
+
+ /// Load from a string of source code.
+ pub fn from_source_code<P: Into<PathBuf>>(source_code: String, path: P) -> Self {
+ let path = canonicalize_path(path);
+ let symbols = parse_symbols_from_source(source_code, Some(&path));
+ Self {
+ main: SourceFile { path, symbols },
+ head: None,
+ tail: None,
+ }
+ }
+}
+
+
+/// Read and parse all symbols from a source file.
+fn parse_symbols_from_file(path: &Path) -> Result<Symbols, ParseError> {
+ let source = read_source_from_file(path)?;
+ Ok(parse_symbols_from_source(source, Some(path)))
+}
+
+
+/// Parse all symbols from a source code string.
+fn parse_symbols_from_source(source_code: String, path: Option<&Path>) -> Symbols {
+ use SyntacticTokenVariant as SynVar;
+
+ let token_iter = SyntacticParser::from_source_code(&source_code, path);
+ let mut definitions = Vec::new();
+ let mut references = Vec::new();
+
+ for token in token_iter {
+ match token.variant {
+ SynVar::LabelDefinition(name) => {
+ definitions.push(Symbol { name, source: token.source });
+ },
+ SynVar::MacroDefinition(name) => {
+ definitions.push(Symbol { name, source: token.source });
+ }
+ SynVar::Symbol(name) => {
+ references.push(Symbol { name, source: token.source });
+ },
+ _ => (),
+ }
+ }
+
+ Symbols {
+ definitions: Some(definitions),
+ references: Some(references),
+ source_code,
+ }
+}
+
+
+/// Attempt to read program source from a file.
+pub fn read_source_from_file(path: &Path) -> Result<String, ParseError> {
+ match std::fs::read(&path) {
+ Ok(bytes) => match String::from_utf8(bytes) {
+ Ok(source) => Ok(source),
+ Err(_) => return Err(ParseError::InvalidUtf8),
+ }
+ Err(err) => return Err( match err.kind() {
+ std::io::ErrorKind::NotFound => ParseError::NotFound,
+ std::io::ErrorKind::PermissionDenied => ParseError::NotReadable,
+ std::io::ErrorKind::IsADirectory => ParseError::IsADirectory,
+ _ => ParseError::Unknown,
+ } )
+ }
+}
+
+
+fn canonicalize_path<P: Into<PathBuf>>(path: P) -> PathBuf {
+ let pathbuf = path.into();
+ match pathbuf.canonicalize() {
+ Ok(canonical) => canonical,
+ Err(_) => pathbuf,
+ }
+}
+
+
+
+pub struct SourceFile {
+ pub path: PathBuf,
+ pub symbols: Symbols,
+}
+
+
+pub struct Symbols {
+ pub definitions: Option<Vec<Symbol>>,
+ pub references: Option<Vec<Symbol>>,
+ pub source_code: String,
+}
+
+
+pub struct Symbol {
+ pub name: String,
+ pub source: SourceSpan,
+}
diff --git a/src/lib.rs b/src/lib.rs
index a657354..ff00605 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,21 +1,20 @@
-mod addressing;
-mod syntactic_token;
-mod semantic_token;
-mod tokenizer;
-mod error;
-mod assembler;
+#![feature(io_error_more)]
+#![feature(map_try_insert)]
-pub use addressing::{CharAddress, SourceLocation, BytecodeLocation};
-pub use syntactic_token::{SyntacticToken, SyntacticTokenType};
-pub use semantic_token::{SemanticToken, SemanticTokenType, LabelDefinition, MacroDefinition};
-pub use error::Error;
-pub use tokenizer::TokenIterator;
-pub use assembler::Assembler;
-pub fn assemble(source_code: &str) -> (Vec<u8>, Vec<SemanticToken>) {
- let mut assembler = Assembler::new();
- assembler.tokenise_source(source_code);
- assembler.resolve_references();
- assembler.generate_bytecode()
-}
+mod gather_libraries;
+mod symbol_resolver;
+pub use gather_libraries::*;
+pub use symbol_resolver::*;
+
+mod locators;
+mod tokens;
+mod translators;
+
+pub use locators::*;
+pub use tokens::*;
+pub use translators::*;
+
+mod print;
+pub use print::*;
diff --git a/src/locators.rs b/src/locators.rs
new file mode 100644
index 0000000..b7db1ee
--- /dev/null
+++ b/src/locators.rs
@@ -0,0 +1,5 @@
+mod bytecode;
+mod source;
+
+pub use bytecode::*;
+pub use source::*;
diff --git a/src/locators/bytecode.rs b/src/locators/bytecode.rs
new file mode 100644
index 0000000..500e9f0
--- /dev/null
+++ b/src/locators/bytecode.rs
@@ -0,0 +1,39 @@
+pub struct BytecodeSpan {
+ /// The location of this span in the assembled bytecode.
+ pub location: BytecodeLocation,
+ /// The bytes which this span represents.
+ pub bytes: Vec<u8>,
+}
+
+
+impl Default for BytecodeSpan {
+ fn default() -> Self {
+ Self {
+ location: BytecodeLocation {
+ address: 0,
+ length: 0,
+ },
+ bytes: Vec::new(),
+ }
+ }
+}
+
+
+#[derive(Clone, Copy)]
+pub struct BytecodeLocation {
+ // Address of the first byte.
+ pub address: usize,
+ // Length as a number of bytes.
+ pub length: usize,
+}
+
+
+impl std::fmt::Display for BytecodeLocation {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ write!(f, "0x{:>04x}", self.address)?;
+ if self.length > 0 {
+ write!(f, "-0x{:>04x}", self.address + self.length)?;
+ }
+ Ok(())
+ }
+}
diff --git a/src/locators/source.rs b/src/locators/source.rs
new file mode 100644
index 0000000..2f10bd9
--- /dev/null
+++ b/src/locators/source.rs
@@ -0,0 +1,69 @@
+use std::path::PathBuf;
+
+
+#[derive(Clone)]
+pub struct SourceSpan {
+ /// The source characters which this span represents.
+ pub string: String,
+ /// The location of this span in the merged source file.
+ pub in_merged: SourceLocation,
+ /// The location of this span in the original source file.
+ pub in_source: Option<SourceLocation>,
+}
+
+
+#[derive(Clone)]
+pub struct SourceLocation {
+ /// File path the source was loaded from.
+ pub path: Option<PathBuf>,
+ /// Position of the first character of the string.
+ pub start: Position,
+ /// Position of the final character of the string.
+ pub end: Position,
+}
+
+impl std::fmt::Display for SourceLocation {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let y = self.start.line + 1;
+ let x = self.start.column + 1;
+ match &self.path {
+ Some(path) => write!(f, "{}:{y}:{x}", path.as_os_str().to_string_lossy()),
+ None => write!(f, "<unknown>:{y}:{x}"),
+ }
+ }
+}
+
+
+#[derive(Clone, Copy)]
+pub struct Position {
+ /// The number of lines that precede this line in the file.
+ pub line: usize,
+ /// The number of characters that precede this character in the line.
+ pub column: usize,
+}
+
+impl Position {
+ pub fn to_next_char(&mut self) {
+ self.column += 1;
+ }
+
+ pub fn to_next_line(&mut self) {
+ self.line += 1;
+ self.column = 0;
+ }
+
+ pub fn advance(&mut self, c: char) {
+ match c {
+ '\n' => self.to_next_line(),
+ _ => self.to_next_char(),
+ }
+ }
+}
+
+impl std::fmt::Display for Position {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let y = self.line + 1;
+ let x = self.column + 1;
+ write!(f, "{y}:{x}")
+ }
+}
diff --git a/src/main.rs b/src/main.rs
deleted file mode 100644
index 11ce42b..0000000
--- a/src/main.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-use std::io::{Read, Write};
-use bedrock_asm::*;
-
-fn main() {
- // Read source code from standard input
- let mut source_code = String::new();
- if let Err(err) = std::io::stdin().read_to_string(&mut source_code) {
- eprintln!("Could not read from standard input, quitting.");
- eprintln!("({err:?})");
- std::process::exit(1);
- };
-
- let (bytecode, tokens) = assemble(&source_code);
- let mut is_error = false;
- for token in &tokens {
- if token.print_error(&source_code) { is_error = true };
- }
- if !is_error {
- for token in &tokens {
- if let SemanticTokenType::LabelDefinition(def) = &token.r#type {
- if def.references.is_empty() {
- eprintln!("Unused label definition: {}", def.name);
- }
- }
- }
- eprintln!();
- }
-
- let byte_count = bytecode.len();
- let byte_percentage = (byte_count as f32 / 65536.0 * 100.0).round() as u16;
- eprintln!("Assembled program in {byte_count} bytes ({byte_percentage}% of maximum).");
-
- if is_error {
- std::process::exit(1)
- }
-
- // Write bytecode to standard output
- if let Err(_) = std::io::stdout().write_all(&bytecode) {
- eprintln!("Could not write to standard output, quitting.");
- std::process::exit(1);
- }
-}
-
diff --git a/src/print.rs b/src/print.rs
new file mode 100644
index 0000000..7f49db2
--- /dev/null
+++ b/src/print.rs
@@ -0,0 +1,237 @@
+use crate::*;
+
+use SemanticTokenVariant as SemVar;
+use SemanticParseError as SemErr;
+use SyntacticParseError as SynErr;
+
+
+const NORMAL: &str = "\x1b[0m";
+const BOLD: &str = "\x1b[1m";
+const DIM: &str = "\x1b[2m";
+const WHITE: &str = "\x1b[37m";
+const RED: &str = "\x1b[31m";
+const YELLOW: &str = "\x1b[33m";
+const BLUE: &str = "\x1b[34m";
+
+
+pub struct Context<'a> {
+ pub source_code: &'a str,
+ pub source: &'a SourceSpan,
+}
+
+
+/// Print all errors found in the semantic tokens, including those inside macro
+/// definitions. Returns true if at least one error was printed.
+pub fn print_semantic_errors(semantic_tokens: &[SemanticToken], source_code: &str) -> bool {
+ let mut found_error = false;
+ for semantic_token in semantic_tokens {
+ match &semantic_token.variant {
+ SemVar::Error(err) => {
+ let context = Context {
+ source_code: source_code,
+ source: &semantic_token.source,
+ };
+ found_error = true;
+ print_semantic_error(&err, context)
+ }
+ SemVar::MacroDefinition(definition) => {
+ for body_token in &definition.body_tokens {
+ if let SemVar::Error(err) = &body_token.variant {
+ let context = Context {
+ source_code: source_code,
+ source: &body_token.source,
+ };
+ found_error = true;
+ print_semantic_error(err, context)
+ }
+ }
+ }
+ _ => (),
+ }
+ }
+ return found_error;
+}
+
+fn print_semantic_error(error: &SemanticParseError, context: Context) {
+ let message = get_message_for_semantic_error(error);
+ print_error(&message, context);
+}
+
+fn get_message_for_semantic_error(error: &SemanticParseError) -> String {
+ match error {
+ SemErr::LabelDefinitionInMacroDefinition =>
+ format!("Labels cannot be defined inside a macro"),
+ SemErr::MacroDefinitionInMacroDefinition =>
+ format!("Macros cannot be defined inside a macro"),
+ SemErr::StrayMacroTerminator =>
+ format!("Macro definition terminator is missing a macro definition"),
+ SemErr::StrayBlockClose =>
+ format!("Block was not opened, add a '{{' character to open"),
+ SemErr::UnclosedBlock =>
+ format!("Block was not closed, add a '}}' character to close"),
+ SemErr::UndefinedSymbol(name) =>
+ format!("Undefined symbol, no label or macro has been defined with the name '{name}'"),
+ SemErr::RedefinedSymbol((_, source)) => {
+ let location = source.in_source.as_ref().unwrap_or(&source.in_merged);
+ format!("Redefined symbol, first defined at {location}")
+ }
+ SemErr::MacroInvocationBeforeDefinition((_, source)) => {
+ let location = source.in_source.as_ref().unwrap_or(&source.in_merged);
+ format!("Macro used before definition, definition is at {location}")
+ }
+ SemErr:: SyntaxError(syntax_error) => match syntax_error {
+ SynErr::UnterminatedComment =>
+ format!("Unclosed comment, add a ')' character to close"),
+ SynErr::UnterminatedRawString =>
+ format!("Unclosed string, add a ' character to close"),
+ SynErr::UnterminatedNullString =>
+ format!("Unclosed string, add a \" character to close"),
+ SynErr::InvalidPaddingValue(_) =>
+ format!("Padding value must be two or four hexidecimal digits"),
+ }
+ }
+}
+
+
+pub fn print_resolver_errors(resolver: &SymbolResolver) -> bool {
+ let mut found_error = false;
+ for reference in &resolver.unresolved {
+ found_error = true;
+ let message = format!(
+ "Undefined symbol, no label or macro has been defined with the name '{}'",
+ &reference.symbol.source.string,
+ );
+ let source_code = resolver.get_source_code_for_tracked_symbol(reference);
+ let source = &reference.symbol.source;
+ print_error(&message, Context { source_code, source } )
+ }
+ for redefinition in &resolver.redefinitions {
+ found_error = true;
+ let definition = resolver.definitions.get(redefinition.1).unwrap();
+ let message = format!(
+ "Redefined symbol, first defined at {}",
+ &definition.symbol.source.in_merged,
+ );
+ let source_code = resolver.get_source_code_for_tracked_symbol(&redefinition.0);
+ let source = &redefinition.0.symbol.source;
+ print_error(&message, Context { source_code, source } )
+ }
+ return found_error;
+}
+
+
+
+pub fn print_error(message: &str, context: Context) {
+ print_source_issue(message, context, SourceIssueVariant::Error);
+}
+
+pub fn print_warning(message: &str, context: Context) {
+ print_source_issue(message, context, SourceIssueVariant::Warning);
+}
+
+fn print_source_issue(message: &str, context: Context, variant: SourceIssueVariant) {
+ let (label, colour) = match variant {
+ SourceIssueVariant::Warning => ("WARNING", YELLOW),
+ SourceIssueVariant::Error => ("ERROR", RED),
+ };
+
+ // Prepare variables.
+ let location = &context.source.in_merged;
+ let digits = location.start.line.to_string().len();
+ let y = location.start.line + 1;
+ let arrow = "-->";
+ let space = " ";
+
+ // Print message and file path.
+ eprintln!("{BOLD}{colour}[{label}]{WHITE}: {message}{NORMAL}");
+ eprintln!("{BLUE}{arrow:>w$}{NORMAL} {location}{NORMAL}", w=digits+3);
+ if let Some(source) = &context.source.in_source {
+ eprintln!("{BLUE}{arrow:>w$}{NORMAL} {source}{NORMAL}", w=digits+3);
+ }
+
+ let start = location.start.column;
+ let end = location.end.column + 1;
+
+ // Print source code line.
+ eprint!("{BLUE} {y} | {NORMAL}");
+ let line = get_line_from_source_code(context.source_code, location.start.line);
+ for (i, c) in line.chars().enumerate() {
+ if i == start { eprint!("{colour}") }
+ if i == end { eprint!("{NORMAL}") }
+ eprint!("{c}");
+ }
+ eprintln!("{NORMAL}");
+
+ // Print source code underline.
+ eprint!("{BLUE} {space:>w$} | {NORMAL}", w=digits);
+ for _ in 0..start { eprint!(" "); }
+ eprint!("{colour}");
+ for _ in start..end { eprint!("^"); }
+ eprintln!("{NORMAL}");
+}
+
+
+fn get_line_from_source_code(source_code: &str, line: usize) -> &str {
+ source_code.split('\n').nth(line).unwrap_or("<error reading line from source>")
+}
+
+
+enum SourceIssueVariant {
+ Warning,
+ Error,
+}
+
+
+/// Print a tree containing the name and path of each source unit.
+pub fn print_source_tree(resolver: &SymbolResolver) {
+ eprintln!(".");
+ let len = resolver.root_unit_ids.len();
+ for (i, id) in resolver.root_unit_ids.iter().enumerate() {
+ let end = i + 1 == len;
+ print_source_tree_leaf(resolver, *id, Vec::new(), end);
+ }
+ eprintln!();
+}
+
+fn print_source_tree_leaf(resolver: &SymbolResolver, id: usize, mut levels: Vec<bool>, end: bool) {
+ // A level entry is true if all entries in that level have been printed.
+ for level in &levels {
+ match level {
+ false => eprint!("│ "),
+ true => eprint!(" "),
+ }
+ }
+ // The end value is true if all siblings of this entry have been printed.
+ match end {
+ false => eprint!("├── "),
+ true => eprint!("└── "),
+ }
+ if let Some(unit) = resolver.source_units.get(id) {
+ let path = &unit.source_unit.main.path;
+ let path_str = path.as_os_str().to_string_lossy();
+ if let Some(name) = path.file_name() {
+ let name_str = name.to_string_lossy();
+ eprint!("{name_str}{BLUE}");
+ if unit.source_unit.head.is_some() { eprint!(" +head") }
+ if unit.source_unit.tail.is_some() { eprint!(" +tail") }
+ let mut unresolved = 0;
+ for symbol in &resolver.unresolved {
+ if symbol.source_id == id { unresolved += 1; }
+ }
+ if unresolved > 0 { eprint!("{RED} ({unresolved})"); }
+ eprintln!("{NORMAL} {DIM}({path_str}){NORMAL}");
+ } else {
+ eprintln!("{path_str}");
+ };
+ levels.push(end);
+ let len = unit.child_ids.len();
+ for (i, id) in unit.child_ids.iter().enumerate() {
+ let end = i + 1 == len;
+ print_source_tree_leaf(resolver, *id, levels.clone(), end);
+ }
+ } else {
+ eprintln!("<error loading source unit details>");
+ }
+}
+
+
diff --git a/src/semantic_token.rs b/src/semantic_token.rs
deleted file mode 100644
index 265db91..0000000
--- a/src/semantic_token.rs
+++ /dev/null
@@ -1,116 +0,0 @@
-use crate::*;
-
-pub enum SemanticTokenType {
- LabelReference(usize),
- MacroReference(usize),
-
- LabelDefinition(LabelDefinition),
- MacroDefinition(MacroDefinition),
-
- Padding(u16),
- ByteLiteral(u8),
- ShortLiteral(u16),
- Instruction(u8),
-
- MacroDefinitionTerminator,
- Comment,
- Error(SyntacticTokenType, Error),
-}
-
-pub struct SemanticToken {
- pub r#type: SemanticTokenType,
- pub source_location: SourceLocation,
- pub bytecode_location: BytecodeLocation,
- pub parent_label: Option<String>,
-}
-
-impl SemanticToken {
- /// Returns true if an error was printed.
- pub fn print_error(&self, source_code: &str) -> bool {
- let mut is_error = false;
- macro_rules! red {()=>{eprint!("\x1b[31m")};}
- macro_rules! dim {()=>{eprint!("\x1b[0;2m")};}
- macro_rules! normal {()=>{eprint!("\x1b[0m")};}
-
- if let SemanticTokenType::Error(token, error) = &self.r#type {
- is_error = true;
-
- red!(); eprint!("[ERROR] "); normal!();
- let source = &self.source_location.source;
- match error {
- Error::UnresolvedReference => {
- eprintln!("Unresolved reference, no label or macro has been defined with the name '{source}'") }
- Error::DuplicateDefinition => {
- eprintln!("Duplicate definition, a label or macro has already been defined with the name '{source}'") }
- Error::OrphanedMacroDefinitionTerminator => {
- eprintln!("Unmatched macro definition terminator, no macro definition is in progress") }
- Error::InvalidPaddingValue => {
- eprintln!("Invalid value for padding, the value must be at least one and at most four hexadecimal characters") }
- Error::CyclicMacroReference => {
- eprintln!("Cyclic macro reference, this macro reference contains a reference to the macro being defined") }
- Error::InvalidTypeInMacroDefinition => {
- let name = match token {
- SyntacticTokenType::Reference(_) => "references",
- SyntacticTokenType::LabelDefinition(_) => "label definitions",
- SyntacticTokenType::MacroDefinition(_) => "macro definitions",
- SyntacticTokenType::MacroDefinitionTerminator => "macro definition terminators",
- SyntacticTokenType::Padding(_) => "padding",
- SyntacticTokenType::ByteLiteral(_) => "byte literals",
- SyntacticTokenType::ShortLiteral(_) => "short literals",
- SyntacticTokenType::Instruction(_) => "instructions",
- SyntacticTokenType::Comment => "comments",
- };
- eprintln!("Invalid token in macro definition, macro definitions are not allowed to contain {name}") }
- }
-
- if let Some(label) = &self.parent_label {
- eprint!(" ... "); red!(); eprint!("| "); dim!(); eprintln!("@{label} "); normal!();
- }
-
- let line = source_code.split('\n').nth(self.source_location.start.line).unwrap();
- eprint!("{:>5} ", self.source_location.start.line+1);
- red!(); eprint!("| "); normal!();
- for (i, c) in line.chars().enumerate() {
- if i == self.source_location.start.column { red!() }
- eprint!("{c}");
- if i == self.source_location.end.column { normal!() }
- }
- eprintln!(); red!(); eprint!(" | ");
- for i in 0..=self.source_location.end.column {
- if i < self.source_location.start.column { eprint!(" ") } else { eprint!("^") };
- }
- normal!(); eprintln!();
- }
- else if let SemanticTokenType::MacroDefinition(definition) = &self.r#type {
- for token in &definition.body_tokens {
- if token.print_error(source_code) { is_error = true }
- }
- }
- is_error
- }
-}
-
-pub struct LabelDefinition {
- pub name: String,
- pub address: u16,
- /// A list of pointers to label reference tokens
- pub references: Vec<usize>,
-}
-impl LabelDefinition {
- pub fn new(name: String) -> Self {
- Self { name, address:0, references:Vec::new() }
- }
-}
-
-pub struct MacroDefinition {
- pub name: String,
- pub body_tokens: Vec<SemanticToken>,
- /// A list of pointers to macro reference tokens
- pub references: Vec<usize>,
-}
-impl MacroDefinition {
- pub fn new(name: String) -> Self {
- Self { name, body_tokens:Vec::new(), references:Vec::new() }
- }
-}
-
diff --git a/src/symbol_resolver.rs b/src/symbol_resolver.rs
new file mode 100644
index 0000000..cced994
--- /dev/null
+++ b/src/symbol_resolver.rs
@@ -0,0 +1,230 @@
+use crate::*;
+
+use std::mem::take;
+
+
+/// Resolve symbol references across source units.
+pub struct SymbolResolver {
+ pub definitions: Vec<TrackedSymbol>,
+ pub unresolved: Vec<TrackedSymbol>,
+ /// Contains the ID of the owner of the original definition.
+ pub redefinitions: Vec<(TrackedSymbol, usize)>,
+ pub source_units: Vec<HeirarchicalSourceUnit>,
+ pub root_unit_ids: Vec<usize>,
+ pub unused_library_units: Vec<SourceUnit>,
+}
+
+
+impl SymbolResolver {
+ /// Construct a resolver from a root source unit.
+ pub fn from_source_unit(source_unit: SourceUnit) -> Self {
+ let mut new = Self {
+ definitions: Vec::new(),
+ unresolved: Vec::new(),
+ redefinitions: Vec::new(),
+ source_units: Vec::new(),
+ root_unit_ids: Vec::new(),
+ unused_library_units: Vec::new(),
+ };
+ new.add_source_unit(source_unit, None);
+ return new;
+ }
+
+ pub fn add_library_units(&mut self, mut source_units: Vec<SourceUnit>) {
+ self.unused_library_units.append(&mut source_units);
+ }
+
+ pub fn resolve(&mut self) {
+ // Repeatedly test if any unused source unit resolves an unresolved symbol,
+ // breaking the loop when no new resolutions are found.
+ 'outer: loop {
+ for (i, source_unit) in self.unused_library_units.iter().enumerate() {
+ if let Some(id) = self.resolves_reference(&source_unit) {
+ let source_unit = self.unused_library_units.remove(i);
+ self.add_source_unit(source_unit, Some(id));
+ continue 'outer;
+ }
+ }
+ break;
+ }
+ }
+
+ /// Add a source unit to the resolver and link it to a parent unit.
+ pub fn add_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) {
+ let source_id = self.source_units.len();
+
+ // Add all main symbols.
+ if let Some(definitions) = take(&mut source_unit.main.symbols.definitions) {
+ self.add_definitions(definitions, source_id, SourceRole::Main); }
+ if let Some(references) = take(&mut source_unit.main.symbols.references) {
+ self.add_references(references, source_id, SourceRole::Main); }
+
+ // Add all head symbols.
+ if let Some(head) = &mut source_unit.head {
+ if let Some(references) = take(&mut head.symbols.references) {
+ self.add_references(references, source_id, SourceRole::Head); }
+ if let Some(definitions) = take(&mut head.symbols.definitions) {
+ self.add_definitions(definitions, source_id, SourceRole::Head); }
+ }
+
+ // Add all tail symbols.
+ if let Some(tail) = &mut source_unit.tail {
+ if let Some(references) = take(&mut tail.symbols.references) {
+ self.add_references(references, source_id, SourceRole::Tail); }
+ if let Some(definitions) = take(&mut tail.symbols.definitions) {
+ self.add_definitions(definitions, source_id, SourceRole::Tail); }
+ }
+
+ if let Some(parent_id) = parent_id {
+ if let Some(parent_unit) = self.source_units.get_mut(parent_id) {
+ parent_unit.child_ids.push(source_id);
+ }
+ } else {
+ self.root_unit_ids.push(source_id);
+ }
+
+ let source_unit = HeirarchicalSourceUnit { source_unit, child_ids: Vec::new() };
+ self.source_units.push(source_unit);
+ }
+
+ fn add_references(&mut self, references: Vec<Symbol>, source_id: usize, source_role: SourceRole) {
+ for symbol in references {
+ let reference = TrackedSymbol { symbol, source_id, source_role };
+ if !self.definitions.contains(&reference) {
+ self.unresolved.push(reference);
+ }
+ }
+ }
+
+ fn add_definitions(&mut self, definitions: Vec<Symbol>, source_id: usize, source_role: SourceRole) {
+ for symbol in definitions {
+ let predicate = |d: &&TrackedSymbol| { &d.symbol.name == &symbol.name };
+ if let Some(def) = self.definitions.iter().find(predicate) {
+ let definition = TrackedSymbol { symbol, source_id, source_role };
+ let redefinition = (definition, def.source_id);
+ self.redefinitions.push(redefinition);
+ } else {
+ self.unresolved.retain(|s| s.symbol.name != symbol.name);
+ let definition = TrackedSymbol { symbol, source_id, source_role };
+ self.definitions.push(definition);
+ }
+ }
+ }
+
+ /// Returns the ID of the owner of a symbol resolved by this unit.
+ pub fn resolves_reference(&self, source_unit: &SourceUnit) -> Option<usize> {
+ if let Some(definitions) = &source_unit.main.symbols.definitions {
+ if let Some(id) = self.source_id_of_unresolved(&definitions) {
+ return Some(id);
+ }
+ }
+ if let Some(head) = &source_unit.head {
+ if let Some(definitions) = &head.symbols.definitions {
+ if let Some(id) = self.source_id_of_unresolved(&definitions) {
+ return Some(id);
+ }
+ }
+ }
+ if let Some(tail) = &source_unit.tail {
+ if let Some(definitions) = &tail.symbols.definitions {
+ if let Some(id) = self.source_id_of_unresolved(&definitions) {
+ return Some(id);
+ }
+ }
+ }
+ return None;
+ }
+
+ /// Returns the ID of the owner of a reference to one of these symbols.
+ fn source_id_of_unresolved(&self, symbols: &[Symbol]) -> Option<usize> {
+ for symbol in symbols {
+ let opt = self.unresolved.iter().find(|s| s.symbol.name == symbol.name);
+ if let Some(unresolved) = opt {
+ return Some(unresolved.source_id);
+ }
+ }
+ return None;
+ }
+
+ pub fn get_source_code_for_tracked_symbol(&self, symbol: &TrackedSymbol) -> &str {
+ let source_unit = &self.source_units[symbol.source_id].source_unit;
+ match symbol.source_role {
+ SourceRole::Main => source_unit.main.symbols.source_code.as_str(),
+ SourceRole::Head => match &source_unit.head {
+ Some(head) => head.symbols.source_code.as_str(),
+ None => unreachable!("Failed to find source for token"),
+ }
+ SourceRole::Tail => match &source_unit.tail {
+ Some(tail) => tail.symbols.source_code.as_str(),
+ None => unreachable!("Failed to find source for token"),
+ }
+ }
+ }
+
+ /// Create a source file by concatenating all source units.
+ pub fn get_merged_source_code(&self) -> String {
+ // The first source unit is guaranteed to be the root unit, so we can
+ // just push source files in their current order.
+ let mut source_code = String::new();
+
+ // Push head source code.
+ for source_unit in self.source_units.iter().rev() {
+ if let Some(head) = &source_unit.source_unit.head {
+ push_source_code_to_string(&mut source_code, head);
+ }
+ }
+ // Push main source code.
+ for source_unit in self.source_units.iter() {
+ push_source_code_to_string(&mut source_code, &source_unit.source_unit.main);
+ }
+ // Push tail source code.
+ for source_unit in self.source_units.iter().rev() {
+ if let Some(tail) = &source_unit.source_unit.tail {
+ push_source_code_to_string(&mut source_code, tail);
+ }
+ }
+ return source_code;
+ }
+}
+
+
+fn push_source_code_to_string(string: &mut String, source_file: &SourceFile) {
+ // Ensure that sections are separated by two newlines.
+ if !string.is_empty() {
+ if !string.ends_with('\n') { string.push('\n'); }
+ if !string.ends_with("\n\n") { string.push('\n'); }
+ }
+ // Write a path comment to the string.
+ let path_str = source_file.path.as_os_str().to_string_lossy();
+ let path_comment = format!("(: {path_str} )\n");
+ string.push_str(&path_comment);
+ string.push_str(&source_file.symbols.source_code);
+}
+
+
+pub struct HeirarchicalSourceUnit {
+ pub source_unit: SourceUnit,
+ pub child_ids: Vec<usize>,
+}
+
+
+pub struct TrackedSymbol {
+ pub symbol: Symbol,
+ pub source_id: usize,
+ pub source_role: SourceRole,
+}
+
+
+#[derive(Clone, Copy)]
+pub enum SourceRole {
+ Main,
+ Head,
+ Tail,
+}
+
+
+impl PartialEq for TrackedSymbol {
+ fn eq(&self, other: &TrackedSymbol) -> bool {
+ self.symbol.name.eq(&other.symbol.name)
+ }
+}
diff --git a/src/syntactic_token.rs b/src/syntactic_token.rs
deleted file mode 100644
index 4a50e8a..0000000
--- a/src/syntactic_token.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-use crate::*;
-
-pub enum SyntacticTokenType {
- Reference(String),
-
- LabelDefinition(String),
- MacroDefinition(String),
- MacroDefinitionTerminator,
-
- Padding(u16),
- ByteLiteral(u8),
- ShortLiteral(u16),
- Instruction(u8),
-
- Comment,
-}
-
-
-
-pub struct SyntacticToken {
- pub r#type: SyntacticTokenType,
- pub source_location: SourceLocation,
- pub error: Option<Error>,
-}
-
-impl SyntacticToken {
- // Call when this token is found inside a macro definition.
- pub fn use_in_macro_body(&mut self) {
- match self.r#type {
- SyntacticTokenType::LabelDefinition(..) |
- SyntacticTokenType::MacroDefinition(..) => {
- self.set_error(Error::InvalidTypeInMacroDefinition)
- }
- _ => (),
- };
- }
- pub fn set_error(&mut self, error: Error) {
- self.error = Some(error);
- }
- pub fn is_macro_terminator(&self) -> bool {
- if let SyntacticTokenType::MacroDefinitionTerminator = self.r#type {true} else {false}
- }
-}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
deleted file mode 100644
index 02bf490..0000000
--- a/src/tokenizer.rs
+++ /dev/null
@@ -1,235 +0,0 @@
-use std::mem::take;
-use crate::*;
-
-#[derive(PartialEq)]
-enum StringLiteral {
- None,
- Raw,
- NullTerminated,
-}
-
-pub struct TokenIterator {
- /// The characters that comprise the program souce code.
- chars: Vec<char>,
- /// The index of the next character to read.
- i: usize,
- /// The address of the next character to read.
- addr: CharAddress,
- /// If true, skip over any whitespace characters. If false, stop reading
- /// when a whitespace character is encountered.
- skip_whitespace: bool,
- /// The name of the most recently defined label.
- label: String,
- /// If not None, each individual character will be tokenised as a ByteLiteral.
- parse_string_literal: StringLiteral,
-
-
- /// The address of the first character of the current token.
- start: CharAddress,
- /// The address of the final character of the current token.
- end: CharAddress,
- /// The entire current token.
- source: String,
- /// The first character of the current token.
- prefix: char,
- /// The second and remaining characters of the current token.
- suffix: String,
-}
-
-impl TokenIterator {
- /// Create an iterator from a string of program source code.
- pub fn from_str(source_code: &str) -> Self {
- Self {
- chars: source_code.chars().collect(),
- i: 0,
- addr: CharAddress::zero(),
- skip_whitespace: true,
- parse_string_literal: StringLiteral::None,
- label: String::new(),
- start: CharAddress::zero(),
- end: CharAddress::zero(),
- source: String::new(),
- prefix: ' ',
- suffix: String::new(),
- }
- }
- /// Append a character to the current token.
- fn push(&mut self, c:char) {
- self.end = self.addr;
- self.source.push(c);
- self.suffix.push(c);
- self.next(c);
- }
- /// Move forward to the next source character.
- fn next(&mut self, c: char) {
- self.addr.column += 1;
- self.i += 1;
- if c == '\n' {
- self.addr.column = 0;
- self.addr.line += 1;
- }
- }
- /// Mark the current character as being the first character of a new token.
- fn mark_start(&mut self, c:char) {
- if c == '"' {
- self.parse_string_literal = StringLiteral::NullTerminated;
- } else if c == '\'' {
- self.parse_string_literal = StringLiteral::Raw;
- } else {
- self.start=self.addr;
- self.end=self.addr;
- self.prefix=c;
- self.source.push(c);
- self.skip_whitespace=false;
- }
- self.next(c);
- }
-}
-
-impl Iterator for TokenIterator {
- type Item = SyntacticToken;
-
- fn next(&mut self) -> Option<SyntacticToken> {
- // Initialise values before reading the next token
- let mut is_comment = false;
- self.skip_whitespace = true;
-
- // Iterate over source characters until a full token is read
- while let Some(c) = self.chars.get(self.i) {
- let c = *c;
- // Parse individual characters from a string literal
- if self.parse_string_literal != StringLiteral::None {
- if c == '"' && self.parse_string_literal == StringLiteral::NullTerminated {
- self.parse_string_literal = StringLiteral::None;
- let token = SyntacticToken {
- r#type: SyntacticTokenType::ByteLiteral(0),
- source_location: SourceLocation {
- source: c.to_string(), start:self.addr, end:self.addr },
- error: None,
- };
- self.next(c);
- return Some(token);
- } else if c == '\'' && self.parse_string_literal == StringLiteral::Raw {
- self.parse_string_literal = StringLiteral::None;
- self.next(c);
- continue
- } else {
- self.next(c);
- return Some(SyntacticToken {
- r#type: SyntacticTokenType::ByteLiteral(c as u8),
- source_location: SourceLocation {
- source: c.to_string(), start:self.addr, end:self.addr },
- error: None,
- });
- }
- }
- // Intercept comments
- if is_comment {
- self.push(c); if c == ')' { break } else { continue }; }
- else if self.skip_whitespace && c == '(' {
- is_comment = true; self.mark_start(c); continue }
-
- // Allow a semicolon at the end of a token to be handled as a separate token
- if self.source.len() > 0 && c == ';' { break }
- // Handle the current character
- match (is_whitespace(c), self.skip_whitespace) {
- (true, true) => self.next(c), // c is the expected leading whitespace
- (false, true) => self.mark_start(c), // c is the first character of the token
- (false, false) => self.push(c), // c is a character of the token
- (true, false) => break, // c is trailing whitespace
- }
- // Allow literal values to be chained to the end of the previous token
- if self.source.len() > 0 && c == ':' { break }
- }
-
- // If no source characters were grabbed then we have read through the entire source file
- if self.source.len() == 0 { return None; }
- // Allow handling macro terminators and symbols of length 1 in the match expression
- if self.suffix.len() == 0 { self.prefix = '\0'; }
- // Consume the collected characters to be used in the match expression
- let full = take(&mut self.source);
- let suffix = take(&mut self.suffix);
- let mut error = None;
- let mut parse_padding_value = |v| {
- parse_short(v).or_else(|| {
- error = Some(Error::InvalidPaddingValue); Some(0)
- }).unwrap()
- };
-
- let r#type = match self.prefix {
- '(' => { SyntacticTokenType::Comment }
- '@' => { SyntacticTokenType::LabelDefinition({self.label=suffix.clone(); suffix}) }
- '&' => { SyntacticTokenType::LabelDefinition(format!("{}/{}", self.label, suffix)) }
- '$' => { SyntacticTokenType::Padding(parse_padding_value(&suffix)) }
- '~' => { SyntacticTokenType::Reference(format!("{}/{}", self.label, suffix)) }
- '%' => if let Some(("", sublabel)) = suffix.split_once("~") {
- SyntacticTokenType::MacroDefinition(format!("{}/{}", self.label, sublabel))
- } else {
- SyntacticTokenType::MacroDefinition(suffix)
- }
- _ => {
- if ";" == &full { SyntacticTokenType::MacroDefinitionTerminator }
- else if let Some(value) = parse_byte_lit(&full) { SyntacticTokenType::ByteLiteral(value) }
- else if let Some(value) = parse_short_lit(&full) { SyntacticTokenType::ShortLiteral(value) }
- else if let Some(value) = parse_instruction(&full) { SyntacticTokenType::Instruction(value) }
- else { SyntacticTokenType::Reference(full.clone()) }
- }
- };
- Some(SyntacticToken {
- r#type,
- source_location:SourceLocation::new(full,self.start,self.end),
- error,
- })
- }
-}
-
-
-fn parse_byte_lit(token: &str) -> Option<u8> {
- match token.len() { 2 => u8::from_str_radix(token, 16).ok(), _ => None } }
-fn parse_short_lit(token: &str) -> Option<u16> {
- match token.len() { 4 => u16::from_str_radix(token, 16).ok(), _ => None } }
-fn parse_short(token: &str) -> Option<u16> {
- match token.len() { 1..=4 => u16::from_str_radix(token, 16).ok(), _ => None } }
-fn is_whitespace(c: char) -> bool {
- match c { ' '|'\t'|'\n'|'\r'|'['|']'|'(' =>true, _=>false } }
-fn parse_instruction(token: &str) -> Option<u8> {
- Some(match token {
- // Control operators
- "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2" =>0x60,"DB3" =>0x80,"DB4" =>0xA0,"DB5" =>0xC0,"DB6" =>0xE0,
- "JMP"=>0x01,"JMS" =>0x21,"JMP:"=>0x41,"JMS:" =>0x61,"JMPr"=>0x81,"JMSr" =>0xA1,"JMPr:"=>0xC1,"JMSr:" =>0xE1,
- "JCN"=>0x02,"JCS" =>0x22,"JCN:"=>0x42,"JCS:" =>0x62,"JCNr"=>0x82,"JCSr" =>0xA2,"JCNr:"=>0xC2,"JCSr:" =>0xE2,
- "JCK"=>0x03,"JCK*"=>0x23,"JCK:"=>0x43,"JCK*:"=>0x63,"JCKr"=>0x83,"JCKr*"=>0xA3,"JCKr:"=>0xC3,"JCKr*:"=>0xE3,
- "LDA"=>0x04,"LDA*"=>0x24,"LDA:"=>0x44,"LDA*:"=>0x64,"LDAr"=>0x84,"LDAr*"=>0xA4,"LDAr:"=>0xC4,"LDAr*:"=>0xE4,
- "STA"=>0x05,"STA*"=>0x25,"STA:"=>0x45,"STA*:"=>0x65,"STAr"=>0x85,"STAr*"=>0xA5,"STAr:"=>0xC5,"STAr*:"=>0xE5,
- "LDD"=>0x06,"LDD*"=>0x26,"LDD:"=>0x46,"LDD*:"=>0x66,"LDDr"=>0x86,"LDDr*"=>0xA6,"LDDr:"=>0xC6,"LDDr*:"=>0xE6,
- "STD"=>0x07,"STD*"=>0x27,"STD:"=>0x47,"STD*:"=>0x67,"STDr"=>0x87,"STDr*"=>0xA7,"STDr:"=>0xC7,"STDr*:"=>0xE7,
- // Stack operators
- "PSH"=>0x08,"PSH*"=>0x28,"PSH:"=>0x48,"PSH*:"=>0x68,"PSHr"=>0x88,"PSHr*"=>0xA8,"PSHr:"=>0xC8,"PSHr*:"=>0xE8,
- "POP"=>0x09,"POP*"=>0x29,"POP:"=>0x49,"POP*:"=>0x69,"POPr"=>0x89,"POPr*"=>0xA9,"POPr:"=>0xC9,"POPr*:"=>0xE9,
- "CPY"=>0x0A,"CPY*"=>0x2A,"CPY:"=>0x4A,"CPY*:"=>0x6A,"CPYr"=>0x8A,"CPYr*"=>0xAA,"CPYr:"=>0xCA,"CPYr*:"=>0xEA,
- "SPL"=>0x0B,"SPL*"=>0x2B,"SPL:"=>0x4B,"SPL*:"=>0x6B,"SPLr"=>0x8B,"SPLr*"=>0xAB,"SPLr:"=>0xCB,"SPLr*:"=>0xEB,
- "DUP"=>0x0C,"DUP*"=>0x2C,"DUP:"=>0x4C,"DUP*:"=>0x6C,"DUPr"=>0x8C,"DUPr*"=>0xAC,"DUPr:"=>0xCC,"DUPr*:"=>0xEC,
- "OVR"=>0x0D,"OVR*"=>0x2D,"OVR:"=>0x4D,"OVR*:"=>0x6D,"OVRr"=>0x8D,"OVRr*"=>0xAD,"OVRr:"=>0xCD,"OVRr*:"=>0xED,
- "SWP"=>0x0E,"SWP*"=>0x2E,"SWP:"=>0x4E,"SWP*:"=>0x6E,"SWPr"=>0x8E,"SWPr*"=>0xAE,"SWPr:"=>0xCE,"SWPr*:"=>0xEE,
- "ROT"=>0x0F,"ROT*"=>0x2F,"ROT:"=>0x4F,"ROT*:"=>0x6F,"ROTr"=>0x8F,"ROTr*"=>0xAF,"ROTr:"=>0xCF,"ROTr*:"=>0xEF,
- // Numeric operators
- "ADD"=>0x10,"ADD*"=>0x30,"ADD:"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr*"=>0xB0,"ADDr:"=>0xD0,"ADDr*:"=>0xF0,
- "SUB"=>0x11,"SUB*"=>0x31,"SUB:"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr*"=>0xB1,"SUBr:"=>0xD1,"SUBr*:"=>0xF1,
- "INC"=>0x12,"INC*"=>0x32,"INC:"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr*"=>0xB2,"INCr:"=>0xD2,"INCr*:"=>0xF2,
- "DEC"=>0x13,"DEC*"=>0x33,"DEC:"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr*"=>0xB3,"DECr:"=>0xD3,"DECr*:"=>0xF3,
- "LTH"=>0x14,"LTH*"=>0x34,"LTH:"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr*"=>0xB4,"LTHr:"=>0xD4,"LTHr*:"=>0xF4,
- "GTH"=>0x15,"GTH*"=>0x35,"GTH:"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr*"=>0xB5,"GTHr:"=>0xD5,"GTHr*:"=>0xF5,
- "EQU"=>0x16,"EQU*"=>0x36,"EQU:"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr*"=>0xB6,"EQUr:"=>0xD6,"EQUr*:"=>0xF6,
- "NQK"=>0x17,"NQK*"=>0x37,"NQK:"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr*"=>0xB7,"NQKr:"=>0xD7,"NQKr*:"=>0xF7,
- // Bitwise operators
- "IOR"=>0x18,"IOR*"=>0x38,"IOR:"=>0x58,"IOR*:"=>0x78,"IORr"=>0x98,"IORr*"=>0xB8,"IORr:"=>0xD8,"IORr*:"=>0xF8,
- "XOR"=>0x19,"XOR*"=>0x39,"XOR:"=>0x59,"XOR*:"=>0x79,"XORr"=>0x99,"XORr*"=>0xB9,"XORr:"=>0xD9,"XORr*:"=>0xF9,
- "AND"=>0x1A,"AND*"=>0x3A,"AND:"=>0x5A,"AND*:"=>0x7A,"ANDr"=>0x9A,"ANDr*"=>0xBA,"ANDr:"=>0xDA,"ANDr*:"=>0xFA,
- "NOT"=>0x1B,"NOT*"=>0x3B,"NOT:"=>0x5B,"NOT*:"=>0x7B,"NOTr"=>0x9B,"NOTr*"=>0xBB,"NOTr:"=>0xDB,"NOTr*:"=>0xFB,
- "SHF"=>0x1C,"SHF*"=>0x3C,"SHF:"=>0x5C,"SHF*:"=>0x7C,"SHFr"=>0x9C,"SHFr*"=>0xBC,"SHFr:"=>0xDC,"SHFr*:"=>0xFC,
- "SHC"=>0x1D,"SHC*"=>0x3D,"SHC:"=>0x5D,"SHC*:"=>0x7D,"SHCr"=>0x9D,"SHCr*"=>0xBD,"SHCr:"=>0xDD,"SHCr*:"=>0xFD,
- "TAL"=>0x1E,"TAL*"=>0x3E,"TAL:"=>0x5E,"TAL*:"=>0x7E,"TALr"=>0x9E,"TALr*"=>0xBE,"TALr:"=>0xDE,"TALr*:"=>0xFE,
- "REV"=>0x1F,"REV*"=>0x3F,"REV:"=>0x5F,"REV*:"=>0x7F,"REVr"=>0x9F,"REVr*"=>0xBF,"REVr:"=>0xDF,"REVr*:"=>0xFF,
- _ => return None,
- })
-}
diff --git a/src/tokens.rs b/src/tokens.rs
new file mode 100644
index 0000000..81bf9d5
--- /dev/null
+++ b/src/tokens.rs
@@ -0,0 +1,9 @@
+mod syntactic;
+mod semantic;
+mod instruction;
+mod value;
+
+pub use syntactic::*;
+pub use semantic::*;
+pub use instruction::*;
+pub use value::*;
diff --git a/src/tokens/instruction.rs b/src/tokens/instruction.rs
new file mode 100644
index 0000000..d5fb3e5
--- /dev/null
+++ b/src/tokens/instruction.rs
@@ -0,0 +1,170 @@
+use Operation as Op;
+
+
+pub struct Instruction {
+ pub value: u8,
+}
+
+
+impl Instruction {
+ pub fn operation(&self) -> Operation {
+ match self.value & 0x1f {
+ 0x00=>Op::HLT, 0x01=>Op::JMP, 0x02=>Op::JCN, 0x03=>Op::JCK,
+ 0x04=>Op::LDA, 0x05=>Op::STA, 0x06=>Op::LDD, 0x07=>Op::STD,
+ 0x08=>Op::PSH, 0x09=>Op::POP, 0x0a=>Op::CPY, 0x0b=>Op::SPL,
+ 0x0c=>Op::DUP, 0x0d=>Op::OVR, 0x0e=>Op::SWP, 0x0f=>Op::ROT,
+ 0x10=>Op::ADD, 0x11=>Op::SUB, 0x12=>Op::INC, 0x13=>Op::DEC,
+ 0x14=>Op::LTH, 0x15=>Op::GTH, 0x16=>Op::EQU, 0x17=>Op::NQK,
+ 0x18=>Op::IOR, 0x19=>Op::XOR, 0x1a=>Op::AND, 0x1b=>Op::NOT,
+ 0x1c=>Op::SHF, 0x1d=>Op::SHC, 0x1e=>Op::TAL, 0x1f=>Op::REV,
+ _ => unreachable!(),
+ }
+ }
+
+ pub fn return_mode(&self) -> bool {
+ self.value & 0x80 != 0
+ }
+
+ pub fn literal_mode(&self) -> bool {
+ self.value & 0x40 != 0
+ }
+
+ pub fn double_mode(&self) -> bool {
+ self.value & 0x20 != 0
+ }
+}
+
+
+impl std::fmt::Display for Instruction {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ write!(f, "{}", match self.value {
+ // Control operators
+ 0x00=>"HLT",0x20=>"NOP" ,0x40=>"DB1" ,0x60=>"DB2" ,0x80=>"DB3" ,0xA0=>"DB4" ,0xC0=>"DB5" ,0xE0=>"DB6" ,
+ 0x01=>"JMP",0x21=>"JMS" ,0x41=>"JMP:",0x61=>"JMS:" ,0x81=>"JMPr",0xA1=>"JMSr" ,0xC1=>"JMPr:",0xE1=>"JMSr:" ,
+ 0x02=>"JCN",0x22=>"JCS" ,0x42=>"JCN:",0x62=>"JCS:" ,0x82=>"JCNr",0xA2=>"JCSr" ,0xC2=>"JCNr:",0xE2=>"JCSr:" ,
+ 0x03=>"JCK",0x23=>"JCK*",0x43=>"JCK:",0x63=>"JCK*:",0x83=>"JCKr",0xA3=>"JCKr*",0xC3=>"JCKr:",0xE3=>"JCKr*:",
+ 0x04=>"LDA",0x24=>"LDA*",0x44=>"LDA:",0x64=>"LDA*:",0x84=>"LDAr",0xA4=>"LDAr*",0xC4=>"LDAr:",0xE4=>"LDAr*:",
+ 0x05=>"STA",0x25=>"STA*",0x45=>"STA:",0x65=>"STA*:",0x85=>"STAr",0xA5=>"STAr*",0xC5=>"STAr:",0xE5=>"STAr*:",
+ 0x06=>"LDD",0x26=>"LDD*",0x46=>"LDD:",0x66=>"LDD*:",0x86=>"LDDr",0xA6=>"LDDr*",0xC6=>"LDDr:",0xE6=>"LDDr*:",
+ 0x07=>"STD",0x27=>"STD*",0x47=>"STD:",0x67=>"STD*:",0x87=>"STDr",0xA7=>"STDr*",0xC7=>"STDr:",0xE7=>"STDr*:",
+ // Stack operators
+ 0x08=>"PSH",0x28=>"PSH*",0x48=>"PSH:",0x68=>"PSH*:",0x88=>"PSHr",0xA8=>"PSHr*",0xC8=>"PSHr:",0xE8=>"PSHr*:",
+ 0x09=>"POP",0x29=>"POP*",0x49=>"POP:",0x69=>"POP*:",0x89=>"POPr",0xA9=>"POPr*",0xC9=>"POPr:",0xE9=>"POPr*:",
+ 0x0A=>"CPY",0x2A=>"CPY*",0x4A=>"CPY:",0x6A=>"CPY*:",0x8A=>"CPYr",0xAA=>"CPYr*",0xCA=>"CPYr:",0xEA=>"CPYr*:",
+ 0x0B=>"SPL",0x2B=>"SPL*",0x4B=>"SPL:",0x6B=>"SPL*:",0x8B=>"SPLr",0xAB=>"SPLr*",0xCB=>"SPLr:",0xEB=>"SPLr*:",
+ 0x0C=>"DUP",0x2C=>"DUP*",0x4C=>"DUP:",0x6C=>"DUP*:",0x8C=>"DUPr",0xAC=>"DUPr*",0xCC=>"DUPr:",0xEC=>"DUPr*:",
+ 0x0D=>"OVR",0x2D=>"OVR*",0x4D=>"OVR:",0x6D=>"OVR*:",0x8D=>"OVRr",0xAD=>"OVRr*",0xCD=>"OVRr:",0xED=>"OVRr*:",
+ 0x0E=>"SWP",0x2E=>"SWP*",0x4E=>"SWP:",0x6E=>"SWP*:",0x8E=>"SWPr",0xAE=>"SWPr*",0xCE=>"SWPr:",0xEE=>"SWPr*:",
+ 0x0F=>"ROT",0x2F=>"ROT*",0x4F=>"ROT:",0x6F=>"ROT*:",0x8F=>"ROTr",0xAF=>"ROTr*",0xCF=>"ROTr:",0xEF=>"ROTr*:",
+ // Numeric operators
+ 0x10=>"ADD",0x30=>"ADD*",0x50=>"ADD:",0x70=>"ADD*:",0x90=>"ADDr",0xB0=>"ADDr*",0xD0=>"ADDr:",0xF0=>"ADDr*:",
+ 0x11=>"SUB",0x31=>"SUB*",0x51=>"SUB:",0x71=>"SUB*:",0x91=>"SUBr",0xB1=>"SUBr*",0xD1=>"SUBr:",0xF1=>"SUBr*:",
+ 0x12=>"INC",0x32=>"INC*",0x52=>"INC:",0x72=>"INC*:",0x92=>"INCr",0xB2=>"INCr*",0xD2=>"INCr:",0xF2=>"INCr*:",
+ 0x13=>"DEC",0x33=>"DEC*",0x53=>"DEC:",0x73=>"DEC*:",0x93=>"DECr",0xB3=>"DECr*",0xD3=>"DECr:",0xF3=>"DECr*:",
+ 0x14=>"LTH",0x34=>"LTH*",0x54=>"LTH:",0x74=>"LTH*:",0x94=>"LTHr",0xB4=>"LTHr*",0xD4=>"LTHr:",0xF4=>"LTHr*:",
+ 0x15=>"GTH",0x35=>"GTH*",0x55=>"GTH:",0x75=>"GTH*:",0x95=>"GTHr",0xB5=>"GTHr*",0xD5=>"GTHr:",0xF5=>"GTHr*:",
+ 0x16=>"EQU",0x36=>"EQU*",0x56=>"EQU:",0x76=>"EQU*:",0x96=>"EQUr",0xB6=>"EQUr*",0xD6=>"EQUr:",0xF6=>"EQUr*:",
+ 0x17=>"NQK",0x37=>"NQK*",0x57=>"NQK:",0x77=>"NQK*:",0x97=>"NQKr",0xB7=>"NQKr*",0xD7=>"NQKr:",0xF7=>"NQKr*:",
+ // Bitwise operators
+ 0x18=>"IOR",0x38=>"IOR*",0x58=>"IOR:",0x78=>"IOR*:",0x98=>"IORr",0xB8=>"IORr*",0xD8=>"IORr:",0xF8=>"IORr*:",
+ 0x19=>"XOR",0x39=>"XOR*",0x59=>"XOR:",0x79=>"XOR*:",0x99=>"XORr",0xB9=>"XORr*",0xD9=>"XORr:",0xF9=>"XORr*:",
+ 0x1A=>"AND",0x3A=>"AND*",0x5A=>"AND:",0x7A=>"AND*:",0x9A=>"ANDr",0xBA=>"ANDr*",0xDA=>"ANDr:",0xFA=>"ANDr*:",
+ 0x1B=>"NOT",0x3B=>"NOT*",0x5B=>"NOT:",0x7B=>"NOT*:",0x9B=>"NOTr",0xBB=>"NOTr*",0xDB=>"NOTr:",0xFB=>"NOTr*:",
+ 0x1C=>"SHF",0x3C=>"SHF*",0x5C=>"SHF:",0x7C=>"SHF*:",0x9C=>"SHFr",0xBC=>"SHFr*",0xDC=>"SHFr:",0xFC=>"SHFr*:",
+ 0x1D=>"SHC",0x3D=>"SHC*",0x5D=>"SHC:",0x7D=>"SHC*:",0x9D=>"SHCr",0xBD=>"SHCr*",0xDD=>"SHCr:",0xFD=>"SHCr*:",
+ 0x1E=>"TAL",0x3E=>"TAL*",0x5E=>"TAL:",0x7E=>"TAL*:",0x9E=>"TALr",0xBE=>"TALr*",0xDE=>"TALr:",0xFE=>"TALr*:",
+ 0x1F=>"REV",0x3F=>"REV*",0x5F=>"REV:",0x7F=>"REV*:",0x9F=>"REVr",0xBF=>"REVr*",0xDF=>"REVr:",0xFF=>"REVr*:",
+ })
+ }
+}
+
+
+impl std::str::FromStr for Instruction {
+ type Err = ();
+
+ fn from_str(token: &str) -> Result<Self, Self::Err> {
+ Ok( Instruction { value: match token {
+ // Control operators
+ "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2" =>0x60,"DB3" =>0x80,"DB4" =>0xA0,"DB5" =>0xC0,"DB6" =>0xE0,
+ "JMP"=>0x01,"JMS" =>0x21,"JMP:"=>0x41,"JMS:" =>0x61,"JMPr"=>0x81,"JMSr" =>0xA1,"JMPr:"=>0xC1,"JMSr:" =>0xE1,
+ "JCN"=>0x02,"JCS" =>0x22,"JCN:"=>0x42,"JCS:" =>0x62,"JCNr"=>0x82,"JCSr" =>0xA2,"JCNr:"=>0xC2,"JCSr:" =>0xE2,
+ "JCK"=>0x03,"JCK*"=>0x23,"JCK:"=>0x43,"JCK*:"=>0x63,"JCKr"=>0x83,"JCKr*"=>0xA3,"JCKr:"=>0xC3,"JCKr*:"=>0xE3,
+ "LDA"=>0x04,"LDA*"=>0x24,"LDA:"=>0x44,"LDA*:"=>0x64,"LDAr"=>0x84,"LDAr*"=>0xA4,"LDAr:"=>0xC4,"LDAr*:"=>0xE4,
+ "STA"=>0x05,"STA*"=>0x25,"STA:"=>0x45,"STA*:"=>0x65,"STAr"=>0x85,"STAr*"=>0xA5,"STAr:"=>0xC5,"STAr*:"=>0xE5,
+ "LDD"=>0x06,"LDD*"=>0x26,"LDD:"=>0x46,"LDD*:"=>0x66,"LDDr"=>0x86,"LDDr*"=>0xA6,"LDDr:"=>0xC6,"LDDr*:"=>0xE6,
+ "STD"=>0x07,"STD*"=>0x27,"STD:"=>0x47,"STD*:"=>0x67,"STDr"=>0x87,"STDr*"=>0xA7,"STDr:"=>0xC7,"STDr*:"=>0xE7,
+ // Stack operators
+ "PSH"=>0x08,"PSH*"=>0x28,"PSH:"=>0x48,"PSH*:"=>0x68,"PSHr"=>0x88,"PSHr*"=>0xA8,"PSHr:"=>0xC8,"PSHr*:"=>0xE8,
+ "POP"=>0x09,"POP*"=>0x29,"POP:"=>0x49,"POP*:"=>0x69,"POPr"=>0x89,"POPr*"=>0xA9,"POPr:"=>0xC9,"POPr*:"=>0xE9,
+ "CPY"=>0x0A,"CPY*"=>0x2A,"CPY:"=>0x4A,"CPY*:"=>0x6A,"CPYr"=>0x8A,"CPYr*"=>0xAA,"CPYr:"=>0xCA,"CPYr*:"=>0xEA,
+ "SPL"=>0x0B,"SPL*"=>0x2B,"SPL:"=>0x4B,"SPL*:"=>0x6B,"SPLr"=>0x8B,"SPLr*"=>0xAB,"SPLr:"=>0xCB,"SPLr*:"=>0xEB,
+ "DUP"=>0x0C,"DUP*"=>0x2C,"DUP:"=>0x4C,"DUP*:"=>0x6C,"DUPr"=>0x8C,"DUPr*"=>0xAC,"DUPr:"=>0xCC,"DUPr*:"=>0xEC,
+ "OVR"=>0x0D,"OVR*"=>0x2D,"OVR:"=>0x4D,"OVR*:"=>0x6D,"OVRr"=>0x8D,"OVRr*"=>0xAD,"OVRr:"=>0xCD,"OVRr*:"=>0xED,
+ "SWP"=>0x0E,"SWP*"=>0x2E,"SWP:"=>0x4E,"SWP*:"=>0x6E,"SWPr"=>0x8E,"SWPr*"=>0xAE,"SWPr:"=>0xCE,"SWPr*:"=>0xEE,
+ "ROT"=>0x0F,"ROT*"=>0x2F,"ROT:"=>0x4F,"ROT*:"=>0x6F,"ROTr"=>0x8F,"ROTr*"=>0xAF,"ROTr:"=>0xCF,"ROTr*:"=>0xEF,
+ // Numeric operators
+ "ADD"=>0x10,"ADD*"=>0x30,"ADD:"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr*"=>0xB0,"ADDr:"=>0xD0,"ADDr*:"=>0xF0,
+ "SUB"=>0x11,"SUB*"=>0x31,"SUB:"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr*"=>0xB1,"SUBr:"=>0xD1,"SUBr*:"=>0xF1,
+ "INC"=>0x12,"INC*"=>0x32,"INC:"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr*"=>0xB2,"INCr:"=>0xD2,"INCr*:"=>0xF2,
+ "DEC"=>0x13,"DEC*"=>0x33,"DEC:"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr*"=>0xB3,"DECr:"=>0xD3,"DECr*:"=>0xF3,
+ "LTH"=>0x14,"LTH*"=>0x34,"LTH:"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr*"=>0xB4,"LTHr:"=>0xD4,"LTHr*:"=>0xF4,
+ "GTH"=>0x15,"GTH*"=>0x35,"GTH:"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr*"=>0xB5,"GTHr:"=>0xD5,"GTHr*:"=>0xF5,
+ "EQU"=>0x16,"EQU*"=>0x36,"EQU:"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr*"=>0xB6,"EQUr:"=>0xD6,"EQUr*:"=>0xF6,
+ "NQK"=>0x17,"NQK*"=>0x37,"NQK:"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr*"=>0xB7,"NQKr:"=>0xD7,"NQKr*:"=>0xF7,
+ // Bitwise operators
+ "IOR"=>0x18,"IOR*"=>0x38,"IOR:"=>0x58,"IOR*:"=>0x78,"IORr"=>0x98,"IORr*"=>0xB8,"IORr:"=>0xD8,"IORr*:"=>0xF8,
+ "XOR"=>0x19,"XOR*"=>0x39,"XOR:"=>0x59,"XOR*:"=>0x79,"XORr"=>0x99,"XORr*"=>0xB9,"XORr:"=>0xD9,"XORr*:"=>0xF9,
+ "AND"=>0x1A,"AND*"=>0x3A,"AND:"=>0x5A,"AND*:"=>0x7A,"ANDr"=>0x9A,"ANDr*"=>0xBA,"ANDr:"=>0xDA,"ANDr*:"=>0xFA,
+ "NOT"=>0x1B,"NOT*"=>0x3B,"NOT:"=>0x5B,"NOT*:"=>0x7B,"NOTr"=>0x9B,"NOTr*"=>0xBB,"NOTr:"=>0xDB,"NOTr*:"=>0xFB,
+ "SHF"=>0x1C,"SHF*"=>0x3C,"SHF:"=>0x5C,"SHF*:"=>0x7C,"SHFr"=>0x9C,"SHFr*"=>0xBC,"SHFr:"=>0xDC,"SHFr*:"=>0xFC,
+ "SHC"=>0x1D,"SHC*"=>0x3D,"SHC:"=>0x5D,"SHC*:"=>0x7D,"SHCr"=>0x9D,"SHCr*"=>0xBD,"SHCr:"=>0xDD,"SHCr*:"=>0xFD,
+ "TAL"=>0x1E,"TAL*"=>0x3E,"TAL:"=>0x5E,"TAL*:"=>0x7E,"TALr"=>0x9E,"TALr*"=>0xBE,"TALr:"=>0xDE,"TALr*:"=>0xFE,
+ "REV"=>0x1F,"REV*"=>0x3F,"REV:"=>0x5F,"REV*:"=>0x7F,"REVr"=>0x9F,"REVr*"=>0xBF,"REVr:"=>0xDF,"REVr*:"=>0xFF,
+ _ => return Err(()),
+ }})
+ }
+}
+
+
+pub enum Operation {
+ HLT, JMP, JCN, JCK,
+ LDA, STA, LDD, STD,
+ PSH, POP, CPY, SPL,
+ DUP, OVR, SWP, ROT,
+ ADD, SUB, INC, DEC,
+ LTH, GTH, EQU, NQK,
+ IOR, XOR, AND, NOT,
+ SHF, SHC, TAL, REV,
+}
+
+
+impl From<Operation> for u8 {
+ fn from(operation: Operation) -> Self {
+ match operation {
+ Op::HLT=>0x00, Op::JMP=>0x01, Op::JCN=>0x02, Op::JCK=>0x03,
+ Op::LDA=>0x04, Op::STA=>0x05, Op::LDD=>0x06, Op::STD=>0x07,
+ Op::PSH=>0x08, Op::POP=>0x09, Op::CPY=>0x0a, Op::SPL=>0x0b,
+ Op::DUP=>0x0c, Op::OVR=>0x0d, Op::SWP=>0x0e, Op::ROT=>0x0f,
+ Op::ADD=>0x10, Op::SUB=>0x11, Op::INC=>0x12, Op::DEC=>0x13,
+ Op::LTH=>0x14, Op::GTH=>0x15, Op::EQU=>0x16, Op::NQK=>0x17,
+ Op::IOR=>0x18, Op::XOR=>0x19, Op::AND=>0x1a, Op::NOT=>0x1b,
+ Op::SHF=>0x1c, Op::SHC=>0x1d, Op::TAL=>0x1e, Op::REV=>0x1f,
+ }
+ }
+}
+
+
+impl std::fmt::Display for Operation {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ write!(f, "{}", match self {
+ Op::HLT=>"HLT", Op::JMP=>"JMP", Op::JCN=>"JCN", Op::JCK=>"JCK",
+ Op::LDA=>"LDA", Op::STA=>"STA", Op::LDD=>"LDD", Op::STD=>"STD",
+ Op::PSH=>"PSH", Op::POP=>"POP", Op::CPY=>"CPY", Op::SPL=>"SPL",
+ Op::DUP=>"DUP", Op::OVR=>"OVR", Op::SWP=>"SWP", Op::ROT=>"ROT",
+ Op::ADD=>"ADD", Op::SUB=>"SUB", Op::INC=>"INC", Op::DEC=>"DEC",
+ Op::LTH=>"LTH", Op::GTH=>"GTH", Op::EQU=>"EQU", Op::NQK=>"NQK",
+ Op::IOR=>"IOR", Op::XOR=>"XOR", Op::AND=>"AND", Op::NOT=>"NOT",
+ Op::SHF=>"SHF", Op::SHC=>"SHC", Op::TAL=>"TAL", Op::REV=>"REV",
+ })
+ }
+}
diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs
new file mode 100644
index 0000000..ac5179c
--- /dev/null
+++ b/src/tokens/semantic.rs
@@ -0,0 +1,90 @@
+use crate::*;
+
+use SemanticTokenVariant as SemVar;
+
+
+pub struct SemanticToken {
+ pub source: SourceSpan,
+ pub bytecode: BytecodeSpan,
+ pub variant: SemanticTokenVariant,
+}
+
+
+pub enum SemanticTokenVariant {
+ LabelDefinition(LabelDefinition),
+ MacroDefinition(MacroDefinition),
+
+ /// Pointer to the matching label definition.
+ LabelReference(usize),
+ /// Pointer to the matching macro definition.
+ MacroInvocation(usize),
+
+ Literal(Value),
+ Padding(Value),
+ Instruction(Instruction),
+
+ Comment(String),
+ String(Vec<u8>),
+
+ /// Pointer to the matching block close.
+ BlockOpen(usize),
+ /// Pointer to the matching block open.
+ BlockClose(usize),
+ MarkOpen,
+ MarkClose,
+
+ Error(SemanticParseError),
+}
+
+impl std::fmt::Debug for SemanticToken {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ match &self.variant {
+ SemVar::LabelDefinition(def) => write!(f, "LabelDefinition({})", def.name),
+ SemVar::MacroDefinition(def) => write!(f, "MacroDefinition({})", def.name),
+ SemVar::LabelReference(pointer) => write!(f, "LabelReference(*{pointer})"),
+ SemVar::MacroInvocation(pointer) => write!(f, "MacroInvocation(*{pointer})"),
+ SemVar::Literal(value) => write!(f, "Literal({value})"),
+ SemVar::Padding(value) => write!(f, "Padding({value})"),
+ SemVar::Instruction(instr) => write!(f, "Instruction(0x{:02x})", instr.value),
+ SemVar::Comment(comment) => write!(f, "Comment({comment})"),
+ SemVar::String(string) => write!(f, "String({})", String::from_utf8_lossy(&string)),
+ SemVar::BlockOpen(_) => write!(f, "BlockOpen"),
+ SemVar::BlockClose(_) => write!(f, "BlockClose"),
+ SemVar::MarkOpen => write!(f, "MarkOpen"),
+ SemVar::MarkClose => write!(f, "MarkClose"),
+ SemVar::Error(_) => write!(f, "Error"),
+ }
+ }
+}
+
+
+pub struct LabelDefinition {
+ /// The absolute name of the label or sublabel.
+ pub name: String,
+ /// List of pointers to label reference tokens.
+ pub references: Vec<usize>,
+}
+
+
+pub struct MacroDefinition {
+ pub name: String,
+ pub references: Vec<usize>,
+ pub body_tokens: Vec<SemanticToken>,
+}
+
+
+pub enum SemanticParseError {
+ LabelDefinitionInMacroDefinition,
+ MacroDefinitionInMacroDefinition,
+
+ StrayMacroTerminator,
+ StrayBlockClose,
+ UnclosedBlock,
+
+ UndefinedSymbol(String),
+ RedefinedSymbol((String, SourceSpan)),
+
+ MacroInvocationBeforeDefinition((String, SourceSpan)),
+
+ SyntaxError(SyntacticParseError)
+}
diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs
new file mode 100644
index 0000000..8684ed9
--- /dev/null
+++ b/src/tokens/syntactic.rs
@@ -0,0 +1,39 @@
+use crate::*;
+
+
+pub struct SyntacticToken {
+ /// Location of token in source files.
+ pub source: SourceSpan,
+ pub variant: SyntacticTokenVariant,
+}
+
+
+pub enum SyntacticTokenVariant {
+ LabelDefinition(String),
+ MacroDefinition(String),
+ MacroDefinitionTerminator,
+
+ Literal(Value),
+ Padding(Value),
+ Instruction(Instruction),
+
+ Comment(String),
+ String(Vec<u8>),
+
+ BlockOpen,
+ BlockClose,
+ MarkOpen,
+ MarkClose,
+
+ Symbol(String),
+
+ Error(SyntacticParseError),
+}
+
+
+pub enum SyntacticParseError {
+ UnterminatedComment,
+ UnterminatedRawString,
+ UnterminatedNullString,
+ InvalidPaddingValue(String),
+}
diff --git a/src/tokens/value.rs b/src/tokens/value.rs
new file mode 100644
index 0000000..e421bd5
--- /dev/null
+++ b/src/tokens/value.rs
@@ -0,0 +1,32 @@
+pub enum Value {
+ Byte(u8),
+ Double(u16),
+}
+
+impl std::fmt::Display for Value {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ match self {
+ Self::Byte(value) => write!(f, "0x{value:02x}"),
+ Self::Double(value) => write!(f, "0x{value:04x}"),
+ }
+ }
+}
+
+
+impl std::str::FromStr for Value {
+ type Err = ();
+
+ fn from_str(token: &str) -> Result<Self, Self::Err> {
+ match token.len() {
+ 2 => match u8::from_str_radix(&token, 16) {
+ Ok(value) => Ok(Value::Byte(value)),
+ Err(_) => Err(()),
+ }
+ 4 => match u16::from_str_radix(&token, 16) {
+ Ok(value) => Ok(Value::Double(value)),
+ Err(_) => Err(()),
+ }
+ _ => Err(()),
+ }
+ }
+}
diff --git a/src/translators.rs b/src/translators.rs
new file mode 100644
index 0000000..cce5633
--- /dev/null
+++ b/src/translators.rs
@@ -0,0 +1,9 @@
+mod syntactic_parser;
+mod semantic_parser;
+mod bytecode_generator;
+mod symbols_generator;
+
+pub use syntactic_parser::*;
+pub use semantic_parser::*;
+pub use bytecode_generator::*;
+pub use symbols_generator::*;
diff --git a/src/translators/bytecode_generator.rs b/src/translators/bytecode_generator.rs
new file mode 100644
index 0000000..956aca5
--- /dev/null
+++ b/src/translators/bytecode_generator.rs
@@ -0,0 +1,131 @@
+use crate::*;
+
+use SemanticTokenVariant as SemVar;
+
+
+pub fn generate_bytecode(semantic_tokens: &mut [SemanticToken]) -> Vec<u8> {
+ let generator = BytecodeGenerator::from_semantic_tokens(semantic_tokens);
+ generator.generate()
+}
+
+
+/// Translate semantic tokens into bytecode.
+struct BytecodeGenerator<'a> {
+ semantic_tokens: &'a mut [SemanticToken],
+ block_stack: Vec<usize>,
+ bytecode: Vec<u8>,
+ /// (address in bytecode, label definition token index)
+ label_references: Vec<(usize, usize)>,
+}
+
+impl<'a> BytecodeGenerator<'a> {
+ pub fn from_semantic_tokens(semantic_tokens: &'a mut [SemanticToken]) -> Self {
+ Self {
+ semantic_tokens,
+ block_stack: Vec::new(),
+ bytecode: Vec::new(),
+ label_references: Vec::new(),
+ }
+ }
+
+ pub fn generate(mut self) -> Vec<u8> {
+ for i in 0..self.semantic_tokens.len() {
+ let address = self.bytecode.len();
+ self.generate_bytecode_for_token(i, None);
+ self.semantic_tokens[i].bytecode = BytecodeSpan {
+ bytes: self.bytecode[address..].to_vec(),
+ location: BytecodeLocation {
+ address,
+ length: self.bytecode.len().saturating_sub(address),
+ }
+ };
+ }
+
+ // Replace blank label references in bytecode with real label addresses.
+ // The layer of indirection is necessary because the iteration borrows
+ // self immutably.
+ let mut insertions: Vec<(usize, u16)> = Vec::new();
+ for (bytecode_address, token_pointer) in &self.label_references {
+ let label_token = &self.semantic_tokens[*token_pointer];
+ // TODO: If greater than u16, print a warning.
+ let address_value = label_token.bytecode.location.address as u16;
+ insertions.push((*bytecode_address, address_value));
+ }
+ for (bytecode_address, address_value) in insertions {
+ self.replace_address_in_bytecode(bytecode_address, address_value);
+ }
+
+ // Strip trailing null bytes from the bytecode.
+ let mut length = self.bytecode.len();
+ for (i, byte) in self.bytecode.iter().enumerate().rev() {
+ match *byte == 0 {
+ true => length = i,
+ false => break,
+ };
+ }
+ self.bytecode.truncate(length);
+
+ return self.bytecode;
+ }
+
+ fn generate_bytecode_for_token(&mut self, pointer: usize, macro_pointer: Option<usize>) {
+ macro_rules! push_byte {
+ ($byte:expr) => { self.bytecode.push($byte) }; }
+ macro_rules! push_double {
+ ($double:expr) => { self.bytecode.extend_from_slice(&$double.to_be_bytes()) }; }
+ macro_rules! pad {
+ ($len:expr) => { for _ in 0..$len { push_byte!(0); } } }
+
+ let semantic_token = if let Some(macro_pointer) = macro_pointer {
+ let macro_definition = &self.semantic_tokens[macro_pointer];
+ if let SemVar::MacroDefinition(def) = &macro_definition.variant {
+ &def.body_tokens[pointer]
+ } else { unreachable!() }
+ } else {
+ &self.semantic_tokens[pointer]
+ };
+ match &semantic_token.variant {
+ SemVar::MacroInvocation(pointer) => {
+ let macro_definition = &self.semantic_tokens[*pointer];
+ if let SemVar::MacroDefinition(def) = &macro_definition.variant {
+ let length = def.body_tokens.len();
+ let macro_pointer = Some(*pointer);
+ for body_pointer in 0..length {
+ // Recurse, generate bytecode for each macro body token.
+ self.generate_bytecode_for_token(body_pointer, macro_pointer);
+ }
+ } else { unreachable!() }
+ }
+ SemVar::Literal(value) => match value {
+ Value::Byte(value) => push_byte!(*value),
+ Value::Double(value) => push_double!(value),
+ }
+ SemVar::Padding(value) => match value {
+ Value::Byte(value) => pad!(*value),
+ Value::Double(value) => pad!(*value),
+ }
+ SemVar::Instruction(instr) => push_byte!(instr.value),
+ SemVar::String(bytes) => self.bytecode.extend_from_slice(&bytes),
+ SemVar::LabelReference(pointer) => {
+ self.label_references.push((self.bytecode.len(), *pointer));
+ push_double!(0u16);
+ }
+ SemVar::BlockOpen(_) => {
+ self.block_stack.push(self.bytecode.len());
+ push_double!(0u16);
+ }
+ SemVar::BlockClose(_) => {
+ let bytecode_address = self.block_stack.pop().unwrap();
+ // TODO: If greater than u16, print a warning.
+ let address_value = self.bytecode.len() as u16;
+ self.replace_address_in_bytecode(bytecode_address, address_value);
+ }
+ _ => (),
+ };
+ }
+
+ fn replace_address_in_bytecode(&mut self, bytecode_address: usize, address_value: u16) {
+ let range = bytecode_address..bytecode_address+2;
+ self.bytecode[range].clone_from_slice(&address_value.to_be_bytes());
+ }
+}
diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs
new file mode 100644
index 0000000..cb6a435
--- /dev/null
+++ b/src/translators/semantic_parser.rs
@@ -0,0 +1,245 @@
+use crate::*;
+
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+use SyntacticTokenVariant as SynVar;
+use SemanticTokenVariant as SemVar;
+use SemanticParseError as SemErr;
+
+
+pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> {
+ let semantic_parser = SemanticParser::from_source_code(source_code, path);
+ semantic_parser.parse()
+}
+
+
+/// Translate syntactic tokens into semantic tokens.
+struct SemanticParser {
+ labels: HashMap<String, Definition>,
+ macros: HashMap<String, Definition>,
+ syntactic_tokens: Vec<SyntacticToken>,
+ /// Index of the current outer token.
+ current_outer_index: usize,
+}
+
+impl SemanticParser {
+ pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
+ let mut labels = HashMap::new();
+ let mut macros = HashMap::new();
+ let mut syntactic_tokens = Vec::new();
+
+ let parser = SyntacticParser::from_source_code(source_code, path);
+ for syntactic_token in parser {
+ let definition = Definition::new(syntactic_token.source.clone());
+ match &syntactic_token.variant {
+ SynVar::LabelDefinition(name) => {
+ let _ = labels.try_insert(name.to_owned(), definition);
+ },
+ SynVar::MacroDefinition(name) => {
+ let _ = macros.try_insert(name.to_owned(), definition);
+ },
+ _ => (),
+ }
+ syntactic_tokens.push(syntactic_token);
+ }
+
+ Self {
+ labels,
+ macros,
+ syntactic_tokens,
+ current_outer_index: 0,
+ }
+ }
+
+ /// Parse syntactic tokens as semantic tokens.
+ pub fn parse(mut self) -> Vec<SemanticToken> {
+ let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens);
+ let mut syntactic = syntactic_tokens.into_iter();
+ let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false);
+
+ // Insert real label definition pointers into label reference tokens.
+ for definition in self.labels.values_mut() {
+ if let Some(definition_pointer) = definition.pointer {
+ // Insert definition pointer into reference tokens.
+ for reference_pointer in &definition.references {
+ let reference_token = &mut semantic_tokens[*reference_pointer];
+ reference_token.variant = SemVar::LabelReference(definition_pointer);
+ }
+ // Insert reference pointers into definition token.
+ let definition_token = &mut semantic_tokens[definition_pointer];
+ if let SemVar::LabelDefinition(ref mut def) = definition_token.variant {
+ def.references = std::mem::take(&mut definition.references);
+ } else { unreachable!() }
+ // Insert definition pointer into reference tokens inside macros.
+ for (outer, inner) in &definition.deep_references {
+ let macro_token = &mut semantic_tokens[*outer];
+ if let SemVar::MacroDefinition(ref mut def) = macro_token.variant {
+ let reference_token = &mut def.body_tokens[*inner];
+ reference_token.variant = SemVar::LabelReference(definition_pointer);
+ } else { unreachable!() }
+ }
+ // TODO: Record deep references in macro and label definitions?
+ }
+ }
+
+ return semantic_tokens;
+ }
+
+ fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken>
+ where I: Iterator<Item = SyntacticToken>
+ {
+ let mut semantic_tokens: Vec<SemanticToken> = Vec::new();
+ let mut block_stack: Vec<usize> = Vec::new();
+
+ while let Some(syntactic_token) = parser.next() {
+ let current_index = semantic_tokens.len();
+ if !in_macro {
+ self.current_outer_index = current_index;
+ }
+
+ let semantic_token_variant = match syntactic_token.variant {
+ SynVar::LabelDefinition(name) => {
+ if in_macro {
+ SemVar::Error(SemErr::LabelDefinitionInMacroDefinition)
+ } else if let Some(definition) = self.macros.get(&name) {
+ let source = definition.source.clone();
+ SemVar::Error(SemErr::RedefinedSymbol((name, source)))
+ } else if let Some(definition) = self.labels.get_mut(&name) {
+ if definition.pointer.is_some() {
+ let source = definition.source.clone();
+ SemVar::Error(SemErr::RedefinedSymbol((name, source)))
+ } else {
+ definition.pointer = Some(current_index);
+ let references = Vec::new();
+ SemVar::LabelDefinition(LabelDefinition { name, references })
+ }
+ } else {
+ unreachable!()
+ }
+ }
+ SynVar::MacroDefinition(name) => {
+ if in_macro {
+ SemVar::Error(SemErr::MacroDefinitionInMacroDefinition)
+ } else if let Some(definition) = self.labels.get(&name) {
+ let source = definition.source.clone();
+ SemVar::Error(SemErr::RedefinedSymbol((name, source)))
+ } else if let Some(definition) = self.macros.get_mut(&name) {
+ if definition.pointer.is_some() {
+ let source = definition.source.clone();
+ SemVar::Error(SemErr::RedefinedSymbol((name, source)))
+ } else {
+ definition.pointer = Some(current_index);
+ let references = Vec::new();
+ let body_tokens = self.pull_semantic_tokens(parser, true);
+ SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens })
+ }
+ } else {
+ unreachable!()
+ }
+ }
+ SynVar::MacroDefinitionTerminator => if in_macro {
+ break;
+ } else {
+ SemVar::Error(SemErr::StrayMacroTerminator)
+ }
+ SynVar::Literal(value) => {
+ SemVar::Literal(value)
+ }
+ SynVar::Padding(value) => {
+ SemVar::Padding(value)
+ }
+ SynVar::Instruction(instr) => {
+ SemVar::Instruction(instr)
+ }
+ SynVar::Comment(comment) => {
+ SemVar::Comment(comment)
+ }
+ SynVar::String(bytes) => {
+ SemVar::String(bytes)
+ }
+ SynVar::BlockOpen => {
+ block_stack.push(current_index);
+ SemVar::BlockOpen(0)
+ }
+ SynVar::BlockClose => {
+ if let Some(pointer) = block_stack.pop() {
+ let open = &mut semantic_tokens[pointer];
+ open.variant = SemVar::BlockOpen(current_index);
+ SemVar::BlockClose(pointer)
+ } else {
+ SemVar::Error(SemErr::StrayBlockClose)
+ }
+ }
+ SynVar::MarkOpen => {
+ SemVar::MarkOpen
+ }
+ SynVar::MarkClose => {
+ SemVar::MarkClose
+ }
+ SynVar::Symbol(name) => {
+ if let Some(definition) = self.labels.get_mut(&name) {
+ if in_macro {
+ let pointer = (self.current_outer_index, current_index);
+ definition.deep_references.push(pointer);
+ } else {
+ definition.references.push(current_index);
+ }
+ SemVar::LabelReference(0)
+ } else if let Some(definition) = self.macros.get_mut(&name) {
+ if let Some(pointer) = definition.pointer {
+ if !in_macro { definition.references.push(current_index); }
+ SemVar::MacroInvocation(pointer)
+ } else {
+ let source = definition.source.clone();
+ SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source)))
+ }
+ } else {
+ SemVar::Error(SemErr::UndefinedSymbol(name))
+ }
+ }
+ SynVar::Error(syntax_err) => {
+ SemVar::Error(SemErr::SyntaxError(syntax_err))
+ }
+ };
+
+ let semantic_token = SemanticToken {
+ source: syntactic_token.source,
+ bytecode: BytecodeSpan::default(),
+ variant: semantic_token_variant,
+ };
+ semantic_tokens.push(semantic_token);
+ }
+
+ if in_macro {
+ //TODO: UnterminatedMacroDefinition
+ }
+
+ // Replace each unclosed BlockOpen token with an error.
+ for block_pointer in block_stack {
+ semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock);
+ }
+
+ return semantic_tokens;
+ }
+}
+
+
+struct Definition {
+ pub source: SourceSpan,
+ pub pointer: Option<usize>,
+ pub references: Vec<usize>,
+ /// (macro index, label reference index)
+ pub deep_references: Vec<(usize, usize)>,
+}
+
+impl Definition {
+ pub fn new(source: SourceSpan) -> Self {
+ Self {
+ source,
+ pointer: None,
+ references: Vec::new(),
+ deep_references: Vec::new(),
+ }
+ }
+}
diff --git a/src/translators/symbols_generator.rs b/src/translators/symbols_generator.rs
new file mode 100644
index 0000000..06bbaa8
--- /dev/null
+++ b/src/translators/symbols_generator.rs
@@ -0,0 +1,28 @@
+use crate::*;
+
+use SemanticTokenVariant as SemVar;
+
+
+pub fn generate_symbols_file(semantic_tokens: &[SemanticToken]) -> String {
+ let mut symbols = String::new();
+
+ for token in semantic_tokens {
+ if let SemVar::LabelDefinition(definition) = &token.variant {
+ let address = token.bytecode.location.address;
+ if address > 0xffff { break; }
+ let name = &definition.name;
+ let path = match &token.source.in_source {
+ Some(source) => &source.path,
+ None => &token.source.in_merged.path,
+ };
+ if let Some(path) = path {
+ let path = path.as_os_str().to_string_lossy();
+ symbols.push_str(&format!("{address:04x} {name} {path}\n"));
+ } else {
+ symbols.push_str(&format!("{address:04x} {name}\n"));
+ }
+ }
+ }
+
+ return symbols;
+}
diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs
new file mode 100644
index 0000000..7279daf
--- /dev/null
+++ b/src/translators/syntactic_parser.rs
@@ -0,0 +1,247 @@
+use crate::*;
+
+use std::path::PathBuf;
+
+
+/// Translate raw source code characters into syntactic tokens.
+pub struct SyntacticParser {
+ /// Path of file from which the source was read.
+ path: Option<PathBuf>,
+ /// Path of the original source file.
+ source_path: Option<PathBuf>,
+ /// Position of the next character to be read.
+ position: Position,
+ /// Previous value of the position field.
+ prev_position: Position,
+ /// Line where the embedded source file begins.
+ source_line_start: usize,
+ /// Characters waiting to be parsed, in reverse order.
+ chars: Vec<char>,
+ /// The token currently being parsed.
+ token_source_string: String,
+ /// The name of the most recently parsed label.
+ label: String,
+}
+
+
+impl SyntacticParser {
+ /// Parse source code.
+ pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
+ Self {
+ path: path.map(|p| p.into()),
+ source_path: None,
+ position: Position { line: 0, column: 0 },
+ prev_position: Position { line: 0, column: 0 },
+ source_line_start: 0,
+ chars: source_code.chars().rev().collect(),
+ token_source_string: String::new(),
+ label: String::new(),
+ }
+ }
+
+ /// Return the next character, keeping it on the queue.
+ fn peek_char(&self) -> Option<char> {
+ self.chars.last().copied()
+ }
+
+ /// Return the next character, removing it from the queue.
+ fn eat_char(&mut self) -> Option<char> {
+ let option = self.chars.pop();
+ if let Some(c) = option {
+ self.prev_position = self.position;
+ self.position.advance(c);
+ self.token_source_string.push(c);
+ }
+ return option;
+ }
+
+ /// Remove the next character from the queue.
+ fn drop_char(&mut self) {
+ if let Some(c) = self.chars.pop() {
+ self.prev_position = self.position;
+ self.position.advance(c);
+ }
+ }
+
+ /// Remove leading whitespace.
+ fn drop_whitespace(&mut self) {
+ while let Some(c) = self.peek_char() {
+ match c.is_whitespace() {
+ true => self.drop_char(),
+ false => break,
+ }
+ }
+ }
+
+ /// Remove a full token from the queue.
+ fn eat_token(&mut self) -> String {
+ const DELIMITERS: [char; 13] =
+ ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~'];
+ let mut token = String::new();
+ while let Some(peek) = self.peek_char() {
+ if peek.is_whitespace() || DELIMITERS.contains(&peek) {
+ break;
+ }
+ let c = self.eat_char().unwrap();
+ token.push(c);
+ if c == ':' {
+ break;
+ }
+ }
+ token
+ }
+
+ /// Return all characters until the delimiter, removing all returned
+ /// characters and the delimiter from the queue. Returns None if end
+ /// of source is reached before delimiter is found.
+ fn eat_to_delim(&mut self, delim: char) -> Option<String> {
+ let mut token = String::new();
+ while let Some(c) = self.eat_char() {
+ self.token_source_string.push(c);
+ match c == delim {
+ true => return Some(token),
+ false => token.push(c),
+ }
+ }
+ return None;
+ }
+
+ fn is_line_empty(&self) -> bool {
+ for c in self.chars.iter().rev() {
+ if *c == '\n' {
+ return true;
+ }
+ if !c.is_whitespace() {
+ return false
+ }
+ }
+ return false;
+ }
+}
+
+
+impl Iterator for SyntacticParser {
+ type Item = SyntacticToken;
+
+ /// Sequentially parse tokens from the source code.
+ fn next(&mut self) -> Option<SyntacticToken> {
+ use SyntacticTokenVariant as SynVar;
+ use SyntacticParseError as SynErr;
+
+ self.drop_whitespace();
+ let start = self.position;
+
+ let variant = match self.eat_char()? {
+ '@' => {
+ self.label = self.eat_token();
+ SynVar::LabelDefinition(self.label.clone())
+ }
+ '&' => {
+ let token = self.eat_token();
+ let sublabel = format!("{}/{token}", self.label);
+ SynVar::LabelDefinition(sublabel)
+ }
+ '%' => SynVar::MacroDefinition(self.eat_token()),
+ ';' => SynVar::MacroDefinitionTerminator,
+ '[' => SynVar::MarkOpen,
+ ']' => SynVar::MarkClose,
+ '{' => SynVar::BlockOpen,
+ '}' => SynVar::BlockClose,
+ '(' => match self.eat_to_delim(')') {
+ Some(string) => SynVar::Comment(string),
+ None => SynVar::Error(SynErr::UnterminatedComment),
+ }
+ '\'' => match self.eat_to_delim('\'') {
+ Some(string) => SynVar::String(string.as_bytes().to_vec()),
+ None => SynVar::Error(SynErr::UnterminatedRawString),
+ }
+ '"' => match self.eat_to_delim('"') {
+ Some(string) => {
+ let mut bytes = string.as_bytes().to_vec();
+ bytes.push(0x00);
+ SynVar::String(bytes)
+ }
+ None => SynVar::Error(SynErr::UnterminatedNullString),
+ }
+ '#' => {
+ let token = self.eat_token();
+ match token.parse::<Value>() {
+ Ok(value) => SynVar::Padding(value),
+ Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)),
+ }
+ },
+ '~' => {
+ let token = self.eat_token();
+ let symbol = format!("{}/{token}", self.label);
+ SynVar::Symbol(symbol)
+ }
+ ':' => SynVar::Symbol(String::from(':')),
+ c => {
+ let token = format!("{c}{}", self.eat_token());
+ match token.parse::<Value>() {
+ Ok(value) => SynVar::Literal(value),
+ Err(_) => match token.parse::<Instruction>() {
+ Ok(instruction) => SynVar::Instruction(instruction),
+ Err(_) => SynVar::Symbol(token),
+ }
+ }
+ }
+ };
+
+ // Parse source path comments.
+ if let SynVar::Comment(comment) = &variant {
+ // Check that the comment fills the entire line.
+ if start.column == 0 && self.is_line_empty() {
+ if let Some(path) = comment.strip_prefix(": ") {
+ self.source_path = Some(PathBuf::from(path.trim()));
+ self.source_line_start = start.line + 1;
+ }
+ }
+ }
+
+ // Find location in current merged file.
+ let in_merged = SourceLocation {
+ path: self.path.to_owned(),
+ start,
+ end: self.prev_position,
+ };
+
+ // Find location in original source file.
+ let in_source = if start.line >= self.source_line_start {
+ match &self.source_path {
+ Some(path) => {
+ let offset = self.source_line_start;
+ Some( SourceLocation {
+ path: Some(path.to_owned()),
+ start: Position {
+ line: in_merged.start.line.saturating_sub(offset),
+ column: in_merged.start.column,
+ },
+ end: Position {
+ line: in_merged.end.line.saturating_sub(offset),
+ column: in_merged.end.column,
+ }
+ })
+ }
+ None => None,
+ }
+ } else {
+ None
+ };
+
+ let string = std::mem::take(&mut self.token_source_string);
+ let source = SourceSpan { string, in_merged, in_source };
+ Some( SyntacticToken { source, variant } )
+ }
+}
+
+
+#[derive(Debug)]
+pub enum ParseError {
+ InvalidExtension,
+ NotFound,
+ NotReadable,
+ IsADirectory,
+ InvalidUtf8,
+ Unknown,
+}