summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Bridle <ben@derelict.engineering>2025-03-06 20:33:27 +1300
committerBen Bridle <ben@derelict.engineering>2025-03-11 16:59:26 +1300
commit1ecee352f5844b0809d7ae66df52e34f42b44c8e (patch)
tree472b6fd57ff7f64ac3f8cd676cbe7a113ba01f05
parentf2ed89083f5326a7a6f0a1720033d3388aa431fb (diff)
downloadtorque-asm-1ecee352f5844b0809d7ae66df52e34f42b44c8e.zip
Rewrite entire assembler
The language is now more general, the code is better structured, error reporting is more detailed, and many new language features have been implemented: - conditional blocks - first-class strings - more expression operators - binary literals - negative values - invocations in constant expressions
-rw-r--r--Cargo.lock8
-rw-r--r--Cargo.toml4
-rw-r--r--src/bin/tq.rs221
-rw-r--r--src/compiler.rs214
-rw-r--r--src/formats/debug.rs18
-rw-r--r--src/formats/inhx.rs28
-rw-r--r--src/formats/inhx32.rs39
-rw-r--r--src/formats/mod.rs87
-rw-r--r--src/formats/raw.rs29
-rw-r--r--src/lib.rs19
-rw-r--r--src/parsers/assembler.rs290
-rw-r--r--src/parsers/bytecode.rs191
-rw-r--r--src/parsers/expression.rs52
-rw-r--r--src/parsers/mod.rs15
-rw-r--r--src/parsers/packed_binary_literal.rs85
-rw-r--r--src/parsers/semantic.rs352
-rw-r--r--src/parsers/syntactic.rs172
-rw-r--r--src/report.rs235
-rw-r--r--src/stages/bytecode.rs182
-rw-r--r--src/stages/bytecode_tokens.rs78
-rw-r--r--src/stages/intermediate.rs577
-rw-r--r--src/stages/intermediate_tokens.rs149
-rw-r--r--src/stages/mod.rs31
-rw-r--r--src/stages/semantic.rs478
-rw-r--r--src/stages/semantic_tokens.rs296
-rw-r--r--src/stages/syntactic.rs323
-rw-r--r--src/stages/syntactic_tokens.rs160
-rw-r--r--src/tokens/assembler.rs162
-rw-r--r--src/tokens/bytecode.rs49
-rw-r--r--src/tokens/expression.rs78
-rw-r--r--src/tokens/mod.rs19
-rw-r--r--src/tokens/semantic.rs192
-rw-r--r--src/tokens/syntactic.rs84
-rw-r--r--src/tokens/tracked.rs47
-rw-r--r--src/tokens/tracked_integer.rs14
-rw-r--r--src/types/expression_stack.rs89
-rw-r--r--src/types/mod.rs7
-rw-r--r--src/types/operator.rs87
-rw-r--r--src/types/word_template.rs (renamed from src/tokens/packed_binary_literal.rs)33
39 files changed, 2951 insertions, 2243 deletions
diff --git a/Cargo.lock b/Cargo.lock
index e54cc0f..cc41ceb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -9,8 +9,8 @@ source = "git+git://benbridle.com/ansi?tag=v1.0.0#81d47867c2c97a9ae1d1c8fdfcd42c
[[package]]
name = "assembler"
-version = "2.0.1"
-source = "git+git://benbridle.com/assembler?tag=v2.0.1#00858f0b4bdfa7de838a21d27aef5e76be310828"
+version = "2.1.0"
+source = "git+git://benbridle.com/assembler?tag=v2.1.0#c5f60b7ff45ced7c8b8519bc8fcf681486ad09fa"
dependencies = [
"ansi",
"log 1.1.2",
@@ -60,8 +60,8 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "switchboard"
-version = "1.0.0"
-source = "git+git://benbridle.com/switchboard?tag=v1.0.0#ea70fa89659e5cf1a9d4ca6ea31fb67f7a2cc633"
+version = "2.1.0"
+source = "git+git://benbridle.com/switchboard?tag=v2.1.0#e6435712ba5b3ca36e99fc8cbe7755940f8b1f3f"
dependencies = [
"log 1.1.1",
"paste",
diff --git a/Cargo.toml b/Cargo.toml
index 27b344f..5755774 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,9 +4,9 @@ version = "1.2.0"
edition = "2021"
[dependencies]
-assembler = { git = "git://benbridle.com/assembler", tag = "v2.0.1" }
+assembler = { git = "git://benbridle.com/assembler", tag = "v2.1.0" }
log = { git = "git://benbridle.com/log", tag = "v1.1.2" }
-switchboard = { git = "git://benbridle.com/switchboard", tag = "v1.0.0" }
+switchboard = { git = "git://benbridle.com/switchboard", tag = "v2.1.0" }
paste = "1.0.15"
indexmap = "2.7.1"
diff --git a/src/bin/tq.rs b/src/bin/tq.rs
index f22bd14..d1e51f3 100644
--- a/src/bin/tq.rs
+++ b/src/bin/tq.rs
@@ -1,43 +1,100 @@
use torque_asm::*;
+use assembler::FileError;
use log::{info, fatal};
-use switchboard::{Switchboard, SwitchQuery};
+use switchboard::*;
use std::io::{Read, Write};
-use std::str::FromStr;
+use std::path::Path;
-fn print_version() -> ! {
- let version = env!("CARGO_PKG_VERSION");
- eprintln!("torque assembler, version {version}");
- eprintln!("written by ben bridle");
- std::process::exit(0);
-}
-
fn main() {
let mut args = Switchboard::from_env();
- if args.named("version").as_bool() {
- print_version();
+ args.positional("source");
+ args.positional("destination");
+ args.positional("extension").default("tq");
+ args.named("no-libs");
+ args.named("no-project-libs");
+ args.named("no-env-libs");
+ args.named("format").default("debug");
+ args.named("width");
+ args.named("dry-run").short('n');
+ args.named("tree");
+ args.named("help").short('h');
+ args.named("version");
+ args.named("verbose").short('v');
+ args.raise_errors();
+
+ let source_path = args.get("source").as_path_opt().map(
+ |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}")));
+ let destination = args.get("destination").as_path_opt();
+ let extension = args.get("extension").as_string();
+ let no_libs = args.get("no-libs").as_bool();
+ let no_project_libs = args.get("no-project-libs").as_bool();
+ let no_env_libs = args.get("no-env-libs").as_bool();
+ let format = Format::from_str(&args.get("format").as_string());
+ let width = args.get("width").as_u32_opt();
+ let dry_run = args.get("dry-run").as_bool();
+ let print_tree = args.get("tree").as_bool();
+ let print_help = args.get("help").as_bool();
+ let print_version = args.get("version").as_bool();
+ let verbose = args.get("verbose").as_bool();
+
+ if verbose { log::set_log_level(log::LogLevel::Info) }
+ if print_version {
+ let version = env!("CARGO_PKG_VERSION");
+ eprintln!("torque assembler, version {version}");
+ eprintln!("written by ben bridle");
+ std::process::exit(0);
}
- if args.named("verbose").short('v').as_bool() {
- log::set_log_level(log::LogLevel::Info);
+ if print_help {
+ eprintln!("\
+Usage: tq [source] [destination]
+
+Torque multi-assembler, see http://benbridle.com/torque for documentation.
+
+Arguments:
+ [source] Path to a source file to assemble
+ [destination] Path to which output will be written
+ [extension] File extension to identify library files (default is 'tq')
+
+Switches:
+ --format=<fmt> Format to apply to assembled bytecode (default is 'debug')
+ --width=<width> Force a fixed width for all assembled words
+ --no-project-libs Don't search for libraries in the source parent folder
+ --no-env-libs Don't search for libraries in the TORQUE_LIBS path variable
+ --no-libs Combination of --no-project-libs and --no-env-libs
+ --tree Display a tree visualisation of all included library files
+ --dry-run (-n) Assemble and show errors only, don't write any output
+ --help (-h) Prints help
+ --verbose, (-v) Print additional debug information
+ --version Print the assembler version and exit
+
+Environment variables:
+ TORQUE_LIBS
+ A list of colon-separated paths which will be searched to find
+ Torque source code files to use as libraries when assembling a
+ Torque program. If a library file resolves an unresolved symbol
+ in the program being assembled, the library file will be merged
+ into the program.
+
+Output formats:
+ <debug>
+ Print assembled words as human-readable binary literals.
+ <inhx>
+ Original 8-bit Intel hex format.
+ <inhx32>
+ Modified 16-bit Intel hex format used by Microchip.
+ <raw>
+ Assembled words are converted to big-endian bytestrings and concatenated.
+ Each word is padded to the nearest byte. Words must all be the same width.
+ <source>
+ Print the source file before assembly, with symbols resolved.
+
+Created by Ben Bridle.
+ ");
+ std::process::exit(0);
}
- let source_path = args.positional("source").as_path_opt().map(
- |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}")));
- let destination_path = args.positional("destination").as_path_opt();
- let extension = args.named("ext").default("tq").as_string();
-
- let no_libs = args.named("no-libs").as_bool();
- let no_project_libs = args.named("no-project-libs").as_bool();
- let no_environment_libs = args.named("no-env-libs").as_bool();
-
- let format = args.named("format").default("debug").as_string();
- let print_tree = args.named("tree").as_bool();
- let dry_run = args.named("dry-run").short('n').as_bool();
-
- let Ok(format) = Format::from_str(format.as_str()) else {
- fatal!("Unknown format '{format}', expected 'debug', 'inhx', 'inhx32', 'raw', or 'source'. ");
- };
// -----------------------------------------------------------------------
@@ -68,14 +125,13 @@ fn main() {
if compiler.error().is_some() && !no_libs && !no_project_libs {
compiler.include_libs_from_parent(&extension);
}
- if compiler.error().is_some() && !no_libs && !no_environment_libs {
+ if compiler.error().is_some() && !no_libs && !no_env_libs {
compiler.include_libs_from_path_variable("TORQUE_LIBS", &extension);
}
if print_tree {
compiler.resolver.hierarchy().report()
}
-
if let Some(error) = compiler.error() {
error.report();
std::process::exit(1);
@@ -85,54 +141,58 @@ fn main() {
error.report();
std::process::exit(1);
});
- if format == Format::Source && !dry_run {
- write_bytes_and_exit(merged_source.as_bytes(), destination_path.as_ref());
+
+ if !dry_run && format == Format::Source {
+ write_bytes_and_exit(merged_source.as_bytes(), destination.as_ref());
}
// -----------------------------------------------------------------------
- // Parse syntactic tokens from merged source code.
let path = Some("<merged source>");
- let syntactic_tokens = SyntacticParser::new(&merged_source, path).parse();
- report_syntactic_errors(&syntactic_tokens, &merged_source);
+ let syntactic = match parse_syntactic(&merged_source, path) {
+ Ok(tokens) => tokens,
+ Err(errors) => {
+ report_syntactic_errors(&errors, &merged_source);
+ std::process::exit(1);
+ }
+ };
- let program = SemanticParser::new(syntactic_tokens).parse();
- report_semantic_errors(&program, &merged_source);
+ let semantic = match parse_semantic(syntactic) {
+ Ok(tokens) => tokens,
+ Err(errors) => {
+ report_semantic_errors(&errors, &merged_source);
+ std::process::exit(1);
+ }
+ };
- // program.print_definitions();
- let assembled_tokens = program.assemble();
- report_assembler_errors(&assembled_tokens, &merged_source);
+ let intermediate = match parse_intermediate(semantic) {
+ Ok(tokens) => tokens,
+ Err(errors) => {
+ report_intermediate_errors(&errors, &merged_source);
+ std::process::exit(1);
+ }
+ };
+
+ let segments = match parse_bytecode(intermediate, width) {
+ Ok(segments) => segments,
+ Err(errors) => {
+ report_bytecode_errors(&errors, &merged_source);
+ std::process::exit(1);
+ }
+ };
- let bytecode = BytecodeGenerator::new(&assembled_tokens).generate();
- report_bytecode_errors(&bytecode, &merged_source);
if !dry_run {
- match format {
- Format::Debug => {
- let mut output = String::new();
- for word in &bytecode.words {
- output.push_str(&word.to_string());
- output.push('\n');
- }
- write_bytes_and_exit(output.as_bytes(), destination_path.as_ref());
- }
- Format::Inhx => {
- let output = format_inhx(&bytecode.words);
- write_bytes_and_exit(output.as_bytes(), destination_path.as_ref());
- }
- Format::Inhx32 => {
- let output = format_inhx32(&bytecode.words);
- write_bytes_and_exit(output.as_bytes(), destination_path.as_ref());
- }
- Format::Raw => {
- let mut output = Vec::new();
- for word in &bytecode.words {
- let value = word.value as u16;
- output.extend(value.to_be_bytes());
- }
- write_bytes_and_exit(&output, destination_path.as_ref());
- }
- Format::Source => unreachable!(),
+ let result = match format {
+ Format::Debug => format_debug(&segments),
+ Format::Inhx => format_inhx(&segments),
+ Format::Inhx32 => format_inhx32(&segments),
+ Format::Raw => format_raw(&segments, width),
+ Format::Source => unreachable!("Source output is handled before merged assembly"),
+ };
+ match result {
+ Ok(bytes) => write_bytes_and_exit(&bytes, destination.as_ref()),
+ Err(error) => report_format_error(&error, format, &merged_source),
}
}
}
@@ -151,26 +211,3 @@ fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! {
}
std::process::exit(0);
}
-
-#[derive(PartialEq)]
-enum Format {
- Debug,
- Inhx,
- Inhx32,
- Raw,
- Source,
-}
-
-impl FromStr for Format {
- type Err = ();
- fn from_str(string: &str) -> Result<Self, ()> {
- match string {
- "debug" => Ok(Self::Debug),
- "inhx" => Ok(Self::Inhx),
- "inhx32" => Ok(Self::Inhx32),
- "raw" => Ok(Self::Raw),
- "source" => Ok(Self::Source),
- _ => Err(()),
- }
- }
-}
diff --git a/src/compiler.rs b/src/compiler.rs
index 10f1433..c0caae0 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -1,5 +1,9 @@
use crate::*;
+use assembler::*;
+use assembler::DefinitionType::*;
+use assembler::SymbolRole::*;
+
/// Compiles multiple source code files into one.
pub struct Compiler {
@@ -50,82 +54,184 @@ impl Compiler {
self.resolver.error()
}
- pub fn get_compiled_source(&self) -> Result<String, MergeError> {
+ pub fn get_compiled_source(&mut self) -> Result<String, MergeError> {
+ self.resolver.calculate_hierarchy();
self.resolver.get_merged_source_code(push_source_code)
}
}
/// Parse all symbols from a source code string.
-fn parse_symbols(source_code: &str, path: Option<&Path>) -> Vec<Symbol> {
- use SyntacticTokenVariant as SynVar;
- use DefinitionType::*;
- use SymbolRole::*;
- let mut symbols = Vec::new();
- let mut macro_name: Option<String> = None;
- let mut parse_arg_list = false; // true if parsing macro argument list
- let mut after_separator = false; // true if prev token was separator
-
- macro_rules! push {
- ($name:expr, $source:expr, $role:expr) => {
- symbols.push(Symbol {
- name: $name,
- source: $source,
- role: $role,
- namespace: match &macro_name {
- Some(name) => vec![name.to_owned()],
- None => vec![],
- }
- })
+fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbol>> {
+ let syntactic = match parse_syntactic(source_code, path) {
+ Ok(syntactic) => syntactic,
+ Err(_errors) => return None,
+ };
+ let semantic = match parse_semantic(syntactic) {
+ Ok(semantic) => semantic,
+ Err(_errors) => return None,
+ };
+ Some(SymbolParser::new().parse(&semantic))
+}
+
+
+// Extract symbol definitions from a list of semantic tokens.
+pub struct SymbolParser {
+ pub macro_name: Option<String>,
+ pub symbols: Vec<Symbol>,
+}
+
+impl SymbolParser {
+ pub fn new() -> Self {
+ Self {
+ macro_name: None,
+ symbols: Vec::new(),
}
}
- let syntactic_tokens = SyntacticParser::new(&source_code, path).parse();
- for token in syntactic_tokens {
- match token.variant {
- SynVar::MacroDefinition(name) => {
- push!(name.clone(), token.source, Definition(MustPrecedeReference));
- macro_name = Some(name);
- parse_arg_list = true;
+ fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) {
+ let name = name.to_string();
+ let namespace = match &self.macro_name {
+ Some(macro_name) => vec![macro_name.to_owned()],
+ None => vec![],
+ };
+ let source = source.to_owned();
+ self.symbols.push(Symbol { name, namespace, source, role });
+
+ }
+
+ pub fn parse(mut self, semantic: &[Tracked<SemanticToken>]) -> Vec<Symbol> {
+ for token in semantic {
+ let source = &token.source;
+ match &token.value {
+ SemanticToken::MacroDefinition(definition) => {
+ // Record macro definition.
+ self.record_symbol(
+ &definition.name,
+ &definition.name.source,
+ Definition(MustPrecedeReference),
+ );
+ self.macro_name = Some(definition.name.to_string());
+
+ for argument in &definition.arguments {
+ self.record_symbol(
+ &argument.name,
+ &argument.source,
+ Definition(MustPrecedeReference),
+ );
+ }
+ match &definition.body {
+ MacroDefinitionBody::Integer(integer) => {
+ self.parse_integer_token(&integer, &integer.source)
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ self.parse_invocation(&invocation, &invocation.source)
+ }
+ MacroDefinitionBody::Block(tokens) => {
+ for token in tokens {
+ self.parse_block_token(&token, &token.source);
+ }
+ }
+ }
+ self.macro_name = None;
+ }
+ SemanticToken::BlockToken(token) => {
+ self.parse_block_token(token, &source);
+ }
}
- SynVar::MacroDefinitionTerminator => {
- macro_name = None;
+ }
+ return self.symbols;
+ }
+
+ fn parse_expression(&mut self, expression: &Expression, _source: &SourceSpan) {
+ for token in &expression.tokens {
+ let source = &token.source;
+ match &token.value {
+ ExpressionToken::IntegerToken(integer) => {
+ self.parse_integer_token(integer, source);
+ }
+ ExpressionToken::Invocation(invocation) => {
+ self.parse_invocation(invocation, source);
+ }
+ ExpressionToken::Operator(_) => (),
}
- SynVar::LabelDefinition(name) => {
- push!(name.clone(), token.source, Definition(CanFollowReference));
+ }
+ }
+
+ fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) {
+ self.record_symbol(
+ &invocation.name,
+ &source,
+ Reference,
+ );
+
+ for argument in &invocation.arguments {
+ let source = &argument.source;
+ match &argument.value {
+ InvocationArgument::IntegerToken(integer) => {
+ self.parse_integer_token(integer, &source);
+ }
+ InvocationArgument::BlockToken(block) => {
+ self.parse_block_token(block, &source);
+ }
+ InvocationArgument::Invocation(invocation) => {
+ self.parse_invocation(invocation, &source);
+ }
+ InvocationArgument::String(_) => (),
}
- SynVar::Symbol(name) => if parse_arg_list && after_separator {
- push!(name, token.source, Definition(MustPrecedeReference));
- } else {
- parse_arg_list = false;
- push!(name, token.source, Reference);
+ }
+ }
+
+ fn parse_block_token(&mut self, token: &BlockToken, source: &SourceSpan) {
+ match token {
+ BlockToken::LabelDefinition(name) => {
+ self.record_symbol(
+ &name,
+ &source,
+ Definition(CanFollowReference),
+ );
}
- SynVar::Separator => {
- after_separator = true;
- continue;
+ BlockToken::PinnedAddress(integer) => {
+ self.parse_integer_token(integer, &integer.source);
}
- SynVar::BlockOpen | SynVar::BlockClose => {
- continue;
+ BlockToken::ConditionalBlock(condition) => {
+ self.parse_integer_token(&condition.predicate, &condition.predicate.source);
+ self.parse_block_token(&condition.body, &condition.body.source);
}
- SynVar::PackedBinaryLiteral(pbl) => {
- for field in pbl.fields {
- push!(field.name.to_string(), field.source, Reference)
+ BlockToken::WordTemplate(word_template) => {
+ for field in &word_template.fields {
+ self.record_symbol(
+ &field.name.to_string(),
+ &field.source,
+ Reference,
+ );
}
}
- SynVar::Expression(expr) => {
- for token in expr.tokens {
- if let ExpressionTokenVariant::Invocation(name) = token.variant {
- push!(name, token.source, Reference);
- }
+ BlockToken::Block(tokens) => {
+ for token in tokens {
+ self.parse_block_token(token, &token.source);
}
}
- _ => ()
- };
- after_separator = false;
+ BlockToken::Invocation(invocation) => {
+ self.parse_invocation(invocation, source);
+ }
+ }
+ }
+
+ fn parse_integer_token(&mut self, token: &IntegerToken, source: &SourceSpan) {
+ match &token {
+ IntegerToken::Expression(expression) => {
+ self.parse_expression(&expression, source)
+ }
+ IntegerToken::Invocation(invocation) => {
+ self.parse_invocation(&invocation, source)
+ }
+ IntegerToken::IntegerLiteral(_) => (),
+ }
}
- return symbols;
}
+
/// Push source code to a source compilation string.
fn push_source_code(compilation: &mut String, source_file: &SourceFile) {
// Skip blank files.
diff --git a/src/formats/debug.rs b/src/formats/debug.rs
new file mode 100644
index 0000000..23fd34f
--- /dev/null
+++ b/src/formats/debug.rs
@@ -0,0 +1,18 @@
+use crate::*;
+
+
+pub fn format_debug(segments: &[Segment]) -> Result<Vec<u8>, FormatError> {
+ let mut output = String::new();
+ for segment in segments {
+ // Find maximum width of all words in the segment.
+ let width = segment.words.iter().map(|w| w.to_string().chars().count()).max().unwrap_or(0);
+ let address = &segment.address;
+ output.push_str(&format!("SEGMENT: 0x{address:>04x}\n"));
+ for word in &segment.words {
+ let string = word.to_string();
+ let w = width as usize;
+ output.push_str(&format!(" {string:>w$}\n"));
+ }
+ }
+ return Ok(output.as_bytes().to_vec());
+}
diff --git a/src/formats/inhx.rs b/src/formats/inhx.rs
index e83e870..fc4791b 100644
--- a/src/formats/inhx.rs
+++ b/src/formats/inhx.rs
@@ -1,10 +1,15 @@
use crate::*;
-pub fn format_inhx(words: &[Word]) -> String {
+pub fn format_inhx(segments: &[Segment]) -> Result<Vec<u8>, FormatError> {
let mut records = Vec::new();
- for (i, chunk) in words.chunks(16).enumerate() {
- records.push(data_record(chunk, (i * 16) as u16));
+ let mut address;
+ for segment in segments {
+ address = segment.address;
+ for chunk in segment.words.chunks(16) {
+ records.push(data_record(chunk, address)?);
+ address += 16;
+ }
}
records.push(terminating_record());
@@ -12,21 +17,24 @@ pub fn format_inhx(words: &[Word]) -> String {
for record in records {
output.push_str(&record.to_string());
}
- return output;
+ return Ok(output.as_bytes().to_vec());
}
-fn data_record(words: &[Word], address: u16) -> InhxRecord {
+fn data_record(words: &[Tracked<Word>], address: usize) -> Result<InhxRecord, FormatError> {
+ let Ok(address) = u16::try_from(address) else {
+ return Err(FormatError::AddressTooLarge(u16::MAX as usize, address));
+ };
let mut record = InhxRecord::new();
record.byte((words.len()) as u8);
record.be_double(address);
record.byte(0x00);
for word in words {
- match word.bits <= 8 {
- true => record.byte(word.value as u8),
- false => panic!("Word '{word}' has more than 8 bits."),
- };
+ if word.value.width > 8 {
+ return Err(FormatError::WordTooWide(8, word.width, word.source.clone()));
+ }
+ record.byte(word.value.value as u8);
}
- return record;
+ return Ok(record);
}
fn terminating_record() -> InhxRecord {
diff --git a/src/formats/inhx32.rs b/src/formats/inhx32.rs
index fd7fd7b..8febeae 100644
--- a/src/formats/inhx32.rs
+++ b/src/formats/inhx32.rs
@@ -1,11 +1,19 @@
use crate::*;
-pub fn format_inhx32(words: &[Word]) -> String {
+pub fn format_inhx32(segments: &[Segment]) -> Result<Vec<u8>, FormatError> {
let mut records = Vec::new();
- records.push(extended_linear_address(0x0000));
- for (i, chunk) in words.chunks(8).enumerate() {
- records.push(data_record(chunk, (i * 8) as u16));
+ let mut address = 0;
+ records.push(extended_linear_address(0));
+ for segment in segments {
+ if (segment.address >> 16) != (address >> 16) {
+ records.push(extended_linear_address(segment.address));
+ }
+ address = segment.address;
+ for chunk in segment.words.chunks(8) {
+ records.push(data_record(chunk, address)?);
+ address += 8;
+ }
}
records.push(terminating_record());
@@ -13,24 +21,29 @@ pub fn format_inhx32(words: &[Word]) -> String {
for record in records {
output.push_str(&record.to_string());
}
- return output;
+ return Ok(output.as_bytes().to_vec());
}
-fn data_record(words: &[Word], address: u16) -> InhxRecord {
+fn data_record(words: &[Tracked<Word>], address: usize) -> Result<InhxRecord, FormatError> {
+ let Ok(address) = u32::try_from(address * 2) else {
+ return Err(FormatError::AddressTooLarge(u32::MAX as usize / 2, address));
+ };
+ let address = address as u16;
let mut record = InhxRecord::new();
record.byte((words.len() * 2) as u8);
- record.be_double(address * 2);
+ record.be_double(address);
record.byte(0x00);
for word in words {
- match word.bits <= 16 {
- true => record.le_double(word.value as u16),
- false => panic!("Word '{word}' has more than 16 bits."),
- };
+ if word.value.width > 16 {
+ return Err(FormatError::WordTooWide(16, word.width, word.source.clone()));
+ }
+ record.le_double(word.value.value as u16);
}
- return record;
+ return Ok(record);
}
-fn extended_linear_address(address: u16) -> InhxRecord {
+fn extended_linear_address(address: usize) -> InhxRecord {
+ let address = (address >> 16) as u16;
let mut record = InhxRecord::new();
record.byte(0x02);
record.be_double(0x0000);
diff --git a/src/formats/mod.rs b/src/formats/mod.rs
index 82f19f1..132001a 100644
--- a/src/formats/mod.rs
+++ b/src/formats/mod.rs
@@ -1,8 +1,78 @@
mod inhx;
mod inhx32;
+mod raw;
+mod debug;
pub use inhx::*;
pub use inhx32::*;
+pub use raw::*;
+pub use debug::*;
+
+use crate::*;
+
+use log::*;
+
+
+#[derive(Clone, Copy, PartialEq)]
+pub enum Format {
+ Debug,
+ Inhx,
+ Inhx32,
+ Raw,
+ Source,
+}
+
+impl Format {
+ pub fn from_str(string: &str) -> Self {
+ match string {
+ "debug" => Self::Debug,
+ "inhx" => Self::Inhx,
+ "inhx32" => Self::Inhx32,
+ "raw" => Self::Raw,
+ "source" => Self::Source,
+ _ => fatal!("Unknown format '{string}', expected 'debug', 'inhx', 'inhx32', 'raw', or 'source'. "),
+ }
+ }
+}
+
+impl std::fmt::Display for Format {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let string = match self {
+ Self::Debug => "debug",
+ Self::Inhx => "inhx",
+ Self::Inhx32 => "inhx32",
+ Self::Raw => "raw",
+ Self::Source => "source",
+ };
+ write!(f, "{string}")
+ }
+}
+
+
+pub enum FormatError {
+ /// (expected, received)
+ AddressTooLarge(usize, usize),
+ /// (expected, received)
+ WordTooWide(u32, u32, SourceSpan),
+ ///
+ ExpectedFixedWidth,
+}
+
+pub fn report_format_error(error: &FormatError, format: Format, source_code: &str) {
+ match error {
+ FormatError::AddressTooLarge(expected, received) =>
+ error!("The {format} format requires that addresses do not exceed {expected}, but the address {received} was reached"),
+ FormatError::WordTooWide(expected, received, source) => {
+ let message = format!("The {format} format requires that words are no wider than {expected} bits, but a {received} bit word was found");
+ let context = Context { source_code, source };
+ report_source_issue(LogLevel::Error, &context, &message);
+ }
+ FormatError::ExpectedFixedWidth =>
+ error!("The {format} format requires all words to be the same width"),
+ }
+ std::process::exit(1);
+}
+
pub struct InhxRecord {
@@ -43,3 +113,20 @@ impl InhxRecord {
format!(":{output}{checksum:0>2X}\n")
}
}
+
+
+pub fn calculate_fixed_width(segments: &[Segment]) -> Option<u32> {
+ let mut width = None;
+ for segment in segments {
+ for word in &segment.words {
+ let word_width = word.value.width;
+ match width {
+ Some(width) => if word_width != width {
+ return None;
+ }
+ None => width = Some(word_width),
+ }
+ }
+ }
+ return width.or(Some(0));
+}
diff --git a/src/formats/raw.rs b/src/formats/raw.rs
new file mode 100644
index 0000000..ecc6473
--- /dev/null
+++ b/src/formats/raw.rs
@@ -0,0 +1,29 @@
+use crate::*;
+
+
+pub fn format_raw(segments: &[Segment], width: Option<u32>) -> Result<Vec<u8>, FormatError> {
+ let Some(width) = width.or_else(|| calculate_fixed_width(&segments)) else {
+ return Err(FormatError::ExpectedFixedWidth);
+ };
+
+ let mut address = 0;
+ let bytes_per_word = ((width + 7) / 8) as usize;
+ let mut bytes = Vec::new();
+
+ for segment in segments {
+ // Pad to the segment start address.
+ let padding = segment.address.saturating_sub(address);
+ bytes.resize(bytes.len() + (padding * bytes_per_word), 0);
+ for word in &segment.words {
+ // Decompose word value into bytes.
+ let value = word.value.value;
+ for i in (0..bytes_per_word).rev() {
+ let byte = (value >> (i*8) & 0xff) as u8;
+ bytes.push(byte);
+ }
+ address += 1;
+ }
+ }
+
+ return Ok(bytes);
+}
diff --git a/src/lib.rs b/src/lib.rs
index d572185..b429646 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,13 +1,14 @@
-mod compiler;
-mod parsers;
-mod report;
-mod tokens;
+mod stages;
+mod types;
mod formats;
+mod compiler;
-pub use compiler::*;
-pub use parsers::*;
-pub use report::*;
-pub use tokens::*;
+pub use stages::*;
+pub use types::*;
pub use formats::*;
+pub use compiler::*;
+
+use assembler::{Context, Tracked, SourceSpan, report_source_issue};
+use log::LogLevel;
-pub use assembler::*;
+use std::path::{PathBuf};
diff --git a/src/parsers/assembler.rs b/src/parsers/assembler.rs
deleted file mode 100644
index 61e1a84..0000000
--- a/src/parsers/assembler.rs
+++ /dev/null
@@ -1,290 +0,0 @@
-use crate::*;
-use AssemblerErrorVariant as ErrVar;
-
-use indexmap::IndexMap;
-
-
-static mut ID: usize = 0;
-macro_rules! new_id {
- () => { unsafe {
- let id = ID;
- ID += 1;
- id
- }};
-}
-
-
-impl SemanticProgram {
- pub fn assemble(&self) -> Vec<AssembledToken> {
- let environment = Environment {
- macro_definitions: &self.macro_definitions,
- label_definitions: &self.label_definitions,
- arguments: &IndexMap::new(),
- id: new_id!(),
- };
- let mut assembled_tokens = Vec::new();
- for token in &self.body {
- let tokens = environment.reify_semantic_token(token);
- assembled_tokens.extend(tokens);
- }
- return assembled_tokens;
- }
-}
-
-
-pub struct Environment<'a> {
- pub macro_definitions: &'a IndexMap<String, MacroDefinition>,
- pub label_definitions: &'a IndexMap<String, LabelDefinition>,
- pub arguments: &'a IndexMap<String, Argument>,
- pub id: usize,
-}
-
-impl<'a> Environment<'a> {
- // This is only ever called for the highest level body tokens, never for invocations.
- fn reify_semantic_token(&self, token: &SemanticToken) -> Vec<AssembledToken> {
- let mut assembled_tokens = Vec::new();
- match token {
- SemanticToken::Word(pbl) => {
- let word = self.reify_packed_binary_literal(pbl);
- assembled_tokens.push(AssembledToken::Word(word));
- }
- SemanticToken::Invocation(invocation) => {
- match self.reify_invocation(invocation) {
- Ok(argument) => match argument {
- Argument::Block(block) => assembled_tokens.extend(block),
- Argument::Integer(_) => {
- let variant = AssemblerErrorVariant::NotABlock;
- let source = invocation.source.clone();
- let error = AssemblerError { source, variant };
- assembled_tokens.push(AssembledToken::Error(error))
- }
- }
- Err(error) => assembled_tokens.push(AssembledToken::Error(error)),
- }
- }
- SemanticToken::LabelDefinition(definition) => {
- assembled_tokens.push(AssembledToken::LabelDefinition(definition.clone()));
- }
- SemanticToken::PinnedAddress(address) => {
- assembled_tokens.push(AssembledToken::PinnedAddress(address.clone()));
- }
- SemanticToken::Error(_) => (),
- }
- return assembled_tokens;
- }
-
- fn reify_packed_binary_literal(&self, pbl: &PackedBinaryLiteral) -> AssembledWord {
- let mut assembled_fields = Vec::new();
- let mut errors = Vec::new();
- for field in &pbl.fields {
- let name = field.name.to_string();
- match self.reify_integer_reference(&name, &field.source) {
- Ok(value) => assembled_fields.push(
- AssembledField {
- source: field.source.clone(),
- value,
- bits: field.bits,
- shift: field.shift,
- }
- ),
- Err(error) => errors.push(error),
- };
- }
- let source = pbl.source.clone();
- let value = pbl.value;
- let bits = pbl.bits;
- AssembledWord { source, bits, fields: assembled_fields, value, errors }
- }
-
- fn reify_integer_reference(&self, name: &str, source: &SourceSpan) -> Result<IntegerArgument, AssemblerError> {
- match self.reify_reference(name, source)? {
- Argument::Integer(integer) => Ok(integer),
- Argument::Block(_) => Err(
- AssemblerError {
- source: source.clone(),
- variant: ErrVar::NotAnInteger,
- }
- ),
- }
- }
-
- fn reify_reference(&self, name: &str, source: &SourceSpan) -> Result<Argument, AssemblerError> {
- let source = source.clone();
- if let Some(argument) = self.arguments.get(name) {
- Ok(argument.clone())
- } else if let Some(definition) = self.macro_definitions.get(name) {
- self.reify_value(&definition.value)
- } else if let Some(label) = self.label_definitions.get(name) {
- let name = Tracked::from(self.tag_label_name(&label.name), source);
- Ok(Argument::Integer(IntegerArgument::LabelReference(name)))
- } else {
- let variant = ErrVar::DefinitionNotFound(name.to_string());
- Err(AssemblerError { source, variant })
- }
- }
-
- fn tag_label_name(&self, name: &str) -> String {
- match name.contains(':') {
- true => format!("{name}:{}", self.id),
- false => name.to_string(),
- }
- }
-
- fn reify_value(&self, value: &Value) -> Result<Argument, AssemblerError> {
- match value {
- Value::Integer(integer) => {
- let value = match &integer {
- Integer::Literal(integer) => {
- IntegerArgument::Integer(integer.clone())
- }
- Integer::Expression(expr) => {
- let expr = self.reify_constant_expression(expr)?;
- IntegerArgument::Expression(expr)
- }
- Integer::LabelReference(name) => {
- let name = Tracked::from(self.tag_label_name(name), name.source.clone());
- IntegerArgument::LabelReference(name)
- }
- Integer::String(string) => {
- IntegerArgument::String(string.clone())
- }
- };
- Ok(Argument::Integer(value))
- }
- Value::Block(block) => {
- let mut assembled_tokens = Vec::new();
- for token in block {
- match &token {
- SemanticToken::Word(pbl) => {
- let word = self.reify_packed_binary_literal(pbl);
- assembled_tokens.push(AssembledToken::Word(word));
- }
- SemanticToken::Invocation(invocation) => {
- match self.reify_invocation(invocation)? {
- Argument::Block(block) => assembled_tokens.extend(block),
- Argument::Integer(_) => {
- let source = invocation.source.clone();
- let variant = AssemblerErrorVariant::IntegerInBlock;
- return Err(AssemblerError { source, variant});
- }
- }
- }
- SemanticToken::LabelDefinition(definition) => {
- let mut definition = definition.clone();
- definition.name.push_str(&format!(":{}", self.id));
- let token = AssembledToken::LabelDefinition(definition);
- assembled_tokens.push(token);
- }
- SemanticToken::PinnedAddress(address) => {
- let token = AssembledToken::PinnedAddress(address.to_owned());
- assembled_tokens.push(token);
- }
- SemanticToken::Error(_) => (),
- }
- }
- Ok(Argument::Block(assembled_tokens))
- }
- Value::Invocation(invocation) => {
- self.reify_invocation(invocation)
- }
- }
- }
-
- fn reify_invocation(&self, invocation: &Invocation) -> Result<Argument, AssemblerError> {
- macro_rules! err {
- ($variant:expr) => { Err(AssemblerError {
- source: invocation.source.clone(), variant: $variant
- }) };
- }
- if let Some(argument) = self.arguments.get(&invocation.name) {
- let expected = 0;
- let received = invocation.arguments.len();
- if received != expected {
- return err!(ErrVar::IncorrectArgumentCount(expected, received));
- }
- Ok(argument.clone())
- } else if let Some(definition) = self.macro_definitions.get(&invocation.name) {
- // Check that the correct number of arguments were provided.
- let received = invocation.arguments.len();
- let expected = definition.arguments.len();
- if received != expected {
- return err!(ErrVar::IncorrectArgumentCount(expected, received));
- }
- let mut arguments = IndexMap::new();
- for (i, argument) in invocation.arguments.iter().enumerate() {
- // Check that the correct types of arguments were provided.
- let arg_invocation = self.reify_value(&argument.value)?;
- let arg_invocation_type = match &arg_invocation {
- Argument::Integer(_) => ArgumentVariant::Integer,
- Argument::Block(_) => ArgumentVariant::Block,
- };
- let arg_definition_type = definition.arguments[i].variant;
- if arg_invocation_type != arg_definition_type {
- let variant = ErrVar::IncorrectArgumentType(
- arg_definition_type, arg_invocation_type
- );
- return Err(AssemblerError { source: argument.source.clone(), variant });
- }
- let name = definition.arguments[i].name.clone();
- arguments.insert(name, arg_invocation);
- }
- let environment = Environment {
- macro_definitions: &self.macro_definitions,
- label_definitions: &self.label_definitions,
- arguments: &arguments,
- id: new_id!(),
- };
- environment.reify_value(&definition.value)
- } else if let Some(label) = self.label_definitions.get(&invocation.name) {
- let expected = 0;
- let received = invocation.arguments.len();
- if received != expected {
- return err!(ErrVar::IncorrectArgumentCount(expected, received));
- }
- let name = Tracked::from(self.tag_label_name(&label.name), label.source.clone());
- Ok(Argument::Integer(IntegerArgument::LabelReference(name)))
- } else {
- err!(ErrVar::DefinitionNotFound(invocation.name.to_string()))
- }
- }
-
- fn reify_constant_expression(&self, expr: &Expression) -> Result<AssembledExpression, AssemblerError> {
- use ExpressionTokenVariant as ExprVar;
-
- let mut assembled_tokens = Vec::new();
- for token in &expr.tokens {
- let assembled_token = match &token.variant {
- ExprVar::Literal(value) => {
- let source = token.source.clone();
- let integer = TrackedInteger { source, value: *value };
- AssembledExpressionToken::Integer(integer)
- }
- ExprVar::Operator(operator) => {
- AssembledExpressionToken::Operator(*operator)
- }
- ExprVar::Invocation(name) => {
- match self.reify_integer_reference(&name, &token.source)? {
- IntegerArgument::LabelReference(name) => {
- AssembledExpressionToken::LabelReference(name)
- }
- IntegerArgument::Integer(integer) => {
- AssembledExpressionToken::Integer(integer)
- }
- IntegerArgument::Expression(expr) => {
- AssembledExpressionToken::Expression(Box::new(expr))
- },
- IntegerArgument::String(string) => {
- let source = string.source.clone();
- let variant = AssemblerErrorVariant::StringInExpression;
- return Err(AssemblerError { source, variant })
- }
- }
- }
- ExprVar::Error(_) => continue,
- };
- assembled_tokens.push(assembled_token);
- }
- Ok(AssembledExpression { source: expr.source.clone(), tokens: assembled_tokens })
- }
-}
-
diff --git a/src/parsers/bytecode.rs b/src/parsers/bytecode.rs
deleted file mode 100644
index ed16e22..0000000
--- a/src/parsers/bytecode.rs
+++ /dev/null
@@ -1,191 +0,0 @@
-use crate::*;
-
-use std::collections::HashMap;
-
-
-pub struct BytecodeGenerator<'a> {
- tokens: &'a [AssembledToken],
- addresses: HashMap<String, Tracked<usize>>,
- words: Vec<Word>,
- errors: Vec<BytecodeError>,
-}
-
-impl<'a> BytecodeGenerator<'a> {
- pub fn new(tokens: &'a [AssembledToken]) -> Self {
- Self {
- tokens,
- addresses: HashMap::new(),
- words: Vec::new(),
- errors: Vec::new(),
- }
- }
-
- pub fn generate(mut self) -> Bytecode {
- self.calculate_addresses();
- for token in self.tokens {
- match token {
- AssembledToken::Word(assembled_word) => {
- self.assemble_word(assembled_word);
- }
- AssembledToken::PinnedAddress(pinned) => {
- if self.words.len() > pinned.address {
- let variant = BytecodeErrorVariant::PinnedAddressBacktrack(
- pinned.address, self.words.len());
- let source = pinned.source.clone();
- self.errors.push(BytecodeError { source, variant });
- } else {
- self.words.resize(pinned.address, Word { bits: 0, value: 0});
- }
- }
- AssembledToken::LabelDefinition(_) => (),
- AssembledToken::Error(_) => (),
- }
- }
-
- return Bytecode {
- words: self.words,
- errors: self.errors,
- }
- }
-
- fn calculate_addresses(&mut self) {
- let mut i = 0;
- for token in self.tokens {
- match token {
- AssembledToken::LabelDefinition(definition) => {
- let address = Tracked::from(i, definition.source.clone());
- if let Some(_) = self.addresses.insert(definition.name.clone(), address) {
- let name = definition.name.clone();
- let variant = BytecodeErrorVariant::DuplicateLabelDefinition(name);
- let source = definition.source.clone();
- self.errors.push(BytecodeError { source, variant });
- }
- }
- AssembledToken::Word(word) => {
- i += word.count();
- }
- AssembledToken::PinnedAddress(pinned) => {
- i = pinned.address;
- }
- AssembledToken::Error(_) => (),
- }
- }
- }
-
- fn resolve_expression(&mut self, expr: &AssembledExpression) -> isize {
- let mut stack = Vec::new();
- macro_rules! push {
- ($value:expr) => { stack.push($value) };
- }
- macro_rules! pop {
- ($name:ident) => { let $name = match stack.pop() {
- Some(value) => value,
- None => {
- let variant = BytecodeErrorVariant::StackUnderflow;
- self.errors.push(BytecodeError { source: expr.source.clone(), variant });
- return 0;
- },
- }; };
- }
- macro_rules! truth {
- ($bool:expr) => { match $bool { true => 1, false => 0 } };
- }
-
- for token in &expr.tokens {
- match &token {
- AssembledExpressionToken::Integer(value) => {
- push!(value.value)
- }
- AssembledExpressionToken::LabelReference(name) => {
- push!(self.resolve_label_reference(name))
- }
- AssembledExpressionToken::Expression(expr) => {
- push!(self.resolve_expression(expr))
- }
- AssembledExpressionToken::Operator(operator) => match operator {
- Operator::Equal => { pop!(b); pop!(a); push!(truth!(a==b)) },
- Operator::NotEqual => { pop!(b); pop!(a); push!(truth!(a!=b)) },
- Operator::LessThan => { pop!(b); pop!(a); push!(truth!(a < b)) },
- Operator::GreaterThan => { pop!(b); pop!(a); push!(truth!(a > b)) },
- Operator::LessThanEqual => { pop!(b); pop!(a); push!(truth!(a <= b)) },
- Operator::GreaterThanEqual => { pop!(b); pop!(a); push!(truth!(a >= b)) },
- Operator::Add => { pop!(b); pop!(a); push!(a + b) },
- Operator::Subtract => { pop!(b); pop!(a); push!(a - b) },
- Operator::LeftShift => { pop!(b); pop!(a); push!(a << b) },
- Operator::RightShift => { pop!(b); pop!(a); push!(a >> b) },
- Operator::And => { pop!(b); pop!(a); push!(a & b) },
- Operator::Or => { pop!(b); pop!(a); push!(a | b) },
- Operator::Xor => { pop!(b); pop!(a); push!(a ^ b) },
- Operator::Not => { pop!(a); push!(!a) },
- }
- }
- }
-
- let variant = match stack.len() {
- 0 => BytecodeErrorVariant::NoReturnValue,
- 1 => return stack[0],
- _ => BytecodeErrorVariant::MultipleReturnValues,
- };
- self.errors.push(BytecodeError { source: expr.source.clone(), variant});
- 0
- }
-
- fn resolve_label_reference(&mut self, name: &Tracked<String>) -> isize {
- if let Some(address) = self.addresses.get(&name.value) {
- address.value as isize
- } else {
- let variant = BytecodeErrorVariant::DefinitionNotFound(name.value.clone());
- self.errors.push(BytecodeError { source: name.source.clone(), variant });
- 0
- }
- }
-
- fn assemble_word(&mut self, assembled_word: &AssembledWord) {
- let mut field_values = Vec::new();
- for field in &assembled_word.fields {
- match &field.value {
- IntegerArgument::Expression(expr) => {
- let source = expr.source.clone();
- let value = self.resolve_expression(expr);
- field_values.push(vec![Tracked::from(value, source)])
- }
- IntegerArgument::LabelReference(name) => {
- let source = name.source.clone();
- let value = self.resolve_label_reference(name);
- field_values.push(vec![Tracked::from(value, source)])
- }
- IntegerArgument::Integer(integer) => {
- let source = integer.source.clone();
- let value = integer.value;
- field_values.push(vec![Tracked::from(value, source)])
- }
- IntegerArgument::String(string) => {
- let values = string.chars.iter()
- .map(|c| Tracked::from(c.value as isize, c.source.clone()))
- .collect();
- field_values.push(values);
- }
- };
- }
- for i in 0..assembled_word.count() {
- let mut value = assembled_word.value;
- for (f, field) in assembled_word.fields.iter().enumerate() {
- let (field_value, source) = match field_values[f].get(i) {
- Some(tracked) => (tracked.value, Some(tracked.source.clone())),
- None => (0, None),
- };
- let bitcount = match field_value {
- 0 => 0,
- _ => (field_value.ilog2() + 1) as usize,
- };
- if field.bits < bitcount {
- let variant = BytecodeErrorVariant::ValueTooLarge(field.bits, bitcount);
- self.errors.push(BytecodeError { source: source.unwrap(), variant });
- } else {
- value |= (field_value << field.shift) as usize;
- }
- }
- self.words.push(Word { bits: assembled_word.bits, value });
- }
- }
-}
diff --git a/src/parsers/expression.rs b/src/parsers/expression.rs
deleted file mode 100644
index e938881..0000000
--- a/src/parsers/expression.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-use crate::*;
-
-
-pub fn parse_constant_expression(mut t: Tokeniser, source: SourceSpan) -> Expression {
- use ExpressionTokenVariant as TokenVar;
- use ExpressionParseError as ParseError;
-
- let mut tokens = Vec::new();
-
- loop {
- t.eat_whitespace();
- t.mark_start();
- let token = t.eat_token();
- if token.is_empty() {
- break;
- }
-
- let variant = match token.as_str() {
- "=" => TokenVar::Operator(Operator::Equal),
- "!=" => TokenVar::Operator(Operator::NotEqual),
- "<" => TokenVar::Operator(Operator::LessThan),
- ">" => TokenVar::Operator(Operator::GreaterThan),
- "<=" => TokenVar::Operator(Operator::LessThanEqual),
- ">=" => TokenVar::Operator(Operator::GreaterThanEqual),
- "+" => TokenVar::Operator(Operator::Add),
- "-" => TokenVar::Operator(Operator::Subtract),
- "<<" => TokenVar::Operator(Operator::LeftShift),
- ">>" => TokenVar::Operator(Operator::RightShift),
- "&" => TokenVar::Operator(Operator::And),
- "|" => TokenVar::Operator(Operator::Or),
- "^" => TokenVar::Operator(Operator::Xor),
- "~" => TokenVar::Operator(Operator::Not),
- _ => if let Some(stripped) = token.strip_prefix("0x") {
- match usize::from_str_radix(stripped, 16) {
- Ok(value) => TokenVar::Literal(value as isize),
- Err(_) => TokenVar::Error(
- ParseError::InvalidHexadecimalLiteral(stripped.to_string())),
- }
- } else {
- match usize::from_str_radix(&token, 10) {
- Ok(value) => TokenVar::Literal(value as isize),
- Err(_) => TokenVar::Invocation(token.to_string()),
- }
- }
- };
-
- let source = t.get_source();
- tokens.push(ExpressionToken { source, variant });
- }
-
- return Expression { source, tokens };
-}
diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs
deleted file mode 100644
index da2c23a..0000000
--- a/src/parsers/mod.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-mod expression;
-mod packed_binary_literal;
-
-pub use expression::*;
-pub use packed_binary_literal::*;
-
-mod syntactic;
-mod semantic;
-mod assembler;
-mod bytecode;
-
-pub use syntactic::*;
-pub use semantic::*;
-pub use assembler::*;
-pub use bytecode::*;
diff --git a/src/parsers/packed_binary_literal.rs b/src/parsers/packed_binary_literal.rs
deleted file mode 100644
index 18f8da7..0000000
--- a/src/parsers/packed_binary_literal.rs
+++ /dev/null
@@ -1,85 +0,0 @@
-use crate::*;
-
-
-/// t is a Tokeniser over the characters of the PBL, excluding the leading hash.
-pub fn parse_packed_binary_literal(mut t: Tokeniser, source: SourceSpan) -> PackedBinaryLiteral {
- use PackedBinaryLiteralParseError as ParseError;
- use PackedBinaryLiteralParseErrorVariant as ParseErrorVar;
-
- let mut value = 0;
- let mut bits = 0;
- let mut field_bits = 0;
- let mut name = '\0';
- let mut fields: Vec<BitField> = Vec::new();
- let mut errors: Vec<ParseError> = Vec::new();
-
- macro_rules! push_field {
- () => {
- if fields.iter().any(|f| f.name == name) {
- let variant = ParseErrorVar::DuplicateFieldName(name);
- errors.push(ParseError { source: t.get_source(), variant });
- } else {
- fields.push(BitField { name, source: t.get_source(), bits: field_bits, shift: 0 });
- }
- };
- }
-
- while let Some(c) = t.eat_char() {
- // Ignore underscores.
- if c == '_' {
- t.mark.undo();
- continue;
- }
-
- // Add a bit to the value;
- value <<= 1;
- bits += 1;
- for field in &mut fields {
- field.shift += 1;
- }
-
- // Extend the current field.
- if c == name {
- field_bits += 1;
- continue;
- }
-
- // Commit the current field.
- if field_bits > 0 {
- t.mark_end_prev();
- push_field!();
- field_bits = 0;
- name = '\0';
- }
-
- // Parse bit literals.
- if c == '0' {
- continue;
- }
- if c == '1' {
- value |= 1;
- continue;
- }
-
- t.mark_start_prev();
- if c.is_alphabetic() {
- name = c;
- field_bits = 1;
- continue;
- } else {
- let source = t.get_source();
- let variant = ParseErrorVar::InvalidCharacter(c);
- errors.push(ParseError { source, variant });
- }
- }
-
- // Commit the final field.
- for field in &mut fields {
- field.shift += 1;
- }
- if field_bits > 0 {
- push_field!();
- }
-
- PackedBinaryLiteral { source, bits, value, fields, errors }
-}
diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs
deleted file mode 100644
index 00cfc80..0000000
--- a/src/parsers/semantic.rs
+++ /dev/null
@@ -1,352 +0,0 @@
-use crate::*;
-use SyntacticTokenVariant as SynVar;
-
-use std::collections::VecDeque;
-
-use indexmap::IndexMap;
-
-
-macro_rules! fn_is_syn_variant {
- ($name:ident, $variant:ty) => { paste::paste! {
- fn [< is_ $name >](token: &SyntacticToken) -> bool {
- match token.variant { $variant => true, _ => false, }
- } } }; }
-fn_is_syn_variant!(block_open, SyntacticTokenVariant::BlockOpen);
-fn_is_syn_variant!(block_close, SyntacticTokenVariant::BlockClose);
-fn_is_syn_variant!(separator, SyntacticTokenVariant::Separator);
-fn_is_syn_variant!(terminator, SyntacticTokenVariant::MacroDefinitionTerminator);
-
-
-pub struct SemanticParser {
- tokens: Tokens,
- macro_definitions: IndexMap<String, MacroDefinition>,
- label_definitions: IndexMap<String, LabelDefinition>,
- body: Vec<SemanticToken>,
-}
-
-impl SemanticParser {
- pub fn new(syntactic_tokens: Vec<SyntacticToken>) -> Self {
- // Gather all labels ahead of time.
- let mut label_definitions = IndexMap::new();
- for token in &syntactic_tokens {
- if let SyntacticTokenVariant::LabelDefinition(name) = &token.variant {
- let definition = LabelDefinition {
- source: token.source.clone(),
- name: name.clone(),
- };
- let None = label_definitions.insert(name.to_string(), definition) else {
- unreachable!("Duplicate definition for label {name:?}");
- };
- }
- }
- Self {
- tokens: Tokens::new(syntactic_tokens),
- macro_definitions: IndexMap::new(),
- label_definitions,
- body: Vec::new(),
- }
- }
-
- pub fn parse(mut self) -> SemanticProgram {
- while let Some(syn) = self.tokens.pop() {
- match syn.variant {
- SynVar::MacroDefinition(name) => {
- let Ok(definition_tokens) = self.tokens.pull_until(is_terminator) else {
- let variant = SemanticParseErrorVariant::UnterminatedMacroDefinition(name);
- let error = SemanticParseError { source: syn.source, variant };
- self.body.push(SemanticToken::Error(error));
- break;
- };
- let definition = MacroDefinitionParser::new(syn.source, definition_tokens).parse();
- let None = self.macro_definitions.insert(name.clone(), definition) else {
- unreachable!("Duplicate definition for macro {name}");
- };
- }
- SynVar::LabelDefinition(name) => {
- let label_definition = LabelDefinition { source: syn.source, name };
- self.body.push(SemanticToken::LabelDefinition(label_definition));
- }
- SynVar::PinnedAddress(address) => {
- let pinned_address = PinnedAddress { source: syn.source, address };
- self.body.push(SemanticToken::PinnedAddress(pinned_address));
- }
- SynVar::Symbol(name) => {
- let invocation = InvocationParser::new(name, syn.source, &mut self.tokens).parse();
- self.body.push(SemanticToken::Invocation(invocation));
- }
- SynVar::PackedBinaryLiteral(pbl) => {
- self.body.push(SemanticToken::Word(pbl));
- }
- _ => {
- let variant = SemanticParseErrorVariant::InvalidToken;
- let error = SemanticParseError { source: syn.source, variant };
- self.body.push(SemanticToken::Error(error));
- }
- }
- }
-
- SemanticProgram {
- macro_definitions: self.macro_definitions,
- label_definitions: self.label_definitions,
- body: self.body,
- }
- }
-}
-
-
-pub struct MacroDefinitionParser {
- source: SourceSpan,
- tokens: Tokens,
- arguments: Vec<ArgumentDefinition>,
- errors: Vec<SemanticParseError>,
-}
-
-impl MacroDefinitionParser {
- pub fn new(source: SourceSpan, tokens: Tokens) -> Self {
- Self {
- tokens,
- source,
- arguments: Vec::new(),
- errors: Vec::new(),
- }
- }
-
- pub fn parse(mut self) -> MacroDefinition {
- while let Some(definition) = self.parse_argument_definition() {
- self.arguments.push(definition)
- }
- MacroDefinition {
- value: self.parse_body(),
- source: self.source,
- arguments: self.arguments,
- errors: self.errors,
- }
- }
-
- fn parse_argument_definition(&mut self) -> Option<ArgumentDefinition> {
- // Only continue if the first token is a separator.
- self.tokens.pop_if(is_separator)?;
-
- // Pop argument tokens.
- let is_block = match self.tokens.pop_if(is_block_open) {
- Some(_) => true,
- None => false,
- };
- let token = self.tokens.pop();
- if is_block {
- self.tokens.pop_if(is_block_close);
- }
- // Parse argument token.
- let token = token?;
- let source = token.source;
- if let SynVar::Symbol(name) = token.variant {
- let variant = match is_block {
- true => ArgumentVariant::Block,
- false => ArgumentVariant::Integer,
- };
- Some(ArgumentDefinition { name, source, variant })
- } else {
- let variant = SemanticParseErrorVariant::InvalidToken;
- self.errors.push(SemanticParseError { source, variant});
- None
- }
- }
-
- fn parse_body(&mut self) -> Value {
- // Attempt to parse an Integer.
- if self.tokens.len() == 1 {
- let token = self.tokens.pop().unwrap();
- match token.variant {
- SynVar::IntegerLiteral(value) => {
- let integer = TrackedInteger { source: token.source, value };
- return Value::Integer(Integer::Literal(integer));
- }
- SynVar::Expression(expr) => {
- return Value::Integer(Integer::Expression(expr));
- }
- _ => (),
- }
- self.tokens.unpop(token);
- }
- // Parse a Block.
- let mut block = BlockParser::new(self.tokens.take()).parse();
- // If the block contains a single invocation, unwrap it.
- if block.len() == 1 {
- match block.pop() {
- Some(SemanticToken::Invocation(invocation)) => return Value::Invocation(invocation),
- Some(other) => block.push(other),
- None => (),
- };
- }
- return Value::Block(block);
- }
-}
-
-
-/// Parse an entire block, excluding delimiters.
-pub struct BlockParser {
- tokens: Tokens,
- semantic_tokens: Vec<SemanticToken>,
-}
-
-impl BlockParser {
- pub fn new(tokens: Tokens) -> Self {
- Self { tokens, semantic_tokens: Vec::new() }
- }
-
- pub fn parse(mut self) -> Vec<SemanticToken> {
- while let Some(token) = self.tokens.pop() {
- let source = token.source;
- match token.variant {
- SynVar::Symbol(name) => {
- let invocation = InvocationParser::new(name, source, &mut self.tokens).parse();
- self.semantic_tokens.push(SemanticToken::Invocation(invocation));
- }
- SynVar::PackedBinaryLiteral(pbl) => {
- self.semantic_tokens.push(SemanticToken::Word(pbl));
- }
- SynVar::LabelDefinition(name) => {
- let label_definition = LabelDefinition { source, name };
- self.semantic_tokens.push(SemanticToken::LabelDefinition(label_definition));
- }
- _ => {
- let variant = SemanticParseErrorVariant::InvalidToken;
- let error = SemanticParseError { source, variant };
- self.semantic_tokens.push(SemanticToken::Error(error));
- }
- }
- }
- return self.semantic_tokens;
- }
-}
-
-
-struct InvocationParser<'a> {
- name: String,
- source: SourceSpan,
- tokens: &'a mut Tokens,
- arguments: Vec<ArgumentInvocation>,
- errors: Vec<SemanticParseError>,
-}
-
-impl<'a> InvocationParser<'a> {
- pub fn new(name: String, source: SourceSpan, tokens: &'a mut Tokens) -> Self {
- Self { name, source, tokens, arguments: Vec::new(), errors: Vec::new() }
- }
-
- pub fn parse(mut self) -> Invocation {
- while let Some(argument) = self.parse_invocation_argument() {
- self.arguments.push(argument);
- }
- Invocation {
- name: self.name,
- source: self.source,
- arguments: self.arguments,
- errors: self.errors,
- }
- }
-
- fn parse_invocation_argument(&mut self) -> Option<ArgumentInvocation> {
- // Only continue if the first token is a separator.
- self.tokens.pop_if(is_separator)?;
-
- if let Some(block_open) = self.tokens.pop_if(is_block_open) {
- let source = block_open.source;
- let mut depth = 1;
- let is_matching_block_close = |token: &SyntacticToken| {
- match token.variant {
- SyntacticTokenVariant::BlockOpen => {
- depth += 1; false }
- SyntacticTokenVariant::BlockClose => {
- depth -= 1; depth == 0 }
- _ => false,
- }
- };
- if let Ok(block_tokens) = self.tokens.pull_until(is_matching_block_close) {
- let block = BlockParser::new(block_tokens).parse();
- Some(ArgumentInvocation { source, value: Value::Block(block) })
- } else {
- let variant = SemanticParseErrorVariant::UnterminatedBlock;
- self.errors.push(SemanticParseError { source, variant });
- None
- }
- } else {
- let token = self.tokens.pop()?;
- let source = token.source;
- match token.variant {
- SynVar::Symbol(name) => {
- let arguments = Vec::new();
- let errors = Vec::new();
- let invocation = Invocation { source: source.clone(), name, arguments, errors };
- let value = Value::Invocation(invocation);
- Some(ArgumentInvocation { source, value })
- }
- SynVar::IntegerLiteral(value) => {
- let integer = TrackedInteger { source: source.clone(), value };
- let value = Value::Integer(Integer::Literal(integer));
- Some(ArgumentInvocation { source, value })
- }
- SynVar::String(string) => {
- let value = Value::Integer(Integer::String(string));
- Some(ArgumentInvocation { source, value })
- }
- SynVar::Expression(expr) => {
- let value = Value::Integer(Integer::Expression(expr));
- Some(ArgumentInvocation { source, value })
- }
- _ => {
- let variant = SemanticParseErrorVariant::InvalidToken;
- self.errors.push(SemanticParseError { source, variant });
- None
- }
- }
- }
- }
-}
-
-
-pub struct Tokens {
- tokens: VecDeque<SyntacticToken>,
-}
-
-impl Tokens {
- pub fn new<T: Into<VecDeque<SyntacticToken>>>(tokens: T) -> Self {
- Self { tokens: tokens.into() }
- }
-
- pub fn pop(&mut self) -> Option<SyntacticToken> {
- self.tokens.pop_front()
- }
-
- pub fn pop_if(&mut self, predicate: fn(&SyntacticToken) -> bool) -> Option<SyntacticToken> {
- match predicate(self.tokens.front()?) {
- true => self.tokens.pop_front(),
- false => None,
- }
- }
-
- pub fn unpop(&mut self, token: SyntacticToken) {
- self.tokens.push_front(token);
- }
-
- /// Pull tokens until the predicate returns true, otherwise return Err.
- pub fn pull_until(&mut self, mut predicate: impl FnMut(&SyntacticToken) -> bool) -> Result<Self, ()> {
- let mut output = VecDeque::new();
- while let Some(token) = self.tokens.pop_front() {
- match predicate(&token) {
- true => return Ok(Self::new(output)),
- false => output.push_back(token),
- };
- }
- return Err(());
- }
-
- pub fn take(&mut self) -> Self {
- Self { tokens: std::mem::take(&mut self.tokens) }
- }
-
- pub fn len(&self) -> usize {
- self.tokens.len()
- }
-}
-
diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs
deleted file mode 100644
index f3fcec1..0000000
--- a/src/parsers/syntactic.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-use crate::*;
-
-
-pub struct SyntacticParser {
- tokeniser: Tokeniser,
- tokens: Vec<SyntacticToken>,
- /// The name of the macro being parsed.
- macro_name: Option<String>,
- /// The name of the most recent label.
- label_name: String,
-}
-
-impl SyntacticParser {
- pub fn new<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
- let mut tokeniser = Tokeniser::new(source_code, path);
- tokeniser.add_delimiters(&['@','%',';',':','{','}','(','[','#','~']);
- Self {
- tokeniser,
- tokens: Vec::new(),
- macro_name: None,
- label_name: String::new(),
- }
- }
-
- pub fn parse(mut self) -> Vec<SyntacticToken> {
- use SyntacticTokenVariant as SynVar;
- use SyntacticParseError as SynErr;
- let t = &mut self.tokeniser;
-
- loop {
- t.eat_whitespace();
- t.mark_start();
- let Some(c) = t.eat_char() else { break };
- let variant = match c {
- ':' => SynVar::Separator,
- '{' => SynVar::BlockOpen,
- '}' => SynVar::BlockClose,
- '@' => match &self.macro_name {
- Some(_) => {
- t.eat_token();
- SynVar::Error(SynErr::LabelInMacroDefinition)
- }
- None => {
- self.label_name = t.eat_token();
- SynVar::LabelDefinition(self.label_name.clone())
- }
- }
- '&' => match &self.macro_name {
- Some(macro_name) => {
- let label_name = format!("{macro_name}:{}", t.eat_token());
- SynVar::LabelDefinition(label_name)
- }
- None => {
- let label_name = &self.label_name;
- let sublabel_name = format!("{label_name}/{}", t.eat_token());
- SynVar::LabelDefinition(sublabel_name)
- }
- }
- '%' => {
- let macro_name = t.eat_token();
- self.macro_name = Some(macro_name.clone());
- SynVar::MacroDefinition(macro_name)
- }
- ';' => {
- self.macro_name = None;
- SynVar::MacroDefinitionTerminator
- }
- '[' => {
- t.mark_child();
- match t.eat_to_delimiter(']') {
- Some(_) => {
- let child = t.subtokenise();
- t.mark_end();
- let expr = parse_constant_expression(child, t.get_source());
- SynVar::Expression(expr)
- }
- None => SynVar::Error(SynErr::UnterminatedExpression),
- }
- }
- '"' => {
- t.mark_child();
- match t.eat_to_delimiter('"') {
- Some(string) => {
- let child = t.subtokenise();
- t.mark_end();
- let chars = parse_tracked_chars(child);
- let tracked_string = TrackedString {
- source: t.get_source(), string, chars,
- };
- SynVar::String(tracked_string)
- }
- None => SynVar::Error(SynErr::UnterminatedString),
- }
- }
- '(' => match t.eat_to_delimiter(')') {
- Some(string) => {
- // Check if the comment fills the entire line.
- if t.start.position.column == 0 && t.end_of_line() {
- if let Some(path) = string.strip_prefix(": ") {
- t.embedded_path = Some(PathBuf::from(path.trim()));
- t.embedded_first_line = t.start.position.line + 1;
- }
- }
- continue;
- },
- None => SynVar::Error(SynErr::UnterminatedComment),
- }
- '|' => {
- let token = t.eat_token();
- if let Some(hex_string) = token.strip_prefix("0x") {
- match usize::from_str_radix(hex_string, 16) {
- Ok(addr) => SynVar::PinnedAddress(addr),
- Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)),
- }
- } else {
- match usize::from_str_radix(&token, 10) {
- Ok(addr) => SynVar::PinnedAddress(addr),
- Err(_) => SynVar::Error(SynErr::InvalidDecimalLiteral(token)),
- }
- }
- }
- '#' => {
- t.mark_child();
- t.eat_token();
- let pbl = parse_packed_binary_literal(t.subtokenise(), t.get_source());
- SynVar::PackedBinaryLiteral(pbl)
- },
- '~' => match &self.macro_name {
- Some(macro_name) => {
- let symbol_name = format!("{macro_name}:{}", t.eat_token());
- SynVar::Symbol(symbol_name)
- }
- None => {
- let label_name = &self.label_name;
- let symbol_name = format!("{label_name}/{}", t.eat_token());
- SynVar::Symbol(symbol_name)
- }
- }
- c => {
- let token = format!("{c}{}", t.eat_token());
- if let Some(hex_string) = token.strip_prefix("0x") {
- match usize::from_str_radix(hex_string, 16) {
- Ok(value) => SynVar::IntegerLiteral(value as isize),
- Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)),
- }
- } else {
- match usize::from_str_radix(&token, 10) {
- Ok(value) => SynVar::IntegerLiteral(value as isize),
- Err(_) => SynVar::Symbol(token),
- }
- }
- }
- };
-
- t.mark_end();
- let source = t.get_source();
- self.tokens.push(SyntacticToken { source, variant });
- }
-
- return self.tokens;
- }
-}
-
-
-fn parse_tracked_chars(mut t: Tokeniser) -> Vec<Tracked<char>> {
- let mut output = Vec::new();
- while let Some(c) = t.eat_char() {
- output.push(Tracked::from(c, t.get_source()));
- t.mark_start();
- }
- return output;
-}
diff --git a/src/report.rs b/src/report.rs
deleted file mode 100644
index a88de4f..0000000
--- a/src/report.rs
+++ /dev/null
@@ -1,235 +0,0 @@
-use crate::*;
-
-
-static mut ERROR_REPORTED: bool = false;
-
-macro_rules! report_source_error {
- ($context:expr, $message:expr) => {
- report_source_issue(LogLevel::Error, $context, $message);
- unsafe { ERROR_REPORTED = true; }
- };
-}
-
-macro_rules! exit_if_error_reported {
- () => {
- if unsafe { ERROR_REPORTED } {
- std::process::exit(1);
- }
- };
-}
-
-pub fn report_syntactic_errors(syntactic_tokens: &[SyntacticToken], source_code: &str) {
- use SyntacticTokenVariant as SynVar;
- for token in syntactic_tokens {
- let context = Context { source_code: &source_code, source: &token.source };
- match &token.variant {
- SynVar::Expression(expr) => for t in &expr.tokens {
- let context = Context { source_code: &source_code, source: &t.source };
- if let ExpressionTokenVariant::Error(err) = &t.variant {
- let ExpressionParseError::InvalidHexadecimalLiteral(hex) = err;
- let message = format!("Invalid hexadecimal literal {hex:?} in constant expression");
- report_source_error!(&context, &message);
- }
- }
- SynVar::PackedBinaryLiteral(pbl) => for e in &pbl.errors {
- let context = Context { source_code: &source_code, source: &e.source };
- match &e.variant {
- PackedBinaryLiteralParseErrorVariant::DuplicateFieldName(name) => {
- let message = format!("Duplicate field name {name:?} in packed binary literal");
- report_source_error!(&context, &message);
- }
- PackedBinaryLiteralParseErrorVariant::InvalidCharacter(c) => {
- let message = format!("Invalid character {c:?} in packed binary literal");
- report_source_error!(&context, &message);
- }
- }
- }
- SynVar::Error(err) => match err {
- SyntacticParseError::InvalidHexadecimalLiteral(hex) => {
- let message = format!("Invalid hexadecimal literal {hex:?}");
- report_source_error!(&context, &message);
- }
- SyntacticParseError::InvalidDecimalLiteral(dec) => {
- let message = format!("Invalid decimal literal {dec:?}");
- report_source_error!(&context, &message);
- }
- SyntacticParseError::InvalidSymbolIdentifier(name) => {
- let message = format!("Invalid identifier {name:?}");
- report_source_error!(&context, &message);
- }
- SyntacticParseError::UnterminatedComment => {
- let message = format!("Unterminated comment");
- report_source_error!(&context, &message);
- }
- SyntacticParseError::UnterminatedString => {
- let message = format!("Unterminated string");
- report_source_error!(&context, &message);
- }
- SyntacticParseError::UnterminatedExpression => {
- let message = format!("Unterminated assembler expression");
- report_source_error!(&context, &message);
- }
- SyntacticParseError::LabelInMacroDefinition => {
- let message = format!("Only sublabels can be used in macro definitions");
- report_source_error!(&context, &message);
- }
- }
- _ => (),
- }
- }
- exit_if_error_reported!();
-}
-
-
-pub fn report_semantic_errors(program: &SemanticProgram, source_code: &str) {
- for (_, definition) in &program.macro_definitions {
- report_value_errors(&definition.value, source_code);
- }
- for token in &program.body {
- report_semantic_token_errors(token, source_code);
- }
- exit_if_error_reported!();
-}
-
-fn report_value_errors(definition: &Value, source_code: &str) {
- match definition {
- Value::Integer(integer) => match integer {
- Integer::Expression(expr) => for token in &expr.tokens {
- if let ExpressionTokenVariant::Error(error) = &token.variant {
- let message = match error {
- ExpressionParseError::InvalidHexadecimalLiteral(hex) =>
- format!("Invalid hexadecimal literal '{hex}' in constant expression"),
- };
- let context = Context { source: &token.source, source_code};
- report_source_error!(&context, &message);
- }
- }
- _ => (),
- }
- Value::Block(block) => {
- for token in block {
- report_semantic_token_errors(token, source_code);
- }
- }
- Value::Invocation(invocation) => report_invocation_errors(invocation, source_code),
- }
-}
-
-fn report_semantic_token_errors(token: &SemanticToken, source_code: &str) {
- match &token {
- SemanticToken::Word(pbl) => for error in &pbl.errors {
- let message = match &error.variant {
- PackedBinaryLiteralParseErrorVariant::DuplicateFieldName(name) =>
- format!("Duplicate field name '{name}' in packed binary literal"),
- PackedBinaryLiteralParseErrorVariant::InvalidCharacter(c) =>
- format!("Invalid character '{c}' in packed binary literal"),
- };
- let context = Context { source: &error.source, source_code };
- report_source_error!(&context, &message);
- }
- SemanticToken::Invocation(invocation) => {
- report_invocation_errors(invocation, source_code)
- }
- SemanticToken::Error(error) => {
- report_semantic_error(error, source_code)
- }
- SemanticToken::LabelDefinition(_) => (),
- SemanticToken::PinnedAddress(_) => (),
- }
-}
-
-fn report_invocation_errors(invocation: &Invocation, source_code: &str) {
- for error in &invocation.errors {
- report_semantic_error(&error, source_code);
- }
- for argument in &invocation.arguments {
- report_value_errors(&argument.value, source_code);
- }
-}
-
-fn report_semantic_error(error: &SemanticParseError, source_code: &str) {
- let message = match &error.variant {
- SemanticParseErrorVariant::UnterminatedMacroDefinition(name) =>
- format!("The macro definition '{name}' is missing a terminating ';' character"),
- SemanticParseErrorVariant::UnterminatedBlock =>
- format!("Block literal is missing a terminating '}}' character"),
- SemanticParseErrorVariant::InvalidToken =>
- format!("Invalid token"),
- };
- let context = Context { source: &error.source, source_code};
- report_source_error!(&context, &message);
-}
-
-
-pub fn report_assembler_errors(tokens: &[AssembledToken], source_code: &str) {
- for token in tokens {
- match token {
- AssembledToken::Word(word) => {
- for error in &word.errors {
- report_assembler_error(&error, source_code);
- }
- }
- AssembledToken::Error(error) => {
- report_assembler_error(error, source_code);
- },
- _ => (),
- }
- }
- exit_if_error_reported!();
-}
-
-fn report_assembler_error(error: &AssemblerError, source_code: &str) {
- let message = match &error.variant {
- AssemblerErrorVariant::DefinitionNotFound(name) =>
- format!("Definition not found for name '{name}'"),
- AssemblerErrorVariant::NotABlock =>
- format!("Value of type block was expected here"),
- AssemblerErrorVariant::NotAnInteger =>
- format!("Value of type integer was expected here"),
- AssemblerErrorVariant::IntegerInBlock =>
- format!("Integer in block"),
- AssemblerErrorVariant::StringInExpression =>
- format!("Expressions cannot contain strings"),
- AssemblerErrorVariant::IncorrectArgumentCount(expected, received) =>
- format!("Expected {expected} arguments, but received {received} instead"),
- AssemblerErrorVariant::IncorrectArgumentType(expected, received) =>
- format!("Expected {expected} argument but received {received} instead"),
- };
- let context = Context {
- source_code: &source_code,
- source: &error.source,
- };
- report_source_error!(&context, &message);
-}
-
-
-pub fn report_bytecode_errors(bytecode: &Bytecode, source_code: &str) {
- for error in &bytecode.errors {
- report_bytecode_error(error, source_code);
- }
- exit_if_error_reported!();
-}
-
-pub fn report_bytecode_error(error: &BytecodeError, source_code: &str) {
- let message = match &error.variant {
- BytecodeErrorVariant::DefinitionNotFound(name) =>
- format!("Could not find definition for label reference '{name}'"),
- BytecodeErrorVariant::DuplicateLabelDefinition(name) =>
- format!("Duplicate definition for label '{name}'"),
- BytecodeErrorVariant::PinnedAddressBacktrack(expected, received) =>
- format!("Cannot pin back to address {expected} when already at address {received}"),
- BytecodeErrorVariant::ValueTooLarge(expected, received) =>
- format!("Expected {expected}-bit value, but received {received}-bit value instead"),
- BytecodeErrorVariant::StackUnderflow =>
- format!("Stack underflow when evaluating expression"),
- BytecodeErrorVariant::NoReturnValue =>
- format!("No value left on stack when evaluating expression"),
- BytecodeErrorVariant::MultipleReturnValues =>
- format!("More than one value left on stack when evaluating expression"),
- };
- let context = Context {
- source_code: &source_code,
- source: &error.source,
- };
- report_source_error!(&context, &message);
-}
diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs
new file mode 100644
index 0000000..3618b26
--- /dev/null
+++ b/src/stages/bytecode.rs
@@ -0,0 +1,182 @@
+use crate::*;
+
+use std::collections::HashMap;
+
+
+pub fn parse_bytecode(tokens: Vec<Tracked<IntermediateToken>>, width: Option<u32>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> {
+ BytecodeParser::new(width).parse(tokens)
+}
+
+
+pub struct BytecodeParser {
+ width: Option<u32>,
+ addresses: HashMap<String, Tracked<usize>>,
+ address: usize,
+ segment_address: usize,
+ segment_source: Option<SourceSpan>,
+ segments: Vec<Segment>,
+ words: Vec<Tracked<Word>>,
+ errors: Vec<Tracked<BytecodeError>>,
+}
+
+impl BytecodeParser {
+ pub fn new(width: Option<u32>) -> Self {
+ Self {
+ width,
+ addresses: HashMap::new(),
+ address: 0,
+ segment_address: 0,
+ segment_source: None,
+ segments: Vec::new(),
+ words: Vec::new(),
+ errors: Vec::new(),
+ }
+ }
+
+ pub fn parse(mut self, tokens: Vec<Tracked<IntermediateToken>>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> {
+ // Calculate all label addresses ahead of time.
+ let mut address = 0;
+ for token in &tokens {
+ let source = &token.source;
+ match &token.value {
+ IntermediateToken::LabelDefinition(name) => {
+ let tracked = Tracked::from(address, source.clone());
+ if let Some(_) = self.addresses.insert(name.clone(), tracked) {
+ unreachable!("Uncaught duplicate label definition '{name}'");
+ }
+ }
+ IntermediateToken::Word(_) => {
+ address += 1;
+ }
+ IntermediateToken::PinnedAddress(pinned) => {
+ address = pinned.value;
+ }
+ }
+ }
+ for token in &tokens {
+ let source = &token.source;
+ match &token.value {
+ IntermediateToken::Word(word) => {
+ let word = self.evaluate_word(word, source);
+ // Check that the word width fits the provided width.
+ if let Some(width) = self.width {
+ if word.width != width {
+ let error = BytecodeError::IncorrectWidth(width, word.width);
+ self.errors.push(Tracked::from(error, source.clone()));
+ }
+ }
+ self.words.push(word);
+ self.address += 1;
+ }
+ IntermediateToken::PinnedAddress(address) => {
+ let current = self.address;
+ let pinned = address.value;
+ if current > pinned {
+ let error = BytecodeError::PinnedAddressBacktrack(pinned, current);
+ self.errors.push(Tracked::from(error, address.source.clone()));
+ } else {
+ let words = std::mem::take(&mut self.words);
+ if !words.is_empty() {
+ let address = self.segment_address;
+ let source = std::mem::take(&mut self.segment_source);
+ let segment = Segment { address, source, words };
+ self.segments.push(segment);
+ }
+ self.segment_source = Some(address.source.clone());
+ self.address = pinned;
+ self.segment_address = pinned;
+ }
+ }
+ IntermediateToken::LabelDefinition(_) => (),
+ }
+ }
+ // Finish final segment.
+ let words = std::mem::take(&mut self.words);
+ if !words.is_empty() {
+ let address = self.segment_address;
+ let source = std::mem::take(&mut self.segment_source);
+ let segment = Segment { address, source, words };
+ self.segments.push(segment);
+ }
+
+ match self.errors.is_empty() {
+ true => Ok(self.segments),
+ false => Err(self.errors),
+ }
+ }
+
+ fn evaluate_expression(&mut self, expression: &IntermediateExpression, source: &SourceSpan) -> isize {
+ let mut stack = ExpressionStack::new();
+ for token in &expression.tokens {
+ let source = &token.source;
+ match &token.value {
+ IntermediateExpressionToken::Integer(integer) => match integer {
+ IntermediateInteger::Integer(value) => {
+ stack.push(*value);
+ }
+ IntermediateInteger::Expression(expression) => {
+ stack.push(self.evaluate_expression(expression, source));
+ }
+ IntermediateInteger::LabelReference(name) => {
+ stack.push(self.evaluate_label_reference(name));
+ }
+ }
+ IntermediateExpressionToken::Operator(operator) => {
+ if let Err(err) = stack.apply(*operator, source) {
+ let error = BytecodeError::StackError(err);
+ self.errors.push(Tracked::from(error, source.clone()))
+ }
+ }
+ }
+ }
+ match stack.pull_result() {
+ Ok(value) => value,
+ Err(err) => {
+ let error = BytecodeError::StackError(Tracked::from(err, source.clone()));
+ self.errors.push(Tracked::from(error, source.clone()));
+ 0
+ }
+ }
+ }
+
+ fn evaluate_label_reference(&mut self, name: &Tracked<String>) -> isize {
+ if let Some(address) = self.addresses.get(&name.to_string()) {
+ address.value as isize
+ } else {
+ unreachable!("Uncaught unresolved label reference '{name}'")
+ }
+ }
+
+ fn evaluate_word(&mut self, word: &IntermediateWord, source: &SourceSpan) -> Tracked<Word> {
+ let mut word_value = word.value;
+ for field in &word.fields {
+ let field_source = &field.value.value.source;
+ let field_value = match &field.value.value.value {
+ IntermediateInteger::Expression(expression) => {
+ self.evaluate_expression(expression, source)
+ }
+ IntermediateInteger::LabelReference(name) => {
+ self.evaluate_label_reference(name)
+ }
+ IntermediateInteger::Integer(value) => {
+ *value
+ }
+ };
+ let value_width = match field_value.cmp(&0) {
+ std::cmp::Ordering::Less => (-field_value).ilog2() + 1,
+ std::cmp::Ordering::Equal => 0,
+ std::cmp::Ordering::Greater => field_value.ilog2() + 1,
+ };
+ if field.width < value_width {
+ let error = BytecodeError::ValueTooWide(field.width, value_width);
+ self.errors.push(Tracked::from(error, field_source.clone()));
+ } else {
+ let mask = 2_usize.pow(field.width as u32) - 1;
+ let clamped_value = (field_value as usize) & mask;
+ word_value |= (clamped_value << field.shift) as usize;
+ }
+ }
+ let word = Word { width: word.width, value: word_value };
+ return Tracked::from(word, source.clone());
+ }
+}
diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs
new file mode 100644
index 0000000..b54cb0e
--- /dev/null
+++ b/src/stages/bytecode_tokens.rs
@@ -0,0 +1,78 @@
+use crate::*;
+
+
+pub struct Segment {
+ pub address: usize,
+ /// Source of the address value.
+ pub source: Option<SourceSpan>,
+ pub words: Vec<Tracked<Word>>,
+}
+
+pub struct Word {
+ pub value: usize,
+ pub width: u32,
+}
+
+impl std::fmt::Display for Word {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ if self.width == 0 {
+ write!(f, "0")
+ } else {
+ for i in (0..self.width).rev() {
+ let is_first_bit = i+1 == self.width;
+ if !is_first_bit && (i+1) % 4 == 0 {
+ write!(f, "_")?;
+ }
+ match (self.value >> i) & 1 {
+ 0 => write!(f, "0")?,
+ _ => write!(f, "1")?,
+ }
+ }
+ Ok(())
+ }
+ }
+}
+
+pub enum BytecodeError {
+ /// expected, received
+ IncorrectWidth(u32, u32),
+ /// pinned, real
+ PinnedAddressBacktrack(usize, usize),
+ /// expected, received
+ ValueTooWide(u32, u32),
+ StackError(Tracked<StackError>),
+}
+
+
+pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) {
+ for error in errors {
+ report_bytecode_error(error, source_code);
+ }
+}
+
+fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ BytecodeError::IncorrectWidth(expected, received) =>
+ &format!("Word is {received} bits wide, but was expected to have a fixed width of {expected} bits"),
+ BytecodeError::PinnedAddressBacktrack(pinned, real) =>
+ &format!("Cannot pin to address {pinned} when address is already {real}"),
+ BytecodeError::StackError(stack_error) => {
+ report_stack_error(stack_error, source_code); return; },
+ BytecodeError::ValueTooWide(expected, received) =>
+ &format!("Field is {expected} bits wide, but received a value that is {received} bits wide"),
+ };
+
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_segment(segment: &Segment) {
+ println!("SEGMENT: 0x{:>04x}", segment.address);
+ // Find maximum width of all words in the segment.
+ let width = segment.words.iter().map(|w| w.to_string().chars().count()).max().unwrap_or(0);
+ for word in &segment.words {
+ let string = word.to_string();
+ println!(" {string:>w$}", w=width as usize);
+ }
+}
diff --git a/src/stages/intermediate.rs b/src/stages/intermediate.rs
new file mode 100644
index 0000000..6853f62
--- /dev/null
+++ b/src/stages/intermediate.rs
@@ -0,0 +1,577 @@
+use crate::*;
+
+use assembler::{DefinitionType, SourceLocation, SourcePosition, SymbolRole};
+
+use indexmap::{IndexSet, IndexMap};
+
+
+static mut ID: usize = 0;
+macro_rules! next_id { () => { unsafe { let id = ID; ID += 1; id }}; }
+
+pub fn parse_intermediate(semantic: Vec<Tracked<SemanticToken>>) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> {
+ IntermediateParser::new(semantic).parse()
+}
+
+
+struct IntermediateParser {
+ semantic: Vec<Tracked<SemanticToken>>,
+ label_names: IndexSet<Tracked<String>>,
+ macro_names: IndexSet<Tracked<String>>,
+ macro_definitions: IndexMap<String, MacroDefinition>,
+ intermediate: Vec<Tracked<IntermediateToken>>,
+ errors: Vec<Tracked<IntermediateError>>,
+}
+
+impl IntermediateParser {
+ pub fn new(semantic: Vec<Tracked<SemanticToken>>) -> Self {
+ let mut label_names = IndexSet::new();
+ let mut macro_names = IndexSet::new();
+ for symbol in SymbolParser::new().parse(&semantic) {
+ match symbol.role {
+ SymbolRole::Definition(DefinitionType::MustPrecedeReference) => {
+ // Only consider macro definitions, not macro argument definitions.
+ if symbol.namespace.is_empty() {
+ if !macro_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) {
+ unreachable!("Uncaught duplicate macro definition '{}'", symbol.name);
+ }
+ }
+ }
+ SymbolRole::Definition(DefinitionType::CanFollowReference) => {
+ if !label_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) {
+ unreachable!("Uncaught duplicate label definition '{}'", symbol.name);
+ }
+ }
+ SymbolRole::Reference => (),
+ }
+ }
+
+ Self {
+ semantic,
+ label_names,
+ macro_names,
+ macro_definitions: IndexMap::new(),
+ intermediate: Vec::new(),
+ errors: Vec::new(),
+ }
+ }
+
+ pub fn parse(mut self) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> {
+ for token in self.semantic {
+ let source = &token.source;
+ match token.value {
+ SemanticToken::MacroDefinition(definition) => {
+ // Invoke the body to see if it contains undefined macros.
+ let error_count = self.errors.len();
+ let mut arguments = IndexMap::new();
+ // Prepare dummy argument values.
+ let null = SourceSpan {
+ string: String::new(),
+ in_merged: SourceLocation {
+ path: None,
+ start: SourcePosition::ZERO,
+ end: SourcePosition::ZERO,
+ },
+ in_source: None,
+ child: None,
+ };
+ for argument in &definition.arguments {
+ let value = match argument.variant {
+ ArgumentType::Integer => {
+ let integer = IntermediateInteger::Integer(0);
+ let tracked = Tracked::from(integer, null.clone());
+ IntermediateValue::Integer(tracked)
+ }
+ ArgumentType::Block => {
+ IntermediateValue::Block(Vec::new())
+ }
+ };
+ let tracked = Tracked::from(value, null.clone());
+ arguments.insert(argument.name.clone(), tracked);
+ }
+ let mut env = Environment {
+ label_names: &self.label_names,
+ macro_names: &self.macro_names,
+ macro_definitions: &self.macro_definitions,
+ arguments,
+ errors: &mut self.errors,
+ id: next_id!(),
+ };
+ env.parse_macro_definition_body(&definition.body, source);
+ if self.errors.len() != error_count {
+ break;
+ }
+
+ let name = definition.name.to_string();
+ if self.macro_definitions.insert(name.clone(), definition).is_some() {
+ unreachable!("Uncaught duplicate macro definition '{}'", name);
+ }
+ }
+ SemanticToken::BlockToken(block_token) => {
+ let mut env = Environment {
+ label_names: &self.label_names,
+ macro_names: &self.macro_names,
+ macro_definitions: &self.macro_definitions,
+ arguments: IndexMap::new(),
+ errors: &mut self.errors,
+ id: next_id!(),
+ };
+ let mut tokens = env.parse_block_token(&block_token, source);
+ self.intermediate.append(&mut tokens);
+ }
+ }
+ }
+ match self.errors.is_empty() {
+ true => Ok(self.intermediate),
+ false => Err(self.errors),
+ }
+ }
+}
+
+
+struct Environment<'a> {
+ label_names: &'a IndexSet<Tracked<String>>,
+ macro_names: &'a IndexSet<Tracked<String>>,
+ macro_definitions: &'a IndexMap<String, MacroDefinition>,
+ arguments: IndexMap<String, Tracked<IntermediateValue>>,
+ errors: &'a mut Vec<Tracked<IntermediateError>>,
+ id: usize,
+}
+
+impl<'a> Environment<'a> {
+ // Attach the invocation ID to every macro label name
+ fn tag_name(&self, name: &str) -> String {
+ match name.contains(':') {
+ true => format!("{name}:{}", self.id),
+ false => name.to_string(),
+ }
+ }
+
+ fn parse_macro_definition_body(&mut self, body: &MacroDefinitionBody, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> {
+ match &body {
+ MacroDefinitionBody::Integer(integer) => {
+ let token = self.parse_integer_token(&integer, &source)?;
+ let integer = IntermediateValue::Integer(token);
+ Some(Tracked::from(integer, source.clone()))
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ self.parse_invocation(&invocation, &invocation.source)
+ }
+ MacroDefinitionBody::Block(blocks) => {
+ let mut tokens = Vec::new();
+ for block in blocks {
+ tokens.append(&mut self.parse_block_token(block, &block.source));
+ }
+ let value = IntermediateValue::Block(tokens);
+ Some(Tracked::from(value, source.clone()))
+ }
+ }
+ }
+
+ fn parse_block_token(&mut self, block: &BlockToken, source: &SourceSpan) -> Vec<Tracked<IntermediateToken>> {
+ let mut intermediate = Vec::new();
+ match block {
+ BlockToken::LabelDefinition(name) => {
+ let token = IntermediateToken::LabelDefinition(self.tag_name(name));
+ intermediate.push(Tracked::from(token, source.clone()));
+ }
+ BlockToken::PinnedAddress(address) => {
+ if let Some(integer) = self.parse_integer_token(address, &address.source) {
+ if let Some(source) = integer_contains_label_reference(&integer) {
+ let error = IntermediateError::LabelReferenceInPinnedAddress;
+ let new_source = address.source.clone().wrap(source);
+ self.errors.push(Tracked::from(error, new_source));
+ } else {
+ match evaluate_integer(&integer, source) {
+ Ok(value) => {
+ let value = usize::try_from(value).unwrap_or(0);
+ let tracked = Tracked::from(value, address.source.clone());
+ let token = IntermediateToken::PinnedAddress(tracked);
+ intermediate.push(Tracked::from(token, source.clone()));
+ }
+ Err(error) => self.errors.push(error),
+ }
+ }
+ }
+ }
+ BlockToken::ConditionalBlock(cond) => {
+ let predicate = self.parse_integer_token(&cond.predicate, &cond.predicate.source);
+ let mut body = self.parse_block_token(&cond.body, &cond.body.source);
+ if let Some(predicate) = predicate {
+ let mut found_error = false;
+ if let Some(source) = integer_contains_label_reference(&predicate) {
+ let error = IntermediateError::LabelReferenceInConditionPredicate;
+ let new_source = cond.predicate.source.clone().wrap(source);
+ self.errors.push(Tracked::from(error, new_source));
+ found_error = true;
+ };
+ if let Some(source) = block_contains_label_definition(&cond.body, &cond.body.source) {
+ let error = IntermediateError::LabelDefinitionInConditionBody;
+ let new_source = cond.body.source.clone().wrap(source);
+ self.errors.push(Tracked::from(error, new_source));
+ found_error = true;
+ }
+ if !found_error {
+ match evaluate_integer(&predicate, &cond.predicate.source) {
+ Ok(value) => if value != 0 { intermediate.append(&mut body) },
+ Err(error) => self.errors.push(error),
+ }
+ }
+ }
+ }
+ BlockToken::WordTemplate(word_template) => {
+ let mut fields = Vec::new();
+ for bit_field in &word_template.fields {
+ let name = bit_field.name.to_string();
+ let source = &bit_field.source;
+ let invocation = Invocation { name, arguments: Vec::new() };
+ if let Some(value) = self.parse_integer_invocation(&invocation, source) {
+ let field = IntermediateField {
+ width: bit_field.width,
+ shift: bit_field.shift,
+ value,
+ };
+ fields.push(Tracked::from(field, bit_field.source.clone()));
+ }
+ }
+ let word = IntermediateWord {
+ value: word_template.value,
+ width: word_template.width,
+ fields,
+ };
+ let token = IntermediateToken::Word(word);
+ intermediate.push(Tracked::from(token, source.clone()));
+ }
+ BlockToken::Block(blocks) => {
+ for block in blocks {
+ let mut tokens = self.parse_block_token(block, &block.source);
+ intermediate.append(&mut tokens);
+ }
+ }
+ BlockToken::Invocation(invocation) => {
+ if let Some(mut tokens) = self.parse_block_invocation(invocation, source) {
+ intermediate.append(&mut tokens);
+ }
+ }
+ }
+
+ return intermediate;
+ }
+
+ fn parse_integer_token(&mut self, integer: &IntegerToken, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> {
+ match integer {
+ IntegerToken::IntegerLiteral(value) => {
+ let integer = IntermediateInteger::Integer(*value);
+ Some(Tracked::from(integer, source.clone()))
+ }
+ IntegerToken::Expression(expression) => {
+ self.parse_expression(expression, source)
+ }
+ IntegerToken::Invocation(invocation) => {
+ self.parse_integer_invocation(invocation, source)
+ }
+ }
+ }
+
+ fn parse_integer_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> {
+ match self.parse_invocation(invocation, source)?.value {
+ IntermediateValue::Integer(integer) => Some(integer),
+ IntermediateValue::Block(_) => {
+ let error = IntermediateError::ExpectedInteger;
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ }
+ }
+ }
+
+ fn parse_block_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Vec<Tracked<IntermediateToken>>> {
+ match self.parse_invocation(invocation, source)?.value {
+ IntermediateValue::Block(tokens) => Some(tokens),
+ IntermediateValue::Integer(_) => {
+ let error = IntermediateError::ExpectedBlock;
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ }
+ }
+ }
+
+ fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> {
+ let received_count = invocation.arguments.len();
+ if let Some(argument) = self.arguments.get(&invocation.name) {
+ if received_count != 0 {
+ let error = IntermediateError::IncorrectArgumentCount(0, received_count);
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ } else {
+ Some(argument.clone())
+ }
+ } else if let Some(label_name) = self.label_names.get(&invocation.name) {
+ if received_count != 0 {
+ let error = IntermediateError::IncorrectArgumentCount(0, received_count);
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ } else {
+ let name = self.tag_name(label_name);
+ let tracked = Tracked::from(name, label_name.source.clone());
+ let integer = IntermediateInteger::LabelReference(tracked);
+ let tracked = Tracked::from(integer, source.clone());
+ let value = IntermediateValue::Integer(tracked);
+ Some(Tracked::from(value, source.clone()))
+ }
+ } else if let Some(definition) = self.macro_definitions.get(&invocation.name) {
+ // Check that the correct number of arguments were provided.
+ let expected_count = definition.arguments.len();
+ if received_count != expected_count {
+ let error = IntermediateError::IncorrectArgumentCount(expected_count, received_count);
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ } else {
+ // Gather and type-check the provided arguments.
+ let mut arguments = Vec::new();
+ for (i, argument) in invocation.arguments.iter().enumerate() {
+ let received_type = match &argument.value {
+ InvocationArgument::String(string) => {
+ let mut values = Vec::new();
+ for c in &string.chars {
+ let integer = IntermediateInteger::Integer(**c);
+ let tracked = Tracked::from(integer, c.source.clone());
+ values.push(IntermediateValue::Integer(tracked));
+ }
+ arguments.push(RepeatedArgument::List(values));
+ ArgumentType::Integer
+ }
+ InvocationArgument::IntegerToken(integer) => {
+ let tracked = self.parse_integer_token(&integer, &argument.source)?;
+ let value = IntermediateValue::Integer(tracked);
+ arguments.push(RepeatedArgument::Loop(value));
+ ArgumentType::Integer
+ }
+ InvocationArgument::BlockToken(block) => {
+ let tokens = self.parse_block_token(&block, &argument.source);
+ let value = IntermediateValue::Block(tokens);
+ arguments.push(RepeatedArgument::Loop(value));
+ ArgumentType::Block
+ }
+ InvocationArgument::Invocation(invocation) => {
+ let value = self.parse_invocation(&invocation, &argument.source)?;
+ let received_type = match &value.value {
+ IntermediateValue::Integer(_) => ArgumentType::Integer,
+ IntermediateValue::Block(_) => ArgumentType::Block,
+ };
+ arguments.push(RepeatedArgument::Loop(value.value));
+ received_type
+ }
+ };
+ let expected_type = match received_type {
+ ArgumentType::Integer => ArgumentType::Block,
+ ArgumentType::Block => ArgumentType::Integer,
+ };
+ if definition.arguments[i].variant != received_type {
+ let error = IntermediateError::IncorrectArgumentType(expected_type, received_type);
+ self.errors.push(Tracked::from(error, argument.source.clone()));
+ return None;
+ }
+ }
+ // Invoke the invocation multiple times.
+ let repetitions = arguments.iter().map(|a| a.len()).max().unwrap_or(1);
+ let mut values = Vec::new();
+ for i in 0..repetitions {
+ // Construct an argument map for this invocation.
+ let mut argument_map = IndexMap::new();
+ for (a, argument) in arguments.iter().enumerate() {
+ let name = definition.arguments[a].name.clone();
+ let source = invocation.arguments[a].source.clone();
+ let value = match argument {
+ RepeatedArgument::Loop(value) => {
+ Tracked::from(value.clone(), source)
+ }
+ RepeatedArgument::List(list) => match list.get(i) {
+ Some(value) => {
+ Tracked::from(value.clone(), source)
+ }
+ None => {
+ let error = IntermediateError::ListExhausted;
+ let source = invocation.arguments[a].source.clone();
+ self.errors.push(Tracked::from(error, source));
+ return None;
+ }
+ }
+ };
+ if argument_map.insert(name.clone(), value).is_some() {
+ unreachable!("Uncaught duplicate macro argument name '{name}'");
+ };
+ }
+ let mut env = Environment {
+ label_names: &self.label_names,
+ macro_names: &self.macro_names,
+ macro_definitions: &self.macro_definitions,
+ arguments: argument_map,
+ errors: &mut self.errors,
+ id: next_id!(),
+ };
+ values.push(env.parse_macro_definition_body(&definition.body, source)?);
+ }
+ if values.len() == 1 {
+ values.pop()
+ } else {
+ // Flatten all values into a list of block tokens.
+ let mut block = Vec::new();
+ for value in values {
+ match value.value {
+ IntermediateValue::Integer(_) => {
+ let error = IntermediateError::ExpectedBlock;
+ self.errors.push(Tracked::from(error, value.source));
+ return None;
+ }
+ IntermediateValue::Block(mut tokens) => {
+ block.append(&mut tokens);
+ }
+ }
+ }
+ Some(Tracked::from(IntermediateValue::Block(block), source.clone()))
+ }
+ }
+ } else if let Some(macro_name) = self.macro_names.get(&invocation.name) {
+ let error = IntermediateError::InvocationBeforeDefinition;
+ let source = source.clone().wrap(macro_name.source.clone());
+ self.errors.push(Tracked::from(error, source));
+ None
+ } else {
+ unreachable!("Uncaught unresolved reference '{}'", invocation.name);
+ }
+ }
+
+ fn parse_expression(&mut self, expression: &Expression, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> {
+ let mut intermediate = Vec::new();
+ let mut error = false;
+
+ for token in &expression.tokens {
+ let source = &token.source;
+ match &token.value {
+ ExpressionToken::IntegerToken(integer) => {
+ let Some(integer) = self.parse_integer_token(integer, source) else {
+ error = true; continue;
+ };
+ let token = IntermediateExpressionToken::Integer(integer.value);
+ intermediate.push(Tracked::from(token, integer.source));
+ }
+ ExpressionToken::Operator(operator) => {
+ let token = IntermediateExpressionToken::Operator(*operator);
+ intermediate.push(Tracked::from(token, source.clone()));
+ }
+ ExpressionToken::Invocation(invocation) => {
+ let Some(integer) = self.parse_integer_invocation(invocation, source) else {
+ error = true; continue;
+ };
+ let token = IntermediateExpressionToken::Integer(integer.value);
+ intermediate.push(Tracked::from(token, integer.source));
+ }
+ }
+ }
+
+ if error { return None; }
+ let expression = IntermediateExpression { tokens: intermediate };
+ let integer = IntermediateInteger::Expression(expression);
+ Some(Tracked::from(integer, source.clone()))
+ }
+}
+
+
+macro_rules! return_some {
+ ($option:expr) => {
+ if $option.is_some() { return $option; }
+ };
+}
+
+fn integer_contains_label_reference(integer: &IntermediateInteger) -> Option<SourceSpan> {
+ match integer {
+ IntermediateInteger::Integer(_) => None,
+ IntermediateInteger::LabelReference(label) => Some(label.source.clone()),
+ IntermediateInteger::Expression(expr) => expression_contains_label_reference(expr),
+ }
+}
+
+fn expression_contains_label_reference(expression: &IntermediateExpression) -> Option<SourceSpan> {
+ for token in &expression.tokens {
+ if let IntermediateExpressionToken::Integer(integer) = &token.value {
+ if let Some(child) = integer_contains_label_reference(&integer) {
+ return Some(token.source.clone().wrap(child));
+ }
+ }
+ }
+ return None;
+}
+
+fn block_contains_label_definition(block: &BlockToken, source: &SourceSpan) -> Option<SourceSpan> {
+ match &block {
+ BlockToken::LabelDefinition(_) => {
+ return Some(source.clone());
+ }
+ BlockToken::Invocation(invocation) => {
+ return_some!(invocation_contains_label_definition(invocation))
+ }
+ BlockToken::Block(blocks) => {
+ for block in blocks {
+ return_some!(block_contains_label_definition(block, &block.source))
+ }
+ }
+ _ => (),
+ }
+ return None;
+}
+
+fn invocation_contains_label_definition(invocation: &Invocation) -> Option<SourceSpan> {
+ for argument in &invocation.arguments {
+ match &argument.value {
+ InvocationArgument::BlockToken(block) => {
+ return_some!(block_contains_label_definition(&block, &argument.source))
+ }
+ InvocationArgument::Invocation(invocation) => {
+ return_some!(invocation_contains_label_definition(&invocation))
+ }
+ _ => (),
+ }
+ }
+ return None;
+}
+
+fn evaluate_integer(integer: &IntermediateInteger, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> {
+ match integer {
+ IntermediateInteger::Integer(value) => Ok(*value),
+ IntermediateInteger::LabelReference(name) =>
+ unreachable!("Uncaught label reference '{name}' in condition predicate or pinned address value"),
+ IntermediateInteger::Expression(expr) => evaluate_expression(expr, source),
+ }
+}
+
+fn evaluate_expression(expression: &IntermediateExpression, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> {
+ let mut stack = ExpressionStack::new();
+ for token in &expression.tokens {
+ let source = &token.source;
+ match &token.value {
+ IntermediateExpressionToken::Integer(integer) => match integer {
+ IntermediateInteger::Integer(value) => {
+ stack.push(*value);
+ }
+ IntermediateInteger::Expression(expression) => {
+ stack.push(evaluate_expression(&expression, source)?);
+ }
+ IntermediateInteger::LabelReference(name) => {
+ unreachable!("Uncaught label reference '{name}' in condition predicate");
+ }
+ }
+ IntermediateExpressionToken::Operator(operator) => {
+ if let Err(stack_error) = stack.apply(*operator, source) {
+ let error = IntermediateError::StackError(stack_error);
+ return Err(Tracked::from(error, token.source.clone()));
+ }
+ }
+ }
+ }
+ match stack.pull_result() {
+ Ok(value) => Ok(value),
+ Err(err) => {
+ let error = Tracked::from(err, source.clone());
+ Err(Tracked::from(IntermediateError::StackError(error), source.clone()))
+ }
+ }
+}
diff --git a/src/stages/intermediate_tokens.rs b/src/stages/intermediate_tokens.rs
new file mode 100644
index 0000000..a09581e
--- /dev/null
+++ b/src/stages/intermediate_tokens.rs
@@ -0,0 +1,149 @@
+use crate::*;
+
+
+#[derive(Clone)]
+pub enum IntermediateToken {
+ Word(IntermediateWord),
+ PinnedAddress(Tracked<usize>),
+ LabelDefinition(String),
+}
+
+#[derive(Clone)]
+pub struct IntermediateWord {
+ pub value: usize,
+ /// Width of the word in bits.
+ pub width: u32,
+ pub fields: Vec<Tracked<IntermediateField>>,
+}
+
+#[derive(Clone)]
+pub struct IntermediateField {
+ pub value: Tracked<IntermediateInteger>,
+ /// Width of the field in bits.
+ pub width: u32,
+ /// Number of bits to the right of the field in the word.
+ pub shift: u32,
+}
+
+#[derive(Clone)]
+pub enum IntermediateInteger {
+ Integer(isize),
+ Expression(IntermediateExpression),
+ LabelReference(Tracked<String>),
+}
+
+#[derive(Clone)]
+pub struct IntermediateExpression {
+ pub tokens: Vec<Tracked<IntermediateExpressionToken>>,
+}
+
+#[derive(Clone)]
+pub enum IntermediateExpressionToken {
+ Integer(IntermediateInteger),
+ Operator(Operator),
+}
+
+#[derive(Clone)]
+pub enum IntermediateValue {
+ Integer(Tracked<IntermediateInteger>),
+ Block(Vec<Tracked<IntermediateToken>>),
+}
+
+pub enum RepeatedArgument {
+ Loop(IntermediateValue),
+ List(Vec<IntermediateValue>),
+}
+
+impl RepeatedArgument {
+ pub fn len(&self) -> usize {
+ match self {
+ Self::Loop(_) => 1,
+ Self::List(list) => list.len(),
+ }
+ }
+}
+
+pub enum IntermediateError {
+ ExpectedInteger,
+ ExpectedBlock,
+ ListExhausted,
+ LabelReferenceInConditionPredicate,
+ LabelDefinitionInConditionBody,
+ LabelReferenceInPinnedAddress,
+ StackError(Tracked<StackError>),
+ InvocationBeforeDefinition,
+ /// expected, received
+ IncorrectArgumentCount(usize, usize),
+ /// expected, received
+ IncorrectArgumentType(ArgumentType, ArgumentType),
+}
+
+pub fn report_intermediate_errors(errors: &[Tracked<IntermediateError>], source_code: &str) {
+ for error in errors {
+ report_intermediate_error(error, source_code);
+ }
+}
+
+fn report_intermediate_error(error: &Tracked<IntermediateError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ IntermediateError::ExpectedInteger =>
+ "An integer value was expected here",
+ IntermediateError::ExpectedBlock =>
+ "A block value was expected here",
+ IntermediateError::ListExhausted =>
+ "This string is shorter than another string passed to the same invocation",
+ IntermediateError::LabelReferenceInConditionPredicate =>
+ "The predicate of a conditional block cannot contain a label reference",
+ IntermediateError::LabelDefinitionInConditionBody =>
+ "The body of a conditional block cannot contain a label definition",
+ IntermediateError::LabelReferenceInPinnedAddress =>
+ "The value of a pinned address cannot contain a label reference",
+ IntermediateError::StackError(stack_error) => {
+ report_stack_error(stack_error, source_code); return; },
+ IntermediateError::InvocationBeforeDefinition =>
+ &format!("Macro cannot be invoked before it has been defined"),
+ IntermediateError::IncorrectArgumentCount(expected, received) =>
+ &format!("Expected {expected} arguments, but received {received} instead"),
+ IntermediateError::IncorrectArgumentType(expected, received) =>
+ &format!("Expected {expected} value but received {received} value instead"),
+ };
+
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_intermediate_token(i: usize, token: &IntermediateToken) {
+ match token {
+ IntermediateToken::Word(word) => {
+ indent!(i, "Word({:>0w$b})", word.value, w = word.width as usize);
+ for field in &word.fields {
+ print_intermediate_integer(i+1, &field.value.value);
+ }
+ }
+ IntermediateToken::PinnedAddress(address) =>
+ indent!(i, "PinnedAddress({address})"),
+ IntermediateToken::LabelDefinition(name) =>
+ indent!(i, "LabelDefinition({name})"),
+ }
+}
+
+fn print_intermediate_integer(i: usize, integer: &IntermediateInteger) {
+ match integer {
+ IntermediateInteger::Integer(value) =>
+ indent!(i, "Integer({value})"),
+ IntermediateInteger::LabelReference(name) =>
+ indent!(i, "LabelReference({name})"),
+ IntermediateInteger::Expression(expression) => {
+ indent!(i, "Expression");
+ for token in &expression.tokens {
+ match &token.value {
+ IntermediateExpressionToken::Integer(integer) =>
+ print_intermediate_integer(i+1, integer),
+ IntermediateExpressionToken::Operator(operator) =>
+ indent!(i+1, "Operator({operator})"),
+ }
+ }
+ }
+ }
+}
diff --git a/src/stages/mod.rs b/src/stages/mod.rs
new file mode 100644
index 0000000..e735f05
--- /dev/null
+++ b/src/stages/mod.rs
@@ -0,0 +1,31 @@
+mod syntactic;
+mod syntactic_tokens;
+mod semantic;
+mod semantic_tokens;
+mod intermediate;
+mod intermediate_tokens;
+mod bytecode;
+mod bytecode_tokens;
+
+pub use syntactic::*;
+pub use syntactic_tokens::*;
+pub use semantic::*;
+pub use semantic_tokens::*;
+pub use intermediate::*;
+pub use intermediate_tokens::*;
+pub use bytecode::*;
+pub use bytecode_tokens::*;
+
+
+#[macro_export]
+macro_rules! indent {
+ (0, $($tokens:tt)*) => {{
+ println!($($tokens)*);
+ }};
+ ($indent:expr, $($tokens:tt)*) => {{
+ for _ in 0..$indent { print!(" "); }
+ println!($($tokens)*);
+ }};
+}
+
+
diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs
new file mode 100644
index 0000000..e225608
--- /dev/null
+++ b/src/stages/semantic.rs
@@ -0,0 +1,478 @@
+use crate::*;
+
+use std::collections::VecDeque;
+
+
+pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> {
+ SemanticParser::from(syntactic, Namespace::None).parse()
+}
+
+#[derive(Clone)]
+enum Namespace {
+ Macro(String),
+ Label(String),
+ None,
+}
+
+
+struct SemanticParser {
+ namespace: Namespace,
+ syntactic: SyntacticTokenStream,
+ semantic: Vec<Tracked<SemanticToken>>,
+ errors: Vec<Tracked<SemanticError>>,
+}
+
+impl SemanticParser {
+ pub fn from(syntactic: Vec<Tracked<SyntacticToken>>, namespace: Namespace) -> Self {
+ Self {
+ namespace,
+ syntactic: SyntacticTokenStream::from(syntactic),
+ semantic: Vec::new(),
+ errors: Vec::new(),
+ }
+ }
+
+ fn pull_from(&mut self, mut other: SemanticParser) {
+ self.errors.append(&mut other.errors);
+ if let Namespace::Macro(_) = other.namespace {
+ ()
+ } else {
+ self.namespace = other.namespace;
+ }
+ }
+
+ fn resolve_label_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> {
+ match symbol {
+ ScopedSymbol::Global(name) => match &self.namespace {
+ Namespace::Macro(_) => {
+ let error = SemanticError::LabelInMacroDefinition;
+ self.errors.push(Tracked::from(error, source.to_owned()));
+ None
+ }
+ Namespace::Label(_) | Namespace::None => {
+ self.namespace = Namespace::Label(name.clone());
+ Some(name)
+ }
+ }
+ ScopedSymbol::Local(name) => match &self.namespace {
+ Namespace::Macro(macro_ns) => {
+ Some(format!("{macro_ns}:{name}"))
+ }
+ Namespace::Label(label_ns) => {
+ Some(format!("{label_ns}/{name}"))
+ }
+ Namespace::None => {
+ let error = SemanticError::SublabelWithoutNamespace;
+ self.errors.push(Tracked::from(error, source.to_owned()));
+ None
+ }
+ }
+ }
+ }
+
+ fn resolve_symbol_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> {
+ match symbol {
+ ScopedSymbol::Global(name) => {
+ Some(name)
+ }
+ ScopedSymbol::Local(name) => match &self.namespace {
+ Namespace::Macro(macro_ns) => {
+ Some(format!("{macro_ns}:{name}"))
+ }
+ Namespace::Label(label_ns) => {
+ Some(format!("{label_ns}/{name}"))
+ }
+ Namespace::None => {
+ let error = SemanticError::LocalSymbolWithoutNamespace;
+ self.errors.push(Tracked::from(error, source.to_owned()));
+ None
+ }
+ }
+ }
+ }
+
+ /// Parse the remaining syntactic tokens as a full program.
+ pub fn parse(mut self) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> {
+ while let Some(token) = self.syntactic.pop() {
+ if let SyntacticToken::MacroDefinition(definition) = token.value {
+ let namespace = Namespace::Macro(definition.name.to_string());
+ let mut parser = SemanticParser::from(definition.tokens, namespace);
+ let mut arguments = Vec::new();
+ while let Some(argument) = parser.pull_argument_definition() {
+ arguments.push(argument);
+ }
+ let body = parser.parse_macro_definition_body(SemanticLocation::MacroDefinitionBody);
+ self.pull_from(parser);
+ let definition = MacroDefinition { name: definition.name, arguments, body };
+ let semantic = SemanticToken::MacroDefinition(definition);
+ self.semantic.push(Tracked::from(semantic, token.source));
+ } else {
+ self.syntactic.unpop(token);
+ if let Some(token) = self.pull_block_token(SemanticLocation::Program) {
+ let semantic = SemanticToken::BlockToken(token.value);
+ self.semantic.push(Tracked::from(semantic, token.source));
+ }
+ }
+ }
+ match self.errors.is_empty() {
+ true => Ok(self.semantic),
+ false => Err(self.errors),
+ }
+ }
+
+ /// Parse the remaining syntactic tokens as a macro definition body.
+ fn parse_macro_definition_body(&mut self, location: SemanticLocation) -> MacroDefinitionBody {
+ let mut tokens = Vec::new();
+ while !self.syntactic.is_empty() {
+ if let Some(token) = self.pull_macro_definition_body_token() {
+ tokens.push(token);
+ }
+ }
+ if tokens.is_empty() {
+ MacroDefinitionBody::Block(Vec::new())
+ } else if tokens.len() == 1 {
+ tokens.pop().unwrap()
+ } else {
+ let mut block_tokens = Vec::new();
+ for token in tokens {
+ match token {
+ MacroDefinitionBody::Integer(integer) => {
+ let error = SemanticError::ExpectedInteger(location);
+ let tracked = Tracked::from(error, integer.source);
+ self.errors.push(tracked);
+ }
+ MacroDefinitionBody::Block(mut tokens) => {
+ block_tokens.append(&mut tokens);
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ // Convert invocation to a block invocation.
+ let token = BlockToken::Invocation(invocation.value);
+ block_tokens.push(Tracked::from(token, invocation.source));
+ }
+ }
+ }
+ MacroDefinitionBody::Block(block_tokens)
+ }
+ }
+
+ /// Attempt to pull a MacroDefinitionBody token from the token stream.
+ /// Invalid values are noted and dropped, and a None is returned.
+ /// Each BodyToken is wrapped in a separate MacroDefinitionBody.
+ fn pull_macro_definition_body_token(&mut self) -> Option<MacroDefinitionBody> {
+ let token = self.syntactic.pop()?;
+ let source = token.source;
+ match token.value {
+ SyntacticToken::LabelDefinition(symbol) => {
+ let name = self.resolve_label_name(symbol, &source)?;
+ let token = BlockToken::LabelDefinition(name);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ SyntacticToken::MacroDefinition(_) => {
+ let error = SemanticError::MisplacedMacroDefinition;
+ self.errors.push(Tracked::from(error, source));
+ None
+ }
+ SyntacticToken::IntegerLiteral(value) => {
+ let token = IntegerToken::IntegerLiteral(value);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Integer(tracked))
+ }
+ SyntacticToken::StringLiteral(_) => {
+ let error = SemanticError::MisplacedStringLiteral;
+ self.errors.push(Tracked::from(error, source));
+ None
+ }
+ SyntacticToken::WordTemplate(word_template) => {
+ let token = BlockToken::WordTemplate(word_template);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ SyntacticToken::BlockLiteral(tokens) => {
+ let mut parser = SemanticParser::from(tokens, self.namespace.clone());
+ let tokens = parser.parse_block();
+ self.pull_from(parser);
+ let token = BlockToken::Block(tokens);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ SyntacticToken::Expression(tokens) => {
+ let mut parser = SemanticParser::from(tokens, self.namespace.clone());
+ let expression = parser.parse_expression();
+ self.pull_from(parser);
+ let token = IntegerToken::Expression(expression);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Integer(tracked))
+ }
+ SyntacticToken::Symbol(symbol) => {
+ let name = self.resolve_symbol_name(symbol, &source)?;
+ let arguments = self.pull_all_invocation_arguments();
+ // Extend invocation source span to cover all arguments.
+ let mut source = source;
+ if let Some(last) = arguments.last() {
+ source.in_merged.end = last.source.in_merged.end;
+ if let Some(last_in_source) = &last.source.in_source {
+ if let Some(in_source) = &mut source.in_source {
+ in_source.end = last_in_source.end.clone();
+ }
+ }
+ }
+ let invocation = Invocation { name, arguments };
+ let tracked = Tracked::from(invocation, source);
+ Some(MacroDefinitionBody::Invocation(tracked))
+ }
+ SyntacticToken::Separator => {
+ let error = SemanticError::MisplacedSeparator;
+ self.errors.push(Tracked::from(error, source));
+ None
+ }
+ SyntacticToken::Condition => {
+ let conditional = self.pull_conditional_block()?;
+ let token = BlockToken::ConditionalBlock(Box::new(conditional));
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ SyntacticToken::Pin => {
+ let integer = self.pull_integer_token(SemanticLocation::PinAddress)?;
+ let token = BlockToken::PinnedAddress(integer);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ }
+ }
+
+ /// Attempt to pull an integer token from the token stream.
+ /// Invalid values are noted and dropped, and a None is returned.
+ fn pull_integer_token(&mut self, location: SemanticLocation) -> Option<Tracked<IntegerToken>> {
+ match self.pull_macro_definition_body_token()? {
+ MacroDefinitionBody::Integer(integer) => {
+ Some(integer)
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ // Convert invocation to an integer invocation.
+ let token = IntegerToken::Invocation(invocation.value);
+ Some(Tracked::from(token, invocation.source))
+ }
+ MacroDefinitionBody::Block(mut tokens) => {
+ assert_eq!(tokens.len(), 1);
+ let token = tokens.pop().unwrap();
+ let error = SemanticError::ExpectedInteger(location);
+ self.errors.push(Tracked::from(error, token.source));
+ None
+ }
+ }
+ }
+
+ /// Attempt to pull a BlockToken from the token stream.
+ /// Invalid values are noted and dropped, and a None is returned.
+ fn pull_block_token(&mut self, location: SemanticLocation) -> Option<Tracked<BlockToken>> {
+ match self.pull_macro_definition_body_token()? {
+ MacroDefinitionBody::Block(mut tokens) => {
+ assert_eq!(tokens.len(), 1);
+ tokens.pop()
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ // Convert invocation to a block invocation.
+ let token = BlockToken::Invocation(invocation.value);
+ Some(Tracked::from(token, invocation.source))
+ }
+ MacroDefinitionBody::Integer(integer) => {
+ let error = SemanticError::ExpectedBlock(location);
+ self.errors.push(Tracked::from(error, integer.source));
+ None
+ }
+ }
+ }
+
+ /// Parse the remaining syntactic tokens as the contents of a block.
+ fn parse_block(&mut self) -> Vec<Tracked<BlockToken>> {
+ let mut tokens = Vec::new();
+ while !self.syntactic.is_empty() {
+ if let Some(token) = self.pull_block_token(SemanticLocation::BlockLiteral) {
+ tokens.push(token);
+ }
+ }
+ tokens
+ }
+
+ /// Parse the remaining syntactic tokens as a list of integer tokens.
+ fn parse_integer_list(&mut self, location: SemanticLocation) -> Vec<Tracked<IntegerToken>> {
+ let mut tokens = Vec::new();
+ while !self.syntactic.is_empty() {
+ if let Some(token) = self.pull_integer_token(location) {
+ tokens.push(token);
+ }
+ }
+ tokens
+ }
+
+ /// Parse the remaining syntactic tokens as the contents of an expression.
+ fn parse_expression(&mut self) -> Expression {
+ let mut tokens = Vec::new();
+ for token in self.parse_integer_list(SemanticLocation::Expression) {
+ let source = token.source;
+ match token.value {
+ IntegerToken::IntegerLiteral(value) => {
+ let integer = Box::new(IntegerToken::IntegerLiteral(value));
+ let token = ExpressionToken::IntegerToken(integer);
+ tokens.push(Tracked::from(token, source));
+ }
+ IntegerToken::Expression(expression) => {
+ let integer = Box::new(IntegerToken::Expression(expression));
+ let token = ExpressionToken::IntegerToken(integer);
+ tokens.push(Tracked::from(token, source));
+ }
+ IntegerToken::Invocation(invocation) => {
+ // Parse the invocation as an operator instead.
+ if invocation.arguments.is_empty() {
+ if let Some(operator) = Operator::from_str(&invocation.name) {
+ let token = ExpressionToken::Operator(operator);
+ tokens.push(Tracked::from(token, source));
+ continue;
+ }
+ }
+ // Parse the invocation as an invocation.
+ let integer = Box::new(IntegerToken::Invocation(invocation));
+ let token = ExpressionToken::IntegerToken(integer);
+ tokens.push(Tracked::from(token, source));
+ }
+ }
+ }
+ Expression { tokens }
+ }
+
+ /// Attempt to pull a conditional block from the token stream.
+ /// Invalid values are noted and dropped, and a None is returned.
+ fn pull_conditional_block(&mut self) -> Option<ConditionalBlock> {
+ let predicate = self.pull_integer_token(SemanticLocation::ConditionPredicate)?;
+ let body = self.pull_block_token(SemanticLocation::ConditionBody)?;
+ Some(ConditionalBlock { predicate, body })
+ }
+
+ /// Attempt to pull an invocation argument from the token stream.
+ /// Invalid values are not dropped, a None indicates that no arguments remain.
+ fn pull_invocation_argument(&mut self) -> Option<Tracked<InvocationArgument>> {
+ self.syntactic.pop_if(is_separator)?;
+ let token = self.syntactic.pop()?;
+ let source = token.source;
+ match token.value {
+ SyntacticToken::StringLiteral(string_literal) => {
+ let argument = InvocationArgument::String(string_literal);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::IntegerLiteral(value) => {
+ let integer = IntegerToken::IntegerLiteral(value);
+ let argument = InvocationArgument::IntegerToken(integer);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::Expression(tokens) => {
+ let mut parser = SemanticParser::from(tokens, self.namespace.clone());
+ let expression = parser.parse_expression();
+ self.pull_from(parser);
+ let integer = IntegerToken::Expression(expression);
+ let argument = InvocationArgument::IntegerToken(integer);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::BlockLiteral(tokens) => {
+ let mut parser = SemanticParser::from(tokens, self.namespace.clone());
+ let tokens = parser.parse_block();
+ self.pull_from(parser);
+ let block = BlockToken::Block(tokens);
+ let argument = InvocationArgument::BlockToken(block);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::Symbol(symbol) => {
+ let name = self.resolve_symbol_name(symbol, &source)?;
+ let invocation = Invocation { name, arguments: Vec::new() };
+ let argument = InvocationArgument::Invocation(invocation);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::WordTemplate(word_template) => {
+ let block = BlockToken::WordTemplate(word_template);
+ let argument = InvocationArgument::BlockToken(block);
+ Some(Tracked::from(argument, source))
+ }
+ _ => {
+ let error = SemanticError::InvalidInvocationArgument;
+ self.errors.push(Tracked::from(error, source));
+ return None;
+ }
+ }
+ }
+
+ fn pull_all_invocation_arguments(&mut self) -> Vec<Tracked<InvocationArgument>> {
+ let mut arguments = Vec::new();
+ while let Some(argument) = self.pull_invocation_argument() {
+ arguments.push(argument);
+ }
+ return arguments;
+ }
+
+ /// Attempt to pull an argument definition from the token stream.
+ /// Invalid values are not dropped, a None indicates that no arguments remain.
+ fn pull_argument_definition(&mut self) -> Option<Tracked<ArgumentDefinition>> {
+ self.syntactic.pop_if(is_separator)?;
+ let token = self.syntactic.pop()?;
+ let source = token.source;
+ match token.value {
+ SyntacticToken::Symbol(ScopedSymbol::Global(name)) => {
+ let variant = ArgumentType::Integer;
+ let definition = ArgumentDefinition { name, variant };
+ return Some(Tracked::from(definition, source));
+ }
+ SyntacticToken::BlockLiteral(mut tokens) => {
+ if tokens.len() == 1 {
+ let token = tokens.pop().unwrap();
+ if let SyntacticToken::Symbol(ScopedSymbol::Global(name)) = token.value {
+ let variant = ArgumentType::Block;
+ let definition = ArgumentDefinition { name, variant };
+ return Some(Tracked::from(definition, source));
+ }
+ }
+ }
+ _ => (),
+ };
+ let error = SemanticError::InvalidArgumentDefinition;
+ self.errors.push(Tracked::from(error, source));
+ return None;
+ }
+}
+
+
+
+struct SyntacticTokenStream {
+ tokens: VecDeque<Tracked<SyntacticToken>>,
+}
+
+impl SyntacticTokenStream {
+ pub fn from<T: Into<VecDeque<Tracked<SyntacticToken>>>>(tokens: T) -> Self {
+ Self { tokens: tokens.into() }
+ }
+
+ pub fn pop(&mut self) -> Option<Tracked<SyntacticToken>> {
+ self.tokens.pop_front()
+ }
+
+ pub fn pop_if(&mut self, predicate: fn(&Tracked<SyntacticToken>) -> bool) -> Option<Tracked<SyntacticToken>> {
+ match predicate(self.tokens.front()?) {
+ true => self.tokens.pop_front(),
+ false => None,
+ }
+ }
+
+ pub fn unpop(&mut self, token: Tracked<SyntacticToken>) {
+ self.tokens.push_front(token);
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.tokens.is_empty()
+ }
+}
+
+
+fn is_separator(token: &Tracked<SyntacticToken>) -> bool {
+ match token.value {
+ SyntacticToken::Separator => true,
+ _ => false,
+ }
+}
diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs
new file mode 100644
index 0000000..dfbea1a
--- /dev/null
+++ b/src/stages/semantic_tokens.rs
@@ -0,0 +1,296 @@
+use crate::*;
+
+
+pub enum SemanticToken {
+ MacroDefinition(MacroDefinition),
+ BlockToken(BlockToken),
+}
+
+pub struct MacroDefinition {
+ pub name: Tracked<String>,
+ pub arguments: Vec<Tracked<ArgumentDefinition>>,
+ pub body: MacroDefinitionBody,
+}
+
+pub struct ArgumentDefinition {
+ pub name: String,
+ pub variant: ArgumentType,
+}
+
+#[derive(PartialEq)]
+pub enum ArgumentType {
+ Integer,
+ Block,
+}
+
+impl std::fmt::Display for ArgumentType {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ match self {
+ ArgumentType::Integer => write!(f, "an integer"),
+ ArgumentType::Block => write!(f, "a block"),
+ }
+ }
+}
+
+pub enum MacroDefinitionBody {
+ Integer(Tracked<IntegerToken>),
+ Block(Vec<Tracked<BlockToken>>),
+ Invocation(Tracked<Invocation>),
+}
+
+pub struct ConditionalBlock {
+ pub predicate: Tracked<IntegerToken>,
+ pub body: Tracked<BlockToken>,
+}
+
+pub enum IntegerToken {
+ IntegerLiteral(isize),
+ Expression(Expression),
+ Invocation(Invocation),
+}
+
+pub struct Expression {
+ pub tokens: Vec<Tracked<ExpressionToken>>,
+}
+
+pub enum ExpressionToken {
+ IntegerToken(Box<IntegerToken>),
+ Invocation(Invocation),
+ Operator(Operator),
+}
+
+pub enum BlockToken {
+ LabelDefinition(String),
+ PinnedAddress(Tracked<IntegerToken>),
+ ConditionalBlock(Box<ConditionalBlock>),
+ WordTemplate(WordTemplate),
+ Block(Vec<Tracked<BlockToken>>),
+ Invocation(Invocation),
+}
+
+pub struct Invocation {
+ pub name: String,
+ pub arguments: Vec<Tracked<InvocationArgument>>,
+}
+
+pub enum InvocationArgument {
+ String(StringLiteral),
+ IntegerToken(IntegerToken),
+ BlockToken(BlockToken),
+ Invocation(Invocation),
+}
+
+pub enum SemanticError {
+ MisplacedStringLiteral,
+ MisplacedListLiteral,
+ MisplacedSeparator,
+ MisplacedMacroDefinition,
+
+ ExpectedInteger(SemanticLocation),
+ ExpectedBlock(SemanticLocation),
+
+ InvalidArgumentDefinition,
+ InvalidInvocationArgument,
+
+ LabelInMacroDefinition,
+ SublabelWithoutNamespace,
+ LocalSymbolWithoutNamespace,
+}
+
+#[derive(Clone, Copy)]
+pub enum SemanticLocation {
+ MacroDefinitionBody,
+ Expression,
+ ConditionPredicate,
+ ConditionBody,
+ Program,
+ BlockLiteral,
+ PinAddress,
+}
+
+impl std::fmt::Display for SemanticLocation {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let string = match self {
+ SemanticLocation::Expression =>
+ "inside this expression",
+ SemanticLocation::ConditionPredicate =>
+ "as the predicate of this conditional block",
+ SemanticLocation::ConditionBody =>
+ "as the body of this conditional block",
+ SemanticLocation::Program =>
+ "at the outermost level of the program",
+ SemanticLocation::BlockLiteral =>
+ "inside this block literal",
+ SemanticLocation::MacroDefinitionBody =>
+ "inside the body of this macro definition",
+ SemanticLocation::PinAddress =>
+ "as the address of this pin",
+ };
+ write!(f, "{string}")
+ }
+}
+
+
+pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) {
+ for error in errors {
+ report_semantic_error(error, source_code);
+ }
+}
+
+fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ SemanticError::MisplacedStringLiteral =>
+ "A string literal can only be used as an invocation argument",
+ SemanticError::MisplacedListLiteral =>
+ "A list literal can only be used as an invocation argument",
+ SemanticError::MisplacedSeparator =>
+ "A separator can only be used to construct an argument list",
+ SemanticError::MisplacedMacroDefinition =>
+ "A macro definition must be used at the outermost level of the program",
+
+ SemanticError::ExpectedInteger(location) =>
+ &format!("An integer value was expected {location}"),
+ SemanticError::ExpectedBlock(location) =>
+ &format!("A block value was expected {location}"),
+
+ SemanticError::InvalidArgumentDefinition =>
+ "Argument definitions must be in the form 'name' or '{{name}}'",
+ SemanticError::InvalidInvocationArgument =>
+ "This token cannot be used in an invocation argument",
+
+ SemanticError::LabelInMacroDefinition =>
+ &format!("Only sublabels can be defined inside macro definitions"),
+ SemanticError::SublabelWithoutNamespace =>
+ &format!("Sublabel was not defined inside a macro definition or after a label"),
+ SemanticError::LocalSymbolWithoutNamespace =>
+ &format!("Local symbol was not defined inside a macro definition or after a label"),
+ };
+
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_semantic_token(i: usize, token: &SemanticToken) {
+ match token {
+ SemanticToken::MacroDefinition(definition) => {
+ indent!(i, "MacroDefinition({})", definition.name);
+ for argument in &definition.arguments {
+ print_argument_definition(i+1, argument);
+ }
+ match &definition.body {
+ MacroDefinitionBody::Integer(integer) => {
+ print_integer_token(i+1, integer)
+ }
+ MacroDefinitionBody::Block(tokens) => {
+ print_block(i+1, tokens);
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ print_invocation(i+1, invocation);
+ }
+ }
+ }
+ SemanticToken::BlockToken(block) => print_block_token(0, block),
+ }
+}
+
+fn print_argument_definition(i: usize, argument: &ArgumentDefinition) {
+ match argument.variant {
+ ArgumentType::Integer => {
+ indent!(i, "Argument({}, integer)", argument.name)
+ }
+ ArgumentType::Block => {
+ indent!(i, "Argument({}, block)", argument.name)
+ }
+ }
+}
+
+fn print_block_token(i: usize, block: &BlockToken) {
+ match block {
+ BlockToken::Invocation(invocation) => {
+ print_invocation(i, invocation)
+ }
+ BlockToken::LabelDefinition(name) => {
+ indent!(i, "LabelDefinition({name})")
+ }
+ BlockToken::Block(block) => {
+ print_block(i, block);
+ }
+ BlockToken::PinnedAddress(integer) => {
+ indent!(i, "PinnedAddress");
+ print_integer_token(i+1, integer);
+ }
+ BlockToken::ConditionalBlock(condition) => {
+ indent!(i, "ConditionalBlock");
+ indent!(i+1, "Predicate");
+ print_integer_token(i+2, &condition.predicate);
+ indent!(i+1, "Body");
+ print_block_token(i+2, &condition.body);
+ }
+ BlockToken::WordTemplate(word_template) => {
+ indent!(i, "WordTemplate({word_template})")
+ }
+ }
+}
+
+fn print_block(i: usize, tokens: &[Tracked<BlockToken>]) {
+ indent!(i, "Block");
+ for token in tokens {
+ print_block_token(i+1, token);
+ }
+}
+
+fn print_invocation(i: usize, invocation: &Invocation) {
+ indent!(i, "Invocation({})", invocation.name);
+ for argument in &invocation.arguments {
+ print_invocation_argument(i+1, argument);
+ }
+}
+
+fn print_invocation_argument(i: usize, argument: &InvocationArgument) {
+ match &argument {
+ InvocationArgument::String(string_literal) => {
+ indent!(i, "String({string_literal})")
+ }
+ InvocationArgument::IntegerToken(integer) => {
+ print_integer_token(i, integer)
+ }
+ InvocationArgument::BlockToken(block) => {
+ print_block_token(i, block)
+ }
+ InvocationArgument::Invocation(invocation) => {
+ print_invocation(i, invocation)
+ }
+ }
+}
+
+fn print_integer_token(i: usize, integer: &IntegerToken) {
+ match integer {
+ IntegerToken::IntegerLiteral(value) => {
+ indent!(i, "IntegerValue({value})")
+ }
+ IntegerToken::Expression(expression) => {
+ print_expression(i, expression)
+ }
+ IntegerToken::Invocation(invocation) => {
+ print_invocation(i, invocation)
+ }
+ }
+}
+
+fn print_expression(i: usize, expression: &Expression) {
+ indent!(i, "Expression");
+ for token in &expression.tokens {
+ match &token.value {
+ ExpressionToken::IntegerToken(integer) => {
+ print_integer_token(i+1, &integer)
+ }
+ ExpressionToken::Invocation(invocation) => {
+ print_invocation(i+1, &invocation);
+ }
+ ExpressionToken::Operator(operator) => {
+ indent!(i+1, "Operator({operator})")
+ }
+ }
+ }
+}
diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs
new file mode 100644
index 0000000..2e7f959
--- /dev/null
+++ b/src/stages/syntactic.rs
@@ -0,0 +1,323 @@
+use crate::*;
+
+use assembler::Tokeniser;
+
+
+pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path))
+}
+
+fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ t.add_delimiters(&['@','&','%',';',':','|','{','}','(',')','[',']','#','~','"','\'']);
+ let mut tokens = Vec::new();
+ let mut errors = Vec::new();
+
+ macro_rules! push_err {
+ ($error:expr) => {{
+ push_err!($error, t.get_source());
+ }};
+ ($error:expr, $source:expr) => {{
+ errors.push(Tracked::from($error, $source));
+ continue;
+ }};
+ }
+
+ loop {
+ t.eat_whitespace();
+ t.mark_start();
+ let Some(c) = t.eat_char() else { break };
+ let token = match c {
+ '"' => {
+ let source = t.get_source();
+ t.mark_child();
+ let is_any_close = |t: &mut Tokeniser| {
+ t.eat_char() == Some('"')
+ };
+ if let Some(_) = t.track_until(is_any_close) {
+ let child = t.tokenise_child_span();
+ SyntacticToken::StringLiteral(parse_string_literal(child))
+ } else {
+ push_err!(SyntacticError::UnterminatedStringLiteral, source);
+ }
+ }
+ '\'' => {
+ let source = t.get_source();
+ let is_any_close = |t: &mut Tokeniser| {
+ t.eat_char() == Some('\'')
+ };
+ if let Some(string) = t.track_until(is_any_close) {
+ let mut chars: Vec<char> = string.chars().collect();
+ if chars.len() == 1 {
+ let value = parse_char(chars.pop().unwrap());
+ SyntacticToken::IntegerLiteral(value)
+ } else {
+ t.mark_end();
+ push_err!(SyntacticError::ExpectedSingleCharacter, t.get_source());
+ }
+ } else {
+ push_err!(SyntacticError::UnterminatedCharacterLiteral, source);
+ }
+ }
+
+ '{' => {
+ let source = t.get_source();
+ t.mark_child();
+ let mut depth = 1;
+ let is_matching_close = |t: &mut Tokeniser| {
+ match t.eat_char() {
+ Some('{') => { depth += 1; false }
+ Some('}') => { depth -= 1; depth == 0 }
+ _ => false,
+ }
+ };
+ if let Some(_) = t.track_until(is_matching_close) {
+ let child = t.tokenise_child_span();
+ match parse_syntactic_from_tokeniser(child) {
+ Ok(tokens) => SyntacticToken::BlockLiteral(tokens),
+ Err(mut parse_errors) => {
+ errors.append(&mut parse_errors);
+ continue;
+ }
+ }
+ } else {
+ push_err!(SyntacticError::UnterminatedBlock, source);
+ }
+ }
+ '[' => {
+ let source = t.get_source();
+ t.mark_child();
+ let mut depth = 1;
+ let is_matching_close = |t: &mut Tokeniser| {
+ match t.eat_char() {
+ Some('[') => { depth += 1; false }
+ Some(']') => { depth -= 1; depth == 0 }
+ _ => false,
+ }
+ };
+ if let Some(_) = t.track_until(is_matching_close) {
+ let child = t.tokenise_child_span();
+ match parse_syntactic_from_tokeniser(child) {
+ Ok(tokens) => SyntacticToken::Expression(tokens),
+ Err(mut parse_errors) => {
+ errors.append(&mut parse_errors);
+ continue;
+ }
+ }
+ } else {
+ push_err!(SyntacticError::UnterminatedExpression, source);
+ }
+ }
+ '(' => {
+ let source = t.get_source();
+ let mut depth = 1;
+ let is_matching_close = |t: &mut Tokeniser| {
+ match t.eat_char() {
+ Some('(') => { depth += 1; false }
+ Some(')') => { depth -= 1; depth == 0 }
+ _ => false,
+ }
+ };
+ if let Some(string) = t.track_until(is_matching_close) {
+ // Check if the comment fills the entire line.
+ if t.start.position.column == 0 && t.end_of_line() {
+ if let Some(path) = string.strip_prefix(": ") {
+ t.embedded_path = Some(PathBuf::from(path.trim()));
+ t.embedded_first_line = t.start.position.line + 1;
+ }
+ }
+ continue;
+ } else {
+ push_err!(SyntacticError::UnterminatedComment, source);
+ }
+ }
+ '%' => {
+ let name = t.eat_token();
+ let source = t.get_source();
+ t.mark_child();
+ let is_any_close = |t: &mut Tokeniser| t.eat_char() == Some(';');
+ if let Some(_) = t.track_until(is_any_close) {
+ let child = t.tokenise_child_span();
+ match parse_syntactic_from_tokeniser(child) {
+ Ok(tokens) => {
+ let name = Tracked::from(name, source);
+ let def = SyntacticMacroDefinition { name, tokens };
+ SyntacticToken::MacroDefinition(def)
+ }
+ Err(mut parse_errors) => {
+ errors.append(&mut parse_errors);
+ continue;
+ }
+ }
+ } else {
+ push_err!(SyntacticError::UnterminatedMacroDefinition(name), source);
+ }
+ }
+
+ '}' => push_err!(SyntacticError::UnmatchedBlockTerminator),
+ ']' => push_err!(SyntacticError::UnmatchedExpressionTerminator),
+ ')' => push_err!(SyntacticError::UnmatchedCommentTerminator),
+ ';' => push_err!(SyntacticError::UnmatchedMacroTerminator),
+
+ '@' => SyntacticToken::LabelDefinition(ScopedSymbol::Global(t.eat_token())),
+ '&' => SyntacticToken::LabelDefinition(ScopedSymbol::Local(t.eat_token())),
+ '~' => SyntacticToken::Symbol(ScopedSymbol::Local(t.eat_token())),
+ ':' => SyntacticToken::Separator,
+ '|' => SyntacticToken::Pin,
+ '?' => SyntacticToken::Condition,
+
+ '#' => {
+ t.mark_child();
+ t.eat_token();
+ let child = t.tokenise_child_span();
+ match parse_word_template(child) {
+ Ok(word_template) => SyntacticToken::WordTemplate(word_template),
+ Err(mut parse_errors) => {
+ errors.append(&mut parse_errors);
+ continue;
+ }
+ }
+ },
+
+ c => {
+ let token = format!("{c}{}", t.eat_token());
+ if let Some(hex_string) = token.strip_prefix("0x") {
+ match parse_integer_literal(hex_string, 16) {
+ Ok(value) => SyntacticToken::IntegerLiteral(value),
+ Err(_) => push_err!(SyntacticError::InvalidHexadecimalLiteral(token)),
+ }
+ } else if let Some(binary_string) = token.strip_prefix("0b") {
+ match parse_integer_literal(binary_string, 2) {
+ Ok(value) => SyntacticToken::IntegerLiteral(value),
+ Err(_) => push_err!(SyntacticError::InvalidBinaryLiteral(token)),
+ }
+ } else {
+ match parse_integer_literal(&token, 10) {
+ Ok(value) => SyntacticToken::IntegerLiteral(value),
+ Err(true) => push_err!(SyntacticError::InvalidDecimalLiteral(token)),
+ Err(false) => SyntacticToken::Symbol(ScopedSymbol::Global(token)),
+ }
+ }
+ }
+ };
+
+ t.mark_end();
+ tokens.push(Tracked::from(token, t.get_source()))
+ }
+ match errors.is_empty() {
+ true => Ok(tokens),
+ false => Err(errors),
+ }
+}
+
+
+fn parse_integer_literal(token: &str, radix: u32) -> Result<isize, bool> {
+ match usize::from_str_radix(&token.replace('_', ""), radix) {
+ Ok(value) => match isize::try_from(value) {
+ Ok(value) => Ok(value),
+ Err(_) => Err(true),
+ }
+ Err(_) => Err(false),
+ }
+}
+
+
+fn parse_string_literal(mut t: Tokeniser) -> StringLiteral {
+ let mut string = String::new();
+ let mut chars = Vec::new();
+
+ while let Some(c) = t.eat_char() {
+ string.push(c);
+ chars.push(Tracked::from(parse_char(c), t.get_source()));
+ t.mark_start();
+ }
+ StringLiteral { string, chars }
+}
+
+fn parse_char(c: char) -> isize {
+ c as u32 as isize
+}
+
+
+fn parse_word_template(mut t: Tokeniser) -> Result<WordTemplate, Vec<Tracked<SyntacticError>>> {
+ let mut value = 0; // Value of the whole word template.
+ let mut value_width = 0; // Bit width of the whole word template.
+ let mut field_width = 0; // Width of the current bit field.
+ let mut field_name = '\0'; // Name of the current bit field.
+ let mut fields: Vec<Tracked<BitField>> = Vec::new();
+ let mut errors: Vec<Tracked<SyntacticError>> = Vec::new();
+
+ macro_rules! push_field {
+ () => {
+ if fields.iter().any(|f| f.name == field_name) {
+ let error = SyntacticError::DuplicateFieldNameInWord(field_name);
+ errors.push(Tracked::from(error, t.get_source()));
+ } else {
+ let field = BitField { name: field_name, width: field_width, shift: 0};
+ fields.push(Tracked::from(field, t.get_source()));
+ }
+ };
+ }
+
+ while let Some(c) = t.eat_char() {
+ // Ignore underscores.
+ if c == '_' {
+ t.mark.undo();
+ continue;
+ }
+
+ // Add a bit to the value;
+ value <<= 1;
+ value_width += 1;
+ for field in &mut fields {
+ field.shift += 1;
+ }
+
+ // Extend the current field.
+ if c == field_name {
+ field_width += 1;
+ continue;
+ }
+
+ // Commit the current field.
+ if field_width > 0 {
+ t.mark_end_prev();
+ push_field!();
+ field_width = 0;
+ field_name = '\0';
+ }
+
+ // Parse bit literals.
+ if c == '0' {
+ continue;
+ }
+ if c == '1' {
+ value |= 1;
+ continue;
+ }
+
+ t.mark_start_prev();
+ if c.is_alphabetic() {
+ field_name = c;
+ field_width = 1;
+ continue;
+ } else {
+ t.mark_end();
+ let error = SyntacticError::InvalidCharacterInWord(c);
+ errors.push(Tracked::from(error, t.get_source()));
+ }
+ }
+
+ // Commit the final field.
+ for field in &mut fields {
+ field.shift += 1;
+ }
+ if field_width > 0 {
+ t.mark_end();
+ push_field!();
+ }
+
+ match errors.is_empty() {
+ true => Ok(WordTemplate { value, width: value_width, fields }),
+ false => Err(errors),
+ }
+}
diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs
new file mode 100644
index 0000000..eabf34b
--- /dev/null
+++ b/src/stages/syntactic_tokens.rs
@@ -0,0 +1,160 @@
+use crate::*;
+
+pub enum SyntacticToken {
+ LabelDefinition(ScopedSymbol),
+ MacroDefinition(SyntacticMacroDefinition),
+
+ IntegerLiteral(isize),
+ StringLiteral(StringLiteral),
+ WordTemplate(WordTemplate),
+
+ BlockLiteral(Vec<Tracked<SyntacticToken>>),
+ Expression(Vec<Tracked<SyntacticToken>>),
+
+ Symbol(ScopedSymbol),
+
+ Separator,
+ Condition,
+ Pin,
+}
+
+pub struct SyntacticMacroDefinition {
+ pub name: Tracked<String>,
+ pub tokens: Vec<Tracked<SyntacticToken>>,
+}
+
+pub struct StringLiteral {
+ pub string: String,
+ pub chars: Vec<Tracked<isize>>,
+}
+
+impl std::fmt::Display for StringLiteral {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ self.string.fmt(f)
+ }
+}
+
+pub enum ScopedSymbol {
+ Local(String),
+ Global(String),
+}
+
+impl std::fmt::Display for ScopedSymbol {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ match self {
+ ScopedSymbol::Local(name) => write!(f, "~{name}"),
+ ScopedSymbol::Global(name) => write!(f, "{name}"),
+ }
+ }
+}
+
+
+pub enum SyntacticError {
+ UnterminatedBlock,
+ UnterminatedExpression,
+ UnterminatedComment,
+ UnterminatedCharacterLiteral,
+ UnterminatedStringLiteral,
+ UnterminatedMacroDefinition(String),
+
+ UnmatchedBlockTerminator,
+ UnmatchedExpressionTerminator,
+ UnmatchedCommentTerminator,
+ UnmatchedMacroTerminator,
+
+ ExpectedSingleCharacter,
+
+ DuplicateFieldNameInWord(char),
+ InvalidCharacterInWord(char),
+
+ InvalidDecimalLiteral(String),
+ InvalidHexadecimalLiteral(String),
+ InvalidBinaryLiteral(String),
+}
+
+
+pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) {
+ for error in errors {
+ report_syntactic_error(error, source_code);
+ }
+}
+
+fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ SyntacticError::UnterminatedBlock =>
+ "Block was not terminated, add a '}}' character to terminate",
+ SyntacticError::UnterminatedExpression =>
+ "Expression was not terminated, add a ']' character to terminate",
+ SyntacticError::UnterminatedComment =>
+ "Comment was not terminated, add a ')' character to terminate",
+ SyntacticError::UnterminatedCharacterLiteral =>
+ "Character was not terminated, add a ' character to terminate",
+ SyntacticError::UnterminatedStringLiteral =>
+ "String was not terminated, add a '\"' character to terminate",
+ SyntacticError::UnterminatedMacroDefinition(name) =>
+ &format!("The '{name}' macro definition was not terminated, add a ';' character to terminate"),
+
+ SyntacticError::UnmatchedBlockTerminator =>
+ "Attempted to terminate a block, but no block was in progress",
+ SyntacticError::UnmatchedExpressionTerminator =>
+ "Attempted to terminate an expression, but no expression was in progress",
+ SyntacticError::UnmatchedCommentTerminator =>
+ "Attempted to terminate a comment, but no comment was in progress",
+ SyntacticError::UnmatchedMacroTerminator =>
+ "Attempted to terminate a macro definition, but no macro definition was in progress",
+
+ SyntacticError::ExpectedSingleCharacter =>
+ "A character literal must contain exactly one character",
+
+ SyntacticError::DuplicateFieldNameInWord(name) =>
+ &format!("The field '{name}' has already been used in this word"),
+ SyntacticError::InvalidCharacterInWord(c) =>
+ &format!("The character '{c}' cannot be used in a word"),
+
+ SyntacticError::InvalidDecimalLiteral(string) =>
+ &format!("The string '{string}' is not a valid decimal literal"),
+ SyntacticError::InvalidHexadecimalLiteral(string) =>
+ &format!("The string '{string}' is not a valid hexadecimal literal"),
+ SyntacticError::InvalidBinaryLiteral(string) =>
+ &format!("The string '{string}' is not a valid binary literal"),
+ };
+
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_syntactic_token(i: usize, token: &SyntacticToken) {
+ match token {
+ SyntacticToken::LabelDefinition(symbol) => indent!(i, "LabelDefinition({symbol})"),
+ SyntacticToken::MacroDefinition(definition) => {
+ indent!(i, "MacroDefinition({})", definition.name);
+ for token in &definition.tokens {
+ print_syntactic_token(i+1, token);
+ }
+ }
+
+ SyntacticToken::IntegerLiteral(value) => indent!(i, "IntegerLiteral({value})"),
+ SyntacticToken::StringLiteral(literal) => indent!(i, "StringLiteral({literal})"),
+ SyntacticToken::WordTemplate(template) => indent!(i, "WordTemplate({template})"),
+
+ SyntacticToken::BlockLiteral(tokens) => {
+ indent!(i, "BlockLiteral");
+ for token in tokens {
+ print_syntactic_token(i+1, token);
+ }
+ }
+ SyntacticToken::Expression(tokens) => {
+ indent!(i, "Expression");
+ for token in tokens {
+ print_syntactic_token(i+1, token);
+ }
+ }
+
+ SyntacticToken::Symbol(symbol) => indent!(i, "Symbol({symbol})"),
+
+ SyntacticToken::Separator => indent!(i, "Separator"),
+ SyntacticToken::Condition => indent!(i, "Condition"),
+ SyntacticToken::Pin => indent!(i, "Pin"),
+ }
+}
diff --git a/src/tokens/assembler.rs b/src/tokens/assembler.rs
deleted file mode 100644
index 048062b..0000000
--- a/src/tokens/assembler.rs
+++ /dev/null
@@ -1,162 +0,0 @@
-use crate::*;
-
-
-#[derive(Clone)]
-pub enum AssembledToken {
- Word(AssembledWord),
- LabelDefinition(LabelDefinition),
- PinnedAddress(PinnedAddress),
- Error(AssemblerError),
-}
-
-#[derive(Clone)]
-pub struct AssembledWord {
- pub source: SourceSpan,
- pub value: usize,
- pub bits: usize,
- pub fields: Vec<AssembledField>,
- pub errors: Vec<AssemblerError>,
-}
-
-impl AssembledWord {
- pub fn count(&self) -> usize {
- // If there is at least one field, and all fields have empty string
- // values, then count will be zero. Else count will be at least one.
- let mut count = 0;
- let mut all_strings = !self.fields.is_empty();
- for field in &self.fields {
- if let IntegerArgument::String(string) = &field.value {
- count = std::cmp::max(count, string.chars.len());
- } else {
- all_strings = false;
- }
- }
- if !all_strings {
- count = std::cmp::max(count, 1);
- }
- return count;
- }
-}
-
-#[derive(Clone)]
-pub struct AssembledField {
- pub source: SourceSpan,
- pub value: IntegerArgument,
- /// Length of field in bits
- pub bits: usize,
- /// Distance to left-shift field in value
- pub shift: usize,
-}
-
-#[derive(Clone)]
-pub struct AssembledExpression {
- pub source: SourceSpan,
- pub tokens: Vec<AssembledExpressionToken>,
-}
-
-#[derive(Clone)]
-pub enum AssembledExpressionToken {
- Integer(TrackedInteger),
- LabelReference(Tracked<String>),
- Operator(Operator),
- Expression(Box<AssembledExpression>),
-}
-
-#[derive(Clone)]
-pub enum Argument {
- Integer(IntegerArgument),
- Block(Vec<AssembledToken>),
-}
-
-#[derive(Clone)]
-pub enum IntegerArgument {
- LabelReference(Tracked<String>),
- Integer(TrackedInteger),
- Expression(AssembledExpression),
- String(TrackedString),
-}
-
-#[derive(Clone)]
-pub struct AssemblerError {
- pub source: SourceSpan,
- pub variant: AssemblerErrorVariant,
-}
-
-#[derive(Clone, Debug)]
-pub enum AssemblerErrorVariant {
- DefinitionNotFound(String),
- NotAnInteger,
- NotABlock,
- IntegerInBlock,
- StringInExpression,
- /// expected, received
- IncorrectArgumentCount(usize, usize),
- /// expected, received, index
- IncorrectArgumentType(ArgumentVariant, ArgumentVariant),
-}
-
-// ------------------------------------------------------------------------ //
-
-macro_rules! indent {
- ($indent:expr => $($tokens:tt)*) => {{
- for _ in 0..$indent { print!(" "); }
- println!($($tokens)*);
- }};
-}
-
-pub fn print_assembled_tokens(tokens: &[AssembledToken]) {
- for token in tokens {
- match token {
- AssembledToken::LabelDefinition(definition) => {
- println!("LABEL {}", definition.name)
- }
- AssembledToken::PinnedAddress(address) => {
- println!("PINNED {}", address.address)
- }
- AssembledToken::Word(word) => {
- println!("WORD {:b}", word.value);
- for field in &word.fields {
- print!(" FIELD ({} << {}) ", field.bits, field.shift);
- match &field.value {
- IntegerArgument::LabelReference(name) => {
- println!("LABEL '{name}'");
- }
- IntegerArgument::Integer(integer) => {
- println!("INTEGER '{}'", integer.value);
- }
- IntegerArgument::String(string) => {
- println!("STRING {string}");
- }
- IntegerArgument::Expression(expr) => {
- println!("EXPRESSION");
- print_assembled_expression(2, expr);
- }
- }
- }
- }
- AssembledToken::Error(error) => {
- println!("ERROR {:?}", error.variant)
- }
- }
- }
-}
-
-fn print_assembled_expression(indent: usize, expr: &AssembledExpression) {
- for token in &expr.tokens {
- match token {
- AssembledExpressionToken::Integer(integer) => {
- indent!(indent => "INTEGER {}", integer.value)
- }
- AssembledExpressionToken::LabelReference(name) => {
- indent!(indent => "LABEL '{name}'")
- }
- AssembledExpressionToken::Operator(operator) => {
- indent!(indent => "OPERATOR {operator:?}")
- }
- AssembledExpressionToken::Expression(expr) => {
- indent!(indent => "EXPRESSION");
- print_assembled_expression(indent+1, expr);
- }
- }
- }
-}
diff --git a/src/tokens/bytecode.rs b/src/tokens/bytecode.rs
deleted file mode 100644
index 9ac340e..0000000
--- a/src/tokens/bytecode.rs
+++ /dev/null
@@ -1,49 +0,0 @@
-use crate::*;
-
-
-pub struct Bytecode {
- pub words: Vec<Word>,
- pub errors: Vec<BytecodeError>,
-}
-
-#[derive(Clone, Copy)]
-pub struct Word {
- pub bits: usize,
- pub value: usize,
-}
-
-impl std::fmt::Display for Word {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- for i in (0..self.bits).rev() {
- let is_first_bit = i+1 == self.bits;
- if !is_first_bit && (i+1) % 4 == 0 {
- write!(f, "_")?;
- }
- match (self.value >> i) & 1 {
- 0 => write!(f, "0")?,
- _ => write!(f, "1")?,
- }
- }
- if self.bits == 0 {
- write!(f, "0")?;
- }
- return Ok(());
- }
-}
-
-pub struct BytecodeError {
- pub source: SourceSpan,
- pub variant: BytecodeErrorVariant,
-}
-
-pub enum BytecodeErrorVariant {
- DefinitionNotFound(String),
- DuplicateLabelDefinition(String),
- /// pin, real
- PinnedAddressBacktrack(usize, usize),
- /// expected, received
- ValueTooLarge(usize, usize),
- StackUnderflow,
- MultipleReturnValues,
- NoReturnValue,
-}
diff --git a/src/tokens/expression.rs b/src/tokens/expression.rs
deleted file mode 100644
index 1d8a336..0000000
--- a/src/tokens/expression.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-use crate::*;
-
-
-#[derive(Clone)]
-pub struct Expression {
- pub source: SourceSpan,
- pub tokens: Vec<ExpressionToken>,
-}
-
-#[derive(Clone)]
-pub struct ExpressionToken {
- pub source: SourceSpan,
- pub variant: ExpressionTokenVariant,
-}
-
-#[derive(Clone)]
-pub enum ExpressionTokenVariant {
- Invocation(String),
- Literal(isize),
- Operator(Operator),
- Error(ExpressionParseError),
-}
-
-#[derive(Clone, Copy, Debug)]
-pub enum Operator {
- Equal,
- NotEqual,
- LessThan,
- GreaterThan,
- LessThanEqual,
- GreaterThanEqual,
- Add,
- Subtract,
- LeftShift,
- RightShift,
- And,
- Or,
- Xor,
- Not,
-}
-
-#[derive(Clone)]
-pub enum ExpressionParseError {
- InvalidHexadecimalLiteral(String),
-}
-
-impl std::fmt::Debug for Expression {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- for (i, token) in self.tokens.iter().enumerate() {
- let string = match &token.variant {
- ExpressionTokenVariant::Invocation(name) => name,
- ExpressionTokenVariant::Literal(value) => &value.to_string(),
- ExpressionTokenVariant::Operator(operator) => match operator {
- Operator::Equal => "=",
- Operator::NotEqual => "!=",
- Operator::LessThan => "<",
- Operator::GreaterThan => ">",
- Operator::LessThanEqual => "<=",
- Operator::GreaterThanEqual => ">=",
- Operator::Add => "+",
- Operator::Subtract => "-",
- Operator::LeftShift => "<<",
- Operator::RightShift => ">>",
- Operator::And => "&",
- Operator::Or => "|",
- Operator::Xor => "^",
- Operator::Not => "~",
- }
- ExpressionTokenVariant::Error(_) => "<error>",
- };
- match i {
- 0 => write!(f, "{string}")?,
- _ => write!(f, " {string}")?,
- }
- }
- return Ok(());
- }
-}
diff --git a/src/tokens/mod.rs b/src/tokens/mod.rs
deleted file mode 100644
index 53ccc6e..0000000
--- a/src/tokens/mod.rs
+++ /dev/null
@@ -1,19 +0,0 @@
-mod expression;
-mod packed_binary_literal;
-mod tracked_integer;
-mod tracked;
-
-pub use expression::*;
-pub use packed_binary_literal::*;
-pub use tracked_integer::*;
-pub use tracked::*;
-
-mod syntactic;
-mod semantic;
-mod assembler;
-mod bytecode;
-
-pub use syntactic::*;
-pub use semantic::*;
-pub use assembler::*;
-pub use bytecode::*;
diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs
deleted file mode 100644
index 225cd6b..0000000
--- a/src/tokens/semantic.rs
+++ /dev/null
@@ -1,192 +0,0 @@
-use crate::*;
-
-use indexmap::IndexMap;
-
-
-/// The entire semantic program, ready to generate bytecode.
-pub struct SemanticProgram {
- pub macro_definitions: IndexMap<String, MacroDefinition>,
- pub label_definitions: IndexMap<String, LabelDefinition>,
- pub body: Vec<SemanticToken>,
-}
-
-/// A symbol definition.
-pub struct MacroDefinition {
- pub source: SourceSpan,
- pub arguments: Vec<ArgumentDefinition>,
- pub value: Value,
- pub errors: Vec<SemanticParseError>,
-}
-
-pub struct ArgumentDefinition {
- pub name: String,
- pub source: SourceSpan,
- pub variant: ArgumentVariant,
-}
-
-#[derive(PartialEq, Clone, Copy, Debug)]
-pub enum ArgumentVariant {
- Integer,
- Block,
-}
-
-pub struct ArgumentInvocation {
- pub source: SourceSpan,
- pub value: Value,
-}
-
-pub enum Value {
- Integer(Integer),
- Block(Vec<SemanticToken>),
- Invocation(Invocation),
-}
-
-pub enum Integer {
- Literal(TrackedInteger),
- String(TrackedString),
- Expression(Expression),
- LabelReference(Tracked<String>),
-}
-
-pub enum SemanticToken {
- Word(PackedBinaryLiteral),
- Invocation(Invocation),
- LabelDefinition(LabelDefinition),
- PinnedAddress(PinnedAddress),
- Error(SemanticParseError),
-}
-
-pub struct Invocation {
- pub name: String,
- pub source: SourceSpan,
- pub arguments: Vec<ArgumentInvocation>,
- pub errors: Vec<SemanticParseError>,
-}
-
-#[derive(Clone)]
-pub struct LabelDefinition {
- pub source: SourceSpan,
- pub name: String,
-}
-
-#[derive(Clone)]
-pub struct PinnedAddress {
- pub source: SourceSpan,
- pub address: usize,
-}
-
-pub struct SemanticParseError {
- pub source: SourceSpan,
- pub variant: SemanticParseErrorVariant,
-}
-
-pub enum SemanticParseErrorVariant {
- UnterminatedMacroDefinition(String),
- UnterminatedBlock,
- InvalidToken,
-}
-
-
-impl std::fmt::Display for ArgumentVariant {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- match self {
- ArgumentVariant::Integer => write!(f, "integer"),
- ArgumentVariant::Block => write!(f, "block"),
- }
- }
-}
-
-// ------------------------------------------------------------------------ //
-
-macro_rules! indent {
- ($indent:expr => $($tokens:tt)*) => {{
- for _ in 0..$indent { print!(" "); }
- println!($($tokens)*);
- }};
-}
-
-impl SemanticProgram {
- pub fn print_definitions(&self) {
- for (name, definition) in &self.macro_definitions {
- let variant = match &definition.value {
- Value::Integer(_) => "INTEGER",
- Value::Block(_) => "BLOCK",
- Value::Invocation(_) => "INVOCATION",
- };
- println!("DEFINE {variant} '{name}'");
- for argument in &definition.arguments {
- self.print_argument_definition(argument);
- }
- match &definition.value {
- Value::Integer(integer) =>
- self.print_integer(1, integer),
- Value::Block(block) =>
- self.print_block(1, block),
- Value::Invocation(invocation) =>
- indent!(1 => "INVOCATION '{}'", invocation.name),
- };
- println!();
- }
-
- println!("LABELS");
- for (name, _) in &self.label_definitions {
- println!(" @{name}");
- }
- println!();
-
- self.print_block(0, &self.body);
- }
-
- fn print_argument_definition(&self, argument: &ArgumentDefinition) {
- let variant = match argument.variant {
- ArgumentVariant::Integer => "INTEGER",
- ArgumentVariant::Block => "BLOCK",
- };
- println!(" ARGUMENT {variant} '{}'", argument.name);
- }
-
- fn print_integer(&self, indent: usize, integer: &Integer) {
- match &integer {
- Integer::Literal(value) =>
- indent!(indent => "LITERAL {value}"),
- Integer::Expression(expr) =>
- indent!(indent => "EXPRESSION [{expr:?}]"),
- Integer::String(string) =>
- indent!(indent => "STRING '{string}'"),
- Integer::LabelReference(name) =>
- indent!(indent => "LABEL REFERENCE '{name}'"),
- }
- }
-
- fn print_block(&self, indent: usize, block: &[SemanticToken]) {
- indent!(indent => "BLOCK");
- for semantic_token in block {
- match &semantic_token {
- SemanticToken::Word(word) =>
- indent!(indent+1 => "WORD #{word}"),
- SemanticToken::Invocation(invocation) =>
- self.print_invocation(indent+1, invocation),
- SemanticToken::LabelDefinition(definition) =>
- indent!(indent+1 => "LABEL DEFINITION @{}", definition.name),
- SemanticToken::PinnedAddress(addr) =>
- indent!(indent+1 => "PINNED ADDRESS {}", addr.address),
- SemanticToken::Error(_) =>
- indent!(indent+1 => "ERROR"),
- }
- }
- }
-
- fn print_invocation(&self, indent: usize, invocation: &Invocation) {
- indent!(indent => "INVOCATION '{}'", invocation.name);
- for argument in &invocation.arguments {
- match &argument.value {
- Value::Integer(integer) =>
- self.print_integer(indent+1, integer),
- Value::Block(block) =>
- self.print_block(indent+1, block),
- Value::Invocation(invocation) =>
- self.print_invocation(indent+1, invocation),
- };
- }
- }
-}
diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs
deleted file mode 100644
index 780c950..0000000
--- a/src/tokens/syntactic.rs
+++ /dev/null
@@ -1,84 +0,0 @@
-use crate::*;
-
-
-pub struct SyntacticToken {
- pub source: SourceSpan,
- pub variant: SyntacticTokenVariant,
-}
-
-pub enum SyntacticTokenVariant {
- LabelDefinition(String),
- MacroDefinition(String),
- MacroDefinitionTerminator,
-
- IntegerLiteral(isize),
- PackedBinaryLiteral(PackedBinaryLiteral),
- PinnedAddress(usize),
-
- Expression(Expression),
-
- String(TrackedString),
-
- BlockOpen,
- BlockClose,
- Separator,
-
- Symbol(String),
-
- Error(SyntacticParseError),
-}
-
-#[derive(Clone)]
-pub struct TrackedString {
- pub source: SourceSpan,
- pub string: String,
- pub chars: Vec<Tracked<char>>,
-}
-
-impl std::fmt::Display for TrackedString {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- self.string.fmt(f)
- }
-}
-
-#[derive(Debug)]
-pub enum SyntacticParseError {
- InvalidHexadecimalLiteral(String),
- InvalidDecimalLiteral(String),
- InvalidSymbolIdentifier(String),
- UnterminatedComment,
- UnterminatedString,
- UnterminatedExpression,
- LabelInMacroDefinition,
-}
-
-
-impl std::fmt::Debug for SyntacticToken {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- use SyntacticTokenVariant::*;
- let start = &self.source.in_merged;
- let name = match &self.variant {
- LabelDefinition(name) => format!("LabelDefinition({name})"),
- MacroDefinition(name) => format!("MacroDefinition({name})"),
- MacroDefinitionTerminator => format!("MacroDefinitionTerminator"),
-
- IntegerLiteral(value) => format!("IntegerLiteral({value})"),
- PackedBinaryLiteral(pbl) => format!("PackedBinaryLiteral({pbl})"),
- PinnedAddress(value) => format!("PinnedAddress({value})"),
-
- Expression(expr) => format!("Expression({expr:?})"),
-
- String(string) => format!("String('{string}')"),
-
- BlockOpen => format!("BlockOpen"),
- BlockClose => format!("BlockClose"),
- Separator => format!("Separator"),
-
- Symbol(name) => format!("Symbol({name})"),
-
- Error(error) => format!("Error({error:?})"),
- };
-
- write!(f, "{start} {name}")
- }
-}
diff --git a/src/tokens/tracked.rs b/src/tokens/tracked.rs
deleted file mode 100644
index ea37047..0000000
--- a/src/tokens/tracked.rs
+++ /dev/null
@@ -1,47 +0,0 @@
-use crate::*;
-
-
-#[derive(Clone)]
-pub struct Tracked<T> {
- pub source: SourceSpan,
- pub value: T,
-}
-
-impl<T> Tracked<T> {
- pub fn from(value: T, source: SourceSpan) -> Self {
- Self { source, value }
- }
-}
-
-impl<T> std::ops::Deref for Tracked<T> {
- type Target = T;
- fn deref(&self) -> &T {
- &self.value
- }
-}
-
-impl<T> std::ops::DerefMut for Tracked<T> {
- fn deref_mut(&mut self) -> &mut T {
- &mut self.value
- }
-}
-
-impl<T: std::fmt::Display> std::fmt::Display for Tracked<T> {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- write!(f, "{}", self.value)
- }
-}
-
-impl<T: std::fmt::Debug> std::fmt::Debug for Tracked<T> {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- write!(f, "{:?}", self.value)
- }
-}
-
-impl<T: PartialEq> PartialEq for Tracked<T> {
- fn eq(&self, other: &Tracked<T>) -> bool {
- self.value.eq(&other.value)
- }
-}
-
-impl<T: Eq> Eq for Tracked<T> {}
diff --git a/src/tokens/tracked_integer.rs b/src/tokens/tracked_integer.rs
deleted file mode 100644
index fa55f09..0000000
--- a/src/tokens/tracked_integer.rs
+++ /dev/null
@@ -1,14 +0,0 @@
-use crate::*;
-
-
-#[derive(Clone)]
-pub struct TrackedInteger {
- pub source: SourceSpan,
- pub value: isize,
-}
-
-impl std::fmt::Display for TrackedInteger {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
- write!(f, "{}", self.value)
- }
-}
diff --git a/src/types/expression_stack.rs b/src/types/expression_stack.rs
new file mode 100644
index 0000000..4d26eb2
--- /dev/null
+++ b/src/types/expression_stack.rs
@@ -0,0 +1,89 @@
+use crate::*;
+
+
+pub struct ExpressionStack {
+ stack: Vec<isize>,
+}
+
+impl ExpressionStack {
+ pub fn new() -> Self {
+ Self {
+ stack: Vec::new(),
+ }
+ }
+
+ pub fn pull_result(mut self) -> Result<isize, StackError> {
+ match self.stack.len() {
+ 0 => Err(StackError::NoReturnValue),
+ 1 => Ok(self.stack.pop().unwrap()),
+ _ => Err(StackError::MultipleReturnValues),
+ }
+ }
+
+ pub fn push(&mut self, value: isize) {
+ self.stack.push(value);
+ }
+
+ pub fn apply(&mut self, operator: Operator, source: &SourceSpan) -> Result<(), Tracked<StackError>> {
+ macro_rules! push {
+ ($val:expr) => { self.stack.push($val) }
+ }
+ macro_rules! pop {
+ ($name:ident) => {
+ let $name = match self.stack.pop() {
+ Some(value) => value,
+ None => return Err(Tracked::from(StackError::Underflow, source.clone())),
+ };
+ }
+ }
+ macro_rules! truth {
+ ($bool:expr) => { match $bool { true => 1, false => 0 } };
+ }
+ match operator {
+ Operator::Equal => { pop!(b); pop!(a); push!(truth!(a==b)) },
+ Operator::NotEqual => { pop!(b); pop!(a); push!(truth!(a!=b)) },
+ Operator::LessThan => { pop!(b); pop!(a); push!(truth!(a < b)) },
+ Operator::GreaterThan => { pop!(b); pop!(a); push!(truth!(a > b)) },
+ Operator::LessThanEqual => { pop!(b); pop!(a); push!(truth!(a <= b)) },
+ Operator::GreaterThanEqual => { pop!(b); pop!(a); push!(truth!(a >= b)) },
+ Operator::Add => { pop!(b); pop!(a); push!(a + b) },
+ Operator::Subtract => { pop!(b); pop!(a); push!(a - b) },
+ Operator::Multiply => { pop!(b); pop!(a); push!(a * b) },
+ Operator::Divide => { pop!(b); pop!(a); push!(a / b) },
+ Operator::Modulo => { pop!(b); pop!(a); push!(a % b) },
+ Operator::Exponent => { pop!(b); pop!(a); push!(
+ if let Ok(b) = u32::try_from(b) { a.saturating_pow(b) } else { 0 } ) },
+ Operator::LeftShift => { pop!(b); pop!(a); push!(
+ if b < 0 { a >> -b } else { a << b } ) },
+ Operator::RightShift => { pop!(b); pop!(a); push!(
+ if b < 0 { a << -b } else { a >> b } ) },
+ Operator::BitAnd => { pop!(b); pop!(a); push!(a & b) },
+ Operator::BitOr => { pop!(b); pop!(a); push!(a | b) },
+ Operator::BitXor => { pop!(b); pop!(a); push!(a ^ b) },
+ Operator::BitNot => { pop!(a); push!(!a) },
+ }
+ return Ok(());
+ }
+}
+
+
+pub enum StackError {
+ Underflow,
+ MultipleReturnValues,
+ NoReturnValue,
+}
+
+
+pub fn report_stack_error(error: &Tracked<StackError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ StackError::Underflow =>
+ "A stack underflow occurred while evaluating this operator",
+ StackError::MultipleReturnValues =>
+ "More than one value was left on the stack after this expression was evaluated",
+ StackError::NoReturnValue =>
+ "No value was left on the stack after this expression was evaluated",
+ };
+
+ report_source_issue(LogLevel::Error, &context, message);
+}
diff --git a/src/types/mod.rs b/src/types/mod.rs
new file mode 100644
index 0000000..623d525
--- /dev/null
+++ b/src/types/mod.rs
@@ -0,0 +1,7 @@
+mod expression_stack;
+mod operator;
+mod word_template;
+
+pub use expression_stack::*;
+pub use operator::*;
+pub use word_template::*;
diff --git a/src/types/operator.rs b/src/types/operator.rs
new file mode 100644
index 0000000..a7e7b9b
--- /dev/null
+++ b/src/types/operator.rs
@@ -0,0 +1,87 @@
+#[derive(Clone, Copy)]
+pub enum Operator {
+ Equal,
+ NotEqual,
+ LessThan,
+ GreaterThan,
+ LessThanEqual,
+ GreaterThanEqual,
+ Add,
+ Subtract,
+ Multiply,
+ Divide,
+ Modulo,
+ Exponent,
+ LeftShift,
+ RightShift,
+ BitAnd,
+ BitOr,
+ BitXor,
+ BitNot,
+}
+
+impl Operator {
+ pub fn from_str(string: &str) -> Option<Self> {
+ match string {
+ "=" => Some(Operator::Equal),
+ "==" => Some(Operator::Equal),
+ "<eq>" => Some(Operator::Equal),
+ "!=" => Some(Operator::NotEqual),
+ "<neq>" => Some(Operator::NotEqual),
+ "<" => Some(Operator::LessThan),
+ "<lth>" => Some(Operator::LessThan),
+ ">" => Some(Operator::GreaterThan),
+ "<gth>" => Some(Operator::GreaterThan),
+ "<=" => Some(Operator::LessThanEqual),
+ "<leq>" => Some(Operator::LessThanEqual),
+ ">=" => Some(Operator::GreaterThanEqual),
+ "<geq>" => Some(Operator::GreaterThanEqual),
+ "+" => Some(Operator::Add),
+ "<add>" => Some(Operator::Add),
+ "-" => Some(Operator::Subtract),
+ "<sub>" => Some(Operator::Subtract),
+ "*" => Some(Operator::Multiply),
+ "<mul>" => Some(Operator::Multiply),
+ "/" => Some(Operator::Divide),
+ "<div>" => Some(Operator::Divide),
+ "<mod>" => Some(Operator::Modulo),
+ "**" => Some(Operator::Exponent),
+ "<exp>" => Some(Operator::Exponent),
+ "<<" => Some(Operator::LeftShift),
+ "<shl>" => Some(Operator::LeftShift),
+ ">>" => Some(Operator::RightShift),
+ "<shr>" => Some(Operator::RightShift),
+ "<and>" => Some(Operator::BitAnd),
+ "<or>" => Some(Operator::BitOr),
+ "<xor>" => Some(Operator::BitXor),
+ "<not>" => Some(Operator::BitNot),
+ _ => None,
+ }
+ }
+}
+
+impl std::fmt::Display for Operator {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let string = match self {
+ Operator::Equal => "<eq>",
+ Operator::NotEqual => "<neq>",
+ Operator::LessThan => "<lth>",
+ Operator::GreaterThan => "<gth>",
+ Operator::LessThanEqual => "<leq>",
+ Operator::GreaterThanEqual => "<geq>",
+ Operator::Add => "<add>",
+ Operator::Subtract => "<sub>",
+ Operator::Multiply => "<mul>",
+ Operator::Divide => "<div>",
+ Operator::Modulo => "<mod>",
+ Operator::Exponent => "<exp>",
+ Operator::LeftShift => "<shl>",
+ Operator::RightShift => "<shr>",
+ Operator::BitAnd => "<and>",
+ Operator::BitOr => "<or>",
+ Operator::BitXor => "<xor>",
+ Operator::BitNot => "<not>",
+ };
+ write!(f, "{string}")
+ }
+}
diff --git a/src/tokens/packed_binary_literal.rs b/src/types/word_template.rs
index a2720b7..33d5933 100644
--- a/src/tokens/packed_binary_literal.rs
+++ b/src/types/word_template.rs
@@ -1,35 +1,23 @@
use crate::*;
-pub struct PackedBinaryLiteral {
- pub source: SourceSpan,
+pub struct WordTemplate {
pub value: usize,
- pub bits: usize,
- pub fields: Vec<BitField>,
- pub errors: Vec<PackedBinaryLiteralParseError>,
+ /// Width of the word in bits.
+ pub width: u32,
+ pub fields: Vec<Tracked<BitField>>,
}
pub struct BitField {
pub name: char,
- pub source: SourceSpan,
- /// Length of field in bits
- pub bits: usize,
- /// Distance to left-shift field in value
- pub shift: usize,
-}
-
-pub struct PackedBinaryLiteralParseError {
- pub source: SourceSpan,
- pub variant: PackedBinaryLiteralParseErrorVariant,
-}
-
-pub enum PackedBinaryLiteralParseErrorVariant {
- DuplicateFieldName(char),
- InvalidCharacter(char),
+ /// Width of the field in bits.
+ pub width: u32,
+ /// Number of bits to the right of the field in the word.
+ pub shift: u32,
}
-impl std::fmt::Display for PackedBinaryLiteral {
+impl std::fmt::Display for WordTemplate {
fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
if self.value == 0 {
write!(f, "0")?;
@@ -41,7 +29,8 @@ impl std::fmt::Display for PackedBinaryLiteral {
write!(f, "_")?;
}
for field in &self.fields {
- if i <= field.bits + field.shift - 1 && i >= field.shift {
+ let i = i as u32;
+ if i <= field.width + field.shift - 1 && i >= field.shift {
write!(f, "{}", field.name)?;
continue 'bit;
}