summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Bridle <bridle.benjamin@gmail.com>2025-02-11 14:00:20 +1300
committerBen Bridle <bridle.benjamin@gmail.com>2025-02-11 14:00:20 +1300
commit2b4e522b12a7eb87e91cd1cdc56064ee429a5212 (patch)
tree9f302e18b7e664502dad32d8d33d44e24a01c677
downloadtorque-asm-2b4e522b12a7eb87e91cd1cdc56064ee429a5212.zip
Initial commit
-rw-r--r--.gitignore1
-rw-r--r--Cargo.lock59
-rw-r--r--Cargo.toml10
-rw-r--r--src/compiler.rs144
-rw-r--r--src/environment.rs63
-rw-r--r--src/main.rs178
-rw-r--r--src/parsers/constant_expression.rs52
-rw-r--r--src/parsers/mod.rs11
-rw-r--r--src/parsers/packed_binary_literal.rs80
-rw-r--r--src/parsers/semantic.rs21
-rw-r--r--src/parsers/syntactic.rs108
-rw-r--r--src/tokens/constant_expression.rs134
-rw-r--r--src/tokens/mod.rs11
-rw-r--r--src/tokens/packed_binary_literal.rs60
-rw-r--r--src/tokens/semantic.rs71
-rw-r--r--src/tokens/syntactic.rs66
16 files changed, 1069 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ea8c4bf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..d9ebcc1
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,59 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "ansi"
+version = "1.0.0"
+source = "git+git://benbridle.com/ansi?tag=v1.0.0#81d47867c2c97a9ae1d1c8fdfcd42c582410ad2a"
+
+[[package]]
+name = "assembler"
+version = "1.1.0"
+dependencies = [
+ "ansi",
+ "log 1.1.2",
+ "vagabond",
+]
+
+[[package]]
+name = "log"
+version = "1.1.1"
+source = "git+git://benbridle.com/log?tag=v1.1.1#930f3d0e2b82df1243f423c092a38546ea7533c3"
+
+[[package]]
+name = "log"
+version = "1.1.2"
+source = "git+git://benbridle.com/log?tag=v1.1.2#3d5d1f7a19436151ba1dd52a2b50664969d90db6"
+dependencies = [
+ "ansi",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "switchboard"
+version = "1.0.0"
+source = "git+git://benbridle.com/switchboard?tag=v1.0.0#ea70fa89659e5cf1a9d4ca6ea31fb67f7a2cc633"
+dependencies = [
+ "log 1.1.1",
+ "paste",
+]
+
+[[package]]
+name = "torque-assembler"
+version = "0.1.0"
+dependencies = [
+ "assembler",
+ "log 1.1.2",
+ "switchboard",
+]
+
+[[package]]
+name = "vagabond"
+version = "1.0.1"
+source = "git+git://benbridle.com/vagabond?tag=v1.0.1#08f3153fea62ea81a42438347eeee058f5bec199"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..44aa1a3
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "torque-assembler"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+assembler = { path = "/home/ben/Libraries/assembler" }
+# assembler = { git = "git://benbridle.com/assembler", tag = "v1.0.0" }
+log = { git = "git://benbridle.com/log", tag = "v1.1.2" }
+switchboard = { git = "git://benbridle.com/switchboard", tag = "v1.0.0" }
diff --git a/src/compiler.rs b/src/compiler.rs
new file mode 100644
index 0000000..068c6d5
--- /dev/null
+++ b/src/compiler.rs
@@ -0,0 +1,144 @@
+use crate::*;
+
+
+/// Compiles multiple source code files into one.
+pub struct Compiler {
+ pub source_path: PathBuf,
+ pub resolver: Resolver,
+}
+
+impl Compiler {
+ pub fn from_string<P: AsRef<Path>>(source_code: String, path: P) -> Self {
+ let source_unit = SourceUnit::from_string(source_code, &path, parse_symbols);
+ Self {
+ source_path: path.as_ref().to_path_buf(),
+ resolver: Resolver::new(source_unit)
+ }
+ }
+
+ pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, FileError> {
+ let source_unit = SourceUnit::from_path(&path, None, parse_symbols)?;
+ Ok(Self {
+ source_path: path.as_ref().to_path_buf(),
+ resolver: Resolver::new(source_unit)
+ })
+ }
+
+ /// Find library files descending from the parent directory.
+ pub fn include_libs_from_parent(&mut self, ext: &str) {
+ if let Some(parent_path) = self.source_path.parent() {
+ let parent_path = parent_path.to_owned();
+ self.include_libs_from_path(&parent_path, ext);
+ }
+ }
+
+ /// Find library files at or descending from a path.
+ pub fn include_libs_from_path(&mut self, path: &Path, ext: &str) {
+ let libraries = gather_from_path(path, Some(ext), parse_symbols);
+ self.resolver.add_library_source_units(libraries);
+ self.resolver.resolve();
+ }
+
+ /// Find library files from a PATH-style environment variable.
+ pub fn include_libs_from_path_variable(&mut self, name: &str, ext: &str) {
+ let libraries = gather_from_path_variable(name, Some(ext), parse_symbols);
+ self.resolver.add_library_source_units(libraries);
+ self.resolver.resolve();
+ }
+
+ pub fn error(&self) -> Option<ResolverError> {
+ self.resolver.error()
+ }
+
+ pub fn get_compiled_source(&self) -> Result<String, MergeError> {
+ self.resolver.get_merged_source_code(push_source_code)
+ }
+}
+
+
+/// Parse all symbols from a source code string.
+fn parse_symbols(source_code: &str, path: Option<&Path>) -> Vec<Symbol> {
+ use SyntacticTokenVariant as SynVar;
+ use DefinitionType::*;
+ use SymbolRole::*;
+ let mut symbols = Vec::new();
+ let mut macro_name: Option<String> = None;
+ let mut parse_arg_list = false; // true if parsing macro argument list
+ let mut after_separator = false; // true if prev token was separator
+
+ macro_rules! push {
+ ($name:expr, $source:expr, $role:expr) => {
+ symbols.push(Symbol {
+ name: $name,
+ source: $source,
+ role: $role,
+ namespace: match &macro_name {
+ Some(name) => vec![name.to_owned()],
+ None => vec![],
+ }
+ })
+ }
+ }
+
+ for token in SyntacticParser::from_source_code(&source_code, path) {
+ match token.variant {
+ SynVar::MacroDefinition(name) => {
+ push!(name.clone(), token.source, Definition(MustPrecedeReference));
+ macro_name = Some(name);
+ parse_arg_list = true;
+ }
+ SynVar::MacroDefinitionTerminator => {
+ macro_name = None;
+ }
+ SynVar::LabelDefinition(name) => {
+ push!(name.clone(), token.source, Definition(CanFollowReference));
+ }
+ SynVar::Symbol(name) => if parse_arg_list && after_separator {
+ push!(name, token.source, Definition(MustPrecedeReference));
+ } else {
+ parse_arg_list = false;
+ push!(name, token.source, Reference);
+ }
+ SynVar::Separator => {
+ after_separator = true;
+ continue;
+ }
+ SynVar::BlockOpen | SynVar::BlockClose => {
+ continue;
+ }
+ SynVar::PackedBinaryLiteral(pbl) => {
+ for field in pbl.fields {
+ push!(field.name.to_string(), field.source, Reference)
+ }
+ }
+ SynVar::ConstantExpression(expr) => {
+ use ConstantExpressionTokenVariant as TokenVar;
+ for token in expr.tokens {
+ if let TokenVar::SymbolReference(name) = token.variant {
+ push!(name, token.source, Reference);
+ }
+ }
+ }
+ _ => ()
+ };
+ after_separator = false;
+ }
+ return symbols;
+}
+
+/// Push source code to a source compilation string.
+fn push_source_code(compilation: &mut String, source_file: &SourceFile) {
+ // Skip blank files.
+ let source_code = &source_file.source_code;
+ if source_code.chars().all(|c| c.is_whitespace()) { return; }
+ // Ensure that the previous section is followed by two newline characters.
+ if !compilation.is_empty() {
+ if !compilation.ends_with('\n') { compilation.push('\n'); }
+ if !compilation.ends_with("\n\n") { compilation.push('\n'); }
+ }
+ // Push a path comment and the source code.
+ let path_str = source_file.path.as_os_str().to_string_lossy();
+ let path_comment = format!("(: {path_str} )\n");
+ compilation.push_str(&path_comment);
+ compilation.push_str(&source_code);
+}
diff --git a/src/environment.rs b/src/environment.rs
new file mode 100644
index 0000000..006b45b
--- /dev/null
+++ b/src/environment.rs
@@ -0,0 +1,63 @@
+use crate::*;
+
+use std::collections::HashMap;
+
+
+pub struct Environment {
+ pub scopes: Vec<Scope>,
+}
+
+impl Environment {
+ pub fn get_integer(&self, name: &str) -> Result<usize, ()> {
+ for scope in self.scopes.iter().rev() {
+ if let Ok(value) = scope.get_integer(name, &self) {
+ return Ok(value);
+ }
+ }
+ return Err(());
+ }
+
+ pub fn get_block(&self, name: &str) -> Result<usize, ()> {
+ for scope in self.scopes.iter().rev() {
+ if let Ok(value) = scope.get_block(name, &self) {
+ return Ok(value);
+ }
+ }
+ return Err(());
+ }
+}
+
+pub struct Scope {
+ pub definitions: HashMap<String, Definition>,
+}
+
+impl Scope {
+ pub fn get_integer(&self, name: &str, environment: &Environment) -> Result<usize, ()> {
+ use IntegerDefinition as IntDef;
+ if let Some(definition) = self.definitions.get(name) {
+ if let Definition::Integer(integer) = definition {
+ match integer {
+ IntDef::Literal(value) => return Ok(*value),
+ IntDef::ConstantExpression(expr) => match expr.evaluate(environment) {
+ Ok(_) | Err(_) => todo!(),
+ },
+ };
+ }
+ }
+ return Err(());
+ }
+
+ pub fn get_block(&self, _name: &str, _environment: &Environment) -> Result<usize, ()> {
+ todo!()
+ }
+}
+
+pub enum Definition {
+ Integer(IntegerDefinition),
+ Block(BlockLiteral),
+}
+
+pub enum IntegerDefinition {
+ Literal(usize),
+ ConstantExpression(ConstantExpression),
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..0086496
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,178 @@
+mod compiler;
+mod environment;
+mod parsers;
+mod tokens;
+
+pub use compiler::*;
+pub use environment::*;
+pub use parsers::*;
+pub use tokens::*;
+
+pub use assembler::*;
+use log::{info, fatal};
+use switchboard::{Switchboard, SwitchQuery};
+
+use std::io::{Read, Write};
+
+
+fn print_version() -> ! {
+ let version = env!("CARGO_PKG_VERSION");
+ eprintln!("torque assembler, version {version}");
+ eprintln!("written by ben bridle");
+ std::process::exit(0);
+}
+
+fn main() {
+ let mut args = Switchboard::from_env();
+ if args.named("version").as_bool() {
+ print_version();
+ }
+ if args.named("verbose").short('v').as_bool() {
+ log::set_log_level(log::LogLevel::Info);
+ }
+ let source_path = args.positional("source").as_path_opt().map(
+ |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}")));
+ let destination_path = args.positional("destination").as_path_opt();
+ let extension = args.named("ext").default("tq").as_string();
+
+ let no_libs = args.named("no-libs").as_bool();
+ let no_project_libs = args.named("no-project-libs").as_bool();
+ let no_environment_libs = args.named("no-env-libs").as_bool();
+
+ let print_tree = args.named("tree").as_bool();
+ let dry_run = args.named("dry-run").short('n').as_bool();
+ let only_resolve = args.named("resolve").as_bool();
+ let _export_symbols = args.named("symbols").as_bool();
+
+ // -----------------------------------------------------------------------
+
+ let mut compiler = if let Some(path) = &source_path {
+ info!("Reading program source from {path:?}");
+ Compiler::from_path(path).unwrap_or_else(|err| match err {
+ FileError::InvalidExtension => fatal!(
+ "File {path:?} has invalid extension, must be '.{extension}'"),
+ FileError::NotFound => fatal!(
+ "File {path:?} was not found"),
+ FileError::InvalidUtf8 => fatal!(
+ "File {path:?} does not contain valid UTF-8 text"),
+ FileError::NotReadable => fatal!(
+ "File {path:?} is not readable"),
+ FileError::IsADirectory => fatal!(
+ "File {path:?} is a directory"),
+ FileError::Unknown => fatal!(
+ "Unknown error while attempting to read from {path:?}")
+ })
+ } else {
+ let mut source_code = String::new();
+ info!("Reading program source from standard input");
+ if let Err(err) = std::io::stdin().read_to_string(&mut source_code) {
+ fatal!("Could not read from standard input\n{err:?}");
+ }
+ Compiler::from_string(source_code, "<standard input>")
+ };
+ if compiler.error().is_some() && !no_libs && !no_project_libs {
+ compiler.include_libs_from_parent(&extension);
+ }
+ if compiler.error().is_some() && !no_libs && !no_environment_libs {
+ compiler.include_libs_from_path_variable("TORQUE_LIBS", &extension);
+ }
+
+ if print_tree {
+ compiler.resolver.hierarchy().report()
+ }
+
+ if let Some(error) = compiler.error() {
+ error.report();
+ std::process::exit(1);
+ }
+ let merged_source = compiler.get_compiled_source().unwrap_or_else(
+ |error| { error.report(); std::process::exit(1); }
+ );
+ if only_resolve && !dry_run {
+ write_bytes_and_exit(merged_source.as_bytes(), destination_path.as_ref());
+ }
+
+ // -----------------------------------------------------------------------
+
+
+ // // TODO: Remove this block
+ // let code = &compiler.resolver.source_units[0].source_unit.main.source_code;
+ // let parser = SyntacticParser::from_source_code(code, Some("<main>"));
+ // println!();
+ // for t in parser {
+ // println!("{t:?}");
+ // }
+
+ // Parse syntactic tokens from merged source code.
+ let path = Some("<merged source>");
+ let parser = SyntacticParser::from_source_code(&merged_source, path);
+ let syntactic_tokens: Vec<_> = parser.collect();
+ report_syntactic_errors(&syntactic_tokens, &merged_source);
+
+ // let mut semantic_parser = SemanticParser::new(syntactic_tokens);
+ // semantic_parser.parse();
+}
+
+
+fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! {
+ match path {
+ Some(path) => match std::fs::write(path, bytes) {
+ Ok(_) => info!("Wrote output to path {:?}", path.as_ref()),
+ Err(err) => fatal!("Could not write to path {:?}\n{err:?}", path.as_ref()),
+ }
+ None => match std::io::stdout().write_all(bytes) {
+ Ok(_) => info!("Wrote output to standard output"),
+ Err(err) => fatal!("Could not write to standard output\n{err:?}"),
+ }
+ }
+ std::process::exit(0);
+}
+
+
+fn report_syntactic_errors(syntactic_tokens: &[SyntacticToken], source_code: &str) {
+ for token in syntactic_tokens {
+ let context = Context { source_code: &source_code, source: &token.source };
+ match &token.variant {
+ SyntacticTokenVariant::ConstantExpression(expr) => for t in &expr.tokens {
+ let context = Context { source_code: &source_code, source: &t.source };
+ if let ConstantExpressionTokenVariant::Error(err) = &t.variant {
+ let ConstantExpressionParseError::InvalidHexadecimalLiteral(hex) = err;
+ let message = format!("Invalid hexadecimal literal {hex:?} in constant expression");
+ report_source_issue(LogLevel::Error, &context, &message);
+ }
+ }
+ SyntacticTokenVariant::PackedBinaryLiteral(pbl) => for e in &pbl.errors {
+ let context = Context { source_code: &source_code, source: &e.source };
+ match e.variant {
+ PackedBinaryLiteralParseErrorVariant::DuplicateFieldName(name) => {
+ let message = format!("Duplicate field name {name:?} in packed binary literal");
+ report_source_issue(LogLevel::Error, &context, &message);
+ }
+ PackedBinaryLiteralParseErrorVariant::InvalidCharacter(c) => {
+ let message = format!("Invalid character {c:?} in packed binary literal");
+ report_source_issue(LogLevel::Error, &context, &message);
+ }
+ }
+ }
+ SyntacticTokenVariant::Error(err) => match err {
+ SyntacticParseError::InvalidHexadecimalLiteral(hex) => {
+ let message = format!("Invalid hexadecimal literal {hex:?}");
+ report_source_issue(LogLevel::Error, &context, &message);
+ }
+ SyntacticParseError::InvalidSymbolIdentifier(name) => {
+ let message = format!("Invalid identifier {name:?}");
+ report_source_issue(LogLevel::Error, &context, &message);
+ }
+ SyntacticParseError::UnterminatedComment => {
+ let message = format!("Unterminated comment");
+ report_source_issue(LogLevel::Error, &context, &message);
+ }
+ SyntacticParseError::UnterminatedConstantExpression => {
+ let message = format!("Unterminated constant expression");
+ report_source_issue(LogLevel::Error, &context, &message);
+ }
+ }
+ _ => (),
+ }
+ }
+}
diff --git a/src/parsers/constant_expression.rs b/src/parsers/constant_expression.rs
new file mode 100644
index 0000000..78dc697
--- /dev/null
+++ b/src/parsers/constant_expression.rs
@@ -0,0 +1,52 @@
+use crate::*;
+
+
+pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantExpression {
+ use ConstantExpressionTokenVariant as TokenVar;
+ use ConstantExpressionParseError as ParseError;
+
+ let mut tokens = Vec::new();
+ let mut t = Tokeniser::new_child(string, parent);
+ t.position.to_next_char(); // skip opening delimiter
+
+ loop {
+ t.drop_whitespace();
+ t.mark_start_position();
+ let token = t.eat_token();
+ if token.is_empty() {
+ break;
+ }
+
+ let variant = match token.as_str() {
+ "=" => TokenVar::Operator(Operator::Equal),
+ "!" => TokenVar::Operator(Operator::NotEqual),
+ "<" => TokenVar::Operator(Operator::LessThan),
+ ">" => TokenVar::Operator(Operator::GreaterThan),
+ "+" => TokenVar::Operator(Operator::Add),
+ "-" => TokenVar::Operator(Operator::Subtract),
+ "<<" => TokenVar::Operator(Operator::LeftShift),
+ ">>" => TokenVar::Operator(Operator::RightShift),
+ "&" => TokenVar::Operator(Operator::And),
+ "|" => TokenVar::Operator(Operator::Or),
+ "^" => TokenVar::Operator(Operator::Xor),
+ "~" => TokenVar::Operator(Operator::Not),
+ _ => if let Some(stripped) = token.strip_prefix("0x") {
+ match usize::from_str_radix(stripped, 16) {
+ Ok(value) => TokenVar::IntegerLiteral(value),
+ Err(_) => TokenVar::Error(
+ ParseError::InvalidHexadecimalLiteral(stripped.to_string())),
+ }
+ } else {
+ match usize::from_str_radix(&token, 10) {
+ Ok(value) => TokenVar::IntegerLiteral(value),
+ Err(_) => TokenVar::SymbolReference(token.to_string()),
+ }
+ }
+ };
+
+ let source = t.mark_end_position();
+ tokens.push(ConstantExpressionToken { source, variant });
+ }
+
+ return ConstantExpression { tokens };
+}
diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs
new file mode 100644
index 0000000..91765a9
--- /dev/null
+++ b/src/parsers/mod.rs
@@ -0,0 +1,11 @@
+mod constant_expression;
+pub use constant_expression::*;
+
+mod packed_binary_literal;
+pub use packed_binary_literal::*;
+
+mod syntactic;
+pub use syntactic::*;
+
+mod semantic;
+pub use semantic::*;
diff --git a/src/parsers/packed_binary_literal.rs b/src/parsers/packed_binary_literal.rs
new file mode 100644
index 0000000..9704fc4
--- /dev/null
+++ b/src/parsers/packed_binary_literal.rs
@@ -0,0 +1,80 @@
+use crate::*;
+
+
+pub fn parse_packed_binary_literal(string: &str, parent: &Tokeniser) -> PackedBinaryLiteral {
+ use PackedBinaryLiteralParseError as ParseError;
+ use PackedBinaryLiteralParseErrorVariant as ParseErrorVar;
+
+ let mut value = 0;
+ let mut bits = 0;
+ let mut name = '\0';
+ let mut fields: Vec<BitField> = Vec::new();
+ let mut errors: Vec<ParseError> = Vec::new();
+
+ macro_rules! push_field {
+ ($source:expr) => {
+ if fields.iter().any(|f| f.name == name) {
+ let variant = ParseErrorVar::DuplicateFieldName(name);
+ errors.push(ParseError { source: $source, variant });
+ } else {
+ fields.push(BitField { name, source: $source, bits, shift: 0 });
+ }
+ };
+ }
+
+ let mut t = Tokeniser::new_child(string, parent);
+ t.position.to_next_char(); // skip opening hash character
+
+ while let Some(c) = t.eat_char() {
+ // Ignore underscores.
+ if c == '_' {
+ t.prev_position = t.prev_prev_position;
+ continue;
+ }
+
+ // Add a bit to the value;
+ value <<= 1;
+ for field in &mut fields {
+ field.shift += 1;
+ }
+
+ // Extend the current field.
+ if c == name {
+ bits += 1;
+ continue;
+ }
+
+ // Commit the current field.
+ if bits > 0 {
+ push_field!(t.mark_prev_end_position());
+ bits = 0;
+ name = '\0';
+ }
+
+ // Parse bit literals.
+ if c == '0' {
+ continue;
+ }
+ if c == '1' {
+ value |= 1;
+ continue;
+ }
+
+ t.mark_prev_start_position();
+ if c.is_alphabetic() {
+ name = c;
+ bits = 1;
+ continue;
+ } else {
+ let source = t.mark_end_position();
+ errors.push(ParseError { source, variant: ParseErrorVar::InvalidCharacter(c) });
+ }
+ }
+
+ // Commit the final field.
+ if bits > 0 {
+ push_field!(t.mark_end_position());
+ }
+
+ PackedBinaryLiteral { value, fields, errors }
+}
diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs
new file mode 100644
index 0000000..94ed70c
--- /dev/null
+++ b/src/parsers/semantic.rs
@@ -0,0 +1,21 @@
+use crate::*;
+
+
+pub struct SemanticParser {
+ pub syntactic_tokens: Vec<SyntacticToken>,
+ pub semantic_tokens: Vec<SemanticToken>,
+}
+
+
+impl SemanticParser {
+ pub fn new(syntactic_tokens: Vec<SyntacticToken>) -> Self {
+ Self {
+ syntactic_tokens,
+ semantic_tokens: Vec::new(),
+ }
+ }
+
+ pub fn parse(&mut self) {
+ todo!()
+ }
+}
diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs
new file mode 100644
index 0000000..443e47e
--- /dev/null
+++ b/src/parsers/syntactic.rs
@@ -0,0 +1,108 @@
+use crate::*;
+
+
+pub struct SyntacticParser {
+ tokeniser: Tokeniser,
+ /// The name of the most recently parsed label.
+ label_name: String,
+ /// The name of the macro being parsed.
+ macro_name: Option<String>,
+}
+
+impl SyntacticParser {
+ pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
+ let mut tokeniser = Tokeniser::new(source_code, path);
+ tokeniser.add_delimiters(&['@','&','%',';',':','{','}','(','[','#','~']);
+ Self {
+ tokeniser,
+ label_name: String::new(),
+ macro_name: None,
+ }
+ }
+}
+
+
+impl Iterator for SyntacticParser {
+ type Item = SyntacticToken;
+
+ /// Sequentially parse tokens from the source code.
+ fn next(&mut self) -> Option<SyntacticToken> {
+ use SyntacticTokenVariant as SynVar;
+ use SyntacticParseError as SynErr;
+ let t = &mut self.tokeniser;
+
+ t.drop_whitespace();
+ t.mark_start_position();
+
+ let variant = match t.eat_char()? {
+ '@' => {
+ self.label_name = t.eat_token();
+ SynVar::LabelDefinition(self.label_name.clone())
+ }
+ '&' => {
+ let token = t.eat_token();
+ SynVar::LabelDefinition(format!("{}/{token}", self.label_name))
+ }
+ '%' => {
+ let macro_name = t.eat_token();
+ self.macro_name = Some(macro_name.clone());
+ SynVar::MacroDefinition(macro_name)
+ }
+ ';' => {
+ self.macro_name = None;
+ SynVar::MacroDefinitionTerminator
+ }
+ '[' => match t.eat_to_delimiter(']') {
+ Some(string) => {
+ let constant = ConstantExpression::from_str(&string, t);
+ SynVar::ConstantExpression(constant)
+ }
+ None => SynVar::Error(SynErr::UnterminatedConstantExpression),
+ }
+ '{' => SynVar::BlockOpen,
+ '}' => SynVar::BlockClose,
+ '(' => match t.eat_to_delimiter(')') {
+ Some(string) => SynVar::Comment(string),
+ None => SynVar::Error(SynErr::UnterminatedComment),
+ }
+ '#' => {
+ let token = t.eat_token();
+ let pbl = PackedBinaryLiteral::from_str(&token, t);
+ SynVar::PackedBinaryLiteral(pbl)
+ },
+ '~' => {
+ let token = t.eat_token();
+ SynVar::Symbol(format!("{}/{token}", self.label_name))
+ }
+ ':' => SynVar::Separator,
+ c => {
+ let token = format!("{c}{}", t.eat_token());
+ if let Some(hex_string) = token.strip_prefix("0x") {
+ match usize::from_str_radix(hex_string, 16) {
+ Ok(hex) => SynVar::HexadecimalLiteral(hex),
+ Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)),
+ }
+ } else {
+ match usize::from_str_radix(&token, 10) {
+ Ok(value) => SynVar::DecimalLiteral(value),
+ Err(_) => SynVar::Symbol(token),
+ }
+ }
+ }
+ };
+
+ // Parse source path comments.
+ if let SynVar::Comment(comment) = &variant {
+ // Check if the comment fills the entire line.
+ if t.start_position.column == 0 && t.end_of_line() {
+ if let Some(path) = comment.strip_prefix(": ") {
+ t.source_path = Some(PathBuf::from(path.trim()));
+ t.embedded_first_line = t.start_position.line + 1;
+ }
+ }
+ }
+
+ let source = t.mark_end_position();
+ Some( SyntacticToken { source, variant } )
+ }
+}
diff --git a/src/tokens/constant_expression.rs b/src/tokens/constant_expression.rs
new file mode 100644
index 0000000..e4aa099
--- /dev/null
+++ b/src/tokens/constant_expression.rs
@@ -0,0 +1,134 @@
+use crate::*;
+
+
+pub struct ConstantExpression {
+ pub tokens: Vec<ConstantExpressionToken>,
+}
+
+impl ConstantExpression {
+ pub fn from_str(string: &str, tokeniser: &Tokeniser) -> Self {
+ parse_constant_expression(string, tokeniser)
+ }
+}
+
+pub struct ConstantExpressionToken {
+ pub source: SourceSpan,
+ pub variant: ConstantExpressionTokenVariant,
+}
+
+pub enum ConstantExpressionTokenVariant {
+ SymbolReference(String),
+ IntegerLiteral(usize),
+ Operator(Operator),
+ Error(ConstantExpressionParseError),
+}
+
+pub enum Operator {
+ Equal,
+ NotEqual,
+ LessThan,
+ GreaterThan,
+ Add,
+ Subtract,
+ LeftShift,
+ RightShift,
+ And,
+ Or,
+ Xor,
+ Not,
+}
+
+pub enum ConstantExpressionParseError {
+ InvalidHexadecimalLiteral(String),
+}
+
+
+impl ConstantExpression {
+ pub fn evaluate(&self, environment: &Environment) -> Result<usize, ConstantExpressionEvaluationError> {
+ use ConstantExpressionTokenVariant as Token;
+ use ConstantExpressionEvaluationError as EvalErr;
+
+ let mut stack = Vec::new();
+ macro_rules! push {
+ ($value:expr) => { stack.push($value) };
+ }
+ macro_rules! pop {
+ ($name:ident) => { let $name = match stack.pop() {
+ Some(value) => value,
+ None => return Err(EvalErr::StackUnderflow),
+ }; };
+ }
+ macro_rules! truth {
+ ($bool:expr) => { match $bool { true => 1, false => 0 } };
+ }
+
+ for token in &self.tokens {
+ match &token.variant {
+ Token::IntegerLiteral(value) => push!(*value),
+ Token::SymbolReference(name) => match environment.get_integer(name) {
+ Ok(value) => push!(value),
+ Err(_) => todo!(),
+ }
+ Token::Operator(operator) => match operator {
+ Operator::Equal => { pop!(b); pop!(a); push!(truth!(a==b)) },
+ Operator::NotEqual => { pop!(b); pop!(a); push!(truth!(a!=b)) },
+ Operator::LessThan => { pop!(b); pop!(a); push!(truth!(a < b)) },
+ Operator::GreaterThan => { pop!(b); pop!(a); push!(truth!(a > b)) },
+ Operator::Add => { pop!(b); pop!(a); push!(a + b) },
+ Operator::Subtract => { pop!(b); pop!(a); push!(a - b) },
+ Operator::LeftShift => { pop!(b); pop!(a); push!(a << b) },
+ Operator::RightShift => { pop!(b); pop!(a); push!(a >> b) },
+ Operator::And => { pop!(b); pop!(a); push!(a & b) },
+ Operator::Or => { pop!(b); pop!(a); push!(a | b) },
+ Operator::Xor => { pop!(b); pop!(a); push!(a ^ b) },
+ Operator::Not => { pop!(a); push!(!a) },
+ }
+ Token::Error(_) => (),
+ }
+ }
+ match stack.len() {
+ 0 => Err(EvalErr::NoReturnValue),
+ 1 => Ok(stack[0]),
+ _ => Err(EvalErr::MultipleReturnValues),
+ }
+ }
+}
+
+pub enum ConstantExpressionEvaluationError {
+ StackUnderflow,
+ MultipleReturnValues,
+ NoReturnValue,
+}
+
+
+impl std::fmt::Debug for ConstantExpression {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ use ConstantExpressionTokenVariant as TokenVar;
+ for (i, token) in self.tokens.iter().enumerate() {
+ let string = match &token.variant {
+ TokenVar::SymbolReference(name) => name,
+ TokenVar::IntegerLiteral(value) => &value.to_string(),
+ TokenVar::Operator(operator) => match operator {
+ Operator::Equal => "=",
+ Operator::NotEqual => "!",
+ Operator::LessThan => "<",
+ Operator::GreaterThan => ">",
+ Operator::Add => "+",
+ Operator::Subtract => "-",
+ Operator::LeftShift => "<<",
+ Operator::RightShift => ">>",
+ Operator::And => "&",
+ Operator::Or => "|",
+ Operator::Xor => "^",
+ Operator::Not => "~",
+ }
+ TokenVar::Error(_) => "<error>",
+ };
+ match i {
+ 0 => write!(f, "{string}")?,
+ _ => write!(f, " {string}")?,
+ }
+ }
+ return Ok(());
+ }
+}
diff --git a/src/tokens/mod.rs b/src/tokens/mod.rs
new file mode 100644
index 0000000..65f361c
--- /dev/null
+++ b/src/tokens/mod.rs
@@ -0,0 +1,11 @@
+mod syntactic;
+pub use syntactic::*;
+
+mod semantic;
+pub use semantic::*;
+
+mod constant_expression;
+pub use constant_expression::*;
+
+mod packed_binary_literal;
+pub use packed_binary_literal::*;
diff --git a/src/tokens/packed_binary_literal.rs b/src/tokens/packed_binary_literal.rs
new file mode 100644
index 0000000..1252398
--- /dev/null
+++ b/src/tokens/packed_binary_literal.rs
@@ -0,0 +1,60 @@
+use crate::*;
+
+
+pub struct PackedBinaryLiteral {
+ pub value: usize,
+ pub fields: Vec<BitField>,
+ pub errors: Vec<PackedBinaryLiteralParseError>,
+}
+
+impl PackedBinaryLiteral {
+ pub fn from_str(string: &str, parent: &Tokeniser) -> Self {
+ parse_packed_binary_literal(string, parent)
+ }
+}
+
+pub struct BitField {
+ pub name: char,
+ pub source: SourceSpan,
+ /// Length of field in bits
+ pub bits: usize,
+ /// Distance to left-shift field in value
+ pub shift: usize,
+}
+
+pub struct PackedBinaryLiteralParseError {
+ pub source: SourceSpan,
+ pub variant: PackedBinaryLiteralParseErrorVariant,
+}
+
+pub enum PackedBinaryLiteralParseErrorVariant {
+ DuplicateFieldName(char),
+ InvalidCharacter(char),
+}
+
+
+impl std::fmt::Display for PackedBinaryLiteral {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ if self.value == 0 {
+ write!(f, "0")?;
+ } else {
+ let bitcount = (self.value.ilog2() + 1) as usize;
+ 'bit: for i in (0..bitcount).rev() {
+ if (i+1) % 4 == 0 {
+ write!(f, "_")?;
+ }
+ for field in &self.fields {
+ if i <= field.bits + field.shift - 1 && i >= field.shift {
+ write!(f, "{}", field.name)?;
+ continue 'bit;
+ }
+ }
+ match (self.value >> i) & 1 {
+ 0 => write!(f, "0")?,
+ _ => write!(f, "1")?,
+ }
+ }
+ }
+ return Ok(());
+ }
+}
diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs
new file mode 100644
index 0000000..ed53685
--- /dev/null
+++ b/src/tokens/semantic.rs
@@ -0,0 +1,71 @@
+use crate::*;
+
+
+pub enum SemanticToken {
+ MacroDefinition,
+ Invocation,
+}
+
+pub struct Invocation {
+ pub name: String,
+ pub bytecode: BytecodeSpan,
+ pub arguments: Vec<InvocationArgument>,
+}
+
+pub struct BlockLiteral {
+ pub tokens: Vec<BlockToken>,
+}
+
+pub struct BlockToken {
+ pub source: SourceSpan,
+ pub bytecode: BytecodeSpan,
+ pub variant: BlockTokenVariant,
+}
+
+pub enum BlockTokenVariant {
+ Invocation(Invocation),
+ Word(PackedBinaryLiteral),
+}
+
+pub struct MacroDefinition {
+ pub name: String,
+ pub arguments: Vec<DefinitionArgument>,
+ pub body: BlockLiteral,
+}
+
+// -------------------------------------------------------------------------- //
+
+pub struct SemanticParseError {
+ pub source: SourceSpan,
+ pub variant: SemanticParseErrorVariant,
+}
+
+pub enum SemanticParseErrorVariant {
+
+}
+
+// -------------------------------------------------------------------------- //
+
+pub struct DefinitionArgument {
+ pub name: String,
+ pub source: SourceSpan,
+ pub variant: DefinitionArgumentVariant,
+}
+
+pub enum DefinitionArgumentVariant {
+ Integer,
+ Block,
+}
+
+pub struct InvocationArgument {
+ pub source: SourceSpan,
+ pub variant: InvocationArgumentVariant,
+}
+
+pub enum InvocationArgumentVariant {
+ BlockLiteral(BlockLiteral),
+ IntegerLiteral(usize),
+ ConstantExpression(ConstantExpression),
+ Invocation(Invocation),
+}
+
diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs
new file mode 100644
index 0000000..000d178
--- /dev/null
+++ b/src/tokens/syntactic.rs
@@ -0,0 +1,66 @@
+use crate::*;
+
+
+pub struct SyntacticToken {
+ pub source: SourceSpan,
+ pub variant: SyntacticTokenVariant,
+}
+
+pub enum SyntacticTokenVariant {
+ LabelDefinition(String),
+ MacroDefinition(String),
+ MacroDefinitionTerminator,
+
+ DecimalLiteral(usize),
+ HexadecimalLiteral(usize),
+ PackedBinaryLiteral(PackedBinaryLiteral),
+
+ Comment(String),
+ ConstantExpression(ConstantExpression),
+
+ BlockOpen,
+ BlockClose,
+ Separator,
+
+ Symbol(String),
+
+ Error(SyntacticParseError),
+}
+
+#[derive(Debug)]
+pub enum SyntacticParseError {
+ InvalidHexadecimalLiteral(String),
+ InvalidSymbolIdentifier(String),
+ UnterminatedComment,
+ UnterminatedConstantExpression,
+}
+
+
+impl std::fmt::Debug for SyntacticToken {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ use SyntacticTokenVariant::*;
+ let start = &self.source.in_merged;
+ let name = match &self.variant {
+ LabelDefinition(name) => format!("LabelDefinition({name})"),
+ MacroDefinition(name) => format!("MacroDefinition({name})"),
+ MacroDefinitionTerminator => format!("MacroDefinitionTerminator"),
+
+ DecimalLiteral(value) => format!("DecimalLiteral({value})"),
+ HexadecimalLiteral(value) => format!("HexadecimalLiteral(0x{value:x})"),
+ PackedBinaryLiteral(pbl) => format!("PackedBinaryLiteral({pbl})"),
+
+ Comment(_) => format!("Comment"),
+ ConstantExpression(expr) => format!("ConstantExpression({expr:?})"),
+
+ BlockOpen => format!("BlockOpen"),
+ BlockClose => format!("BlockClose"),
+ Separator => format!("Separator"),
+
+ Symbol(name) => format!("Symbol({name})"),
+
+ Error(error) => format!("Error({error:?})"),
+ };
+
+ write!(f, "{start} {name}")
+ }
+}