diff options
author | Ben Bridle <ben@derelict.engineering> | 2025-02-27 14:53:21 +1300 |
---|---|---|
committer | Ben Bridle <ben@derelict.engineering> | 2025-02-27 14:53:31 +1300 |
commit | 67470aea034fd46f4bbcfe815c51ad3451043188 (patch) | |
tree | 83d78d3d28e094d6a3af347d2ff2ff16472e5421 | |
parent | 4e8fae09f0f7d6f3a4ddbe285aeb01ef0622b761 (diff) | |
download | torque-asm-67470aea034fd46f4bbcfe815c51ad3451043188.zip |
Finish first working version of Torque
This is a huge and messy commit, worked on piecemeal while traveling
and while the language was still being designed.
-rw-r--r-- | Cargo.lock | 26 | ||||
-rw-r--r-- | Cargo.toml | 11 | ||||
-rw-r--r-- | src/compiler.rs | 24 | ||||
-rw-r--r-- | src/environment.rs | 56 | ||||
-rw-r--r-- | src/expression_evaluator.rs | 0 | ||||
-rw-r--r-- | src/formats/inhx32.rs | 87 | ||||
-rw-r--r-- | src/formats/mod.rs | 2 | ||||
-rw-r--r-- | src/main.rs | 201 | ||||
-rw-r--r-- | src/parsers/assembler.rs | 282 | ||||
-rw-r--r-- | src/parsers/bytecode.rs | 161 | ||||
-rw-r--r-- | src/parsers/expression.rs (renamed from src/parsers/constant_expression.rs) | 26 | ||||
-rw-r--r-- | src/parsers/mod.rs | 14 | ||||
-rw-r--r-- | src/parsers/packed_binary_literal.rs | 43 | ||||
-rw-r--r-- | src/parsers/semantic.rs | 339 | ||||
-rw-r--r-- | src/parsers/syntactic.rs | 197 | ||||
-rw-r--r-- | src/report.rs | 229 | ||||
-rw-r--r-- | src/tokens/assembler.rs | 140 | ||||
-rw-r--r-- | src/tokens/bytecode.rs | 49 | ||||
-rw-r--r-- | src/tokens/constant_expression.rs | 134 | ||||
-rw-r--r-- | src/tokens/expression.rs | 74 | ||||
-rw-r--r-- | src/tokens/mod.rs | 22 | ||||
-rw-r--r-- | src/tokens/packed_binary_literal.rs | 11 | ||||
-rw-r--r-- | src/tokens/semantic.rs | 184 | ||||
-rw-r--r-- | src/tokens/syntactic.rs | 32 | ||||
-rw-r--r-- | src/tokens/tracked.rs | 47 | ||||
-rw-r--r-- | src/tokens/tracked_integer.rs | 14 |
26 files changed, 1673 insertions, 732 deletions
@@ -9,7 +9,8 @@ source = "git+git://benbridle.com/ansi?tag=v1.0.0#81d47867c2c97a9ae1d1c8fdfcd42c [[package]] name = "assembler" -version = "1.1.0" +version = "2.0.0" +source = "git+git://benbridle.com/assembler?tag=v2.0.0#0b4a82bf73339079490e9643cb115dbf82f3e92b" dependencies = [ "ansi", "log 1.1.2", @@ -17,6 +18,28 @@ dependencies = [ ] [[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + +[[package]] +name = "indexmap" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] name = "log" version = "1.1.1" source = "git+git://benbridle.com/log?tag=v1.1.1#930f3d0e2b82df1243f423c092a38546ea7533c3" @@ -49,6 +72,7 @@ name = "torque-asm" version = "0.1.0" dependencies = [ "assembler", + "indexmap", "log 1.1.2", "paste", "switchboard", @@ -4,9 +4,16 @@ version = "0.1.0" edition = "2021" [dependencies] -assembler = { path = "/home/ben/Libraries/assembler" } -# assembler = { git = "git://benbridle.com/assembler", tag = "v1.0.0" } +assembler = { git = "git://benbridle.com/assembler", tag = "v2.0.0" } log = { git = "git://benbridle.com/log", tag = "v1.1.2" } switchboard = { git = "git://benbridle.com/switchboard", tag = "v1.0.0" } paste = "1.0.15" +indexmap = "2.7.1" + +[profile.release] +lto=true +opt-level="s" +debug=false +strip=true +codegen-units=1 diff --git a/src/compiler.rs b/src/compiler.rs index 2ad9145..10f1433 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -58,7 +58,7 @@ impl Compiler { /// Parse all symbols from a source code string. fn parse_symbols(source_code: &str, path: Option<&Path>) -> Vec<Symbol> { - use syntactic::*; + use SyntacticTokenVariant as SynVar; use DefinitionType::*; use SymbolRole::*; let mut symbols = Vec::new(); @@ -80,41 +80,41 @@ fn parse_symbols(source_code: &str, path: Option<&Path>) -> Vec<Symbol> { } } - for token in SyntacticParser::from_source_code(&source_code, path) { + let syntactic_tokens = SyntacticParser::new(&source_code, path).parse(); + for token in syntactic_tokens { match token.variant { - TokenVariant::MacroDefinition(name) => { + SynVar::MacroDefinition(name) => { push!(name.clone(), token.source, Definition(MustPrecedeReference)); macro_name = Some(name); parse_arg_list = true; } - TokenVariant::MacroDefinitionTerminator => { + SynVar::MacroDefinitionTerminator => { macro_name = None; } - TokenVariant::LabelDefinition(name) => { + SynVar::LabelDefinition(name) => { push!(name.clone(), token.source, Definition(CanFollowReference)); } - TokenVariant::Symbol(name) => if parse_arg_list && after_separator { + SynVar::Symbol(name) => if parse_arg_list && after_separator { push!(name, token.source, Definition(MustPrecedeReference)); } else { parse_arg_list = false; push!(name, token.source, Reference); } - TokenVariant::Separator => { + SynVar::Separator => { after_separator = true; continue; } - TokenVariant::BlockOpen | TokenVariant::BlockClose => { + SynVar::BlockOpen | SynVar::BlockClose => { continue; } - TokenVariant::PackedBinaryLiteral(pbl) => { + SynVar::PackedBinaryLiteral(pbl) => { for field in pbl.fields { push!(field.name.to_string(), field.source, Reference) } } - TokenVariant::ConstantExpression(expr) => { - use ConstantExpressionTokenVariant as TokenVar; + SynVar::Expression(expr) => { for token in expr.tokens { - if let TokenVar::SymbolReference(name) = token.variant { + if let ExpressionTokenVariant::Invocation(name) = token.variant { push!(name, token.source, Reference); } } diff --git a/src/environment.rs b/src/environment.rs deleted file mode 100644 index 2bb3f5b..0000000 --- a/src/environment.rs +++ /dev/null @@ -1,56 +0,0 @@ -use crate::*; -use semantic::*; - -use std::collections::HashMap; - - -pub struct Environment { - pub scopes: Vec<Scope>, -} - -impl Environment { - pub fn get_integer(&self, name: &str) -> Result<usize, ()> { - for scope in self.scopes.iter().rev() { - if let Ok(value) = scope.get_integer(name, &self) { - return Ok(value); - } - } - return Err(()); - } - - pub fn get_block(&self, name: &str) -> Result<usize, ()> { - for scope in self.scopes.iter().rev() { - if let Ok(value) = scope.get_block(name, &self) { - return Ok(value); - } - } - return Err(()); - } -} - -pub struct Scope { - pub definitions: HashMap<String, Definition>, -} - -impl Scope { - pub fn get_integer(&self, _name: &str, _environment: &Environment) -> Result<usize, ()> { - todo!() - // use semantic::IntegerDefinition as IntDef; - - // if let Some(Definition { variant, ..}) = self.definitions.get(name) { - // if let IntegerDefinition::Integer(integer) = definition { - // match integer { - // IntDef::Literal(value) => return Ok(*value), - // IntDef::ConstantExpression(expr) => match expr.evaluate(environment) { - // Ok(_) | Err(_) => todo!(), - // }, - // }; - // } - // } - // return Err(()); - } - - pub fn get_block(&self, _name: &str, _environment: &Environment) -> Result<usize, ()> { - todo!() - } -} diff --git a/src/expression_evaluator.rs b/src/expression_evaluator.rs new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/expression_evaluator.rs diff --git a/src/formats/inhx32.rs b/src/formats/inhx32.rs new file mode 100644 index 0000000..d9c31d3 --- /dev/null +++ b/src/formats/inhx32.rs @@ -0,0 +1,87 @@ +use crate::*; + + +pub fn format_inhx32(words: &[Word]) -> String { + let mut records = Vec::new(); + records.push(extended_linear_address(0x0000)); + for (i, chunk) in words.chunks(8).enumerate() { + records.push(data_record(chunk, (i * 8) as u16)); + } + records.push(terminating_record()); + + let mut output = String::new(); + for record in records { + output.push_str(&record.to_string()); + } + return output; +} + +struct Record { + bytes: Vec<u8>, +} + +impl Record { + pub fn new() -> Self { + Self { bytes: Vec::new() } + } + + pub fn byte(&mut self, byte: u8) { + self.bytes.push(byte); + } + + pub fn be_double(&mut self, double: u16) { + let [high, low] = double.to_be_bytes(); + self.byte(high); + self.byte(low); + } + + pub fn le_double(&mut self, double: u16) { + let [high, low] = double.to_be_bytes(); + self.byte(low); + self.byte(high); + } + + pub fn to_string(self) -> String { + let mut sum: u8 = 0; + for byte in &self.bytes { + sum = sum.wrapping_add(*byte); + } + let checksum = sum.wrapping_neg(); + let mut output = String::new(); + for byte in &self.bytes { + output.push_str(&format!("{byte:0>2X}")); + } + format!(":{output}{checksum:0>2X}\n") + } +} + +fn data_record(words: &[Word], address: u16) -> Record { + let mut record = Record::new(); + record.byte((words.len() * 2) as u8); + record.be_double(address * 2); + record.byte(0x00); + for word in words { + match word.bits <= 16 { + true => record.le_double(word.value as u16), + false => panic!("Word '{word}' has more than 16 bits."), + }; + } + return record; +} + +fn extended_linear_address(address: u16) -> Record { + let mut record = Record::new(); + record.byte(0x02); + record.be_double(0x0000); + record.byte(0x04); + record.be_double(address); + return record; +} + +fn terminating_record() -> Record { + let mut record = Record::new(); + record.byte(0x00); + record.be_double(0x0000); + record.byte(0x01); + return record; +} diff --git a/src/formats/mod.rs b/src/formats/mod.rs new file mode 100644 index 0000000..42d198c --- /dev/null +++ b/src/formats/mod.rs @@ -0,0 +1,2 @@ +mod inhx32; +pub use inhx32::*; diff --git a/src/main.rs b/src/main.rs index e7e52d9..9fb404c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,18 +1,21 @@ mod compiler; -mod environment; mod parsers; +mod report; mod tokens; +mod formats; pub use compiler::*; -pub use environment::*; pub use parsers::*; +pub use report::*; pub use tokens::*; +pub use formats::*; pub use assembler::*; use log::{info, fatal}; use switchboard::{Switchboard, SwitchQuery}; use std::io::{Read, Write}; +use std::str::FromStr; fn print_version() -> ! { @@ -39,10 +42,13 @@ fn main() { let no_project_libs = args.named("no-project-libs").as_bool(); let no_environment_libs = args.named("no-env-libs").as_bool(); + let format = args.named("format").default("debug").as_string(); let print_tree = args.named("tree").as_bool(); let dry_run = args.named("dry-run").short('n').as_bool(); - let only_resolve = args.named("resolve").as_bool(); - let _export_symbols = args.named("symbols").as_bool(); + + let Ok(format) = Format::from_str(format.as_str()) else { + fatal!("Unknown format '{format}', expected 'debug', 'inhx32', 'raw', or 'source'. "); + }; // ----------------------------------------------------------------------- @@ -86,10 +92,11 @@ fn main() { std::process::exit(1); } - let merged_source = compiler.get_compiled_source().unwrap_or_else( - |error| { error.report(); std::process::exit(1); } - ); - if only_resolve && !dry_run { + let merged_source = compiler.get_compiled_source().unwrap_or_else(|error| { + error.report(); + std::process::exit(1); + }); + if format == Format::Source && !dry_run { write_bytes_and_exit(merged_source.as_bytes(), destination_path.as_ref()); } @@ -97,12 +104,44 @@ fn main() { // Parse syntactic tokens from merged source code. let path = Some("<merged source>"); - let parser = SyntacticParser::from_source_code(&merged_source, path); - let syntactic_tokens: Vec<_> = parser.collect(); + let syntactic_tokens = SyntacticParser::new(&merged_source, path).parse(); report_syntactic_errors(&syntactic_tokens, &merged_source); - let program = ProgramParser::new(syntactic_tokens).parse(); + let program = SemanticParser::new(syntactic_tokens).parse(); report_semantic_errors(&program, &merged_source); + + // program.print_definitions(); + let assembled_tokens = program.assemble(); + report_assembler_errors(&assembled_tokens, &merged_source); + + let bytecode = BytecodeGenerator::new(&assembled_tokens).generate(); + report_bytecode_errors(&bytecode, &merged_source); + + if !dry_run { + match format { + Format::Debug => { + let mut output = String::new(); + for word in &bytecode.words { + output.push_str(&word.to_string()); + output.push('\n'); + } + write_bytes_and_exit(output.as_bytes(), destination_path.as_ref()); + } + Format::Inhx32 => { + let output = format_inhx32(&bytecode.words); + write_bytes_and_exit(output.as_bytes(), destination_path.as_ref()); + } + Format::Raw => { + let mut output = Vec::new(); + for word in &bytecode.words { + let value = word.value as u16; + output.extend(value.to_be_bytes()); + } + write_bytes_and_exit(&output, destination_path.as_ref()); + } + Format::Source => unreachable!(), + } + } } @@ -120,133 +159,23 @@ fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { std::process::exit(0); } - -fn report_syntactic_errors(syntactic_tokens: &[syntactic::Token], source_code: &str) { - use syntactic::*; - - for token in syntactic_tokens { - let context = Context { source_code: &source_code, source: &token.source }; - match &token.variant { - TokenVariant::ConstantExpression(expr) => for t in &expr.tokens { - let context = Context { source_code: &source_code, source: &t.source }; - if let ConstantExpressionTokenVariant::Error(err) = &t.variant { - let ConstantExpressionParseError::InvalidHexadecimalLiteral(hex) = err; - let message = format!("Invalid hexadecimal literal {hex:?} in constant expression"); - report_source_issue(LogLevel::Error, &context, &message); - } - } - TokenVariant::PackedBinaryLiteral(pbl) => for e in &pbl.errors { - let context = Context { source_code: &source_code, source: &e.source }; - match e.variant { - PackedBinaryLiteralParseErrorVariant::DuplicateFieldName(name) => { - let message = format!("Duplicate field name {name:?} in packed binary literal"); - report_source_issue(LogLevel::Error, &context, &message); - } - PackedBinaryLiteralParseErrorVariant::InvalidCharacter(c) => { - let message = format!("Invalid character {c:?} in packed binary literal"); - report_source_issue(LogLevel::Error, &context, &message); - } - } - } - TokenVariant::Error(err) => match err { - ParseError::InvalidHexadecimalLiteral(hex) => { - let message = format!("Invalid hexadecimal literal {hex:?}"); - report_source_issue(LogLevel::Error, &context, &message); - } - ParseError::InvalidSymbolIdentifier(name) => { - let message = format!("Invalid identifier {name:?}"); - report_source_issue(LogLevel::Error, &context, &message); - } - ParseError::UnterminatedComment => { - let message = format!("Unterminated comment"); - report_source_issue(LogLevel::Error, &context, &message); - } - ParseError::UnterminatedConstantExpression => { - let message = format!("Unterminated constant expression"); - report_source_issue(LogLevel::Error, &context, &message); - } - } - _ => (), - } - } -} - - -fn report_semantic_errors(program: &semantic::Program, source_code: &str) { - for error in &program.errors { - report_parse_error(error, source_code); - } - for definition in &program.definitions { - report_definition_errors(&definition.variant, source_code); - } - for invocation in &program.invocations { - report_invocation_errors(invocation, source_code); - } +#[derive(PartialEq)] +enum Format { + Debug, + Inhx32, + Raw, + Source, } -fn report_parse_error(error: &semantic::ParseError, source_code: &str) { - use semantic::*; - let message = match &error.variant { - ParseErrorVariant::UnterminatedMacroDefinition(name) => - format!("The macro definition '{name}' is missing a terminating ';' character"), - ParseErrorVariant::UnterminatedBlockDefinition => - format!("Block literal is missing a terminating '}}' character"), - ParseErrorVariant::InvalidArgumentDefinition(name) => - format!("The macro definition '{name}' has an invalid argument definition"), - ParseErrorVariant::InvalidToken => - format!("Invalid token"), - }; - let context = Context { source: &error.source, source_code}; - report_source_issue(LogLevel::Error, &context, &message); -} - -fn report_definition_errors(definition: &semantic::DefinitionVariant, source_code: &str) { - use semantic::*; - match definition { - DefinitionVariant::Integer(integer) => match &integer.variant { - IntegerDefinitionVariant::Constant(expr) => for token in &expr.tokens { - if let ConstantExpressionTokenVariant::Error(error) = &token.variant { - let message = match error { - ConstantExpressionParseError::InvalidHexadecimalLiteral(hex) => - format!("Invalid hexadecimal literal '{hex}' in constant expression"), - }; - let context = Context { source: &token.source, source_code}; - report_source_issue(LogLevel::Error, &context, &message); - } - } - _ => (), +impl FromStr for Format { + type Err = (); + fn from_str(string: &str) -> Result<Self, ()> { + match string { + "debug" => Ok(Self::Debug), + "inhx32" => Ok(Self::Inhx32), + "raw" => Ok(Self::Raw), + "source" => Ok(Self::Source), + _ => Err(()), } - DefinitionVariant::Block(block) => { - for error in &block.errors { - report_parse_error(&error, source_code); - } - for token in &block.tokens { - match &token.variant { - BlockTokenVariant::Word(pbl) => for error in &pbl.errors { - let message = match error.variant { - PackedBinaryLiteralParseErrorVariant::DuplicateFieldName(name) => - format!("Duplicate field name '{name}' in packed binary literal"), - PackedBinaryLiteralParseErrorVariant::InvalidCharacter(c) => - format!("Invalid character '{c}' in packed binary literal"), - }; - let context = Context { source: &error.source, source_code }; - report_source_issue(LogLevel::Error, &context, &message); - } - BlockTokenVariant::Invocation(invocation) => - report_invocation_errors(invocation, source_code), - BlockTokenVariant::Comment(_) => (), - } - } - } - DefinitionVariant::Reference(_) => (), - } -} - -fn report_invocation_errors(invocation: &semantic::Invocation, source_code: &str) { - for error in &invocation.errors { - report_parse_error(&error, source_code); - } - for argument in &invocation.arguments { - report_definition_errors(argument, source_code); } } diff --git a/src/parsers/assembler.rs b/src/parsers/assembler.rs new file mode 100644 index 0000000..eb180e3 --- /dev/null +++ b/src/parsers/assembler.rs @@ -0,0 +1,282 @@ +use crate::*; +use AssemblerErrorVariant as ErrVar; + +use indexmap::IndexMap; + + +static mut ID: usize = 0; +macro_rules! new_id { + () => { unsafe { + let id = ID; + ID += 1; + id + }}; +} + + +impl SemanticProgram { + pub fn assemble(&self) -> Vec<AssembledToken> { + let environment = Environment { + macro_definitions: &self.macro_definitions, + label_definitions: &self.label_definitions, + arguments: &IndexMap::new(), + id: new_id!(), + }; + let mut assembled_tokens = Vec::new(); + for token in &self.body { + let tokens = environment.reify_semantic_token(token); + assembled_tokens.extend(tokens); + } + return assembled_tokens; + } +} + + +pub struct Environment<'a> { + pub macro_definitions: &'a IndexMap<String, MacroDefinition>, + pub label_definitions: &'a IndexMap<String, LabelDefinition>, + pub arguments: &'a IndexMap<String, Argument>, + pub id: usize, +} + +impl<'a> Environment<'a> { + // This is only ever called for the highest level body tokens, never for invocations. + fn reify_semantic_token(&self, token: &SemanticToken) -> Vec<AssembledToken> { + let mut assembled_tokens = Vec::new(); + match token { + SemanticToken::Word(pbl) => { + let word = self.reify_packed_binary_literal(pbl); + assembled_tokens.push(AssembledToken::Word(word)); + } + SemanticToken::Invocation(invocation) => { + match self.reify_invocation(invocation) { + Ok(argument) => match argument { + Argument::Block(block) => assembled_tokens.extend(block), + Argument::Integer(_) => { + let variant = AssemblerErrorVariant::NotABlock; + let source = invocation.source.clone(); + let error = AssemblerError { source, variant }; + assembled_tokens.push(AssembledToken::Error(error)) + } + } + Err(error) => assembled_tokens.push(AssembledToken::Error(error)), + } + } + SemanticToken::LabelDefinition(definition) => { + assembled_tokens.push(AssembledToken::LabelDefinition(definition.clone())); + } + SemanticToken::PinnedAddress(address) => { + assembled_tokens.push(AssembledToken::PinnedAddress(address.clone())); + } + SemanticToken::Error(_) => (), + } + return assembled_tokens; + } + + fn reify_packed_binary_literal(&self, pbl: &PackedBinaryLiteral) -> AssembledWord { + let mut assembled_fields = Vec::new(); + let mut errors = Vec::new(); + for field in &pbl.fields { + let name = field.name.to_string(); + match self.reify_integer_reference(&name, &field.source) { + Ok(value) => assembled_fields.push( + AssembledField { + source: field.source.clone(), + value, + bits: field.bits, + shift: field.shift, + } + ), + Err(error) => errors.push(error), + }; + } + let source = pbl.source.clone(); + let value = pbl.value; + let bits = pbl.bits; + AssembledWord { source, bits, fields: assembled_fields, value, errors } + } + + fn reify_integer_reference(&self, name: &str, source: &SourceSpan) -> Result<IntegerArgument, AssemblerError> { + match self.reify_reference(name, source)? { + Argument::Integer(integer) => Ok(integer), + Argument::Block(_) => Err( + AssemblerError { + source: source.clone(), + variant: ErrVar::NotAnInteger, + } + ), + } + } + + fn reify_reference(&self, name: &str, source: &SourceSpan) -> Result<Argument, AssemblerError> { + let source = source.clone(); + if let Some(argument) = self.arguments.get(name) { + Ok(argument.clone()) + } else if let Some(definition) = self.macro_definitions.get(name) { + self.reify_value(&definition.value) + } else if let Some(label) = self.label_definitions.get(name) { + let name = Tracked::from(self.tag_label_name(&label.name), &source); + Ok(Argument::Integer(IntegerArgument::LabelReference(name))) + } else { + let variant = ErrVar::DefinitionNotFound(name.to_string()); + Err(AssemblerError { source, variant }) + } + } + + fn tag_label_name(&self, name: &str) -> String { + match name.contains(':') { + true => format!("{name}:{}", self.id), + false => name.to_string(), + } + } + + fn reify_value(&self, value: &Value) -> Result<Argument, AssemblerError> { + match value { + Value::Integer(integer) => { + let value = match &integer { + Integer::Literal(integer) => { + IntegerArgument::Integer(integer.clone()) + } + Integer::Expression(expr) => { + let expr = self.reify_constant_expression(expr)?; + IntegerArgument::Expression(expr) + } + Integer::LabelReference(name) => { + let name = Tracked::from(self.tag_label_name(name), &name.source); + IntegerArgument::LabelReference(name) + } + }; + Ok(Argument::Integer(value)) + } + Value::Block(block) => { + let mut assembled_tokens = Vec::new(); + for token in block { + match &token { + SemanticToken::Word(pbl) => { + let word = self.reify_packed_binary_literal(pbl); + assembled_tokens.push(AssembledToken::Word(word)); + } + SemanticToken::Invocation(invocation) => { + match self.reify_invocation(invocation)? { + Argument::Block(block) => assembled_tokens.extend(block), + Argument::Integer(_) => { + let source = invocation.source.clone(); + let variant = AssemblerErrorVariant::IntegerInBlock; + return Err(AssemblerError { source, variant}); + } + } + } + SemanticToken::LabelDefinition(definition) => { + let mut definition = definition.clone(); + definition.name.push_str(&format!(":{}", self.id)); + let token = AssembledToken::LabelDefinition(definition); + assembled_tokens.push(token); + } + SemanticToken::PinnedAddress(address) => { + let token = AssembledToken::PinnedAddress(address.to_owned()); + assembled_tokens.push(token); + } + SemanticToken::Error(_) => (), + } + } + Ok(Argument::Block(assembled_tokens)) + } + Value::Invocation(invocation) => { + self.reify_invocation(invocation) + } + } + } + + fn reify_invocation(&self, invocation: &Invocation) -> Result<Argument, AssemblerError> { + macro_rules! err { + ($variant:expr) => { Err(AssemblerError { + source: invocation.source.clone(), variant: $variant + }) }; + } + if let Some(argument) = self.arguments.get(&invocation.name) { + let expected = 0; + let received = invocation.arguments.len(); + if received != expected { + return err!(ErrVar::IncorrectArgumentCount(expected, received)); + } + Ok(argument.clone()) + } else if let Some(definition) = self.macro_definitions.get(&invocation.name) { + // Check that the correct number of arguments were provided. + let received = invocation.arguments.len(); + let expected = definition.arguments.len(); + if received != expected { + return err!(ErrVar::IncorrectArgumentCount(expected, received)); + } + let mut arguments = IndexMap::new(); + for (i, argument) in invocation.arguments.iter().enumerate() { + // Check that the correct types of arguments were provided. + let arg_invocation = self.reify_value(&argument.value)?; + let arg_invocation_type = match &arg_invocation { + Argument::Integer(_) => ArgumentVariant::Integer, + Argument::Block(_) => ArgumentVariant::Block, + }; + let arg_definition_type = definition.arguments[i].variant; + if arg_invocation_type != arg_definition_type { + let variant = ErrVar::IncorrectArgumentType( + arg_definition_type, arg_invocation_type + ); + return Err(AssemblerError { source: argument.source.clone(), variant }); + } + let name = definition.arguments[i].name.clone(); + arguments.insert(name, arg_invocation); + } + let environment = Environment { + macro_definitions: &self.macro_definitions, + label_definitions: &self.label_definitions, + arguments: &arguments, + id: new_id!(), + }; + environment.reify_value(&definition.value) + } else if let Some(label) = self.label_definitions.get(&invocation.name) { + let expected = 0; + let received = invocation.arguments.len(); + if received != expected { + return err!(ErrVar::IncorrectArgumentCount(expected, received)); + } + let name = Tracked::from(self.tag_label_name(&label.name), &label.source); + Ok(Argument::Integer(IntegerArgument::LabelReference(name))) + } else { + err!(ErrVar::DefinitionNotFound(invocation.name.to_string())) + } + } + + fn reify_constant_expression(&self, expr: &Expression) -> Result<AssembledExpression, AssemblerError> { + use ExpressionTokenVariant as ExprVar; + + let mut assembled_tokens = Vec::new(); + for token in &expr.tokens { + let assembled_token = match &token.variant { + ExprVar::Literal(value) => { + let source = token.source.clone(); + let integer = TrackedInteger { source, value: *value }; + AssembledExpressionToken::Integer(integer) + } + ExprVar::Operator(operator) => { + AssembledExpressionToken::Operator(*operator) + } + ExprVar::Invocation(name) => { + match self.reify_integer_reference(&name, &token.source)? { + IntegerArgument::LabelReference(name) => { + AssembledExpressionToken::LabelReference(name) + } + IntegerArgument::Integer(integer) => { + AssembledExpressionToken::Integer(integer) + } + IntegerArgument::Expression(expr) => { + AssembledExpressionToken::Expression(Box::new(expr)) + }, + } + } + ExprVar::Error(_) => continue, + }; + assembled_tokens.push(assembled_token); + } + Ok(AssembledExpression { source: expr.source.clone(), tokens: assembled_tokens }) + } +} + diff --git a/src/parsers/bytecode.rs b/src/parsers/bytecode.rs new file mode 100644 index 0000000..ec19d9f --- /dev/null +++ b/src/parsers/bytecode.rs @@ -0,0 +1,161 @@ +use crate::*; + +use std::collections::HashMap; + + +pub struct BytecodeGenerator<'a> { + tokens: &'a [AssembledToken], + addresses: HashMap<String, Tracked<usize>>, + words: Vec<Word>, + errors: Vec<BytecodeError>, +} + +impl<'a> BytecodeGenerator<'a> { + pub fn new(tokens: &'a [AssembledToken]) -> Self { + Self { + tokens, + addresses: HashMap::new(), + words: Vec::new(), + errors: Vec::new(), + } + } + + pub fn generate(mut self) -> Bytecode { + self.calculate_addresses(); + for token in self.tokens { + match token { + AssembledToken::Word(assembled_word) => { + let mut value = assembled_word.value; + for field in &assembled_word.fields { + let (field_value, source) = match &field.value { + IntegerArgument::Expression(expr) => + (self.resolve_expression(expr), expr.source.clone()), + IntegerArgument::LabelReference(name) => + (self.resolve_label_reference(name), name.source.clone()), + IntegerArgument::Integer(integer) => + (integer.value, integer.source.clone()), + }; + let bitcount = match field_value { + 0 => 0, + _ => (field_value.ilog2() + 1) as usize, + }; + if field.bits < bitcount { + let variant = BytecodeErrorVariant::ValueTooLarge(field.bits, bitcount); + self.errors.push(BytecodeError { source, variant }); + } else { + value |= (field_value << field.shift) as usize; + } + } + self.words.push(Word { bits: assembled_word.bits, value }); + } + AssembledToken::PinnedAddress(pinned) => { + if self.words.len() > pinned.address { + let variant = BytecodeErrorVariant::PinnedAddressBacktrack( + pinned.address, self.words.len()); + let source = pinned.source.clone(); + self.errors.push(BytecodeError { source, variant }); + } else { + self.words.resize(pinned.address, Word { bits: 0, value: 0}); + } + } + AssembledToken::LabelDefinition(_) => (), + AssembledToken::Error(_) => (), + } + } + + return Bytecode { + words: self.words, + errors: self.errors, + } + } + + fn calculate_addresses(&mut self) { + let mut i = 0; + for token in self.tokens { + match token { + AssembledToken::LabelDefinition(definition) => { + let address = Tracked::from(i, &definition.source); + if let Some(_) = self.addresses.insert(definition.name.clone(), address) { + let name = definition.name.clone(); + let variant = BytecodeErrorVariant::DuplicateLabelDefinition(name); + let source = definition.source.clone(); + self.errors.push(BytecodeError { source, variant }); + } + } + AssembledToken::Word(_) => { + i += 1; + } + AssembledToken::PinnedAddress(pinned) => { + i = pinned.address; + } + AssembledToken::Error(_) => (), + } + } + } + + fn resolve_expression(&mut self, expr: &AssembledExpression) -> isize { + let mut stack = Vec::new(); + macro_rules! push { + ($value:expr) => { stack.push($value) }; + } + macro_rules! pop { + ($name:ident) => { let $name = match stack.pop() { + Some(value) => value, + None => { + let variant = BytecodeErrorVariant::StackUnderflow; + self.errors.push(BytecodeError { source: expr.source.clone(), variant }); + return 0; + }, + }; }; + } + macro_rules! truth { + ($bool:expr) => { match $bool { true => 1, false => 0 } }; + } + + for token in &expr.tokens { + match &token { + AssembledExpressionToken::Integer(value) => { + push!(value.value) + } + AssembledExpressionToken::LabelReference(name) => { + push!(self.resolve_label_reference(name)) + } + AssembledExpressionToken::Expression(expr) => { + push!(self.resolve_expression(expr)) + } + AssembledExpressionToken::Operator(operator) => match operator { + Operator::Equal => { pop!(b); pop!(a); push!(truth!(a==b)) }, + Operator::NotEqual => { pop!(b); pop!(a); push!(truth!(a!=b)) }, + Operator::LessThan => { pop!(b); pop!(a); push!(truth!(a < b)) }, + Operator::GreaterThan => { pop!(b); pop!(a); push!(truth!(a > b)) }, + Operator::Add => { pop!(b); pop!(a); push!(a + b) }, + Operator::Subtract => { pop!(b); pop!(a); push!(a - b) }, + Operator::LeftShift => { pop!(b); pop!(a); push!(a << b) }, + Operator::RightShift => { pop!(b); pop!(a); push!(a >> b) }, + Operator::And => { pop!(b); pop!(a); push!(a & b) }, + Operator::Or => { pop!(b); pop!(a); push!(a | b) }, + Operator::Xor => { pop!(b); pop!(a); push!(a ^ b) }, + Operator::Not => { pop!(a); push!(!a) }, + } + } + } + + let variant = match stack.len() { + 0 => BytecodeErrorVariant::NoReturnValue, + 1 => return stack[0], + _ => BytecodeErrorVariant::MultipleReturnValues, + }; + self.errors.push(BytecodeError { source: expr.source.clone(), variant}); + 0 + } + + fn resolve_label_reference(&mut self, name: &Tracked<String>) -> isize { + if let Some(address) = self.addresses.get(&name.value) { + address.value as isize + } else { + let variant = BytecodeErrorVariant::DefinitionNotFound(name.value.clone()); + self.errors.push(BytecodeError { source: name.source.clone(), variant }); + 0 + } + } +} diff --git a/src/parsers/constant_expression.rs b/src/parsers/expression.rs index 78dc697..f902858 100644 --- a/src/parsers/constant_expression.rs +++ b/src/parsers/expression.rs @@ -1,17 +1,15 @@ use crate::*; -pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantExpression { - use ConstantExpressionTokenVariant as TokenVar; - use ConstantExpressionParseError as ParseError; +pub fn parse_constant_expression(mut t: Tokeniser, source: SourceSpan) -> Expression { + use ExpressionTokenVariant as TokenVar; + use ExpressionParseError as ParseError; let mut tokens = Vec::new(); - let mut t = Tokeniser::new_child(string, parent); - t.position.to_next_char(); // skip opening delimiter loop { - t.drop_whitespace(); - t.mark_start_position(); + t.eat_whitespace(); + t.mark_start(); let token = t.eat_token(); if token.is_empty() { break; @@ -19,7 +17,7 @@ pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantEx let variant = match token.as_str() { "=" => TokenVar::Operator(Operator::Equal), - "!" => TokenVar::Operator(Operator::NotEqual), + "!=" => TokenVar::Operator(Operator::NotEqual), "<" => TokenVar::Operator(Operator::LessThan), ">" => TokenVar::Operator(Operator::GreaterThan), "+" => TokenVar::Operator(Operator::Add), @@ -32,21 +30,21 @@ pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantEx "~" => TokenVar::Operator(Operator::Not), _ => if let Some(stripped) = token.strip_prefix("0x") { match usize::from_str_radix(stripped, 16) { - Ok(value) => TokenVar::IntegerLiteral(value), + Ok(value) => TokenVar::Literal(value as isize), Err(_) => TokenVar::Error( ParseError::InvalidHexadecimalLiteral(stripped.to_string())), } } else { match usize::from_str_radix(&token, 10) { - Ok(value) => TokenVar::IntegerLiteral(value), - Err(_) => TokenVar::SymbolReference(token.to_string()), + Ok(value) => TokenVar::Literal(value as isize), + Err(_) => TokenVar::Invocation(token.to_string()), } } }; - let source = t.mark_end_position(); - tokens.push(ConstantExpressionToken { source, variant }); + let source = t.get_source(); + tokens.push(ExpressionToken { source, variant }); } - return ConstantExpression { tokens }; + return Expression { source, tokens }; } diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs index 91765a9..da2c23a 100644 --- a/src/parsers/mod.rs +++ b/src/parsers/mod.rs @@ -1,11 +1,15 @@ -mod constant_expression; -pub use constant_expression::*; - +mod expression; mod packed_binary_literal; + +pub use expression::*; pub use packed_binary_literal::*; mod syntactic; -pub use syntactic::*; - mod semantic; +mod assembler; +mod bytecode; + +pub use syntactic::*; pub use semantic::*; +pub use assembler::*; +pub use bytecode::*; diff --git a/src/parsers/packed_binary_literal.rs b/src/parsers/packed_binary_literal.rs index 9704fc4..18f8da7 100644 --- a/src/parsers/packed_binary_literal.rs +++ b/src/parsers/packed_binary_literal.rs @@ -1,53 +1,54 @@ use crate::*; -pub fn parse_packed_binary_literal(string: &str, parent: &Tokeniser) -> PackedBinaryLiteral { +/// t is a Tokeniser over the characters of the PBL, excluding the leading hash. +pub fn parse_packed_binary_literal(mut t: Tokeniser, source: SourceSpan) -> PackedBinaryLiteral { use PackedBinaryLiteralParseError as ParseError; use PackedBinaryLiteralParseErrorVariant as ParseErrorVar; let mut value = 0; let mut bits = 0; + let mut field_bits = 0; let mut name = '\0'; let mut fields: Vec<BitField> = Vec::new(); let mut errors: Vec<ParseError> = Vec::new(); macro_rules! push_field { - ($source:expr) => { + () => { if fields.iter().any(|f| f.name == name) { let variant = ParseErrorVar::DuplicateFieldName(name); - errors.push(ParseError { source: $source, variant }); + errors.push(ParseError { source: t.get_source(), variant }); } else { - fields.push(BitField { name, source: $source, bits, shift: 0 }); + fields.push(BitField { name, source: t.get_source(), bits: field_bits, shift: 0 }); } }; } - let mut t = Tokeniser::new_child(string, parent); - t.position.to_next_char(); // skip opening hash character - while let Some(c) = t.eat_char() { // Ignore underscores. if c == '_' { - t.prev_position = t.prev_prev_position; + t.mark.undo(); continue; } // Add a bit to the value; value <<= 1; + bits += 1; for field in &mut fields { field.shift += 1; } // Extend the current field. if c == name { - bits += 1; + field_bits += 1; continue; } // Commit the current field. - if bits > 0 { - push_field!(t.mark_prev_end_position()); - bits = 0; + if field_bits > 0 { + t.mark_end_prev(); + push_field!(); + field_bits = 0; name = '\0'; } @@ -60,21 +61,25 @@ pub fn parse_packed_binary_literal(string: &str, parent: &Tokeniser) -> PackedBi continue; } - t.mark_prev_start_position(); + t.mark_start_prev(); if c.is_alphabetic() { name = c; - bits = 1; + field_bits = 1; continue; } else { - let source = t.mark_end_position(); - errors.push(ParseError { source, variant: ParseErrorVar::InvalidCharacter(c) }); + let source = t.get_source(); + let variant = ParseErrorVar::InvalidCharacter(c); + errors.push(ParseError { source, variant }); } } // Commit the final field. - if bits > 0 { - push_field!(t.mark_end_position()); + for field in &mut fields { + field.shift += 1; + } + if field_bits > 0 { + push_field!(); } - PackedBinaryLiteral { value, fields, errors } + PackedBinaryLiteral { source, bits, value, fields, errors } } diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs index 7ef4a4a..a58fb5f 100644 --- a/src/parsers/semantic.rs +++ b/src/parsers/semantic.rs @@ -1,136 +1,106 @@ use crate::*; - -use syntactic as syn; -use syn::TokenVariant as SynVar; -use semantic::*; +use SyntacticTokenVariant as SynVar; use std::collections::VecDeque; +use indexmap::IndexMap; + macro_rules! fn_is_syn_variant { ($name:ident, $variant:ty) => { paste::paste! { - fn [< is_ $name >](token: &syn::Token) -> bool { + fn [< is_ $name >](token: &SyntacticToken) -> bool { match token.variant { $variant => true, _ => false, } } } }; } -fn_is_syn_variant!(block_open, syn::TokenVariant::BlockOpen); -fn_is_syn_variant!(block_close, syn::TokenVariant::BlockClose); -fn_is_syn_variant!(separator, syn::TokenVariant::Separator); -fn_is_syn_variant!(terminator, syn::TokenVariant::MacroDefinitionTerminator); - - -pub struct Tokens { - tokens: VecDeque<syn::Token>, -} - -impl Tokens { - pub fn new<T: Into<VecDeque<syn::Token>>>(tokens: T) -> Self { - Self { tokens: tokens.into() } - } - - pub fn pop(&mut self) -> Option<syn::Token> { - self.tokens.pop_front() - } +fn_is_syn_variant!(block_open, SyntacticTokenVariant::BlockOpen); +fn_is_syn_variant!(block_close, SyntacticTokenVariant::BlockClose); +fn_is_syn_variant!(separator, SyntacticTokenVariant::Separator); +fn_is_syn_variant!(terminator, SyntacticTokenVariant::MacroDefinitionTerminator); - pub fn pop_if(&mut self, predicate: fn(&syn::Token) -> bool) -> Option<syn::Token> { - match predicate(self.tokens.front()?) { - true => self.tokens.pop_front(), - false => None, - } - } - pub fn unpop(&mut self, token: syn::Token) { - self.tokens.push_front(token); - } - - /// Pull tokens until the predicate returns true, otherwise return Err. - pub fn pull_until(&mut self, mut predicate: impl FnMut(&syn::Token) -> bool) -> Result<Self, ()> { - let mut output = VecDeque::new(); - while let Some(token) = self.tokens.pop_front() { - match predicate(&token) { - true => return Ok(Self::new(output)), - false => output.push_back(token), - }; - } - return Err(()); - } - - pub fn take(&mut self) -> Self { - Self { tokens: std::mem::take(&mut self.tokens) } - } - - pub fn len(&self) -> usize { - self.tokens.len() - } -} - - -pub struct ProgramParser { +pub struct SemanticParser { tokens: Tokens, - definitions: Vec<Definition>, - invocations: Vec<Invocation>, - errors: Vec<ParseError>, + macro_definitions: IndexMap<String, MacroDefinition>, + label_definitions: IndexMap<String, LabelDefinition>, + body: Vec<SemanticToken>, } -impl ProgramParser { - pub fn new(syntactic_tokens: Vec<syn::Token>) -> Self { +impl SemanticParser { + pub fn new(syntactic_tokens: Vec<SyntacticToken>) -> Self { + // Gather all labels ahead of time. + let mut label_definitions = IndexMap::new(); + for token in &syntactic_tokens { + if let SyntacticTokenVariant::LabelDefinition(name) = &token.variant { + let definition = LabelDefinition { + source: token.source.clone(), + name: name.clone(), + }; + let None = label_definitions.insert(name.to_string(), definition) else { + unreachable!("Duplicate definition for label {name:?}"); + }; + } + } Self { tokens: Tokens::new(syntactic_tokens), - definitions: Vec::new(), - invocations: Vec::new(), - errors: Vec::new(), + macro_definitions: IndexMap::new(), + label_definitions, + body: Vec::new(), } } - pub fn parse(mut self) -> Program { + pub fn parse(mut self) -> SemanticProgram { while let Some(syn) = self.tokens.pop() { match syn.variant { SynVar::MacroDefinition(name) => { - // Collect all tokens up to the next definition terminator. let Ok(definition_tokens) = self.tokens.pull_until(is_terminator) else { - let variant = ParseErrorVariant::UnterminatedMacroDefinition(name); - self.errors.push(ParseError { source: syn.source, variant}); + let variant = SemanticParseErrorVariant::UnterminatedMacroDefinition(name); + let error = SemanticParseError { source: syn.source, variant }; + self.body.push(SemanticToken::Error(error)); break; }; - // Parse macro definition arguments. - match DefinitionParser::new(name, syn.source, definition_tokens).parse() { - Ok(definition) => self.definitions.push(definition), - Err(errors) => self.errors.extend(errors), + let definition = MacroDefinitionParser::new(syn.source, definition_tokens).parse(); + let None = self.macro_definitions.insert(name.clone(), definition) else { + unreachable!("Duplicate definition for macro {name}"); }; } - SynVar::Comment(_) => (), + SynVar::LabelDefinition(name) => { + let label_definition = LabelDefinition { source: syn.source, name }; + self.body.push(SemanticToken::LabelDefinition(label_definition)); + } + SynVar::PinnedAddress(address) => { + let pinned_address = PinnedAddress { source: syn.source, address }; + self.body.push(SemanticToken::PinnedAddress(pinned_address)); + } SynVar::Symbol(name) => { - let parser = InvocationParser::new(name, &mut self.tokens); - self.invocations.push(parser.parse()); + let invocation = InvocationParser::new(name, syn.source, &mut self.tokens).parse(); + self.body.push(SemanticToken::Invocation(invocation)); } _ => { - let variant = ParseErrorVariant::InvalidToken; - self.errors.push(ParseError { source: syn.source, variant}); - break; + let variant = SemanticParseErrorVariant::InvalidToken; + let error = SemanticParseError { source: syn.source, variant }; + self.body.push(SemanticToken::Error(error)); } } } - Program { - definitions: self.definitions, - invocations: self.invocations, - errors: self.errors, + SemanticProgram { + macro_definitions: self.macro_definitions, + label_definitions: self.label_definitions, + body: self.body, } } } -pub struct DefinitionParser { - name: String, +pub struct MacroDefinitionParser { source: SourceSpan, tokens: Tokens, arguments: Vec<ArgumentDefinition>, - errors: Vec<ParseError>, + errors: Vec<SemanticParseError>, } -impl DefinitionParser { - pub fn new(name: String, source: SourceSpan, tokens: Tokens) -> Self { +impl MacroDefinitionParser { + pub fn new(source: SourceSpan, tokens: Tokens) -> Self { Self { - name, tokens, source, arguments: Vec::new(), @@ -138,20 +108,15 @@ impl DefinitionParser { } } - pub fn parse(mut self) -> Result<Definition, Vec<ParseError>> { + pub fn parse(mut self) -> MacroDefinition { while let Some(definition) = self.parse_argument_definition() { self.arguments.push(definition) } - if self.errors.is_empty() { - let variant = self.parse_body(); - Ok(Definition { - name: self.name, - source: self.source, - arguments: self.arguments, - variant, - }) - } else { - Err(self.errors) + MacroDefinition { + value: self.parse_body(), + source: self.source, + arguments: self.arguments, + errors: self.errors, } } @@ -172,47 +137,45 @@ impl DefinitionParser { let token = token?; let source = token.source; if let SynVar::Symbol(name) = token.variant { - let variant = ArgumentDefinitionVariant::Integer; + let variant = match is_block { + true => ArgumentVariant::Block, + false => ArgumentVariant::Integer, + }; Some(ArgumentDefinition { name, source, variant }) } else { - let name = self.name.clone(); - let variant = ParseErrorVariant::InvalidArgumentDefinition(name); - self.errors.push(ParseError { source, variant}); + let variant = SemanticParseErrorVariant::InvalidToken; + self.errors.push(SemanticParseError { source, variant}); None } } - fn parse_body(&mut self) -> DefinitionVariant { - // Attempt to parse an IntegerDefinition. + fn parse_body(&mut self) -> Value { + // Attempt to parse an Integer. if self.tokens.len() == 1 { let token = self.tokens.pop().unwrap(); match token.variant { - SynVar::DecimalLiteral(value) | SynVar::HexadecimalLiteral(value) => { - return DefinitionVariant::Integer(IntegerDefinition { - source: token.source, - variant: IntegerDefinitionVariant::Literal(value), - }); + SynVar::IntegerLiteral(value) => { + let integer = TrackedInteger { source: token.source, value }; + return Value::Integer(Integer::Literal(integer)); } - SynVar::ConstantExpression(expr) => { - return DefinitionVariant::Integer(IntegerDefinition { - source: token.source, - variant: IntegerDefinitionVariant::Constant(expr), - }); - } - SynVar::Symbol(name) => { - return DefinitionVariant::Reference(ReferenceDefinition { - source: token.source, - name, - }); + SynVar::Expression(expr) => { + return Value::Integer(Integer::Expression(expr)); } _ => (), } self.tokens.unpop(token); } - - // Parse the remaining tokens as a BlockDefinition. - let block = BlockParser::new(self.tokens.take()).parse(); - return DefinitionVariant::Block(block); + // Parse a Block. + let mut block = BlockParser::new(self.tokens.take()).parse(); + // If the block contains a single invocation, unwrap it. + if block.len() == 1 { + match block.pop() { + Some(SemanticToken::Invocation(invocation)) => return Value::Invocation(invocation), + Some(other) => block.push(other), + None => (), + }; + } + return Value::Block(block); } } @@ -220,52 +183,52 @@ impl DefinitionParser { /// Parse an entire block, excluding delimiters. pub struct BlockParser { tokens: Tokens, - block_tokens: Vec<BlockToken>, - errors: Vec<ParseError>, + semantic_tokens: Vec<SemanticToken>, } impl BlockParser { pub fn new(tokens: Tokens) -> Self { - Self { tokens, block_tokens: Vec::new(), errors: Vec::new() } + Self { tokens, semantic_tokens: Vec::new() } } - pub fn parse(mut self) -> BlockDefinition { + pub fn parse(mut self) -> Vec<SemanticToken> { while let Some(token) = self.tokens.pop() { let source = token.source; match token.variant { SynVar::Symbol(name) => { - let parser = InvocationParser::new(name, &mut self.tokens); - let invocation = parser.parse(); - let variant = BlockTokenVariant::Invocation(invocation); - let block_token = BlockToken { source, variant }; - self.block_tokens.push(block_token); + let invocation = InvocationParser::new(name, source, &mut self.tokens).parse(); + self.semantic_tokens.push(SemanticToken::Invocation(invocation)); } SynVar::PackedBinaryLiteral(pbl) => { - let variant = BlockTokenVariant::Word(pbl); - let block_token = BlockToken { source, variant }; - self.block_tokens.push(block_token); + self.semantic_tokens.push(SemanticToken::Word(pbl)); + } + SynVar::LabelDefinition(name) => { + let label_definition = LabelDefinition { source, name }; + self.semantic_tokens.push(SemanticToken::LabelDefinition(label_definition)); } _ => { - let variant = ParseErrorVariant::InvalidToken; - self.errors.push(ParseError { source, variant }) + let variant = SemanticParseErrorVariant::InvalidToken; + let error = SemanticParseError { source, variant }; + self.semantic_tokens.push(SemanticToken::Error(error)); } } } - BlockDefinition { tokens: self.block_tokens, errors: self.errors } + return self.semantic_tokens; } } struct InvocationParser<'a> { name: String, + source: SourceSpan, tokens: &'a mut Tokens, - arguments: Vec<DefinitionVariant>, - errors: Vec<ParseError>, + arguments: Vec<ArgumentInvocation>, + errors: Vec<SemanticParseError>, } impl<'a> InvocationParser<'a> { - pub fn new(name: String, tokens: &'a mut Tokens) -> Self { - Self { name, tokens, arguments: Vec::new(), errors: Vec::new() } + pub fn new(name: String, source: SourceSpan, tokens: &'a mut Tokens) -> Self { + Self { name, source, tokens, arguments: Vec::new(), errors: Vec::new() } } pub fn parse(mut self) -> Invocation { @@ -274,33 +237,34 @@ impl<'a> InvocationParser<'a> { } Invocation { name: self.name, + source: self.source, arguments: self.arguments, errors: self.errors, } } - fn parse_invocation_argument(&mut self) -> Option<DefinitionVariant> { + fn parse_invocation_argument(&mut self) -> Option<ArgumentInvocation> { // Only continue if the first token is a separator. self.tokens.pop_if(is_separator)?; if let Some(block_open) = self.tokens.pop_if(is_block_open) { let source = block_open.source; let mut depth = 1; - let is_matching_block_close = |token: &syntactic::Token| { + let is_matching_block_close = |token: &SyntacticToken| { match token.variant { - syntactic::TokenVariant::BlockOpen => { + SyntacticTokenVariant::BlockOpen => { depth += 1; false } - syntactic::TokenVariant::BlockClose => { + SyntacticTokenVariant::BlockClose => { depth -= 1; depth == 0 } _ => false, } }; if let Ok(block_tokens) = self.tokens.pull_until(is_matching_block_close) { let block = BlockParser::new(block_tokens).parse(); - Some(DefinitionVariant::Block(block)) + Some(ArgumentInvocation { source, value: Value::Block(block) }) } else { - let variant = ParseErrorVariant::UnterminatedBlockDefinition; - self.errors.push(ParseError { source, variant }); + let variant = SemanticParseErrorVariant::UnterminatedBlock; + self.errors.push(SemanticParseError { source, variant }); None } } else { @@ -308,25 +272,74 @@ impl<'a> InvocationParser<'a> { let source = token.source; match token.variant { SynVar::Symbol(name) => { - let reference = ReferenceDefinition { source, name }; - Some(DefinitionVariant::Reference(reference)) + let arguments = Vec::new(); + let errors = Vec::new(); + let invocation = Invocation { source: source.clone(), name, arguments, errors }; + let value = Value::Invocation(invocation); + Some(ArgumentInvocation { source, value }) } - SynVar::DecimalLiteral(value) | SynVar::HexadecimalLiteral(value) => { - let variant = IntegerDefinitionVariant::Literal(value); - let integer = IntegerDefinition { source, variant }; - Some(DefinitionVariant::Integer(integer)) + SynVar::IntegerLiteral(value) => { + let integer = TrackedInteger { source: source.clone(), value }; + let value = Value::Integer(Integer::Literal(integer)); + Some(ArgumentInvocation { source, value }) } - SynVar::ConstantExpression(expr) => { - let variant = IntegerDefinitionVariant::Constant(expr); - let integer = IntegerDefinition { source, variant }; - Some(DefinitionVariant::Integer(integer)) + SynVar::Expression(expr) => { + let value = Value::Integer(Integer::Expression(expr)); + Some(ArgumentInvocation { source, value }) } _ => { - let variant = ParseErrorVariant::InvalidToken; - self.errors.push(ParseError { source, variant }); + let variant = SemanticParseErrorVariant::InvalidToken; + self.errors.push(SemanticParseError { source, variant }); None } } } } } + + +pub struct Tokens { + tokens: VecDeque<SyntacticToken>, +} + +impl Tokens { + pub fn new<T: Into<VecDeque<SyntacticToken>>>(tokens: T) -> Self { + Self { tokens: tokens.into() } + } + + pub fn pop(&mut self) -> Option<SyntacticToken> { + self.tokens.pop_front() + } + + pub fn pop_if(&mut self, predicate: fn(&SyntacticToken) -> bool) -> Option<SyntacticToken> { + match predicate(self.tokens.front()?) { + true => self.tokens.pop_front(), + false => None, + } + } + + pub fn unpop(&mut self, token: SyntacticToken) { + self.tokens.push_front(token); + } + + /// Pull tokens until the predicate returns true, otherwise return Err. + pub fn pull_until(&mut self, mut predicate: impl FnMut(&SyntacticToken) -> bool) -> Result<Self, ()> { + let mut output = VecDeque::new(); + while let Some(token) = self.tokens.pop_front() { + match predicate(&token) { + true => return Ok(Self::new(output)), + false => output.push_back(token), + }; + } + return Err(()); + } + + pub fn take(&mut self) -> Self { + Self { tokens: std::mem::take(&mut self.tokens) } + } + + pub fn len(&self) -> usize { + self.tokens.len() + } +} + diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs index 909dbaa..37f8e6c 100644 --- a/src/parsers/syntactic.rs +++ b/src/parsers/syntactic.rs @@ -1,106 +1,147 @@ use crate::*; -use syntactic::*; pub struct SyntacticParser { tokeniser: Tokeniser, - /// The name of the most recently parsed label. - label_name: String, + tokens: Vec<SyntacticToken>, /// The name of the macro being parsed. macro_name: Option<String>, + /// The name of the most recent label. + label_name: String, } impl SyntacticParser { - pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { + pub fn new<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { let mut tokeniser = Tokeniser::new(source_code, path); - tokeniser.add_delimiters(&['@','&','%',';',':','{','}','(','[','#','~']); + tokeniser.add_delimiters(&['@','%',';',':','{','}','(','[','#','~']); Self { tokeniser, - label_name: String::new(), + tokens: Vec::new(), macro_name: None, + label_name: String::new(), } } -} - -impl Iterator for SyntacticParser { - type Item = Token; - - /// Sequentially parse tokens from the source code. - fn next(&mut self) -> Option<Token> { + pub fn parse(mut self) -> Vec<SyntacticToken> { + use SyntacticTokenVariant as SynVar; + use SyntacticParseError as SynErr; let t = &mut self.tokeniser; - t.drop_whitespace(); - t.mark_start_position(); - let variant = match t.eat_char()? { - '@' => { - self.label_name = t.eat_token(); - TokenVariant::LabelDefinition(self.label_name.clone()) - } - '&' => { - let token = t.eat_token(); - TokenVariant::LabelDefinition(format!("{}/{token}", self.label_name)) - } - '%' => { - let macro_name = t.eat_token(); - self.macro_name = Some(macro_name.clone()); - TokenVariant::MacroDefinition(macro_name) - } - ';' => { - self.macro_name = None; - TokenVariant::MacroDefinitionTerminator - } - '[' => match t.eat_to_delimiter(']') { - Some(string) => { - let constant = ConstantExpression::from_str(&string, t); - TokenVariant::ConstantExpression(constant) + loop { + t.eat_whitespace(); + t.mark_start(); + let Some(c) = t.eat_char() else { break }; + let variant = match c { + ':' => SynVar::Separator, + '{' => SynVar::BlockOpen, + '}' => SynVar::BlockClose, + '@' => match &self.macro_name { + Some(_) => { + t.eat_token(); + SynVar::Error(SynErr::LabelInMacroDefinition) + } + None => { + self.label_name = t.eat_token(); + SynVar::LabelDefinition(self.label_name.clone()) + } } - None => TokenVariant::Error(ParseError::UnterminatedConstantExpression), - } - '{' => TokenVariant::BlockOpen, - '}' => TokenVariant::BlockClose, - '(' => match t.eat_to_delimiter(')') { - Some(string) => TokenVariant::Comment(string), - None => TokenVariant::Error(ParseError::UnterminatedComment), - } - '#' => { - let token = t.eat_token(); - let pbl = PackedBinaryLiteral::from_str(&token, t); - TokenVariant::PackedBinaryLiteral(pbl) - }, - '~' => { - let token = t.eat_token(); - TokenVariant::Symbol(format!("{}/{token}", self.label_name)) - } - ':' => TokenVariant::Separator, - c => { - let token = format!("{c}{}", t.eat_token()); - if let Some(hex_string) = token.strip_prefix("0x") { - match usize::from_str_radix(hex_string, 16) { - Ok(hex) => TokenVariant::HexadecimalLiteral(hex), - Err(_) => TokenVariant::Error(ParseError::InvalidHexadecimalLiteral(token)), + '&' => match &self.macro_name { + Some(macro_name) => { + let label_name = format!("{macro_name}:{}", t.eat_token()); + SynVar::LabelDefinition(label_name) } - } else { - match usize::from_str_radix(&token, 10) { - Ok(value) => TokenVariant::DecimalLiteral(value), - Err(_) => TokenVariant::Symbol(token), + None => { + let label_name = &self.label_name; + let sublabel_name = format!("{label_name}/{}", t.eat_token()); + SynVar::LabelDefinition(sublabel_name) } } - } - }; - - // Parse source path comments. - if let TokenVariant::Comment(comment) = &variant { - // Check if the comment fills the entire line. - if t.start_position.column == 0 && t.end_of_line() { - if let Some(path) = comment.strip_prefix(": ") { - t.embedded_path = Some(PathBuf::from(path.trim())); - t.embedded_first_line = t.start_position.line + 1; + '%' => { + let macro_name = t.eat_token(); + self.macro_name = Some(macro_name.clone()); + SynVar::MacroDefinition(macro_name) + } + ';' => { + self.macro_name = None; + SynVar::MacroDefinitionTerminator } - } + '[' => { + t.mark_child(); + match t.eat_to_delimiter(']') { + Some(_) => { + let child = t.subtokenise(); + t.mark_end(); + let expr = parse_constant_expression(child, t.get_source()); + SynVar::Expression(expr) + } + None => SynVar::Error(SynErr::UnterminatedExpression), + } + } + '(' => match t.eat_to_delimiter(')') { + Some(string) => { + // Check if the comment fills the entire line. + if t.start.position.column == 0 && t.end_of_line() { + if let Some(path) = string.strip_prefix(": ") { + t.embedded_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start.position.line + 1; + } + } + continue; + }, + None => SynVar::Error(SynErr::UnterminatedComment), + } + '|' => { + let token = t.eat_token(); + if let Some(hex_string) = token.strip_prefix("0x") { + match usize::from_str_radix(hex_string, 16) { + Ok(addr) => SynVar::PinnedAddress(addr), + Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), + } + } else { + match usize::from_str_radix(&token, 10) { + Ok(addr) => SynVar::PinnedAddress(addr), + Err(_) => SynVar::Error(SynErr::InvalidDecimalLiteral(token)), + } + } + } + '#' => { + t.mark_child(); + t.eat_token(); + let pbl = parse_packed_binary_literal(t.subtokenise(), t.get_source()); + SynVar::PackedBinaryLiteral(pbl) + }, + '~' => match &self.macro_name { + Some(macro_name) => { + let symbol_name = format!("{macro_name}:{}", t.eat_token()); + SynVar::Symbol(symbol_name) + } + None => { + let label_name = &self.label_name; + let symbol_name = format!("{label_name}/{}", t.eat_token()); + SynVar::Symbol(symbol_name) + } + } + c => { + let token = format!("{c}{}", t.eat_token()); + if let Some(hex_string) = token.strip_prefix("0x") { + match usize::from_str_radix(hex_string, 16) { + Ok(value) => SynVar::IntegerLiteral(value as isize), + Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), + } + } else { + match usize::from_str_radix(&token, 10) { + Ok(value) => SynVar::IntegerLiteral(value as isize), + Err(_) => SynVar::Symbol(token), + } + } + } + }; + + t.mark_end(); + let source = t.get_source(); + self.tokens.push(SyntacticToken { source, variant }); } - let source = t.mark_end_position(); - Some( Token { source, variant } ) + return self.tokens; } } diff --git a/src/report.rs b/src/report.rs new file mode 100644 index 0000000..2acdddc --- /dev/null +++ b/src/report.rs @@ -0,0 +1,229 @@ +use crate::*; + + +static mut ERROR_REPORTED: bool = false; + +macro_rules! report_source_error { + ($context:expr, $message:expr) => { + report_source_issue(LogLevel::Error, $context, $message); + unsafe { ERROR_REPORTED = true; } + }; +} + +macro_rules! exit_if_error_reported { + () => { + if unsafe { ERROR_REPORTED } { + std::process::exit(1); + } + }; +} + +pub fn report_syntactic_errors(syntactic_tokens: &[SyntacticToken], source_code: &str) { + use SyntacticTokenVariant as SynVar; + for token in syntactic_tokens { + let context = Context { source_code: &source_code, source: &token.source }; + match &token.variant { + SynVar::Expression(expr) => for t in &expr.tokens { + let context = Context { source_code: &source_code, source: &t.source }; + if let ExpressionTokenVariant::Error(err) = &t.variant { + let ExpressionParseError::InvalidHexadecimalLiteral(hex) = err; + let message = format!("Invalid hexadecimal literal {hex:?} in constant expression"); + report_source_error!(&context, &message); + } + } + SynVar::PackedBinaryLiteral(pbl) => for e in &pbl.errors { + let context = Context { source_code: &source_code, source: &e.source }; + match &e.variant { + PackedBinaryLiteralParseErrorVariant::DuplicateFieldName(name) => { + let message = format!("Duplicate field name {name:?} in packed binary literal"); + report_source_error!(&context, &message); + } + PackedBinaryLiteralParseErrorVariant::InvalidCharacter(c) => { + let message = format!("Invalid character {c:?} in packed binary literal"); + report_source_error!(&context, &message); + } + } + } + SynVar::Error(err) => match err { + SyntacticParseError::InvalidHexadecimalLiteral(hex) => { + let message = format!("Invalid hexadecimal literal {hex:?}"); + report_source_error!(&context, &message); + } + SyntacticParseError::InvalidDecimalLiteral(dec) => { + let message = format!("Invalid decimal literal {dec:?}"); + report_source_error!(&context, &message); + } + SyntacticParseError::InvalidSymbolIdentifier(name) => { + let message = format!("Invalid identifier {name:?}"); + report_source_error!(&context, &message); + } + SyntacticParseError::UnterminatedComment => { + let message = format!("Unterminated comment"); + report_source_error!(&context, &message); + } + SyntacticParseError::UnterminatedExpression => { + let message = format!("Unterminated constant expression"); + report_source_error!(&context, &message); + } + SyntacticParseError::LabelInMacroDefinition => { + let message = format!("Only sublabels can be used in macro definitions"); + report_source_error!(&context, &message); + } + } + _ => (), + } + } + exit_if_error_reported!(); +} + + +pub fn report_semantic_errors(program: &SemanticProgram, source_code: &str) { + for (_, definition) in &program.macro_definitions { + report_value_errors(&definition.value, source_code); + } + for token in &program.body { + report_semantic_token_errors(token, source_code); + } + exit_if_error_reported!(); +} + +fn report_value_errors(definition: &Value, source_code: &str) { + match definition { + Value::Integer(integer) => match integer { + Integer::Expression(expr) => for token in &expr.tokens { + if let ExpressionTokenVariant::Error(error) = &token.variant { + let message = match error { + ExpressionParseError::InvalidHexadecimalLiteral(hex) => + format!("Invalid hexadecimal literal '{hex}' in constant expression"), + }; + let context = Context { source: &token.source, source_code}; + report_source_error!(&context, &message); + } + } + _ => (), + } + Value::Block(block) => { + for token in block { + report_semantic_token_errors(token, source_code); + } + } + Value::Invocation(invocation) => report_invocation_errors(invocation, source_code), + } +} + +fn report_semantic_token_errors(token: &SemanticToken, source_code: &str) { + match &token { + SemanticToken::Word(pbl) => for error in &pbl.errors { + let message = match &error.variant { + PackedBinaryLiteralParseErrorVariant::DuplicateFieldName(name) => + format!("Duplicate field name '{name}' in packed binary literal"), + PackedBinaryLiteralParseErrorVariant::InvalidCharacter(c) => + format!("Invalid character '{c}' in packed binary literal"), + }; + let context = Context { source: &error.source, source_code }; + report_source_error!(&context, &message); + } + SemanticToken::Invocation(invocation) => { + report_invocation_errors(invocation, source_code) + } + SemanticToken::Error(error) => { + report_semantic_error(error, source_code) + } + SemanticToken::LabelDefinition(_) => (), + SemanticToken::PinnedAddress(_) => (), + } +} + +fn report_invocation_errors(invocation: &Invocation, source_code: &str) { + for error in &invocation.errors { + report_semantic_error(&error, source_code); + } + for argument in &invocation.arguments { + report_value_errors(&argument.value, source_code); + } +} + +fn report_semantic_error(error: &SemanticParseError, source_code: &str) { + let message = match &error.variant { + SemanticParseErrorVariant::UnterminatedMacroDefinition(name) => + format!("The macro definition '{name}' is missing a terminating ';' character"), + SemanticParseErrorVariant::UnterminatedBlock => + format!("Block literal is missing a terminating '}}' character"), + SemanticParseErrorVariant::InvalidToken => + format!("Invalid token"), + }; + let context = Context { source: &error.source, source_code}; + report_source_error!(&context, &message); +} + + +pub fn report_assembler_errors(tokens: &[AssembledToken], source_code: &str) { + for token in tokens { + match token { + AssembledToken::Word(word) => { + for error in &word.errors { + report_assembler_error(&error, source_code); + } + } + AssembledToken::Error(error) => { + report_assembler_error(error, source_code); + }, + _ => (), + } + } + exit_if_error_reported!(); +} + +fn report_assembler_error(error: &AssemblerError, source_code: &str) { + let message = match &error.variant { + AssemblerErrorVariant::DefinitionNotFound(name) => + format!("Definition not found for name '{name}'"), + AssemblerErrorVariant::NotABlock => + format!("Value of type block was expected here"), + AssemblerErrorVariant::NotAnInteger => + format!("Value of type integer was expected here"), + AssemblerErrorVariant::IntegerInBlock => + format!("Integer in block"), + AssemblerErrorVariant::IncorrectArgumentCount(expected, received) => + format!("Expected {expected} arguments, but received {received} instead"), + AssemblerErrorVariant::IncorrectArgumentType(expected, received) => + format!("Expected {expected} argument but received {received} instead"), + }; + let context = Context { + source_code: &source_code, + source: &error.source, + }; + report_source_error!(&context, &message); +} + + +pub fn report_bytecode_errors(bytecode: &Bytecode, source_code: &str) { + for error in &bytecode.errors { + report_bytecode_error(error, source_code); + } + exit_if_error_reported!(); +} + +pub fn report_bytecode_error(error: &BytecodeError, source_code: &str) { + let message = match &error.variant { + BytecodeErrorVariant::DefinitionNotFound(name) => + format!("Could not find definition for label reference '{name}'"), + BytecodeErrorVariant::DuplicateLabelDefinition(name) => + format!("Duplicate definition for label '{name}'"), + BytecodeErrorVariant::PinnedAddressBacktrack(expected, received) => + format!("Cannot pin back to address {expected} when already at address {received}"), + BytecodeErrorVariant::ValueTooLarge(expected, received) => + format!("Expected {expected}-bit value, but received {received}-bit value instead"), + BytecodeErrorVariant::StackUnderflow => + format!("Stack underflow when evaluating expression"), + BytecodeErrorVariant::NoReturnValue => + format!("No value left on stack when evaluating expression"), + BytecodeErrorVariant::MultipleReturnValues => + format!("More than one value left on stack when evaluating expression"), + }; + let context = Context { + source_code: &source_code, + source: &error.source, + }; + report_source_error!(&context, &message); +} diff --git a/src/tokens/assembler.rs b/src/tokens/assembler.rs new file mode 100644 index 0000000..04ecd38 --- /dev/null +++ b/src/tokens/assembler.rs @@ -0,0 +1,140 @@ +use crate::*; + + +#[derive(Clone)] +pub enum AssembledToken { + Word(AssembledWord), + LabelDefinition(LabelDefinition), + PinnedAddress(PinnedAddress), + Error(AssemblerError), +} + +#[derive(Clone)] +pub struct AssembledWord { + pub source: SourceSpan, + pub value: usize, + pub bits: usize, + pub fields: Vec<AssembledField>, + pub errors: Vec<AssemblerError>, +} + +#[derive(Clone)] +pub struct AssembledField { + pub source: SourceSpan, + pub value: IntegerArgument, + /// Length of field in bits + pub bits: usize, + /// Distance to left-shift field in value + pub shift: usize, +} + +#[derive(Clone)] +pub struct AssembledExpression { + pub source: SourceSpan, + pub tokens: Vec<AssembledExpressionToken>, +} + +#[derive(Clone)] +pub enum AssembledExpressionToken { + Integer(TrackedInteger), + LabelReference(Tracked<String>), + Operator(Operator), + Expression(Box<AssembledExpression>), +} + +#[derive(Clone)] +pub enum Argument { + Integer(IntegerArgument), + Block(Vec<AssembledToken>), +} + +#[derive(Clone)] +pub enum IntegerArgument { + LabelReference(Tracked<String>), + Integer(TrackedInteger), + Expression(AssembledExpression), +} + +#[derive(Clone)] +pub struct AssemblerError { + pub source: SourceSpan, + pub variant: AssemblerErrorVariant, +} + +#[derive(Clone, Debug)] +pub enum AssemblerErrorVariant { + DefinitionNotFound(String), + NotAnInteger, + NotABlock, + IntegerInBlock, + /// expected, received + IncorrectArgumentCount(usize, usize), + /// expected, received, index + IncorrectArgumentType(ArgumentVariant, ArgumentVariant), +} + +// ------------------------------------------------------------------------ // + +macro_rules! indent { + ($indent:expr => $($tokens:tt)*) => {{ + for _ in 0..$indent { print!(" "); } + println!($($tokens)*); + }}; +} + +pub fn print_assembled_tokens(tokens: &[AssembledToken]) { + println!(); + println!("--------------------------------------------------------------"); + println!(); + for token in tokens { + match token { + AssembledToken::LabelDefinition(definition) => { + println!("LABEL {}", definition.name) + } + AssembledToken::PinnedAddress(address) => { + println!("PINNED {}", address.address) + } + AssembledToken::Word(word) => { + println!("WORD {:b}", word.value); + for field in &word.fields { + print!(" FIELD ({} << {}) ", field.bits, field.shift); + match &field.value { + IntegerArgument::LabelReference(name) => { + println!("LABEL '{name}'"); + } + IntegerArgument::Integer(integer) => { + println!("INTEGER '{}'", integer.value); + } + IntegerArgument::Expression(expr) => { + println!("EXPRESSION"); + print_assembled_expression(2, expr); + } + } + } + } + AssembledToken::Error(error) => { + println!("ERROR {:?}", error.variant) + } + } + } +} + +fn print_assembled_expression(indent: usize, expr: &AssembledExpression) { + for token in &expr.tokens { + match token { + AssembledExpressionToken::Integer(integer) => { + indent!(indent => "INTEGER {}", integer.value) + } + AssembledExpressionToken::LabelReference(name) => { + indent!(indent => "LABEL '{name}'") + } + AssembledExpressionToken::Operator(operator) => { + indent!(indent => "OPERATOR {operator:?}") + } + AssembledExpressionToken::Expression(expr) => { + indent!(indent => "EXPRESSION"); + print_assembled_expression(indent+1, expr); + } + } + } +} diff --git a/src/tokens/bytecode.rs b/src/tokens/bytecode.rs new file mode 100644 index 0000000..9ac340e --- /dev/null +++ b/src/tokens/bytecode.rs @@ -0,0 +1,49 @@ +use crate::*; + + +pub struct Bytecode { + pub words: Vec<Word>, + pub errors: Vec<BytecodeError>, +} + +#[derive(Clone, Copy)] +pub struct Word { + pub bits: usize, + pub value: usize, +} + +impl std::fmt::Display for Word { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + for i in (0..self.bits).rev() { + let is_first_bit = i+1 == self.bits; + if !is_first_bit && (i+1) % 4 == 0 { + write!(f, "_")?; + } + match (self.value >> i) & 1 { + 0 => write!(f, "0")?, + _ => write!(f, "1")?, + } + } + if self.bits == 0 { + write!(f, "0")?; + } + return Ok(()); + } +} + +pub struct BytecodeError { + pub source: SourceSpan, + pub variant: BytecodeErrorVariant, +} + +pub enum BytecodeErrorVariant { + DefinitionNotFound(String), + DuplicateLabelDefinition(String), + /// pin, real + PinnedAddressBacktrack(usize, usize), + /// expected, received + ValueTooLarge(usize, usize), + StackUnderflow, + MultipleReturnValues, + NoReturnValue, +} diff --git a/src/tokens/constant_expression.rs b/src/tokens/constant_expression.rs deleted file mode 100644 index e4aa099..0000000 --- a/src/tokens/constant_expression.rs +++ /dev/null @@ -1,134 +0,0 @@ -use crate::*; - - -pub struct ConstantExpression { - pub tokens: Vec<ConstantExpressionToken>, -} - -impl ConstantExpression { - pub fn from_str(string: &str, tokeniser: &Tokeniser) -> Self { - parse_constant_expression(string, tokeniser) - } -} - -pub struct ConstantExpressionToken { - pub source: SourceSpan, - pub variant: ConstantExpressionTokenVariant, -} - -pub enum ConstantExpressionTokenVariant { - SymbolReference(String), - IntegerLiteral(usize), - Operator(Operator), - Error(ConstantExpressionParseError), -} - -pub enum Operator { - Equal, - NotEqual, - LessThan, - GreaterThan, - Add, - Subtract, - LeftShift, - RightShift, - And, - Or, - Xor, - Not, -} - -pub enum ConstantExpressionParseError { - InvalidHexadecimalLiteral(String), -} - - -impl ConstantExpression { - pub fn evaluate(&self, environment: &Environment) -> Result<usize, ConstantExpressionEvaluationError> { - use ConstantExpressionTokenVariant as Token; - use ConstantExpressionEvaluationError as EvalErr; - - let mut stack = Vec::new(); - macro_rules! push { - ($value:expr) => { stack.push($value) }; - } - macro_rules! pop { - ($name:ident) => { let $name = match stack.pop() { - Some(value) => value, - None => return Err(EvalErr::StackUnderflow), - }; }; - } - macro_rules! truth { - ($bool:expr) => { match $bool { true => 1, false => 0 } }; - } - - for token in &self.tokens { - match &token.variant { - Token::IntegerLiteral(value) => push!(*value), - Token::SymbolReference(name) => match environment.get_integer(name) { - Ok(value) => push!(value), - Err(_) => todo!(), - } - Token::Operator(operator) => match operator { - Operator::Equal => { pop!(b); pop!(a); push!(truth!(a==b)) }, - Operator::NotEqual => { pop!(b); pop!(a); push!(truth!(a!=b)) }, - Operator::LessThan => { pop!(b); pop!(a); push!(truth!(a < b)) }, - Operator::GreaterThan => { pop!(b); pop!(a); push!(truth!(a > b)) }, - Operator::Add => { pop!(b); pop!(a); push!(a + b) }, - Operator::Subtract => { pop!(b); pop!(a); push!(a - b) }, - Operator::LeftShift => { pop!(b); pop!(a); push!(a << b) }, - Operator::RightShift => { pop!(b); pop!(a); push!(a >> b) }, - Operator::And => { pop!(b); pop!(a); push!(a & b) }, - Operator::Or => { pop!(b); pop!(a); push!(a | b) }, - Operator::Xor => { pop!(b); pop!(a); push!(a ^ b) }, - Operator::Not => { pop!(a); push!(!a) }, - } - Token::Error(_) => (), - } - } - match stack.len() { - 0 => Err(EvalErr::NoReturnValue), - 1 => Ok(stack[0]), - _ => Err(EvalErr::MultipleReturnValues), - } - } -} - -pub enum ConstantExpressionEvaluationError { - StackUnderflow, - MultipleReturnValues, - NoReturnValue, -} - - -impl std::fmt::Debug for ConstantExpression { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - use ConstantExpressionTokenVariant as TokenVar; - for (i, token) in self.tokens.iter().enumerate() { - let string = match &token.variant { - TokenVar::SymbolReference(name) => name, - TokenVar::IntegerLiteral(value) => &value.to_string(), - TokenVar::Operator(operator) => match operator { - Operator::Equal => "=", - Operator::NotEqual => "!", - Operator::LessThan => "<", - Operator::GreaterThan => ">", - Operator::Add => "+", - Operator::Subtract => "-", - Operator::LeftShift => "<<", - Operator::RightShift => ">>", - Operator::And => "&", - Operator::Or => "|", - Operator::Xor => "^", - Operator::Not => "~", - } - TokenVar::Error(_) => "<error>", - }; - match i { - 0 => write!(f, "{string}")?, - _ => write!(f, " {string}")?, - } - } - return Ok(()); - } -} diff --git a/src/tokens/expression.rs b/src/tokens/expression.rs new file mode 100644 index 0000000..ff2d82d --- /dev/null +++ b/src/tokens/expression.rs @@ -0,0 +1,74 @@ +use crate::*; + + +#[derive(Clone)] +pub struct Expression { + pub source: SourceSpan, + pub tokens: Vec<ExpressionToken>, +} + +#[derive(Clone)] +pub struct ExpressionToken { + pub source: SourceSpan, + pub variant: ExpressionTokenVariant, +} + +#[derive(Clone)] +pub enum ExpressionTokenVariant { + Invocation(String), + Literal(isize), + Operator(Operator), + Error(ExpressionParseError), +} + +#[derive(Clone, Copy, Debug)] +pub enum Operator { + Equal, + NotEqual, + LessThan, + GreaterThan, + Add, + Subtract, + LeftShift, + RightShift, + And, + Or, + Xor, + Not, +} + +#[derive(Clone)] +pub enum ExpressionParseError { + InvalidHexadecimalLiteral(String), +} + +impl std::fmt::Debug for Expression { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + for (i, token) in self.tokens.iter().enumerate() { + let string = match &token.variant { + ExpressionTokenVariant::Invocation(name) => name, + ExpressionTokenVariant::Literal(value) => &value.to_string(), + ExpressionTokenVariant::Operator(operator) => match operator { + Operator::Equal => "=", + Operator::NotEqual => "!=", + Operator::LessThan => "<", + Operator::GreaterThan => ">", + Operator::Add => "+", + Operator::Subtract => "-", + Operator::LeftShift => "<<", + Operator::RightShift => ">>", + Operator::And => "&", + Operator::Or => "|", + Operator::Xor => "^", + Operator::Not => "~", + } + ExpressionTokenVariant::Error(_) => "<error>", + }; + match i { + 0 => write!(f, "{string}")?, + _ => write!(f, " {string}")?, + } + } + return Ok(()); + } +} diff --git a/src/tokens/mod.rs b/src/tokens/mod.rs index edb7c19..53ccc6e 100644 --- a/src/tokens/mod.rs +++ b/src/tokens/mod.rs @@ -1,9 +1,19 @@ -pub mod syntactic; +mod expression; +mod packed_binary_literal; +mod tracked_integer; +mod tracked; -pub mod semantic; +pub use expression::*; +pub use packed_binary_literal::*; +pub use tracked_integer::*; +pub use tracked::*; -mod constant_expression; -pub use constant_expression::*; +mod syntactic; +mod semantic; +mod assembler; +mod bytecode; -mod packed_binary_literal; -pub use packed_binary_literal::*; +pub use syntactic::*; +pub use semantic::*; +pub use assembler::*; +pub use bytecode::*; diff --git a/src/tokens/packed_binary_literal.rs b/src/tokens/packed_binary_literal.rs index 1252398..a2720b7 100644 --- a/src/tokens/packed_binary_literal.rs +++ b/src/tokens/packed_binary_literal.rs @@ -2,17 +2,13 @@ use crate::*; pub struct PackedBinaryLiteral { + pub source: SourceSpan, pub value: usize, + pub bits: usize, pub fields: Vec<BitField>, pub errors: Vec<PackedBinaryLiteralParseError>, } -impl PackedBinaryLiteral { - pub fn from_str(string: &str, parent: &Tokeniser) -> Self { - parse_packed_binary_literal(string, parent) - } -} - pub struct BitField { pub name: char, pub source: SourceSpan, @@ -40,7 +36,8 @@ impl std::fmt::Display for PackedBinaryLiteral { } else { let bitcount = (self.value.ilog2() + 1) as usize; 'bit: for i in (0..bitcount).rev() { - if (i+1) % 4 == 0 { + let is_first_bit = i+1 == bitcount; + if !is_first_bit && (i+1) % 4 == 0 { write!(f, "_")?; } for field in &self.fields { diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs index 7d5d327..d61ad8e 100644 --- a/src/tokens/semantic.rs +++ b/src/tokens/semantic.rs @@ -1,90 +1,100 @@ use crate::*; +use indexmap::IndexMap; + /// The entire semantic program, ready to generate bytecode. -pub struct Program { - pub definitions: Vec<Definition>, - pub invocations: Vec<Invocation>, - pub errors: Vec<ParseError>, +pub struct SemanticProgram { + pub macro_definitions: IndexMap<String, MacroDefinition>, + pub label_definitions: IndexMap<String, LabelDefinition>, + pub body: Vec<SemanticToken>, } /// A symbol definition. -pub struct Definition { - pub name: String, +pub struct MacroDefinition { pub source: SourceSpan, pub arguments: Vec<ArgumentDefinition>, - pub variant: DefinitionVariant, + pub value: Value, + pub errors: Vec<SemanticParseError>, } pub struct ArgumentDefinition { pub name: String, pub source: SourceSpan, - pub variant: ArgumentDefinitionVariant, + pub variant: ArgumentVariant, } -pub enum ArgumentDefinitionVariant { +#[derive(PartialEq, Clone, Copy, Debug)] +pub enum ArgumentVariant { Integer, Block, } -pub enum DefinitionVariant { - Integer(IntegerDefinition), - Block(BlockDefinition), - Reference(ReferenceDefinition), -} - -pub struct IntegerDefinition { +pub struct ArgumentInvocation { pub source: SourceSpan, - pub variant: IntegerDefinitionVariant, + pub value: Value, } -pub enum IntegerDefinitionVariant { - Literal(usize), - Constant(ConstantExpression), +pub enum Value { + Integer(Integer), + Block(Vec<SemanticToken>), + Invocation(Invocation), } -pub struct BlockDefinition { - pub tokens: Vec<BlockToken>, - pub errors: Vec<ParseError>, +pub enum Integer { + Literal(TrackedInteger), + Expression(Expression), + LabelReference(Tracked<String>), } -pub struct BlockToken { - pub source: SourceSpan, - pub variant: BlockTokenVariant, +pub enum SemanticToken { + Word(PackedBinaryLiteral), + Invocation(Invocation), + LabelDefinition(LabelDefinition), + PinnedAddress(PinnedAddress), + Error(SemanticParseError), } -pub enum BlockTokenVariant { - Invocation(Invocation), - Comment(String), - Word(PackedBinaryLiteral), +pub struct Invocation { + pub name: String, + pub source: SourceSpan, + pub arguments: Vec<ArgumentInvocation>, + pub errors: Vec<SemanticParseError>, } -/// References aren't necessarily an integer or a block -pub struct ReferenceDefinition { +#[derive(Clone)] +pub struct LabelDefinition { pub source: SourceSpan, pub name: String, } -pub struct Invocation { - pub name: String, - pub arguments: Vec<DefinitionVariant>, - pub errors: Vec<ParseError>, +#[derive(Clone)] +pub struct PinnedAddress { + pub source: SourceSpan, + pub address: usize, } -pub struct ParseError { +pub struct SemanticParseError { pub source: SourceSpan, - pub variant: ParseErrorVariant, + pub variant: SemanticParseErrorVariant, } -pub enum ParseErrorVariant { +pub enum SemanticParseErrorVariant { UnterminatedMacroDefinition(String), - UnterminatedBlockDefinition, - /// Name of the macro. - InvalidArgumentDefinition(String), + UnterminatedBlock, InvalidToken, } +impl std::fmt::Display for ArgumentVariant { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + match self { + ArgumentVariant::Integer => write!(f, "integer"), + ArgumentVariant::Block => write!(f, "block"), + } + } +} + // ------------------------------------------------------------------------ // macro_rules! indent { @@ -94,77 +104,85 @@ macro_rules! indent { }}; } -impl Program { +impl SemanticProgram { pub fn print_definitions(&self) { - for definition in &self.definitions { - let variant = match &definition.variant { - DefinitionVariant::Integer(_) => "INTEGER", - DefinitionVariant::Block(_) => "BLOCK", - DefinitionVariant::Reference(_) => "REFERENCE", + for (name, definition) in &self.macro_definitions { + let variant = match &definition.value { + Value::Integer(_) => "INTEGER", + Value::Block(_) => "BLOCK", + Value::Invocation(_) => "INVOCATION", }; - println!("DEFINE {variant} '{}'", definition.name); + println!("DEFINE {variant} '{name}'"); for argument in &definition.arguments { self.print_argument_definition(argument); } - match &definition.variant { - DefinitionVariant::Integer(integer) => - self.print_integer_definition(1, integer), - DefinitionVariant::Block(block) => - self.print_block_definition(1, block), - DefinitionVariant::Reference(reference) => - indent!(1 => "REFERENCE '{}'", reference.name), + match &definition.value { + Value::Integer(integer) => + self.print_integer(1, integer), + Value::Block(block) => + self.print_block(1, block), + Value::Invocation(invocation) => + indent!(1 => "INVOCATION '{}'", invocation.name), }; println!(); } - for invocation in &self.invocations { - self.print_invocation(0, invocation); + println!("LABELS"); + for (name, _) in &self.label_definitions { + println!(" @{name}"); } + println!(); + + self.print_block(0, &self.body); } fn print_argument_definition(&self, argument: &ArgumentDefinition) { let variant = match argument.variant { - ArgumentDefinitionVariant::Integer => "INTEGER", - ArgumentDefinitionVariant::Block => "BLOCK", + ArgumentVariant::Integer => "INTEGER", + ArgumentVariant::Block => "BLOCK", }; println!(" ARGUMENT {variant} '{}'", argument.name); } - fn print_integer_definition(&self, indent: usize, definition: &IntegerDefinition) { - match &definition.variant { - IntegerDefinitionVariant::Literal(value) => + fn print_integer(&self, indent: usize, integer: &Integer) { + match &integer { + Integer::Literal(value) => indent!(indent => "LITERAL {value}"), - IntegerDefinitionVariant::Constant(expr) => - indent!(indent => "CONSTANT [{expr:?}]"), + Integer::Expression(expr) => + indent!(indent => "EXPRESSION [{expr:?}]"), + Integer::LabelReference(name) => + indent!(indent => "LABEL REFERENCE '{name}'"), } } - fn print_block_definition(&self, indent: usize, definition: &BlockDefinition) { + fn print_block(&self, indent: usize, block: &[SemanticToken]) { indent!(indent => "BLOCK"); - let indent = indent + 1; - for token in &definition.tokens { - match &token.variant { - BlockTokenVariant::Invocation(invocation) => - self.print_invocation(indent, invocation), - BlockTokenVariant::Comment(_) => - indent!(indent => "COMMENT"), - BlockTokenVariant::Word(word) => - indent!(indent => "WORD #{word}"), + for semantic_token in block { + match &semantic_token { + SemanticToken::Word(word) => + indent!(indent+1 => "WORD #{word}"), + SemanticToken::Invocation(invocation) => + self.print_invocation(indent+1, invocation), + SemanticToken::LabelDefinition(definition) => + indent!(indent+1 => "LABEL DEFINITION @{}", definition.name), + SemanticToken::PinnedAddress(addr) => + indent!(indent+1 => "PINNED ADDRESS {}", addr.address), + SemanticToken::Error(_) => + indent!(indent+1 => "ERROR"), } } } fn print_invocation(&self, indent: usize, invocation: &Invocation) { indent!(indent => "INVOCATION '{}'", invocation.name); - let indent = indent + 1; for argument in &invocation.arguments { - match &argument { - DefinitionVariant::Integer(integer) => - self.print_integer_definition(indent, integer), - DefinitionVariant::Block(block) => - self.print_block_definition(indent, block), - DefinitionVariant::Reference(reference) => - indent!(indent => "REFERENCE '{}'", reference.name), + match &argument.value { + Value::Integer(integer) => + self.print_integer(indent+1, integer), + Value::Block(block) => + self.print_block(indent+1, block), + Value::Invocation(invocation) => + self.print_invocation(indent+1, invocation), }; } } diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs index 162f1c0..eb33806 100644 --- a/src/tokens/syntactic.rs +++ b/src/tokens/syntactic.rs @@ -1,22 +1,21 @@ use crate::*; -pub struct Token { +pub struct SyntacticToken { pub source: SourceSpan, - pub variant: TokenVariant, + pub variant: SyntacticTokenVariant, } -pub enum TokenVariant { +pub enum SyntacticTokenVariant { LabelDefinition(String), MacroDefinition(String), MacroDefinitionTerminator, - DecimalLiteral(usize), - HexadecimalLiteral(usize), + IntegerLiteral(isize), PackedBinaryLiteral(PackedBinaryLiteral), + PinnedAddress(usize), - Comment(String), - ConstantExpression(ConstantExpression), + Expression(Expression), BlockOpen, BlockClose, @@ -24,33 +23,34 @@ pub enum TokenVariant { Symbol(String), - Error(ParseError), + Error(SyntacticParseError), } #[derive(Debug)] -pub enum ParseError { +pub enum SyntacticParseError { InvalidHexadecimalLiteral(String), + InvalidDecimalLiteral(String), InvalidSymbolIdentifier(String), UnterminatedComment, - UnterminatedConstantExpression, + UnterminatedExpression, + LabelInMacroDefinition, } -impl std::fmt::Debug for Token { +impl std::fmt::Debug for SyntacticToken { fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - use TokenVariant::*; + use SyntacticTokenVariant::*; let start = &self.source.in_merged; let name = match &self.variant { LabelDefinition(name) => format!("LabelDefinition({name})"), MacroDefinition(name) => format!("MacroDefinition({name})"), MacroDefinitionTerminator => format!("MacroDefinitionTerminator"), - DecimalLiteral(value) => format!("DecimalLiteral({value})"), - HexadecimalLiteral(value) => format!("HexadecimalLiteral(0x{value:x})"), + IntegerLiteral(value) => format!("IntegerLiteral({value})"), PackedBinaryLiteral(pbl) => format!("PackedBinaryLiteral({pbl})"), + PinnedAddress(value) => format!("PinnedAddress({value})"), - Comment(_) => format!("Comment"), - ConstantExpression(expr) => format!("ConstantExpression({expr:?})"), + Expression(expr) => format!("Expression({expr:?})"), BlockOpen => format!("BlockOpen"), BlockClose => format!("BlockClose"), diff --git a/src/tokens/tracked.rs b/src/tokens/tracked.rs new file mode 100644 index 0000000..049c8f8 --- /dev/null +++ b/src/tokens/tracked.rs @@ -0,0 +1,47 @@ +use crate::*; + + +#[derive(Clone)] +pub struct Tracked<T> { + pub source: SourceSpan, + pub value: T, +} + +impl<T> Tracked<T> { + pub fn from(value: T, source: &SourceSpan) -> Self { + Self { source: source.clone(), value } + } +} + +impl<T> std::ops::Deref for Tracked<T> { + type Target = T; + fn deref(&self) -> &T { + &self.value + } +} + +impl<T> std::ops::DerefMut for Tracked<T> { + fn deref_mut(&mut self) -> &mut T { + &mut self.value + } +} + +impl<T: std::fmt::Display> std::fmt::Display for Tracked<T> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}", self.value) + } +} + +impl<T: std::fmt::Debug> std::fmt::Debug for Tracked<T> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{:?}", self.value) + } +} + +impl<T: PartialEq> PartialEq for Tracked<T> { + fn eq(&self, other: &Tracked<T>) -> bool { + self.value.eq(&other.value) + } +} + +impl<T: Eq> Eq for Tracked<T> {} diff --git a/src/tokens/tracked_integer.rs b/src/tokens/tracked_integer.rs new file mode 100644 index 0000000..fa55f09 --- /dev/null +++ b/src/tokens/tracked_integer.rs @@ -0,0 +1,14 @@ +use crate::*; + + +#[derive(Clone)] +pub struct TrackedInteger { + pub source: SourceSpan, + pub value: isize, +} + +impl std::fmt::Display for TrackedInteger { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}", self.value) + } +} |