diff options
author | Ben Bridle <ben@derelict.engineering> | 2025-03-06 20:33:27 +1300 |
---|---|---|
committer | Ben Bridle <ben@derelict.engineering> | 2025-03-11 16:59:26 +1300 |
commit | 1ecee352f5844b0809d7ae66df52e34f42b44c8e (patch) | |
tree | 472b6fd57ff7f64ac3f8cd676cbe7a113ba01f05 | |
parent | f2ed89083f5326a7a6f0a1720033d3388aa431fb (diff) | |
download | torque-asm-1ecee352f5844b0809d7ae66df52e34f42b44c8e.zip |
Rewrite entire assembler
The language is now more general, the code is better structured, error
reporting is more detailed, and many new language features have
been implemented:
- conditional blocks
- first-class strings
- more expression operators
- binary literals
- negative values
- invocations in constant expressions
39 files changed, 2951 insertions, 2243 deletions
@@ -9,8 +9,8 @@ source = "git+git://benbridle.com/ansi?tag=v1.0.0#81d47867c2c97a9ae1d1c8fdfcd42c [[package]] name = "assembler" -version = "2.0.1" -source = "git+git://benbridle.com/assembler?tag=v2.0.1#00858f0b4bdfa7de838a21d27aef5e76be310828" +version = "2.1.0" +source = "git+git://benbridle.com/assembler?tag=v2.1.0#c5f60b7ff45ced7c8b8519bc8fcf681486ad09fa" dependencies = [ "ansi", "log 1.1.2", @@ -60,8 +60,8 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "switchboard" -version = "1.0.0" -source = "git+git://benbridle.com/switchboard?tag=v1.0.0#ea70fa89659e5cf1a9d4ca6ea31fb67f7a2cc633" +version = "2.1.0" +source = "git+git://benbridle.com/switchboard?tag=v2.1.0#e6435712ba5b3ca36e99fc8cbe7755940f8b1f3f" dependencies = [ "log 1.1.1", "paste", @@ -4,9 +4,9 @@ version = "1.2.0" edition = "2021" [dependencies] -assembler = { git = "git://benbridle.com/assembler", tag = "v2.0.1" } +assembler = { git = "git://benbridle.com/assembler", tag = "v2.1.0" } log = { git = "git://benbridle.com/log", tag = "v1.1.2" } -switchboard = { git = "git://benbridle.com/switchboard", tag = "v1.0.0" } +switchboard = { git = "git://benbridle.com/switchboard", tag = "v2.1.0" } paste = "1.0.15" indexmap = "2.7.1" diff --git a/src/bin/tq.rs b/src/bin/tq.rs index f22bd14..d1e51f3 100644 --- a/src/bin/tq.rs +++ b/src/bin/tq.rs @@ -1,43 +1,100 @@ use torque_asm::*; +use assembler::FileError; use log::{info, fatal}; -use switchboard::{Switchboard, SwitchQuery}; +use switchboard::*; use std::io::{Read, Write}; -use std::str::FromStr; +use std::path::Path; -fn print_version() -> ! { - let version = env!("CARGO_PKG_VERSION"); - eprintln!("torque assembler, version {version}"); - eprintln!("written by ben bridle"); - std::process::exit(0); -} - fn main() { let mut args = Switchboard::from_env(); - if args.named("version").as_bool() { - print_version(); + args.positional("source"); + args.positional("destination"); + args.positional("extension").default("tq"); + args.named("no-libs"); + args.named("no-project-libs"); + args.named("no-env-libs"); + args.named("format").default("debug"); + args.named("width"); + args.named("dry-run").short('n'); + args.named("tree"); + args.named("help").short('h'); + args.named("version"); + args.named("verbose").short('v'); + args.raise_errors(); + + let source_path = args.get("source").as_path_opt().map( + |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}"))); + let destination = args.get("destination").as_path_opt(); + let extension = args.get("extension").as_string(); + let no_libs = args.get("no-libs").as_bool(); + let no_project_libs = args.get("no-project-libs").as_bool(); + let no_env_libs = args.get("no-env-libs").as_bool(); + let format = Format::from_str(&args.get("format").as_string()); + let width = args.get("width").as_u32_opt(); + let dry_run = args.get("dry-run").as_bool(); + let print_tree = args.get("tree").as_bool(); + let print_help = args.get("help").as_bool(); + let print_version = args.get("version").as_bool(); + let verbose = args.get("verbose").as_bool(); + + if verbose { log::set_log_level(log::LogLevel::Info) } + if print_version { + let version = env!("CARGO_PKG_VERSION"); + eprintln!("torque assembler, version {version}"); + eprintln!("written by ben bridle"); + std::process::exit(0); } - if args.named("verbose").short('v').as_bool() { - log::set_log_level(log::LogLevel::Info); + if print_help { + eprintln!("\ +Usage: tq [source] [destination] + +Torque multi-assembler, see http://benbridle.com/torque for documentation. + +Arguments: + [source] Path to a source file to assemble + [destination] Path to which output will be written + [extension] File extension to identify library files (default is 'tq') + +Switches: + --format=<fmt> Format to apply to assembled bytecode (default is 'debug') + --width=<width> Force a fixed width for all assembled words + --no-project-libs Don't search for libraries in the source parent folder + --no-env-libs Don't search for libraries in the TORQUE_LIBS path variable + --no-libs Combination of --no-project-libs and --no-env-libs + --tree Display a tree visualisation of all included library files + --dry-run (-n) Assemble and show errors only, don't write any output + --help (-h) Prints help + --verbose, (-v) Print additional debug information + --version Print the assembler version and exit + +Environment variables: + TORQUE_LIBS + A list of colon-separated paths which will be searched to find + Torque source code files to use as libraries when assembling a + Torque program. If a library file resolves an unresolved symbol + in the program being assembled, the library file will be merged + into the program. + +Output formats: + <debug> + Print assembled words as human-readable binary literals. + <inhx> + Original 8-bit Intel hex format. + <inhx32> + Modified 16-bit Intel hex format used by Microchip. + <raw> + Assembled words are converted to big-endian bytestrings and concatenated. + Each word is padded to the nearest byte. Words must all be the same width. + <source> + Print the source file before assembly, with symbols resolved. + +Created by Ben Bridle. + "); + std::process::exit(0); } - let source_path = args.positional("source").as_path_opt().map( - |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}"))); - let destination_path = args.positional("destination").as_path_opt(); - let extension = args.named("ext").default("tq").as_string(); - - let no_libs = args.named("no-libs").as_bool(); - let no_project_libs = args.named("no-project-libs").as_bool(); - let no_environment_libs = args.named("no-env-libs").as_bool(); - - let format = args.named("format").default("debug").as_string(); - let print_tree = args.named("tree").as_bool(); - let dry_run = args.named("dry-run").short('n').as_bool(); - - let Ok(format) = Format::from_str(format.as_str()) else { - fatal!("Unknown format '{format}', expected 'debug', 'inhx', 'inhx32', 'raw', or 'source'. "); - }; // ----------------------------------------------------------------------- @@ -68,14 +125,13 @@ fn main() { if compiler.error().is_some() && !no_libs && !no_project_libs { compiler.include_libs_from_parent(&extension); } - if compiler.error().is_some() && !no_libs && !no_environment_libs { + if compiler.error().is_some() && !no_libs && !no_env_libs { compiler.include_libs_from_path_variable("TORQUE_LIBS", &extension); } if print_tree { compiler.resolver.hierarchy().report() } - if let Some(error) = compiler.error() { error.report(); std::process::exit(1); @@ -85,54 +141,58 @@ fn main() { error.report(); std::process::exit(1); }); - if format == Format::Source && !dry_run { - write_bytes_and_exit(merged_source.as_bytes(), destination_path.as_ref()); + + if !dry_run && format == Format::Source { + write_bytes_and_exit(merged_source.as_bytes(), destination.as_ref()); } // ----------------------------------------------------------------------- - // Parse syntactic tokens from merged source code. let path = Some("<merged source>"); - let syntactic_tokens = SyntacticParser::new(&merged_source, path).parse(); - report_syntactic_errors(&syntactic_tokens, &merged_source); + let syntactic = match parse_syntactic(&merged_source, path) { + Ok(tokens) => tokens, + Err(errors) => { + report_syntactic_errors(&errors, &merged_source); + std::process::exit(1); + } + }; - let program = SemanticParser::new(syntactic_tokens).parse(); - report_semantic_errors(&program, &merged_source); + let semantic = match parse_semantic(syntactic) { + Ok(tokens) => tokens, + Err(errors) => { + report_semantic_errors(&errors, &merged_source); + std::process::exit(1); + } + }; - // program.print_definitions(); - let assembled_tokens = program.assemble(); - report_assembler_errors(&assembled_tokens, &merged_source); + let intermediate = match parse_intermediate(semantic) { + Ok(tokens) => tokens, + Err(errors) => { + report_intermediate_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + let segments = match parse_bytecode(intermediate, width) { + Ok(segments) => segments, + Err(errors) => { + report_bytecode_errors(&errors, &merged_source); + std::process::exit(1); + } + }; - let bytecode = BytecodeGenerator::new(&assembled_tokens).generate(); - report_bytecode_errors(&bytecode, &merged_source); if !dry_run { - match format { - Format::Debug => { - let mut output = String::new(); - for word in &bytecode.words { - output.push_str(&word.to_string()); - output.push('\n'); - } - write_bytes_and_exit(output.as_bytes(), destination_path.as_ref()); - } - Format::Inhx => { - let output = format_inhx(&bytecode.words); - write_bytes_and_exit(output.as_bytes(), destination_path.as_ref()); - } - Format::Inhx32 => { - let output = format_inhx32(&bytecode.words); - write_bytes_and_exit(output.as_bytes(), destination_path.as_ref()); - } - Format::Raw => { - let mut output = Vec::new(); - for word in &bytecode.words { - let value = word.value as u16; - output.extend(value.to_be_bytes()); - } - write_bytes_and_exit(&output, destination_path.as_ref()); - } - Format::Source => unreachable!(), + let result = match format { + Format::Debug => format_debug(&segments), + Format::Inhx => format_inhx(&segments), + Format::Inhx32 => format_inhx32(&segments), + Format::Raw => format_raw(&segments, width), + Format::Source => unreachable!("Source output is handled before merged assembly"), + }; + match result { + Ok(bytes) => write_bytes_and_exit(&bytes, destination.as_ref()), + Err(error) => report_format_error(&error, format, &merged_source), } } } @@ -151,26 +211,3 @@ fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { } std::process::exit(0); } - -#[derive(PartialEq)] -enum Format { - Debug, - Inhx, - Inhx32, - Raw, - Source, -} - -impl FromStr for Format { - type Err = (); - fn from_str(string: &str) -> Result<Self, ()> { - match string { - "debug" => Ok(Self::Debug), - "inhx" => Ok(Self::Inhx), - "inhx32" => Ok(Self::Inhx32), - "raw" => Ok(Self::Raw), - "source" => Ok(Self::Source), - _ => Err(()), - } - } -} diff --git a/src/compiler.rs b/src/compiler.rs index 10f1433..c0caae0 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1,5 +1,9 @@ use crate::*; +use assembler::*; +use assembler::DefinitionType::*; +use assembler::SymbolRole::*; + /// Compiles multiple source code files into one. pub struct Compiler { @@ -50,82 +54,184 @@ impl Compiler { self.resolver.error() } - pub fn get_compiled_source(&self) -> Result<String, MergeError> { + pub fn get_compiled_source(&mut self) -> Result<String, MergeError> { + self.resolver.calculate_hierarchy(); self.resolver.get_merged_source_code(push_source_code) } } /// Parse all symbols from a source code string. -fn parse_symbols(source_code: &str, path: Option<&Path>) -> Vec<Symbol> { - use SyntacticTokenVariant as SynVar; - use DefinitionType::*; - use SymbolRole::*; - let mut symbols = Vec::new(); - let mut macro_name: Option<String> = None; - let mut parse_arg_list = false; // true if parsing macro argument list - let mut after_separator = false; // true if prev token was separator - - macro_rules! push { - ($name:expr, $source:expr, $role:expr) => { - symbols.push(Symbol { - name: $name, - source: $source, - role: $role, - namespace: match ¯o_name { - Some(name) => vec![name.to_owned()], - None => vec![], - } - }) +fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbol>> { + let syntactic = match parse_syntactic(source_code, path) { + Ok(syntactic) => syntactic, + Err(_errors) => return None, + }; + let semantic = match parse_semantic(syntactic) { + Ok(semantic) => semantic, + Err(_errors) => return None, + }; + Some(SymbolParser::new().parse(&semantic)) +} + + +// Extract symbol definitions from a list of semantic tokens. +pub struct SymbolParser { + pub macro_name: Option<String>, + pub symbols: Vec<Symbol>, +} + +impl SymbolParser { + pub fn new() -> Self { + Self { + macro_name: None, + symbols: Vec::new(), } } - let syntactic_tokens = SyntacticParser::new(&source_code, path).parse(); - for token in syntactic_tokens { - match token.variant { - SynVar::MacroDefinition(name) => { - push!(name.clone(), token.source, Definition(MustPrecedeReference)); - macro_name = Some(name); - parse_arg_list = true; + fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) { + let name = name.to_string(); + let namespace = match &self.macro_name { + Some(macro_name) => vec![macro_name.to_owned()], + None => vec![], + }; + let source = source.to_owned(); + self.symbols.push(Symbol { name, namespace, source, role }); + + } + + pub fn parse(mut self, semantic: &[Tracked<SemanticToken>]) -> Vec<Symbol> { + for token in semantic { + let source = &token.source; + match &token.value { + SemanticToken::MacroDefinition(definition) => { + // Record macro definition. + self.record_symbol( + &definition.name, + &definition.name.source, + Definition(MustPrecedeReference), + ); + self.macro_name = Some(definition.name.to_string()); + + for argument in &definition.arguments { + self.record_symbol( + &argument.name, + &argument.source, + Definition(MustPrecedeReference), + ); + } + match &definition.body { + MacroDefinitionBody::Integer(integer) => { + self.parse_integer_token(&integer, &integer.source) + } + MacroDefinitionBody::Invocation(invocation) => { + self.parse_invocation(&invocation, &invocation.source) + } + MacroDefinitionBody::Block(tokens) => { + for token in tokens { + self.parse_block_token(&token, &token.source); + } + } + } + self.macro_name = None; + } + SemanticToken::BlockToken(token) => { + self.parse_block_token(token, &source); + } } - SynVar::MacroDefinitionTerminator => { - macro_name = None; + } + return self.symbols; + } + + fn parse_expression(&mut self, expression: &Expression, _source: &SourceSpan) { + for token in &expression.tokens { + let source = &token.source; + match &token.value { + ExpressionToken::IntegerToken(integer) => { + self.parse_integer_token(integer, source); + } + ExpressionToken::Invocation(invocation) => { + self.parse_invocation(invocation, source); + } + ExpressionToken::Operator(_) => (), } - SynVar::LabelDefinition(name) => { - push!(name.clone(), token.source, Definition(CanFollowReference)); + } + } + + fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) { + self.record_symbol( + &invocation.name, + &source, + Reference, + ); + + for argument in &invocation.arguments { + let source = &argument.source; + match &argument.value { + InvocationArgument::IntegerToken(integer) => { + self.parse_integer_token(integer, &source); + } + InvocationArgument::BlockToken(block) => { + self.parse_block_token(block, &source); + } + InvocationArgument::Invocation(invocation) => { + self.parse_invocation(invocation, &source); + } + InvocationArgument::String(_) => (), } - SynVar::Symbol(name) => if parse_arg_list && after_separator { - push!(name, token.source, Definition(MustPrecedeReference)); - } else { - parse_arg_list = false; - push!(name, token.source, Reference); + } + } + + fn parse_block_token(&mut self, token: &BlockToken, source: &SourceSpan) { + match token { + BlockToken::LabelDefinition(name) => { + self.record_symbol( + &name, + &source, + Definition(CanFollowReference), + ); } - SynVar::Separator => { - after_separator = true; - continue; + BlockToken::PinnedAddress(integer) => { + self.parse_integer_token(integer, &integer.source); } - SynVar::BlockOpen | SynVar::BlockClose => { - continue; + BlockToken::ConditionalBlock(condition) => { + self.parse_integer_token(&condition.predicate, &condition.predicate.source); + self.parse_block_token(&condition.body, &condition.body.source); } - SynVar::PackedBinaryLiteral(pbl) => { - for field in pbl.fields { - push!(field.name.to_string(), field.source, Reference) + BlockToken::WordTemplate(word_template) => { + for field in &word_template.fields { + self.record_symbol( + &field.name.to_string(), + &field.source, + Reference, + ); } } - SynVar::Expression(expr) => { - for token in expr.tokens { - if let ExpressionTokenVariant::Invocation(name) = token.variant { - push!(name, token.source, Reference); - } + BlockToken::Block(tokens) => { + for token in tokens { + self.parse_block_token(token, &token.source); } } - _ => () - }; - after_separator = false; + BlockToken::Invocation(invocation) => { + self.parse_invocation(invocation, source); + } + } + } + + fn parse_integer_token(&mut self, token: &IntegerToken, source: &SourceSpan) { + match &token { + IntegerToken::Expression(expression) => { + self.parse_expression(&expression, source) + } + IntegerToken::Invocation(invocation) => { + self.parse_invocation(&invocation, source) + } + IntegerToken::IntegerLiteral(_) => (), + } } - return symbols; } + /// Push source code to a source compilation string. fn push_source_code(compilation: &mut String, source_file: &SourceFile) { // Skip blank files. diff --git a/src/formats/debug.rs b/src/formats/debug.rs new file mode 100644 index 0000000..23fd34f --- /dev/null +++ b/src/formats/debug.rs @@ -0,0 +1,18 @@ +use crate::*; + + +pub fn format_debug(segments: &[Segment]) -> Result<Vec<u8>, FormatError> { + let mut output = String::new(); + for segment in segments { + // Find maximum width of all words in the segment. + let width = segment.words.iter().map(|w| w.to_string().chars().count()).max().unwrap_or(0); + let address = &segment.address; + output.push_str(&format!("SEGMENT: 0x{address:>04x}\n")); + for word in &segment.words { + let string = word.to_string(); + let w = width as usize; + output.push_str(&format!(" {string:>w$}\n")); + } + } + return Ok(output.as_bytes().to_vec()); +} diff --git a/src/formats/inhx.rs b/src/formats/inhx.rs index e83e870..fc4791b 100644 --- a/src/formats/inhx.rs +++ b/src/formats/inhx.rs @@ -1,10 +1,15 @@ use crate::*; -pub fn format_inhx(words: &[Word]) -> String { +pub fn format_inhx(segments: &[Segment]) -> Result<Vec<u8>, FormatError> { let mut records = Vec::new(); - for (i, chunk) in words.chunks(16).enumerate() { - records.push(data_record(chunk, (i * 16) as u16)); + let mut address; + for segment in segments { + address = segment.address; + for chunk in segment.words.chunks(16) { + records.push(data_record(chunk, address)?); + address += 16; + } } records.push(terminating_record()); @@ -12,21 +17,24 @@ pub fn format_inhx(words: &[Word]) -> String { for record in records { output.push_str(&record.to_string()); } - return output; + return Ok(output.as_bytes().to_vec()); } -fn data_record(words: &[Word], address: u16) -> InhxRecord { +fn data_record(words: &[Tracked<Word>], address: usize) -> Result<InhxRecord, FormatError> { + let Ok(address) = u16::try_from(address) else { + return Err(FormatError::AddressTooLarge(u16::MAX as usize, address)); + }; let mut record = InhxRecord::new(); record.byte((words.len()) as u8); record.be_double(address); record.byte(0x00); for word in words { - match word.bits <= 8 { - true => record.byte(word.value as u8), - false => panic!("Word '{word}' has more than 8 bits."), - }; + if word.value.width > 8 { + return Err(FormatError::WordTooWide(8, word.width, word.source.clone())); + } + record.byte(word.value.value as u8); } - return record; + return Ok(record); } fn terminating_record() -> InhxRecord { diff --git a/src/formats/inhx32.rs b/src/formats/inhx32.rs index fd7fd7b..8febeae 100644 --- a/src/formats/inhx32.rs +++ b/src/formats/inhx32.rs @@ -1,11 +1,19 @@ use crate::*; -pub fn format_inhx32(words: &[Word]) -> String { +pub fn format_inhx32(segments: &[Segment]) -> Result<Vec<u8>, FormatError> { let mut records = Vec::new(); - records.push(extended_linear_address(0x0000)); - for (i, chunk) in words.chunks(8).enumerate() { - records.push(data_record(chunk, (i * 8) as u16)); + let mut address = 0; + records.push(extended_linear_address(0)); + for segment in segments { + if (segment.address >> 16) != (address >> 16) { + records.push(extended_linear_address(segment.address)); + } + address = segment.address; + for chunk in segment.words.chunks(8) { + records.push(data_record(chunk, address)?); + address += 8; + } } records.push(terminating_record()); @@ -13,24 +21,29 @@ pub fn format_inhx32(words: &[Word]) -> String { for record in records { output.push_str(&record.to_string()); } - return output; + return Ok(output.as_bytes().to_vec()); } -fn data_record(words: &[Word], address: u16) -> InhxRecord { +fn data_record(words: &[Tracked<Word>], address: usize) -> Result<InhxRecord, FormatError> { + let Ok(address) = u32::try_from(address * 2) else { + return Err(FormatError::AddressTooLarge(u32::MAX as usize / 2, address)); + }; + let address = address as u16; let mut record = InhxRecord::new(); record.byte((words.len() * 2) as u8); - record.be_double(address * 2); + record.be_double(address); record.byte(0x00); for word in words { - match word.bits <= 16 { - true => record.le_double(word.value as u16), - false => panic!("Word '{word}' has more than 16 bits."), - }; + if word.value.width > 16 { + return Err(FormatError::WordTooWide(16, word.width, word.source.clone())); + } + record.le_double(word.value.value as u16); } - return record; + return Ok(record); } -fn extended_linear_address(address: u16) -> InhxRecord { +fn extended_linear_address(address: usize) -> InhxRecord { + let address = (address >> 16) as u16; let mut record = InhxRecord::new(); record.byte(0x02); record.be_double(0x0000); diff --git a/src/formats/mod.rs b/src/formats/mod.rs index 82f19f1..132001a 100644 --- a/src/formats/mod.rs +++ b/src/formats/mod.rs @@ -1,8 +1,78 @@ mod inhx; mod inhx32; +mod raw; +mod debug; pub use inhx::*; pub use inhx32::*; +pub use raw::*; +pub use debug::*; + +use crate::*; + +use log::*; + + +#[derive(Clone, Copy, PartialEq)] +pub enum Format { + Debug, + Inhx, + Inhx32, + Raw, + Source, +} + +impl Format { + pub fn from_str(string: &str) -> Self { + match string { + "debug" => Self::Debug, + "inhx" => Self::Inhx, + "inhx32" => Self::Inhx32, + "raw" => Self::Raw, + "source" => Self::Source, + _ => fatal!("Unknown format '{string}', expected 'debug', 'inhx', 'inhx32', 'raw', or 'source'. "), + } + } +} + +impl std::fmt::Display for Format { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let string = match self { + Self::Debug => "debug", + Self::Inhx => "inhx", + Self::Inhx32 => "inhx32", + Self::Raw => "raw", + Self::Source => "source", + }; + write!(f, "{string}") + } +} + + +pub enum FormatError { + /// (expected, received) + AddressTooLarge(usize, usize), + /// (expected, received) + WordTooWide(u32, u32, SourceSpan), + /// + ExpectedFixedWidth, +} + +pub fn report_format_error(error: &FormatError, format: Format, source_code: &str) { + match error { + FormatError::AddressTooLarge(expected, received) => + error!("The {format} format requires that addresses do not exceed {expected}, but the address {received} was reached"), + FormatError::WordTooWide(expected, received, source) => { + let message = format!("The {format} format requires that words are no wider than {expected} bits, but a {received} bit word was found"); + let context = Context { source_code, source }; + report_source_issue(LogLevel::Error, &context, &message); + } + FormatError::ExpectedFixedWidth => + error!("The {format} format requires all words to be the same width"), + } + std::process::exit(1); +} + pub struct InhxRecord { @@ -43,3 +113,20 @@ impl InhxRecord { format!(":{output}{checksum:0>2X}\n") } } + + +pub fn calculate_fixed_width(segments: &[Segment]) -> Option<u32> { + let mut width = None; + for segment in segments { + for word in &segment.words { + let word_width = word.value.width; + match width { + Some(width) => if word_width != width { + return None; + } + None => width = Some(word_width), + } + } + } + return width.or(Some(0)); +} diff --git a/src/formats/raw.rs b/src/formats/raw.rs new file mode 100644 index 0000000..ecc6473 --- /dev/null +++ b/src/formats/raw.rs @@ -0,0 +1,29 @@ +use crate::*; + + +pub fn format_raw(segments: &[Segment], width: Option<u32>) -> Result<Vec<u8>, FormatError> { + let Some(width) = width.or_else(|| calculate_fixed_width(&segments)) else { + return Err(FormatError::ExpectedFixedWidth); + }; + + let mut address = 0; + let bytes_per_word = ((width + 7) / 8) as usize; + let mut bytes = Vec::new(); + + for segment in segments { + // Pad to the segment start address. + let padding = segment.address.saturating_sub(address); + bytes.resize(bytes.len() + (padding * bytes_per_word), 0); + for word in &segment.words { + // Decompose word value into bytes. + let value = word.value.value; + for i in (0..bytes_per_word).rev() { + let byte = (value >> (i*8) & 0xff) as u8; + bytes.push(byte); + } + address += 1; + } + } + + return Ok(bytes); +} @@ -1,13 +1,14 @@ -mod compiler; -mod parsers; -mod report; -mod tokens; +mod stages; +mod types; mod formats; +mod compiler; -pub use compiler::*; -pub use parsers::*; -pub use report::*; -pub use tokens::*; +pub use stages::*; +pub use types::*; pub use formats::*; +pub use compiler::*; + +use assembler::{Context, Tracked, SourceSpan, report_source_issue}; +use log::LogLevel; -pub use assembler::*; +use std::path::{PathBuf}; diff --git a/src/parsers/assembler.rs b/src/parsers/assembler.rs deleted file mode 100644 index 61e1a84..0000000 --- a/src/parsers/assembler.rs +++ /dev/null @@ -1,290 +0,0 @@ -use crate::*; -use AssemblerErrorVariant as ErrVar; - -use indexmap::IndexMap; - - -static mut ID: usize = 0; -macro_rules! new_id { - () => { unsafe { - let id = ID; - ID += 1; - id - }}; -} - - -impl SemanticProgram { - pub fn assemble(&self) -> Vec<AssembledToken> { - let environment = Environment { - macro_definitions: &self.macro_definitions, - label_definitions: &self.label_definitions, - arguments: &IndexMap::new(), - id: new_id!(), - }; - let mut assembled_tokens = Vec::new(); - for token in &self.body { - let tokens = environment.reify_semantic_token(token); - assembled_tokens.extend(tokens); - } - return assembled_tokens; - } -} - - -pub struct Environment<'a> { - pub macro_definitions: &'a IndexMap<String, MacroDefinition>, - pub label_definitions: &'a IndexMap<String, LabelDefinition>, - pub arguments: &'a IndexMap<String, Argument>, - pub id: usize, -} - -impl<'a> Environment<'a> { - // This is only ever called for the highest level body tokens, never for invocations. - fn reify_semantic_token(&self, token: &SemanticToken) -> Vec<AssembledToken> { - let mut assembled_tokens = Vec::new(); - match token { - SemanticToken::Word(pbl) => { - let word = self.reify_packed_binary_literal(pbl); - assembled_tokens.push(AssembledToken::Word(word)); - } - SemanticToken::Invocation(invocation) => { - match self.reify_invocation(invocation) { - Ok(argument) => match argument { - Argument::Block(block) => assembled_tokens.extend(block), - Argument::Integer(_) => { - let variant = AssemblerErrorVariant::NotABlock; - let source = invocation.source.clone(); - let error = AssemblerError { source, variant }; - assembled_tokens.push(AssembledToken::Error(error)) - } - } - Err(error) => assembled_tokens.push(AssembledToken::Error(error)), - } - } - SemanticToken::LabelDefinition(definition) => { - assembled_tokens.push(AssembledToken::LabelDefinition(definition.clone())); - } - SemanticToken::PinnedAddress(address) => { - assembled_tokens.push(AssembledToken::PinnedAddress(address.clone())); - } - SemanticToken::Error(_) => (), - } - return assembled_tokens; - } - - fn reify_packed_binary_literal(&self, pbl: &PackedBinaryLiteral) -> AssembledWord { - let mut assembled_fields = Vec::new(); - let mut errors = Vec::new(); - for field in &pbl.fields { - let name = field.name.to_string(); - match self.reify_integer_reference(&name, &field.source) { - Ok(value) => assembled_fields.push( - AssembledField { - source: field.source.clone(), - value, - bits: field.bits, - shift: field.shift, - } - ), - Err(error) => errors.push(error), - }; - } - let source = pbl.source.clone(); - let value = pbl.value; - let bits = pbl.bits; - AssembledWord { source, bits, fields: assembled_fields, value, errors } - } - - fn reify_integer_reference(&self, name: &str, source: &SourceSpan) -> Result<IntegerArgument, AssemblerError> { - match self.reify_reference(name, source)? { - Argument::Integer(integer) => Ok(integer), - Argument::Block(_) => Err( - AssemblerError { - source: source.clone(), - variant: ErrVar::NotAnInteger, - } - ), - } - } - - fn reify_reference(&self, name: &str, source: &SourceSpan) -> Result<Argument, AssemblerError> { - let source = source.clone(); - if let Some(argument) = self.arguments.get(name) { - Ok(argument.clone()) - } else if let Some(definition) = self.macro_definitions.get(name) { - self.reify_value(&definition.value) - } else if let Some(label) = self.label_definitions.get(name) { - let name = Tracked::from(self.tag_label_name(&label.name), source); - Ok(Argument::Integer(IntegerArgument::LabelReference(name))) - } else { - let variant = ErrVar::DefinitionNotFound(name.to_string()); - Err(AssemblerError { source, variant }) - } - } - - fn tag_label_name(&self, name: &str) -> String { - match name.contains(':') { - true => format!("{name}:{}", self.id), - false => name.to_string(), - } - } - - fn reify_value(&self, value: &Value) -> Result<Argument, AssemblerError> { - match value { - Value::Integer(integer) => { - let value = match &integer { - Integer::Literal(integer) => { - IntegerArgument::Integer(integer.clone()) - } - Integer::Expression(expr) => { - let expr = self.reify_constant_expression(expr)?; - IntegerArgument::Expression(expr) - } - Integer::LabelReference(name) => { - let name = Tracked::from(self.tag_label_name(name), name.source.clone()); - IntegerArgument::LabelReference(name) - } - Integer::String(string) => { - IntegerArgument::String(string.clone()) - } - }; - Ok(Argument::Integer(value)) - } - Value::Block(block) => { - let mut assembled_tokens = Vec::new(); - for token in block { - match &token { - SemanticToken::Word(pbl) => { - let word = self.reify_packed_binary_literal(pbl); - assembled_tokens.push(AssembledToken::Word(word)); - } - SemanticToken::Invocation(invocation) => { - match self.reify_invocation(invocation)? { - Argument::Block(block) => assembled_tokens.extend(block), - Argument::Integer(_) => { - let source = invocation.source.clone(); - let variant = AssemblerErrorVariant::IntegerInBlock; - return Err(AssemblerError { source, variant}); - } - } - } - SemanticToken::LabelDefinition(definition) => { - let mut definition = definition.clone(); - definition.name.push_str(&format!(":{}", self.id)); - let token = AssembledToken::LabelDefinition(definition); - assembled_tokens.push(token); - } - SemanticToken::PinnedAddress(address) => { - let token = AssembledToken::PinnedAddress(address.to_owned()); - assembled_tokens.push(token); - } - SemanticToken::Error(_) => (), - } - } - Ok(Argument::Block(assembled_tokens)) - } - Value::Invocation(invocation) => { - self.reify_invocation(invocation) - } - } - } - - fn reify_invocation(&self, invocation: &Invocation) -> Result<Argument, AssemblerError> { - macro_rules! err { - ($variant:expr) => { Err(AssemblerError { - source: invocation.source.clone(), variant: $variant - }) }; - } - if let Some(argument) = self.arguments.get(&invocation.name) { - let expected = 0; - let received = invocation.arguments.len(); - if received != expected { - return err!(ErrVar::IncorrectArgumentCount(expected, received)); - } - Ok(argument.clone()) - } else if let Some(definition) = self.macro_definitions.get(&invocation.name) { - // Check that the correct number of arguments were provided. - let received = invocation.arguments.len(); - let expected = definition.arguments.len(); - if received != expected { - return err!(ErrVar::IncorrectArgumentCount(expected, received)); - } - let mut arguments = IndexMap::new(); - for (i, argument) in invocation.arguments.iter().enumerate() { - // Check that the correct types of arguments were provided. - let arg_invocation = self.reify_value(&argument.value)?; - let arg_invocation_type = match &arg_invocation { - Argument::Integer(_) => ArgumentVariant::Integer, - Argument::Block(_) => ArgumentVariant::Block, - }; - let arg_definition_type = definition.arguments[i].variant; - if arg_invocation_type != arg_definition_type { - let variant = ErrVar::IncorrectArgumentType( - arg_definition_type, arg_invocation_type - ); - return Err(AssemblerError { source: argument.source.clone(), variant }); - } - let name = definition.arguments[i].name.clone(); - arguments.insert(name, arg_invocation); - } - let environment = Environment { - macro_definitions: &self.macro_definitions, - label_definitions: &self.label_definitions, - arguments: &arguments, - id: new_id!(), - }; - environment.reify_value(&definition.value) - } else if let Some(label) = self.label_definitions.get(&invocation.name) { - let expected = 0; - let received = invocation.arguments.len(); - if received != expected { - return err!(ErrVar::IncorrectArgumentCount(expected, received)); - } - let name = Tracked::from(self.tag_label_name(&label.name), label.source.clone()); - Ok(Argument::Integer(IntegerArgument::LabelReference(name))) - } else { - err!(ErrVar::DefinitionNotFound(invocation.name.to_string())) - } - } - - fn reify_constant_expression(&self, expr: &Expression) -> Result<AssembledExpression, AssemblerError> { - use ExpressionTokenVariant as ExprVar; - - let mut assembled_tokens = Vec::new(); - for token in &expr.tokens { - let assembled_token = match &token.variant { - ExprVar::Literal(value) => { - let source = token.source.clone(); - let integer = TrackedInteger { source, value: *value }; - AssembledExpressionToken::Integer(integer) - } - ExprVar::Operator(operator) => { - AssembledExpressionToken::Operator(*operator) - } - ExprVar::Invocation(name) => { - match self.reify_integer_reference(&name, &token.source)? { - IntegerArgument::LabelReference(name) => { - AssembledExpressionToken::LabelReference(name) - } - IntegerArgument::Integer(integer) => { - AssembledExpressionToken::Integer(integer) - } - IntegerArgument::Expression(expr) => { - AssembledExpressionToken::Expression(Box::new(expr)) - }, - IntegerArgument::String(string) => { - let source = string.source.clone(); - let variant = AssemblerErrorVariant::StringInExpression; - return Err(AssemblerError { source, variant }) - } - } - } - ExprVar::Error(_) => continue, - }; - assembled_tokens.push(assembled_token); - } - Ok(AssembledExpression { source: expr.source.clone(), tokens: assembled_tokens }) - } -} - diff --git a/src/parsers/bytecode.rs b/src/parsers/bytecode.rs deleted file mode 100644 index ed16e22..0000000 --- a/src/parsers/bytecode.rs +++ /dev/null @@ -1,191 +0,0 @@ -use crate::*; - -use std::collections::HashMap; - - -pub struct BytecodeGenerator<'a> { - tokens: &'a [AssembledToken], - addresses: HashMap<String, Tracked<usize>>, - words: Vec<Word>, - errors: Vec<BytecodeError>, -} - -impl<'a> BytecodeGenerator<'a> { - pub fn new(tokens: &'a [AssembledToken]) -> Self { - Self { - tokens, - addresses: HashMap::new(), - words: Vec::new(), - errors: Vec::new(), - } - } - - pub fn generate(mut self) -> Bytecode { - self.calculate_addresses(); - for token in self.tokens { - match token { - AssembledToken::Word(assembled_word) => { - self.assemble_word(assembled_word); - } - AssembledToken::PinnedAddress(pinned) => { - if self.words.len() > pinned.address { - let variant = BytecodeErrorVariant::PinnedAddressBacktrack( - pinned.address, self.words.len()); - let source = pinned.source.clone(); - self.errors.push(BytecodeError { source, variant }); - } else { - self.words.resize(pinned.address, Word { bits: 0, value: 0}); - } - } - AssembledToken::LabelDefinition(_) => (), - AssembledToken::Error(_) => (), - } - } - - return Bytecode { - words: self.words, - errors: self.errors, - } - } - - fn calculate_addresses(&mut self) { - let mut i = 0; - for token in self.tokens { - match token { - AssembledToken::LabelDefinition(definition) => { - let address = Tracked::from(i, definition.source.clone()); - if let Some(_) = self.addresses.insert(definition.name.clone(), address) { - let name = definition.name.clone(); - let variant = BytecodeErrorVariant::DuplicateLabelDefinition(name); - let source = definition.source.clone(); - self.errors.push(BytecodeError { source, variant }); - } - } - AssembledToken::Word(word) => { - i += word.count(); - } - AssembledToken::PinnedAddress(pinned) => { - i = pinned.address; - } - AssembledToken::Error(_) => (), - } - } - } - - fn resolve_expression(&mut self, expr: &AssembledExpression) -> isize { - let mut stack = Vec::new(); - macro_rules! push { - ($value:expr) => { stack.push($value) }; - } - macro_rules! pop { - ($name:ident) => { let $name = match stack.pop() { - Some(value) => value, - None => { - let variant = BytecodeErrorVariant::StackUnderflow; - self.errors.push(BytecodeError { source: expr.source.clone(), variant }); - return 0; - }, - }; }; - } - macro_rules! truth { - ($bool:expr) => { match $bool { true => 1, false => 0 } }; - } - - for token in &expr.tokens { - match &token { - AssembledExpressionToken::Integer(value) => { - push!(value.value) - } - AssembledExpressionToken::LabelReference(name) => { - push!(self.resolve_label_reference(name)) - } - AssembledExpressionToken::Expression(expr) => { - push!(self.resolve_expression(expr)) - } - AssembledExpressionToken::Operator(operator) => match operator { - Operator::Equal => { pop!(b); pop!(a); push!(truth!(a==b)) }, - Operator::NotEqual => { pop!(b); pop!(a); push!(truth!(a!=b)) }, - Operator::LessThan => { pop!(b); pop!(a); push!(truth!(a < b)) }, - Operator::GreaterThan => { pop!(b); pop!(a); push!(truth!(a > b)) }, - Operator::LessThanEqual => { pop!(b); pop!(a); push!(truth!(a <= b)) }, - Operator::GreaterThanEqual => { pop!(b); pop!(a); push!(truth!(a >= b)) }, - Operator::Add => { pop!(b); pop!(a); push!(a + b) }, - Operator::Subtract => { pop!(b); pop!(a); push!(a - b) }, - Operator::LeftShift => { pop!(b); pop!(a); push!(a << b) }, - Operator::RightShift => { pop!(b); pop!(a); push!(a >> b) }, - Operator::And => { pop!(b); pop!(a); push!(a & b) }, - Operator::Or => { pop!(b); pop!(a); push!(a | b) }, - Operator::Xor => { pop!(b); pop!(a); push!(a ^ b) }, - Operator::Not => { pop!(a); push!(!a) }, - } - } - } - - let variant = match stack.len() { - 0 => BytecodeErrorVariant::NoReturnValue, - 1 => return stack[0], - _ => BytecodeErrorVariant::MultipleReturnValues, - }; - self.errors.push(BytecodeError { source: expr.source.clone(), variant}); - 0 - } - - fn resolve_label_reference(&mut self, name: &Tracked<String>) -> isize { - if let Some(address) = self.addresses.get(&name.value) { - address.value as isize - } else { - let variant = BytecodeErrorVariant::DefinitionNotFound(name.value.clone()); - self.errors.push(BytecodeError { source: name.source.clone(), variant }); - 0 - } - } - - fn assemble_word(&mut self, assembled_word: &AssembledWord) { - let mut field_values = Vec::new(); - for field in &assembled_word.fields { - match &field.value { - IntegerArgument::Expression(expr) => { - let source = expr.source.clone(); - let value = self.resolve_expression(expr); - field_values.push(vec![Tracked::from(value, source)]) - } - IntegerArgument::LabelReference(name) => { - let source = name.source.clone(); - let value = self.resolve_label_reference(name); - field_values.push(vec![Tracked::from(value, source)]) - } - IntegerArgument::Integer(integer) => { - let source = integer.source.clone(); - let value = integer.value; - field_values.push(vec![Tracked::from(value, source)]) - } - IntegerArgument::String(string) => { - let values = string.chars.iter() - .map(|c| Tracked::from(c.value as isize, c.source.clone())) - .collect(); - field_values.push(values); - } - }; - } - for i in 0..assembled_word.count() { - let mut value = assembled_word.value; - for (f, field) in assembled_word.fields.iter().enumerate() { - let (field_value, source) = match field_values[f].get(i) { - Some(tracked) => (tracked.value, Some(tracked.source.clone())), - None => (0, None), - }; - let bitcount = match field_value { - 0 => 0, - _ => (field_value.ilog2() + 1) as usize, - }; - if field.bits < bitcount { - let variant = BytecodeErrorVariant::ValueTooLarge(field.bits, bitcount); - self.errors.push(BytecodeError { source: source.unwrap(), variant }); - } else { - value |= (field_value << field.shift) as usize; - } - } - self.words.push(Word { bits: assembled_word.bits, value }); - } - } -} diff --git a/src/parsers/expression.rs b/src/parsers/expression.rs deleted file mode 100644 index e938881..0000000 --- a/src/parsers/expression.rs +++ /dev/null @@ -1,52 +0,0 @@ -use crate::*; - - -pub fn parse_constant_expression(mut t: Tokeniser, source: SourceSpan) -> Expression { - use ExpressionTokenVariant as TokenVar; - use ExpressionParseError as ParseError; - - let mut tokens = Vec::new(); - - loop { - t.eat_whitespace(); - t.mark_start(); - let token = t.eat_token(); - if token.is_empty() { - break; - } - - let variant = match token.as_str() { - "=" => TokenVar::Operator(Operator::Equal), - "!=" => TokenVar::Operator(Operator::NotEqual), - "<" => TokenVar::Operator(Operator::LessThan), - ">" => TokenVar::Operator(Operator::GreaterThan), - "<=" => TokenVar::Operator(Operator::LessThanEqual), - ">=" => TokenVar::Operator(Operator::GreaterThanEqual), - "+" => TokenVar::Operator(Operator::Add), - "-" => TokenVar::Operator(Operator::Subtract), - "<<" => TokenVar::Operator(Operator::LeftShift), - ">>" => TokenVar::Operator(Operator::RightShift), - "&" => TokenVar::Operator(Operator::And), - "|" => TokenVar::Operator(Operator::Or), - "^" => TokenVar::Operator(Operator::Xor), - "~" => TokenVar::Operator(Operator::Not), - _ => if let Some(stripped) = token.strip_prefix("0x") { - match usize::from_str_radix(stripped, 16) { - Ok(value) => TokenVar::Literal(value as isize), - Err(_) => TokenVar::Error( - ParseError::InvalidHexadecimalLiteral(stripped.to_string())), - } - } else { - match usize::from_str_radix(&token, 10) { - Ok(value) => TokenVar::Literal(value as isize), - Err(_) => TokenVar::Invocation(token.to_string()), - } - } - }; - - let source = t.get_source(); - tokens.push(ExpressionToken { source, variant }); - } - - return Expression { source, tokens }; -} diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs deleted file mode 100644 index da2c23a..0000000 --- a/src/parsers/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -mod expression; -mod packed_binary_literal; - -pub use expression::*; -pub use packed_binary_literal::*; - -mod syntactic; -mod semantic; -mod assembler; -mod bytecode; - -pub use syntactic::*; -pub use semantic::*; -pub use assembler::*; -pub use bytecode::*; diff --git a/src/parsers/packed_binary_literal.rs b/src/parsers/packed_binary_literal.rs deleted file mode 100644 index 18f8da7..0000000 --- a/src/parsers/packed_binary_literal.rs +++ /dev/null @@ -1,85 +0,0 @@ -use crate::*; - - -/// t is a Tokeniser over the characters of the PBL, excluding the leading hash. -pub fn parse_packed_binary_literal(mut t: Tokeniser, source: SourceSpan) -> PackedBinaryLiteral { - use PackedBinaryLiteralParseError as ParseError; - use PackedBinaryLiteralParseErrorVariant as ParseErrorVar; - - let mut value = 0; - let mut bits = 0; - let mut field_bits = 0; - let mut name = '\0'; - let mut fields: Vec<BitField> = Vec::new(); - let mut errors: Vec<ParseError> = Vec::new(); - - macro_rules! push_field { - () => { - if fields.iter().any(|f| f.name == name) { - let variant = ParseErrorVar::DuplicateFieldName(name); - errors.push(ParseError { source: t.get_source(), variant }); - } else { - fields.push(BitField { name, source: t.get_source(), bits: field_bits, shift: 0 }); - } - }; - } - - while let Some(c) = t.eat_char() { - // Ignore underscores. - if c == '_' { - t.mark.undo(); - continue; - } - - // Add a bit to the value; - value <<= 1; - bits += 1; - for field in &mut fields { - field.shift += 1; - } - - // Extend the current field. - if c == name { - field_bits += 1; - continue; - } - - // Commit the current field. - if field_bits > 0 { - t.mark_end_prev(); - push_field!(); - field_bits = 0; - name = '\0'; - } - - // Parse bit literals. - if c == '0' { - continue; - } - if c == '1' { - value |= 1; - continue; - } - - t.mark_start_prev(); - if c.is_alphabetic() { - name = c; - field_bits = 1; - continue; - } else { - let source = t.get_source(); - let variant = ParseErrorVar::InvalidCharacter(c); - errors.push(ParseError { source, variant }); - } - } - - // Commit the final field. - for field in &mut fields { - field.shift += 1; - } - if field_bits > 0 { - push_field!(); - } - - PackedBinaryLiteral { source, bits, value, fields, errors } -} diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs deleted file mode 100644 index 00cfc80..0000000 --- a/src/parsers/semantic.rs +++ /dev/null @@ -1,352 +0,0 @@ -use crate::*; -use SyntacticTokenVariant as SynVar; - -use std::collections::VecDeque; - -use indexmap::IndexMap; - - -macro_rules! fn_is_syn_variant { - ($name:ident, $variant:ty) => { paste::paste! { - fn [< is_ $name >](token: &SyntacticToken) -> bool { - match token.variant { $variant => true, _ => false, } - } } }; } -fn_is_syn_variant!(block_open, SyntacticTokenVariant::BlockOpen); -fn_is_syn_variant!(block_close, SyntacticTokenVariant::BlockClose); -fn_is_syn_variant!(separator, SyntacticTokenVariant::Separator); -fn_is_syn_variant!(terminator, SyntacticTokenVariant::MacroDefinitionTerminator); - - -pub struct SemanticParser { - tokens: Tokens, - macro_definitions: IndexMap<String, MacroDefinition>, - label_definitions: IndexMap<String, LabelDefinition>, - body: Vec<SemanticToken>, -} - -impl SemanticParser { - pub fn new(syntactic_tokens: Vec<SyntacticToken>) -> Self { - // Gather all labels ahead of time. - let mut label_definitions = IndexMap::new(); - for token in &syntactic_tokens { - if let SyntacticTokenVariant::LabelDefinition(name) = &token.variant { - let definition = LabelDefinition { - source: token.source.clone(), - name: name.clone(), - }; - let None = label_definitions.insert(name.to_string(), definition) else { - unreachable!("Duplicate definition for label {name:?}"); - }; - } - } - Self { - tokens: Tokens::new(syntactic_tokens), - macro_definitions: IndexMap::new(), - label_definitions, - body: Vec::new(), - } - } - - pub fn parse(mut self) -> SemanticProgram { - while let Some(syn) = self.tokens.pop() { - match syn.variant { - SynVar::MacroDefinition(name) => { - let Ok(definition_tokens) = self.tokens.pull_until(is_terminator) else { - let variant = SemanticParseErrorVariant::UnterminatedMacroDefinition(name); - let error = SemanticParseError { source: syn.source, variant }; - self.body.push(SemanticToken::Error(error)); - break; - }; - let definition = MacroDefinitionParser::new(syn.source, definition_tokens).parse(); - let None = self.macro_definitions.insert(name.clone(), definition) else { - unreachable!("Duplicate definition for macro {name}"); - }; - } - SynVar::LabelDefinition(name) => { - let label_definition = LabelDefinition { source: syn.source, name }; - self.body.push(SemanticToken::LabelDefinition(label_definition)); - } - SynVar::PinnedAddress(address) => { - let pinned_address = PinnedAddress { source: syn.source, address }; - self.body.push(SemanticToken::PinnedAddress(pinned_address)); - } - SynVar::Symbol(name) => { - let invocation = InvocationParser::new(name, syn.source, &mut self.tokens).parse(); - self.body.push(SemanticToken::Invocation(invocation)); - } - SynVar::PackedBinaryLiteral(pbl) => { - self.body.push(SemanticToken::Word(pbl)); - } - _ => { - let variant = SemanticParseErrorVariant::InvalidToken; - let error = SemanticParseError { source: syn.source, variant }; - self.body.push(SemanticToken::Error(error)); - } - } - } - - SemanticProgram { - macro_definitions: self.macro_definitions, - label_definitions: self.label_definitions, - body: self.body, - } - } -} - - -pub struct MacroDefinitionParser { - source: SourceSpan, - tokens: Tokens, - arguments: Vec<ArgumentDefinition>, - errors: Vec<SemanticParseError>, -} - -impl MacroDefinitionParser { - pub fn new(source: SourceSpan, tokens: Tokens) -> Self { - Self { - tokens, - source, - arguments: Vec::new(), - errors: Vec::new(), - } - } - - pub fn parse(mut self) -> MacroDefinition { - while let Some(definition) = self.parse_argument_definition() { - self.arguments.push(definition) - } - MacroDefinition { - value: self.parse_body(), - source: self.source, - arguments: self.arguments, - errors: self.errors, - } - } - - fn parse_argument_definition(&mut self) -> Option<ArgumentDefinition> { - // Only continue if the first token is a separator. - self.tokens.pop_if(is_separator)?; - - // Pop argument tokens. - let is_block = match self.tokens.pop_if(is_block_open) { - Some(_) => true, - None => false, - }; - let token = self.tokens.pop(); - if is_block { - self.tokens.pop_if(is_block_close); - } - // Parse argument token. - let token = token?; - let source = token.source; - if let SynVar::Symbol(name) = token.variant { - let variant = match is_block { - true => ArgumentVariant::Block, - false => ArgumentVariant::Integer, - }; - Some(ArgumentDefinition { name, source, variant }) - } else { - let variant = SemanticParseErrorVariant::InvalidToken; - self.errors.push(SemanticParseError { source, variant}); - None - } - } - - fn parse_body(&mut self) -> Value { - // Attempt to parse an Integer. - if self.tokens.len() == 1 { - let token = self.tokens.pop().unwrap(); - match token.variant { - SynVar::IntegerLiteral(value) => { - let integer = TrackedInteger { source: token.source, value }; - return Value::Integer(Integer::Literal(integer)); - } - SynVar::Expression(expr) => { - return Value::Integer(Integer::Expression(expr)); - } - _ => (), - } - self.tokens.unpop(token); - } - // Parse a Block. - let mut block = BlockParser::new(self.tokens.take()).parse(); - // If the block contains a single invocation, unwrap it. - if block.len() == 1 { - match block.pop() { - Some(SemanticToken::Invocation(invocation)) => return Value::Invocation(invocation), - Some(other) => block.push(other), - None => (), - }; - } - return Value::Block(block); - } -} - - -/// Parse an entire block, excluding delimiters. -pub struct BlockParser { - tokens: Tokens, - semantic_tokens: Vec<SemanticToken>, -} - -impl BlockParser { - pub fn new(tokens: Tokens) -> Self { - Self { tokens, semantic_tokens: Vec::new() } - } - - pub fn parse(mut self) -> Vec<SemanticToken> { - while let Some(token) = self.tokens.pop() { - let source = token.source; - match token.variant { - SynVar::Symbol(name) => { - let invocation = InvocationParser::new(name, source, &mut self.tokens).parse(); - self.semantic_tokens.push(SemanticToken::Invocation(invocation)); - } - SynVar::PackedBinaryLiteral(pbl) => { - self.semantic_tokens.push(SemanticToken::Word(pbl)); - } - SynVar::LabelDefinition(name) => { - let label_definition = LabelDefinition { source, name }; - self.semantic_tokens.push(SemanticToken::LabelDefinition(label_definition)); - } - _ => { - let variant = SemanticParseErrorVariant::InvalidToken; - let error = SemanticParseError { source, variant }; - self.semantic_tokens.push(SemanticToken::Error(error)); - } - } - } - return self.semantic_tokens; - } -} - - -struct InvocationParser<'a> { - name: String, - source: SourceSpan, - tokens: &'a mut Tokens, - arguments: Vec<ArgumentInvocation>, - errors: Vec<SemanticParseError>, -} - -impl<'a> InvocationParser<'a> { - pub fn new(name: String, source: SourceSpan, tokens: &'a mut Tokens) -> Self { - Self { name, source, tokens, arguments: Vec::new(), errors: Vec::new() } - } - - pub fn parse(mut self) -> Invocation { - while let Some(argument) = self.parse_invocation_argument() { - self.arguments.push(argument); - } - Invocation { - name: self.name, - source: self.source, - arguments: self.arguments, - errors: self.errors, - } - } - - fn parse_invocation_argument(&mut self) -> Option<ArgumentInvocation> { - // Only continue if the first token is a separator. - self.tokens.pop_if(is_separator)?; - - if let Some(block_open) = self.tokens.pop_if(is_block_open) { - let source = block_open.source; - let mut depth = 1; - let is_matching_block_close = |token: &SyntacticToken| { - match token.variant { - SyntacticTokenVariant::BlockOpen => { - depth += 1; false } - SyntacticTokenVariant::BlockClose => { - depth -= 1; depth == 0 } - _ => false, - } - }; - if let Ok(block_tokens) = self.tokens.pull_until(is_matching_block_close) { - let block = BlockParser::new(block_tokens).parse(); - Some(ArgumentInvocation { source, value: Value::Block(block) }) - } else { - let variant = SemanticParseErrorVariant::UnterminatedBlock; - self.errors.push(SemanticParseError { source, variant }); - None - } - } else { - let token = self.tokens.pop()?; - let source = token.source; - match token.variant { - SynVar::Symbol(name) => { - let arguments = Vec::new(); - let errors = Vec::new(); - let invocation = Invocation { source: source.clone(), name, arguments, errors }; - let value = Value::Invocation(invocation); - Some(ArgumentInvocation { source, value }) - } - SynVar::IntegerLiteral(value) => { - let integer = TrackedInteger { source: source.clone(), value }; - let value = Value::Integer(Integer::Literal(integer)); - Some(ArgumentInvocation { source, value }) - } - SynVar::String(string) => { - let value = Value::Integer(Integer::String(string)); - Some(ArgumentInvocation { source, value }) - } - SynVar::Expression(expr) => { - let value = Value::Integer(Integer::Expression(expr)); - Some(ArgumentInvocation { source, value }) - } - _ => { - let variant = SemanticParseErrorVariant::InvalidToken; - self.errors.push(SemanticParseError { source, variant }); - None - } - } - } - } -} - - -pub struct Tokens { - tokens: VecDeque<SyntacticToken>, -} - -impl Tokens { - pub fn new<T: Into<VecDeque<SyntacticToken>>>(tokens: T) -> Self { - Self { tokens: tokens.into() } - } - - pub fn pop(&mut self) -> Option<SyntacticToken> { - self.tokens.pop_front() - } - - pub fn pop_if(&mut self, predicate: fn(&SyntacticToken) -> bool) -> Option<SyntacticToken> { - match predicate(self.tokens.front()?) { - true => self.tokens.pop_front(), - false => None, - } - } - - pub fn unpop(&mut self, token: SyntacticToken) { - self.tokens.push_front(token); - } - - /// Pull tokens until the predicate returns true, otherwise return Err. - pub fn pull_until(&mut self, mut predicate: impl FnMut(&SyntacticToken) -> bool) -> Result<Self, ()> { - let mut output = VecDeque::new(); - while let Some(token) = self.tokens.pop_front() { - match predicate(&token) { - true => return Ok(Self::new(output)), - false => output.push_back(token), - }; - } - return Err(()); - } - - pub fn take(&mut self) -> Self { - Self { tokens: std::mem::take(&mut self.tokens) } - } - - pub fn len(&self) -> usize { - self.tokens.len() - } -} - diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs deleted file mode 100644 index f3fcec1..0000000 --- a/src/parsers/syntactic.rs +++ /dev/null @@ -1,172 +0,0 @@ -use crate::*; - - -pub struct SyntacticParser { - tokeniser: Tokeniser, - tokens: Vec<SyntacticToken>, - /// The name of the macro being parsed. - macro_name: Option<String>, - /// The name of the most recent label. - label_name: String, -} - -impl SyntacticParser { - pub fn new<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { - let mut tokeniser = Tokeniser::new(source_code, path); - tokeniser.add_delimiters(&['@','%',';',':','{','}','(','[','#','~']); - Self { - tokeniser, - tokens: Vec::new(), - macro_name: None, - label_name: String::new(), - } - } - - pub fn parse(mut self) -> Vec<SyntacticToken> { - use SyntacticTokenVariant as SynVar; - use SyntacticParseError as SynErr; - let t = &mut self.tokeniser; - - loop { - t.eat_whitespace(); - t.mark_start(); - let Some(c) = t.eat_char() else { break }; - let variant = match c { - ':' => SynVar::Separator, - '{' => SynVar::BlockOpen, - '}' => SynVar::BlockClose, - '@' => match &self.macro_name { - Some(_) => { - t.eat_token(); - SynVar::Error(SynErr::LabelInMacroDefinition) - } - None => { - self.label_name = t.eat_token(); - SynVar::LabelDefinition(self.label_name.clone()) - } - } - '&' => match &self.macro_name { - Some(macro_name) => { - let label_name = format!("{macro_name}:{}", t.eat_token()); - SynVar::LabelDefinition(label_name) - } - None => { - let label_name = &self.label_name; - let sublabel_name = format!("{label_name}/{}", t.eat_token()); - SynVar::LabelDefinition(sublabel_name) - } - } - '%' => { - let macro_name = t.eat_token(); - self.macro_name = Some(macro_name.clone()); - SynVar::MacroDefinition(macro_name) - } - ';' => { - self.macro_name = None; - SynVar::MacroDefinitionTerminator - } - '[' => { - t.mark_child(); - match t.eat_to_delimiter(']') { - Some(_) => { - let child = t.subtokenise(); - t.mark_end(); - let expr = parse_constant_expression(child, t.get_source()); - SynVar::Expression(expr) - } - None => SynVar::Error(SynErr::UnterminatedExpression), - } - } - '"' => { - t.mark_child(); - match t.eat_to_delimiter('"') { - Some(string) => { - let child = t.subtokenise(); - t.mark_end(); - let chars = parse_tracked_chars(child); - let tracked_string = TrackedString { - source: t.get_source(), string, chars, - }; - SynVar::String(tracked_string) - } - None => SynVar::Error(SynErr::UnterminatedString), - } - } - '(' => match t.eat_to_delimiter(')') { - Some(string) => { - // Check if the comment fills the entire line. - if t.start.position.column == 0 && t.end_of_line() { - if let Some(path) = string.strip_prefix(": ") { - t.embedded_path = Some(PathBuf::from(path.trim())); - t.embedded_first_line = t.start.position.line + 1; - } - } - continue; - }, - None => SynVar::Error(SynErr::UnterminatedComment), - } - '|' => { - let token = t.eat_token(); - if let Some(hex_string) = token.strip_prefix("0x") { - match usize::from_str_radix(hex_string, 16) { - Ok(addr) => SynVar::PinnedAddress(addr), - Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), - } - } else { - match usize::from_str_radix(&token, 10) { - Ok(addr) => SynVar::PinnedAddress(addr), - Err(_) => SynVar::Error(SynErr::InvalidDecimalLiteral(token)), - } - } - } - '#' => { - t.mark_child(); - t.eat_token(); - let pbl = parse_packed_binary_literal(t.subtokenise(), t.get_source()); - SynVar::PackedBinaryLiteral(pbl) - }, - '~' => match &self.macro_name { - Some(macro_name) => { - let symbol_name = format!("{macro_name}:{}", t.eat_token()); - SynVar::Symbol(symbol_name) - } - None => { - let label_name = &self.label_name; - let symbol_name = format!("{label_name}/{}", t.eat_token()); - SynVar::Symbol(symbol_name) - } - } - c => { - let token = format!("{c}{}", t.eat_token()); - if let Some(hex_string) = token.strip_prefix("0x") { - match usize::from_str_radix(hex_string, 16) { - Ok(value) => SynVar::IntegerLiteral(value as isize), - Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), - } - } else { - match usize::from_str_radix(&token, 10) { - Ok(value) => SynVar::IntegerLiteral(value as isize), - Err(_) => SynVar::Symbol(token), - } - } - } - }; - - t.mark_end(); - let source = t.get_source(); - self.tokens.push(SyntacticToken { source, variant }); - } - - return self.tokens; - } -} - - -fn parse_tracked_chars(mut t: Tokeniser) -> Vec<Tracked<char>> { - let mut output = Vec::new(); - while let Some(c) = t.eat_char() { - output.push(Tracked::from(c, t.get_source())); - t.mark_start(); - } - return output; -} diff --git a/src/report.rs b/src/report.rs deleted file mode 100644 index a88de4f..0000000 --- a/src/report.rs +++ /dev/null @@ -1,235 +0,0 @@ -use crate::*; - - -static mut ERROR_REPORTED: bool = false; - -macro_rules! report_source_error { - ($context:expr, $message:expr) => { - report_source_issue(LogLevel::Error, $context, $message); - unsafe { ERROR_REPORTED = true; } - }; -} - -macro_rules! exit_if_error_reported { - () => { - if unsafe { ERROR_REPORTED } { - std::process::exit(1); - } - }; -} - -pub fn report_syntactic_errors(syntactic_tokens: &[SyntacticToken], source_code: &str) { - use SyntacticTokenVariant as SynVar; - for token in syntactic_tokens { - let context = Context { source_code: &source_code, source: &token.source }; - match &token.variant { - SynVar::Expression(expr) => for t in &expr.tokens { - let context = Context { source_code: &source_code, source: &t.source }; - if let ExpressionTokenVariant::Error(err) = &t.variant { - let ExpressionParseError::InvalidHexadecimalLiteral(hex) = err; - let message = format!("Invalid hexadecimal literal {hex:?} in constant expression"); - report_source_error!(&context, &message); - } - } - SynVar::PackedBinaryLiteral(pbl) => for e in &pbl.errors { - let context = Context { source_code: &source_code, source: &e.source }; - match &e.variant { - PackedBinaryLiteralParseErrorVariant::DuplicateFieldName(name) => { - let message = format!("Duplicate field name {name:?} in packed binary literal"); - report_source_error!(&context, &message); - } - PackedBinaryLiteralParseErrorVariant::InvalidCharacter(c) => { - let message = format!("Invalid character {c:?} in packed binary literal"); - report_source_error!(&context, &message); - } - } - } - SynVar::Error(err) => match err { - SyntacticParseError::InvalidHexadecimalLiteral(hex) => { - let message = format!("Invalid hexadecimal literal {hex:?}"); - report_source_error!(&context, &message); - } - SyntacticParseError::InvalidDecimalLiteral(dec) => { - let message = format!("Invalid decimal literal {dec:?}"); - report_source_error!(&context, &message); - } - SyntacticParseError::InvalidSymbolIdentifier(name) => { - let message = format!("Invalid identifier {name:?}"); - report_source_error!(&context, &message); - } - SyntacticParseError::UnterminatedComment => { - let message = format!("Unterminated comment"); - report_source_error!(&context, &message); - } - SyntacticParseError::UnterminatedString => { - let message = format!("Unterminated string"); - report_source_error!(&context, &message); - } - SyntacticParseError::UnterminatedExpression => { - let message = format!("Unterminated assembler expression"); - report_source_error!(&context, &message); - } - SyntacticParseError::LabelInMacroDefinition => { - let message = format!("Only sublabels can be used in macro definitions"); - report_source_error!(&context, &message); - } - } - _ => (), - } - } - exit_if_error_reported!(); -} - - -pub fn report_semantic_errors(program: &SemanticProgram, source_code: &str) { - for (_, definition) in &program.macro_definitions { - report_value_errors(&definition.value, source_code); - } - for token in &program.body { - report_semantic_token_errors(token, source_code); - } - exit_if_error_reported!(); -} - -fn report_value_errors(definition: &Value, source_code: &str) { - match definition { - Value::Integer(integer) => match integer { - Integer::Expression(expr) => for token in &expr.tokens { - if let ExpressionTokenVariant::Error(error) = &token.variant { - let message = match error { - ExpressionParseError::InvalidHexadecimalLiteral(hex) => - format!("Invalid hexadecimal literal '{hex}' in constant expression"), - }; - let context = Context { source: &token.source, source_code}; - report_source_error!(&context, &message); - } - } - _ => (), - } - Value::Block(block) => { - for token in block { - report_semantic_token_errors(token, source_code); - } - } - Value::Invocation(invocation) => report_invocation_errors(invocation, source_code), - } -} - -fn report_semantic_token_errors(token: &SemanticToken, source_code: &str) { - match &token { - SemanticToken::Word(pbl) => for error in &pbl.errors { - let message = match &error.variant { - PackedBinaryLiteralParseErrorVariant::DuplicateFieldName(name) => - format!("Duplicate field name '{name}' in packed binary literal"), - PackedBinaryLiteralParseErrorVariant::InvalidCharacter(c) => - format!("Invalid character '{c}' in packed binary literal"), - }; - let context = Context { source: &error.source, source_code }; - report_source_error!(&context, &message); - } - SemanticToken::Invocation(invocation) => { - report_invocation_errors(invocation, source_code) - } - SemanticToken::Error(error) => { - report_semantic_error(error, source_code) - } - SemanticToken::LabelDefinition(_) => (), - SemanticToken::PinnedAddress(_) => (), - } -} - -fn report_invocation_errors(invocation: &Invocation, source_code: &str) { - for error in &invocation.errors { - report_semantic_error(&error, source_code); - } - for argument in &invocation.arguments { - report_value_errors(&argument.value, source_code); - } -} - -fn report_semantic_error(error: &SemanticParseError, source_code: &str) { - let message = match &error.variant { - SemanticParseErrorVariant::UnterminatedMacroDefinition(name) => - format!("The macro definition '{name}' is missing a terminating ';' character"), - SemanticParseErrorVariant::UnterminatedBlock => - format!("Block literal is missing a terminating '}}' character"), - SemanticParseErrorVariant::InvalidToken => - format!("Invalid token"), - }; - let context = Context { source: &error.source, source_code}; - report_source_error!(&context, &message); -} - - -pub fn report_assembler_errors(tokens: &[AssembledToken], source_code: &str) { - for token in tokens { - match token { - AssembledToken::Word(word) => { - for error in &word.errors { - report_assembler_error(&error, source_code); - } - } - AssembledToken::Error(error) => { - report_assembler_error(error, source_code); - }, - _ => (), - } - } - exit_if_error_reported!(); -} - -fn report_assembler_error(error: &AssemblerError, source_code: &str) { - let message = match &error.variant { - AssemblerErrorVariant::DefinitionNotFound(name) => - format!("Definition not found for name '{name}'"), - AssemblerErrorVariant::NotABlock => - format!("Value of type block was expected here"), - AssemblerErrorVariant::NotAnInteger => - format!("Value of type integer was expected here"), - AssemblerErrorVariant::IntegerInBlock => - format!("Integer in block"), - AssemblerErrorVariant::StringInExpression => - format!("Expressions cannot contain strings"), - AssemblerErrorVariant::IncorrectArgumentCount(expected, received) => - format!("Expected {expected} arguments, but received {received} instead"), - AssemblerErrorVariant::IncorrectArgumentType(expected, received) => - format!("Expected {expected} argument but received {received} instead"), - }; - let context = Context { - source_code: &source_code, - source: &error.source, - }; - report_source_error!(&context, &message); -} - - -pub fn report_bytecode_errors(bytecode: &Bytecode, source_code: &str) { - for error in &bytecode.errors { - report_bytecode_error(error, source_code); - } - exit_if_error_reported!(); -} - -pub fn report_bytecode_error(error: &BytecodeError, source_code: &str) { - let message = match &error.variant { - BytecodeErrorVariant::DefinitionNotFound(name) => - format!("Could not find definition for label reference '{name}'"), - BytecodeErrorVariant::DuplicateLabelDefinition(name) => - format!("Duplicate definition for label '{name}'"), - BytecodeErrorVariant::PinnedAddressBacktrack(expected, received) => - format!("Cannot pin back to address {expected} when already at address {received}"), - BytecodeErrorVariant::ValueTooLarge(expected, received) => - format!("Expected {expected}-bit value, but received {received}-bit value instead"), - BytecodeErrorVariant::StackUnderflow => - format!("Stack underflow when evaluating expression"), - BytecodeErrorVariant::NoReturnValue => - format!("No value left on stack when evaluating expression"), - BytecodeErrorVariant::MultipleReturnValues => - format!("More than one value left on stack when evaluating expression"), - }; - let context = Context { - source_code: &source_code, - source: &error.source, - }; - report_source_error!(&context, &message); -} diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs new file mode 100644 index 0000000..3618b26 --- /dev/null +++ b/src/stages/bytecode.rs @@ -0,0 +1,182 @@ +use crate::*; + +use std::collections::HashMap; + + +pub fn parse_bytecode(tokens: Vec<Tracked<IntermediateToken>>, width: Option<u32>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> { + BytecodeParser::new(width).parse(tokens) +} + + +pub struct BytecodeParser { + width: Option<u32>, + addresses: HashMap<String, Tracked<usize>>, + address: usize, + segment_address: usize, + segment_source: Option<SourceSpan>, + segments: Vec<Segment>, + words: Vec<Tracked<Word>>, + errors: Vec<Tracked<BytecodeError>>, +} + +impl BytecodeParser { + pub fn new(width: Option<u32>) -> Self { + Self { + width, + addresses: HashMap::new(), + address: 0, + segment_address: 0, + segment_source: None, + segments: Vec::new(), + words: Vec::new(), + errors: Vec::new(), + } + } + + pub fn parse(mut self, tokens: Vec<Tracked<IntermediateToken>>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> { + // Calculate all label addresses ahead of time. + let mut address = 0; + for token in &tokens { + let source = &token.source; + match &token.value { + IntermediateToken::LabelDefinition(name) => { + let tracked = Tracked::from(address, source.clone()); + if let Some(_) = self.addresses.insert(name.clone(), tracked) { + unreachable!("Uncaught duplicate label definition '{name}'"); + } + } + IntermediateToken::Word(_) => { + address += 1; + } + IntermediateToken::PinnedAddress(pinned) => { + address = pinned.value; + } + } + } + for token in &tokens { + let source = &token.source; + match &token.value { + IntermediateToken::Word(word) => { + let word = self.evaluate_word(word, source); + // Check that the word width fits the provided width. + if let Some(width) = self.width { + if word.width != width { + let error = BytecodeError::IncorrectWidth(width, word.width); + self.errors.push(Tracked::from(error, source.clone())); + } + } + self.words.push(word); + self.address += 1; + } + IntermediateToken::PinnedAddress(address) => { + let current = self.address; + let pinned = address.value; + if current > pinned { + let error = BytecodeError::PinnedAddressBacktrack(pinned, current); + self.errors.push(Tracked::from(error, address.source.clone())); + } else { + let words = std::mem::take(&mut self.words); + if !words.is_empty() { + let address = self.segment_address; + let source = std::mem::take(&mut self.segment_source); + let segment = Segment { address, source, words }; + self.segments.push(segment); + } + self.segment_source = Some(address.source.clone()); + self.address = pinned; + self.segment_address = pinned; + } + } + IntermediateToken::LabelDefinition(_) => (), + } + } + // Finish final segment. + let words = std::mem::take(&mut self.words); + if !words.is_empty() { + let address = self.segment_address; + let source = std::mem::take(&mut self.segment_source); + let segment = Segment { address, source, words }; + self.segments.push(segment); + } + + match self.errors.is_empty() { + true => Ok(self.segments), + false => Err(self.errors), + } + } + + fn evaluate_expression(&mut self, expression: &IntermediateExpression, source: &SourceSpan) -> isize { + let mut stack = ExpressionStack::new(); + for token in &expression.tokens { + let source = &token.source; + match &token.value { + IntermediateExpressionToken::Integer(integer) => match integer { + IntermediateInteger::Integer(value) => { + stack.push(*value); + } + IntermediateInteger::Expression(expression) => { + stack.push(self.evaluate_expression(expression, source)); + } + IntermediateInteger::LabelReference(name) => { + stack.push(self.evaluate_label_reference(name)); + } + } + IntermediateExpressionToken::Operator(operator) => { + if let Err(err) = stack.apply(*operator, source) { + let error = BytecodeError::StackError(err); + self.errors.push(Tracked::from(error, source.clone())) + } + } + } + } + match stack.pull_result() { + Ok(value) => value, + Err(err) => { + let error = BytecodeError::StackError(Tracked::from(err, source.clone())); + self.errors.push(Tracked::from(error, source.clone())); + 0 + } + } + } + + fn evaluate_label_reference(&mut self, name: &Tracked<String>) -> isize { + if let Some(address) = self.addresses.get(&name.to_string()) { + address.value as isize + } else { + unreachable!("Uncaught unresolved label reference '{name}'") + } + } + + fn evaluate_word(&mut self, word: &IntermediateWord, source: &SourceSpan) -> Tracked<Word> { + let mut word_value = word.value; + for field in &word.fields { + let field_source = &field.value.value.source; + let field_value = match &field.value.value.value { + IntermediateInteger::Expression(expression) => { + self.evaluate_expression(expression, source) + } + IntermediateInteger::LabelReference(name) => { + self.evaluate_label_reference(name) + } + IntermediateInteger::Integer(value) => { + *value + } + }; + let value_width = match field_value.cmp(&0) { + std::cmp::Ordering::Less => (-field_value).ilog2() + 1, + std::cmp::Ordering::Equal => 0, + std::cmp::Ordering::Greater => field_value.ilog2() + 1, + }; + if field.width < value_width { + let error = BytecodeError::ValueTooWide(field.width, value_width); + self.errors.push(Tracked::from(error, field_source.clone())); + } else { + let mask = 2_usize.pow(field.width as u32) - 1; + let clamped_value = (field_value as usize) & mask; + word_value |= (clamped_value << field.shift) as usize; + } + } + let word = Word { width: word.width, value: word_value }; + return Tracked::from(word, source.clone()); + } +} diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs new file mode 100644 index 0000000..b54cb0e --- /dev/null +++ b/src/stages/bytecode_tokens.rs @@ -0,0 +1,78 @@ +use crate::*; + + +pub struct Segment { + pub address: usize, + /// Source of the address value. + pub source: Option<SourceSpan>, + pub words: Vec<Tracked<Word>>, +} + +pub struct Word { + pub value: usize, + pub width: u32, +} + +impl std::fmt::Display for Word { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + if self.width == 0 { + write!(f, "0") + } else { + for i in (0..self.width).rev() { + let is_first_bit = i+1 == self.width; + if !is_first_bit && (i+1) % 4 == 0 { + write!(f, "_")?; + } + match (self.value >> i) & 1 { + 0 => write!(f, "0")?, + _ => write!(f, "1")?, + } + } + Ok(()) + } + } +} + +pub enum BytecodeError { + /// expected, received + IncorrectWidth(u32, u32), + /// pinned, real + PinnedAddressBacktrack(usize, usize), + /// expected, received + ValueTooWide(u32, u32), + StackError(Tracked<StackError>), +} + + +pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) { + for error in errors { + report_bytecode_error(error, source_code); + } +} + +fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + BytecodeError::IncorrectWidth(expected, received) => + &format!("Word is {received} bits wide, but was expected to have a fixed width of {expected} bits"), + BytecodeError::PinnedAddressBacktrack(pinned, real) => + &format!("Cannot pin to address {pinned} when address is already {real}"), + BytecodeError::StackError(stack_error) => { + report_stack_error(stack_error, source_code); return; }, + BytecodeError::ValueTooWide(expected, received) => + &format!("Field is {expected} bits wide, but received a value that is {received} bits wide"), + }; + + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_segment(segment: &Segment) { + println!("SEGMENT: 0x{:>04x}", segment.address); + // Find maximum width of all words in the segment. + let width = segment.words.iter().map(|w| w.to_string().chars().count()).max().unwrap_or(0); + for word in &segment.words { + let string = word.to_string(); + println!(" {string:>w$}", w=width as usize); + } +} diff --git a/src/stages/intermediate.rs b/src/stages/intermediate.rs new file mode 100644 index 0000000..6853f62 --- /dev/null +++ b/src/stages/intermediate.rs @@ -0,0 +1,577 @@ +use crate::*; + +use assembler::{DefinitionType, SourceLocation, SourcePosition, SymbolRole}; + +use indexmap::{IndexSet, IndexMap}; + + +static mut ID: usize = 0; +macro_rules! next_id { () => { unsafe { let id = ID; ID += 1; id }}; } + +pub fn parse_intermediate(semantic: Vec<Tracked<SemanticToken>>) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> { + IntermediateParser::new(semantic).parse() +} + + +struct IntermediateParser { + semantic: Vec<Tracked<SemanticToken>>, + label_names: IndexSet<Tracked<String>>, + macro_names: IndexSet<Tracked<String>>, + macro_definitions: IndexMap<String, MacroDefinition>, + intermediate: Vec<Tracked<IntermediateToken>>, + errors: Vec<Tracked<IntermediateError>>, +} + +impl IntermediateParser { + pub fn new(semantic: Vec<Tracked<SemanticToken>>) -> Self { + let mut label_names = IndexSet::new(); + let mut macro_names = IndexSet::new(); + for symbol in SymbolParser::new().parse(&semantic) { + match symbol.role { + SymbolRole::Definition(DefinitionType::MustPrecedeReference) => { + // Only consider macro definitions, not macro argument definitions. + if symbol.namespace.is_empty() { + if !macro_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) { + unreachable!("Uncaught duplicate macro definition '{}'", symbol.name); + } + } + } + SymbolRole::Definition(DefinitionType::CanFollowReference) => { + if !label_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) { + unreachable!("Uncaught duplicate label definition '{}'", symbol.name); + } + } + SymbolRole::Reference => (), + } + } + + Self { + semantic, + label_names, + macro_names, + macro_definitions: IndexMap::new(), + intermediate: Vec::new(), + errors: Vec::new(), + } + } + + pub fn parse(mut self) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> { + for token in self.semantic { + let source = &token.source; + match token.value { + SemanticToken::MacroDefinition(definition) => { + // Invoke the body to see if it contains undefined macros. + let error_count = self.errors.len(); + let mut arguments = IndexMap::new(); + // Prepare dummy argument values. + let null = SourceSpan { + string: String::new(), + in_merged: SourceLocation { + path: None, + start: SourcePosition::ZERO, + end: SourcePosition::ZERO, + }, + in_source: None, + child: None, + }; + for argument in &definition.arguments { + let value = match argument.variant { + ArgumentType::Integer => { + let integer = IntermediateInteger::Integer(0); + let tracked = Tracked::from(integer, null.clone()); + IntermediateValue::Integer(tracked) + } + ArgumentType::Block => { + IntermediateValue::Block(Vec::new()) + } + }; + let tracked = Tracked::from(value, null.clone()); + arguments.insert(argument.name.clone(), tracked); + } + let mut env = Environment { + label_names: &self.label_names, + macro_names: &self.macro_names, + macro_definitions: &self.macro_definitions, + arguments, + errors: &mut self.errors, + id: next_id!(), + }; + env.parse_macro_definition_body(&definition.body, source); + if self.errors.len() != error_count { + break; + } + + let name = definition.name.to_string(); + if self.macro_definitions.insert(name.clone(), definition).is_some() { + unreachable!("Uncaught duplicate macro definition '{}'", name); + } + } + SemanticToken::BlockToken(block_token) => { + let mut env = Environment { + label_names: &self.label_names, + macro_names: &self.macro_names, + macro_definitions: &self.macro_definitions, + arguments: IndexMap::new(), + errors: &mut self.errors, + id: next_id!(), + }; + let mut tokens = env.parse_block_token(&block_token, source); + self.intermediate.append(&mut tokens); + } + } + } + match self.errors.is_empty() { + true => Ok(self.intermediate), + false => Err(self.errors), + } + } +} + + +struct Environment<'a> { + label_names: &'a IndexSet<Tracked<String>>, + macro_names: &'a IndexSet<Tracked<String>>, + macro_definitions: &'a IndexMap<String, MacroDefinition>, + arguments: IndexMap<String, Tracked<IntermediateValue>>, + errors: &'a mut Vec<Tracked<IntermediateError>>, + id: usize, +} + +impl<'a> Environment<'a> { + // Attach the invocation ID to every macro label name + fn tag_name(&self, name: &str) -> String { + match name.contains(':') { + true => format!("{name}:{}", self.id), + false => name.to_string(), + } + } + + fn parse_macro_definition_body(&mut self, body: &MacroDefinitionBody, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> { + match &body { + MacroDefinitionBody::Integer(integer) => { + let token = self.parse_integer_token(&integer, &source)?; + let integer = IntermediateValue::Integer(token); + Some(Tracked::from(integer, source.clone())) + } + MacroDefinitionBody::Invocation(invocation) => { + self.parse_invocation(&invocation, &invocation.source) + } + MacroDefinitionBody::Block(blocks) => { + let mut tokens = Vec::new(); + for block in blocks { + tokens.append(&mut self.parse_block_token(block, &block.source)); + } + let value = IntermediateValue::Block(tokens); + Some(Tracked::from(value, source.clone())) + } + } + } + + fn parse_block_token(&mut self, block: &BlockToken, source: &SourceSpan) -> Vec<Tracked<IntermediateToken>> { + let mut intermediate = Vec::new(); + match block { + BlockToken::LabelDefinition(name) => { + let token = IntermediateToken::LabelDefinition(self.tag_name(name)); + intermediate.push(Tracked::from(token, source.clone())); + } + BlockToken::PinnedAddress(address) => { + if let Some(integer) = self.parse_integer_token(address, &address.source) { + if let Some(source) = integer_contains_label_reference(&integer) { + let error = IntermediateError::LabelReferenceInPinnedAddress; + let new_source = address.source.clone().wrap(source); + self.errors.push(Tracked::from(error, new_source)); + } else { + match evaluate_integer(&integer, source) { + Ok(value) => { + let value = usize::try_from(value).unwrap_or(0); + let tracked = Tracked::from(value, address.source.clone()); + let token = IntermediateToken::PinnedAddress(tracked); + intermediate.push(Tracked::from(token, source.clone())); + } + Err(error) => self.errors.push(error), + } + } + } + } + BlockToken::ConditionalBlock(cond) => { + let predicate = self.parse_integer_token(&cond.predicate, &cond.predicate.source); + let mut body = self.parse_block_token(&cond.body, &cond.body.source); + if let Some(predicate) = predicate { + let mut found_error = false; + if let Some(source) = integer_contains_label_reference(&predicate) { + let error = IntermediateError::LabelReferenceInConditionPredicate; + let new_source = cond.predicate.source.clone().wrap(source); + self.errors.push(Tracked::from(error, new_source)); + found_error = true; + }; + if let Some(source) = block_contains_label_definition(&cond.body, &cond.body.source) { + let error = IntermediateError::LabelDefinitionInConditionBody; + let new_source = cond.body.source.clone().wrap(source); + self.errors.push(Tracked::from(error, new_source)); + found_error = true; + } + if !found_error { + match evaluate_integer(&predicate, &cond.predicate.source) { + Ok(value) => if value != 0 { intermediate.append(&mut body) }, + Err(error) => self.errors.push(error), + } + } + } + } + BlockToken::WordTemplate(word_template) => { + let mut fields = Vec::new(); + for bit_field in &word_template.fields { + let name = bit_field.name.to_string(); + let source = &bit_field.source; + let invocation = Invocation { name, arguments: Vec::new() }; + if let Some(value) = self.parse_integer_invocation(&invocation, source) { + let field = IntermediateField { + width: bit_field.width, + shift: bit_field.shift, + value, + }; + fields.push(Tracked::from(field, bit_field.source.clone())); + } + } + let word = IntermediateWord { + value: word_template.value, + width: word_template.width, + fields, + }; + let token = IntermediateToken::Word(word); + intermediate.push(Tracked::from(token, source.clone())); + } + BlockToken::Block(blocks) => { + for block in blocks { + let mut tokens = self.parse_block_token(block, &block.source); + intermediate.append(&mut tokens); + } + } + BlockToken::Invocation(invocation) => { + if let Some(mut tokens) = self.parse_block_invocation(invocation, source) { + intermediate.append(&mut tokens); + } + } + } + + return intermediate; + } + + fn parse_integer_token(&mut self, integer: &IntegerToken, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { + match integer { + IntegerToken::IntegerLiteral(value) => { + let integer = IntermediateInteger::Integer(*value); + Some(Tracked::from(integer, source.clone())) + } + IntegerToken::Expression(expression) => { + self.parse_expression(expression, source) + } + IntegerToken::Invocation(invocation) => { + self.parse_integer_invocation(invocation, source) + } + } + } + + fn parse_integer_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { + match self.parse_invocation(invocation, source)?.value { + IntermediateValue::Integer(integer) => Some(integer), + IntermediateValue::Block(_) => { + let error = IntermediateError::ExpectedInteger; + self.errors.push(Tracked::from(error, source.clone())); + None + } + } + } + + fn parse_block_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Vec<Tracked<IntermediateToken>>> { + match self.parse_invocation(invocation, source)?.value { + IntermediateValue::Block(tokens) => Some(tokens), + IntermediateValue::Integer(_) => { + let error = IntermediateError::ExpectedBlock; + self.errors.push(Tracked::from(error, source.clone())); + None + } + } + } + + fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> { + let received_count = invocation.arguments.len(); + if let Some(argument) = self.arguments.get(&invocation.name) { + if received_count != 0 { + let error = IntermediateError::IncorrectArgumentCount(0, received_count); + self.errors.push(Tracked::from(error, source.clone())); + None + } else { + Some(argument.clone()) + } + } else if let Some(label_name) = self.label_names.get(&invocation.name) { + if received_count != 0 { + let error = IntermediateError::IncorrectArgumentCount(0, received_count); + self.errors.push(Tracked::from(error, source.clone())); + None + } else { + let name = self.tag_name(label_name); + let tracked = Tracked::from(name, label_name.source.clone()); + let integer = IntermediateInteger::LabelReference(tracked); + let tracked = Tracked::from(integer, source.clone()); + let value = IntermediateValue::Integer(tracked); + Some(Tracked::from(value, source.clone())) + } + } else if let Some(definition) = self.macro_definitions.get(&invocation.name) { + // Check that the correct number of arguments were provided. + let expected_count = definition.arguments.len(); + if received_count != expected_count { + let error = IntermediateError::IncorrectArgumentCount(expected_count, received_count); + self.errors.push(Tracked::from(error, source.clone())); + None + } else { + // Gather and type-check the provided arguments. + let mut arguments = Vec::new(); + for (i, argument) in invocation.arguments.iter().enumerate() { + let received_type = match &argument.value { + InvocationArgument::String(string) => { + let mut values = Vec::new(); + for c in &string.chars { + let integer = IntermediateInteger::Integer(**c); + let tracked = Tracked::from(integer, c.source.clone()); + values.push(IntermediateValue::Integer(tracked)); + } + arguments.push(RepeatedArgument::List(values)); + ArgumentType::Integer + } + InvocationArgument::IntegerToken(integer) => { + let tracked = self.parse_integer_token(&integer, &argument.source)?; + let value = IntermediateValue::Integer(tracked); + arguments.push(RepeatedArgument::Loop(value)); + ArgumentType::Integer + } + InvocationArgument::BlockToken(block) => { + let tokens = self.parse_block_token(&block, &argument.source); + let value = IntermediateValue::Block(tokens); + arguments.push(RepeatedArgument::Loop(value)); + ArgumentType::Block + } + InvocationArgument::Invocation(invocation) => { + let value = self.parse_invocation(&invocation, &argument.source)?; + let received_type = match &value.value { + IntermediateValue::Integer(_) => ArgumentType::Integer, + IntermediateValue::Block(_) => ArgumentType::Block, + }; + arguments.push(RepeatedArgument::Loop(value.value)); + received_type + } + }; + let expected_type = match received_type { + ArgumentType::Integer => ArgumentType::Block, + ArgumentType::Block => ArgumentType::Integer, + }; + if definition.arguments[i].variant != received_type { + let error = IntermediateError::IncorrectArgumentType(expected_type, received_type); + self.errors.push(Tracked::from(error, argument.source.clone())); + return None; + } + } + // Invoke the invocation multiple times. + let repetitions = arguments.iter().map(|a| a.len()).max().unwrap_or(1); + let mut values = Vec::new(); + for i in 0..repetitions { + // Construct an argument map for this invocation. + let mut argument_map = IndexMap::new(); + for (a, argument) in arguments.iter().enumerate() { + let name = definition.arguments[a].name.clone(); + let source = invocation.arguments[a].source.clone(); + let value = match argument { + RepeatedArgument::Loop(value) => { + Tracked::from(value.clone(), source) + } + RepeatedArgument::List(list) => match list.get(i) { + Some(value) => { + Tracked::from(value.clone(), source) + } + None => { + let error = IntermediateError::ListExhausted; + let source = invocation.arguments[a].source.clone(); + self.errors.push(Tracked::from(error, source)); + return None; + } + } + }; + if argument_map.insert(name.clone(), value).is_some() { + unreachable!("Uncaught duplicate macro argument name '{name}'"); + }; + } + let mut env = Environment { + label_names: &self.label_names, + macro_names: &self.macro_names, + macro_definitions: &self.macro_definitions, + arguments: argument_map, + errors: &mut self.errors, + id: next_id!(), + }; + values.push(env.parse_macro_definition_body(&definition.body, source)?); + } + if values.len() == 1 { + values.pop() + } else { + // Flatten all values into a list of block tokens. + let mut block = Vec::new(); + for value in values { + match value.value { + IntermediateValue::Integer(_) => { + let error = IntermediateError::ExpectedBlock; + self.errors.push(Tracked::from(error, value.source)); + return None; + } + IntermediateValue::Block(mut tokens) => { + block.append(&mut tokens); + } + } + } + Some(Tracked::from(IntermediateValue::Block(block), source.clone())) + } + } + } else if let Some(macro_name) = self.macro_names.get(&invocation.name) { + let error = IntermediateError::InvocationBeforeDefinition; + let source = source.clone().wrap(macro_name.source.clone()); + self.errors.push(Tracked::from(error, source)); + None + } else { + unreachable!("Uncaught unresolved reference '{}'", invocation.name); + } + } + + fn parse_expression(&mut self, expression: &Expression, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { + let mut intermediate = Vec::new(); + let mut error = false; + + for token in &expression.tokens { + let source = &token.source; + match &token.value { + ExpressionToken::IntegerToken(integer) => { + let Some(integer) = self.parse_integer_token(integer, source) else { + error = true; continue; + }; + let token = IntermediateExpressionToken::Integer(integer.value); + intermediate.push(Tracked::from(token, integer.source)); + } + ExpressionToken::Operator(operator) => { + let token = IntermediateExpressionToken::Operator(*operator); + intermediate.push(Tracked::from(token, source.clone())); + } + ExpressionToken::Invocation(invocation) => { + let Some(integer) = self.parse_integer_invocation(invocation, source) else { + error = true; continue; + }; + let token = IntermediateExpressionToken::Integer(integer.value); + intermediate.push(Tracked::from(token, integer.source)); + } + } + } + + if error { return None; } + let expression = IntermediateExpression { tokens: intermediate }; + let integer = IntermediateInteger::Expression(expression); + Some(Tracked::from(integer, source.clone())) + } +} + + +macro_rules! return_some { + ($option:expr) => { + if $option.is_some() { return $option; } + }; +} + +fn integer_contains_label_reference(integer: &IntermediateInteger) -> Option<SourceSpan> { + match integer { + IntermediateInteger::Integer(_) => None, + IntermediateInteger::LabelReference(label) => Some(label.source.clone()), + IntermediateInteger::Expression(expr) => expression_contains_label_reference(expr), + } +} + +fn expression_contains_label_reference(expression: &IntermediateExpression) -> Option<SourceSpan> { + for token in &expression.tokens { + if let IntermediateExpressionToken::Integer(integer) = &token.value { + if let Some(child) = integer_contains_label_reference(&integer) { + return Some(token.source.clone().wrap(child)); + } + } + } + return None; +} + +fn block_contains_label_definition(block: &BlockToken, source: &SourceSpan) -> Option<SourceSpan> { + match &block { + BlockToken::LabelDefinition(_) => { + return Some(source.clone()); + } + BlockToken::Invocation(invocation) => { + return_some!(invocation_contains_label_definition(invocation)) + } + BlockToken::Block(blocks) => { + for block in blocks { + return_some!(block_contains_label_definition(block, &block.source)) + } + } + _ => (), + } + return None; +} + +fn invocation_contains_label_definition(invocation: &Invocation) -> Option<SourceSpan> { + for argument in &invocation.arguments { + match &argument.value { + InvocationArgument::BlockToken(block) => { + return_some!(block_contains_label_definition(&block, &argument.source)) + } + InvocationArgument::Invocation(invocation) => { + return_some!(invocation_contains_label_definition(&invocation)) + } + _ => (), + } + } + return None; +} + +fn evaluate_integer(integer: &IntermediateInteger, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> { + match integer { + IntermediateInteger::Integer(value) => Ok(*value), + IntermediateInteger::LabelReference(name) => + unreachable!("Uncaught label reference '{name}' in condition predicate or pinned address value"), + IntermediateInteger::Expression(expr) => evaluate_expression(expr, source), + } +} + +fn evaluate_expression(expression: &IntermediateExpression, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> { + let mut stack = ExpressionStack::new(); + for token in &expression.tokens { + let source = &token.source; + match &token.value { + IntermediateExpressionToken::Integer(integer) => match integer { + IntermediateInteger::Integer(value) => { + stack.push(*value); + } + IntermediateInteger::Expression(expression) => { + stack.push(evaluate_expression(&expression, source)?); + } + IntermediateInteger::LabelReference(name) => { + unreachable!("Uncaught label reference '{name}' in condition predicate"); + } + } + IntermediateExpressionToken::Operator(operator) => { + if let Err(stack_error) = stack.apply(*operator, source) { + let error = IntermediateError::StackError(stack_error); + return Err(Tracked::from(error, token.source.clone())); + } + } + } + } + match stack.pull_result() { + Ok(value) => Ok(value), + Err(err) => { + let error = Tracked::from(err, source.clone()); + Err(Tracked::from(IntermediateError::StackError(error), source.clone())) + } + } +} diff --git a/src/stages/intermediate_tokens.rs b/src/stages/intermediate_tokens.rs new file mode 100644 index 0000000..a09581e --- /dev/null +++ b/src/stages/intermediate_tokens.rs @@ -0,0 +1,149 @@ +use crate::*; + + +#[derive(Clone)] +pub enum IntermediateToken { + Word(IntermediateWord), + PinnedAddress(Tracked<usize>), + LabelDefinition(String), +} + +#[derive(Clone)] +pub struct IntermediateWord { + pub value: usize, + /// Width of the word in bits. + pub width: u32, + pub fields: Vec<Tracked<IntermediateField>>, +} + +#[derive(Clone)] +pub struct IntermediateField { + pub value: Tracked<IntermediateInteger>, + /// Width of the field in bits. + pub width: u32, + /// Number of bits to the right of the field in the word. + pub shift: u32, +} + +#[derive(Clone)] +pub enum IntermediateInteger { + Integer(isize), + Expression(IntermediateExpression), + LabelReference(Tracked<String>), +} + +#[derive(Clone)] +pub struct IntermediateExpression { + pub tokens: Vec<Tracked<IntermediateExpressionToken>>, +} + +#[derive(Clone)] +pub enum IntermediateExpressionToken { + Integer(IntermediateInteger), + Operator(Operator), +} + +#[derive(Clone)] +pub enum IntermediateValue { + Integer(Tracked<IntermediateInteger>), + Block(Vec<Tracked<IntermediateToken>>), +} + +pub enum RepeatedArgument { + Loop(IntermediateValue), + List(Vec<IntermediateValue>), +} + +impl RepeatedArgument { + pub fn len(&self) -> usize { + match self { + Self::Loop(_) => 1, + Self::List(list) => list.len(), + } + } +} + +pub enum IntermediateError { + ExpectedInteger, + ExpectedBlock, + ListExhausted, + LabelReferenceInConditionPredicate, + LabelDefinitionInConditionBody, + LabelReferenceInPinnedAddress, + StackError(Tracked<StackError>), + InvocationBeforeDefinition, + /// expected, received + IncorrectArgumentCount(usize, usize), + /// expected, received + IncorrectArgumentType(ArgumentType, ArgumentType), +} + +pub fn report_intermediate_errors(errors: &[Tracked<IntermediateError>], source_code: &str) { + for error in errors { + report_intermediate_error(error, source_code); + } +} + +fn report_intermediate_error(error: &Tracked<IntermediateError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + IntermediateError::ExpectedInteger => + "An integer value was expected here", + IntermediateError::ExpectedBlock => + "A block value was expected here", + IntermediateError::ListExhausted => + "This string is shorter than another string passed to the same invocation", + IntermediateError::LabelReferenceInConditionPredicate => + "The predicate of a conditional block cannot contain a label reference", + IntermediateError::LabelDefinitionInConditionBody => + "The body of a conditional block cannot contain a label definition", + IntermediateError::LabelReferenceInPinnedAddress => + "The value of a pinned address cannot contain a label reference", + IntermediateError::StackError(stack_error) => { + report_stack_error(stack_error, source_code); return; }, + IntermediateError::InvocationBeforeDefinition => + &format!("Macro cannot be invoked before it has been defined"), + IntermediateError::IncorrectArgumentCount(expected, received) => + &format!("Expected {expected} arguments, but received {received} instead"), + IntermediateError::IncorrectArgumentType(expected, received) => + &format!("Expected {expected} value but received {received} value instead"), + }; + + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_intermediate_token(i: usize, token: &IntermediateToken) { + match token { + IntermediateToken::Word(word) => { + indent!(i, "Word({:>0w$b})", word.value, w = word.width as usize); + for field in &word.fields { + print_intermediate_integer(i+1, &field.value.value); + } + } + IntermediateToken::PinnedAddress(address) => + indent!(i, "PinnedAddress({address})"), + IntermediateToken::LabelDefinition(name) => + indent!(i, "LabelDefinition({name})"), + } +} + +fn print_intermediate_integer(i: usize, integer: &IntermediateInteger) { + match integer { + IntermediateInteger::Integer(value) => + indent!(i, "Integer({value})"), + IntermediateInteger::LabelReference(name) => + indent!(i, "LabelReference({name})"), + IntermediateInteger::Expression(expression) => { + indent!(i, "Expression"); + for token in &expression.tokens { + match &token.value { + IntermediateExpressionToken::Integer(integer) => + print_intermediate_integer(i+1, integer), + IntermediateExpressionToken::Operator(operator) => + indent!(i+1, "Operator({operator})"), + } + } + } + } +} diff --git a/src/stages/mod.rs b/src/stages/mod.rs new file mode 100644 index 0000000..e735f05 --- /dev/null +++ b/src/stages/mod.rs @@ -0,0 +1,31 @@ +mod syntactic; +mod syntactic_tokens; +mod semantic; +mod semantic_tokens; +mod intermediate; +mod intermediate_tokens; +mod bytecode; +mod bytecode_tokens; + +pub use syntactic::*; +pub use syntactic_tokens::*; +pub use semantic::*; +pub use semantic_tokens::*; +pub use intermediate::*; +pub use intermediate_tokens::*; +pub use bytecode::*; +pub use bytecode_tokens::*; + + +#[macro_export] +macro_rules! indent { + (0, $($tokens:tt)*) => {{ + println!($($tokens)*); + }}; + ($indent:expr, $($tokens:tt)*) => {{ + for _ in 0..$indent { print!(" "); } + println!($($tokens)*); + }}; +} + + diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..e225608 --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,478 @@ +use crate::*; + +use std::collections::VecDeque; + + +pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> { + SemanticParser::from(syntactic, Namespace::None).parse() +} + +#[derive(Clone)] +enum Namespace { + Macro(String), + Label(String), + None, +} + + +struct SemanticParser { + namespace: Namespace, + syntactic: SyntacticTokenStream, + semantic: Vec<Tracked<SemanticToken>>, + errors: Vec<Tracked<SemanticError>>, +} + +impl SemanticParser { + pub fn from(syntactic: Vec<Tracked<SyntacticToken>>, namespace: Namespace) -> Self { + Self { + namespace, + syntactic: SyntacticTokenStream::from(syntactic), + semantic: Vec::new(), + errors: Vec::new(), + } + } + + fn pull_from(&mut self, mut other: SemanticParser) { + self.errors.append(&mut other.errors); + if let Namespace::Macro(_) = other.namespace { + () + } else { + self.namespace = other.namespace; + } + } + + fn resolve_label_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> { + match symbol { + ScopedSymbol::Global(name) => match &self.namespace { + Namespace::Macro(_) => { + let error = SemanticError::LabelInMacroDefinition; + self.errors.push(Tracked::from(error, source.to_owned())); + None + } + Namespace::Label(_) | Namespace::None => { + self.namespace = Namespace::Label(name.clone()); + Some(name) + } + } + ScopedSymbol::Local(name) => match &self.namespace { + Namespace::Macro(macro_ns) => { + Some(format!("{macro_ns}:{name}")) + } + Namespace::Label(label_ns) => { + Some(format!("{label_ns}/{name}")) + } + Namespace::None => { + let error = SemanticError::SublabelWithoutNamespace; + self.errors.push(Tracked::from(error, source.to_owned())); + None + } + } + } + } + + fn resolve_symbol_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> { + match symbol { + ScopedSymbol::Global(name) => { + Some(name) + } + ScopedSymbol::Local(name) => match &self.namespace { + Namespace::Macro(macro_ns) => { + Some(format!("{macro_ns}:{name}")) + } + Namespace::Label(label_ns) => { + Some(format!("{label_ns}/{name}")) + } + Namespace::None => { + let error = SemanticError::LocalSymbolWithoutNamespace; + self.errors.push(Tracked::from(error, source.to_owned())); + None + } + } + } + } + + /// Parse the remaining syntactic tokens as a full program. + pub fn parse(mut self) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> { + while let Some(token) = self.syntactic.pop() { + if let SyntacticToken::MacroDefinition(definition) = token.value { + let namespace = Namespace::Macro(definition.name.to_string()); + let mut parser = SemanticParser::from(definition.tokens, namespace); + let mut arguments = Vec::new(); + while let Some(argument) = parser.pull_argument_definition() { + arguments.push(argument); + } + let body = parser.parse_macro_definition_body(SemanticLocation::MacroDefinitionBody); + self.pull_from(parser); + let definition = MacroDefinition { name: definition.name, arguments, body }; + let semantic = SemanticToken::MacroDefinition(definition); + self.semantic.push(Tracked::from(semantic, token.source)); + } else { + self.syntactic.unpop(token); + if let Some(token) = self.pull_block_token(SemanticLocation::Program) { + let semantic = SemanticToken::BlockToken(token.value); + self.semantic.push(Tracked::from(semantic, token.source)); + } + } + } + match self.errors.is_empty() { + true => Ok(self.semantic), + false => Err(self.errors), + } + } + + /// Parse the remaining syntactic tokens as a macro definition body. + fn parse_macro_definition_body(&mut self, location: SemanticLocation) -> MacroDefinitionBody { + let mut tokens = Vec::new(); + while !self.syntactic.is_empty() { + if let Some(token) = self.pull_macro_definition_body_token() { + tokens.push(token); + } + } + if tokens.is_empty() { + MacroDefinitionBody::Block(Vec::new()) + } else if tokens.len() == 1 { + tokens.pop().unwrap() + } else { + let mut block_tokens = Vec::new(); + for token in tokens { + match token { + MacroDefinitionBody::Integer(integer) => { + let error = SemanticError::ExpectedInteger(location); + let tracked = Tracked::from(error, integer.source); + self.errors.push(tracked); + } + MacroDefinitionBody::Block(mut tokens) => { + block_tokens.append(&mut tokens); + } + MacroDefinitionBody::Invocation(invocation) => { + // Convert invocation to a block invocation. + let token = BlockToken::Invocation(invocation.value); + block_tokens.push(Tracked::from(token, invocation.source)); + } + } + } + MacroDefinitionBody::Block(block_tokens) + } + } + + /// Attempt to pull a MacroDefinitionBody token from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + /// Each BodyToken is wrapped in a separate MacroDefinitionBody. + fn pull_macro_definition_body_token(&mut self) -> Option<MacroDefinitionBody> { + let token = self.syntactic.pop()?; + let source = token.source; + match token.value { + SyntacticToken::LabelDefinition(symbol) => { + let name = self.resolve_label_name(symbol, &source)?; + let token = BlockToken::LabelDefinition(name); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::MacroDefinition(_) => { + let error = SemanticError::MisplacedMacroDefinition; + self.errors.push(Tracked::from(error, source)); + None + } + SyntacticToken::IntegerLiteral(value) => { + let token = IntegerToken::IntegerLiteral(value); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Integer(tracked)) + } + SyntacticToken::StringLiteral(_) => { + let error = SemanticError::MisplacedStringLiteral; + self.errors.push(Tracked::from(error, source)); + None + } + SyntacticToken::WordTemplate(word_template) => { + let token = BlockToken::WordTemplate(word_template); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::BlockLiteral(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let tokens = parser.parse_block(); + self.pull_from(parser); + let token = BlockToken::Block(tokens); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::Expression(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let expression = parser.parse_expression(); + self.pull_from(parser); + let token = IntegerToken::Expression(expression); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Integer(tracked)) + } + SyntacticToken::Symbol(symbol) => { + let name = self.resolve_symbol_name(symbol, &source)?; + let arguments = self.pull_all_invocation_arguments(); + // Extend invocation source span to cover all arguments. + let mut source = source; + if let Some(last) = arguments.last() { + source.in_merged.end = last.source.in_merged.end; + if let Some(last_in_source) = &last.source.in_source { + if let Some(in_source) = &mut source.in_source { + in_source.end = last_in_source.end.clone(); + } + } + } + let invocation = Invocation { name, arguments }; + let tracked = Tracked::from(invocation, source); + Some(MacroDefinitionBody::Invocation(tracked)) + } + SyntacticToken::Separator => { + let error = SemanticError::MisplacedSeparator; + self.errors.push(Tracked::from(error, source)); + None + } + SyntacticToken::Condition => { + let conditional = self.pull_conditional_block()?; + let token = BlockToken::ConditionalBlock(Box::new(conditional)); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::Pin => { + let integer = self.pull_integer_token(SemanticLocation::PinAddress)?; + let token = BlockToken::PinnedAddress(integer); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + } + } + + /// Attempt to pull an integer token from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + fn pull_integer_token(&mut self, location: SemanticLocation) -> Option<Tracked<IntegerToken>> { + match self.pull_macro_definition_body_token()? { + MacroDefinitionBody::Integer(integer) => { + Some(integer) + } + MacroDefinitionBody::Invocation(invocation) => { + // Convert invocation to an integer invocation. + let token = IntegerToken::Invocation(invocation.value); + Some(Tracked::from(token, invocation.source)) + } + MacroDefinitionBody::Block(mut tokens) => { + assert_eq!(tokens.len(), 1); + let token = tokens.pop().unwrap(); + let error = SemanticError::ExpectedInteger(location); + self.errors.push(Tracked::from(error, token.source)); + None + } + } + } + + /// Attempt to pull a BlockToken from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + fn pull_block_token(&mut self, location: SemanticLocation) -> Option<Tracked<BlockToken>> { + match self.pull_macro_definition_body_token()? { + MacroDefinitionBody::Block(mut tokens) => { + assert_eq!(tokens.len(), 1); + tokens.pop() + } + MacroDefinitionBody::Invocation(invocation) => { + // Convert invocation to a block invocation. + let token = BlockToken::Invocation(invocation.value); + Some(Tracked::from(token, invocation.source)) + } + MacroDefinitionBody::Integer(integer) => { + let error = SemanticError::ExpectedBlock(location); + self.errors.push(Tracked::from(error, integer.source)); + None + } + } + } + + /// Parse the remaining syntactic tokens as the contents of a block. + fn parse_block(&mut self) -> Vec<Tracked<BlockToken>> { + let mut tokens = Vec::new(); + while !self.syntactic.is_empty() { + if let Some(token) = self.pull_block_token(SemanticLocation::BlockLiteral) { + tokens.push(token); + } + } + tokens + } + + /// Parse the remaining syntactic tokens as a list of integer tokens. + fn parse_integer_list(&mut self, location: SemanticLocation) -> Vec<Tracked<IntegerToken>> { + let mut tokens = Vec::new(); + while !self.syntactic.is_empty() { + if let Some(token) = self.pull_integer_token(location) { + tokens.push(token); + } + } + tokens + } + + /// Parse the remaining syntactic tokens as the contents of an expression. + fn parse_expression(&mut self) -> Expression { + let mut tokens = Vec::new(); + for token in self.parse_integer_list(SemanticLocation::Expression) { + let source = token.source; + match token.value { + IntegerToken::IntegerLiteral(value) => { + let integer = Box::new(IntegerToken::IntegerLiteral(value)); + let token = ExpressionToken::IntegerToken(integer); + tokens.push(Tracked::from(token, source)); + } + IntegerToken::Expression(expression) => { + let integer = Box::new(IntegerToken::Expression(expression)); + let token = ExpressionToken::IntegerToken(integer); + tokens.push(Tracked::from(token, source)); + } + IntegerToken::Invocation(invocation) => { + // Parse the invocation as an operator instead. + if invocation.arguments.is_empty() { + if let Some(operator) = Operator::from_str(&invocation.name) { + let token = ExpressionToken::Operator(operator); + tokens.push(Tracked::from(token, source)); + continue; + } + } + // Parse the invocation as an invocation. + let integer = Box::new(IntegerToken::Invocation(invocation)); + let token = ExpressionToken::IntegerToken(integer); + tokens.push(Tracked::from(token, source)); + } + } + } + Expression { tokens } + } + + /// Attempt to pull a conditional block from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + fn pull_conditional_block(&mut self) -> Option<ConditionalBlock> { + let predicate = self.pull_integer_token(SemanticLocation::ConditionPredicate)?; + let body = self.pull_block_token(SemanticLocation::ConditionBody)?; + Some(ConditionalBlock { predicate, body }) + } + + /// Attempt to pull an invocation argument from the token stream. + /// Invalid values are not dropped, a None indicates that no arguments remain. + fn pull_invocation_argument(&mut self) -> Option<Tracked<InvocationArgument>> { + self.syntactic.pop_if(is_separator)?; + let token = self.syntactic.pop()?; + let source = token.source; + match token.value { + SyntacticToken::StringLiteral(string_literal) => { + let argument = InvocationArgument::String(string_literal); + Some(Tracked::from(argument, source)) + } + SyntacticToken::IntegerLiteral(value) => { + let integer = IntegerToken::IntegerLiteral(value); + let argument = InvocationArgument::IntegerToken(integer); + Some(Tracked::from(argument, source)) + } + SyntacticToken::Expression(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let expression = parser.parse_expression(); + self.pull_from(parser); + let integer = IntegerToken::Expression(expression); + let argument = InvocationArgument::IntegerToken(integer); + Some(Tracked::from(argument, source)) + } + SyntacticToken::BlockLiteral(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let tokens = parser.parse_block(); + self.pull_from(parser); + let block = BlockToken::Block(tokens); + let argument = InvocationArgument::BlockToken(block); + Some(Tracked::from(argument, source)) + } + SyntacticToken::Symbol(symbol) => { + let name = self.resolve_symbol_name(symbol, &source)?; + let invocation = Invocation { name, arguments: Vec::new() }; + let argument = InvocationArgument::Invocation(invocation); + Some(Tracked::from(argument, source)) + } + SyntacticToken::WordTemplate(word_template) => { + let block = BlockToken::WordTemplate(word_template); + let argument = InvocationArgument::BlockToken(block); + Some(Tracked::from(argument, source)) + } + _ => { + let error = SemanticError::InvalidInvocationArgument; + self.errors.push(Tracked::from(error, source)); + return None; + } + } + } + + fn pull_all_invocation_arguments(&mut self) -> Vec<Tracked<InvocationArgument>> { + let mut arguments = Vec::new(); + while let Some(argument) = self.pull_invocation_argument() { + arguments.push(argument); + } + return arguments; + } + + /// Attempt to pull an argument definition from the token stream. + /// Invalid values are not dropped, a None indicates that no arguments remain. + fn pull_argument_definition(&mut self) -> Option<Tracked<ArgumentDefinition>> { + self.syntactic.pop_if(is_separator)?; + let token = self.syntactic.pop()?; + let source = token.source; + match token.value { + SyntacticToken::Symbol(ScopedSymbol::Global(name)) => { + let variant = ArgumentType::Integer; + let definition = ArgumentDefinition { name, variant }; + return Some(Tracked::from(definition, source)); + } + SyntacticToken::BlockLiteral(mut tokens) => { + if tokens.len() == 1 { + let token = tokens.pop().unwrap(); + if let SyntacticToken::Symbol(ScopedSymbol::Global(name)) = token.value { + let variant = ArgumentType::Block; + let definition = ArgumentDefinition { name, variant }; + return Some(Tracked::from(definition, source)); + } + } + } + _ => (), + }; + let error = SemanticError::InvalidArgumentDefinition; + self.errors.push(Tracked::from(error, source)); + return None; + } +} + + + +struct SyntacticTokenStream { + tokens: VecDeque<Tracked<SyntacticToken>>, +} + +impl SyntacticTokenStream { + pub fn from<T: Into<VecDeque<Tracked<SyntacticToken>>>>(tokens: T) -> Self { + Self { tokens: tokens.into() } + } + + pub fn pop(&mut self) -> Option<Tracked<SyntacticToken>> { + self.tokens.pop_front() + } + + pub fn pop_if(&mut self, predicate: fn(&Tracked<SyntacticToken>) -> bool) -> Option<Tracked<SyntacticToken>> { + match predicate(self.tokens.front()?) { + true => self.tokens.pop_front(), + false => None, + } + } + + pub fn unpop(&mut self, token: Tracked<SyntacticToken>) { + self.tokens.push_front(token); + } + + pub fn is_empty(&self) -> bool { + self.tokens.is_empty() + } +} + + +fn is_separator(token: &Tracked<SyntacticToken>) -> bool { + match token.value { + SyntacticToken::Separator => true, + _ => false, + } +} diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs new file mode 100644 index 0000000..dfbea1a --- /dev/null +++ b/src/stages/semantic_tokens.rs @@ -0,0 +1,296 @@ +use crate::*; + + +pub enum SemanticToken { + MacroDefinition(MacroDefinition), + BlockToken(BlockToken), +} + +pub struct MacroDefinition { + pub name: Tracked<String>, + pub arguments: Vec<Tracked<ArgumentDefinition>>, + pub body: MacroDefinitionBody, +} + +pub struct ArgumentDefinition { + pub name: String, + pub variant: ArgumentType, +} + +#[derive(PartialEq)] +pub enum ArgumentType { + Integer, + Block, +} + +impl std::fmt::Display for ArgumentType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + match self { + ArgumentType::Integer => write!(f, "an integer"), + ArgumentType::Block => write!(f, "a block"), + } + } +} + +pub enum MacroDefinitionBody { + Integer(Tracked<IntegerToken>), + Block(Vec<Tracked<BlockToken>>), + Invocation(Tracked<Invocation>), +} + +pub struct ConditionalBlock { + pub predicate: Tracked<IntegerToken>, + pub body: Tracked<BlockToken>, +} + +pub enum IntegerToken { + IntegerLiteral(isize), + Expression(Expression), + Invocation(Invocation), +} + +pub struct Expression { + pub tokens: Vec<Tracked<ExpressionToken>>, +} + +pub enum ExpressionToken { + IntegerToken(Box<IntegerToken>), + Invocation(Invocation), + Operator(Operator), +} + +pub enum BlockToken { + LabelDefinition(String), + PinnedAddress(Tracked<IntegerToken>), + ConditionalBlock(Box<ConditionalBlock>), + WordTemplate(WordTemplate), + Block(Vec<Tracked<BlockToken>>), + Invocation(Invocation), +} + +pub struct Invocation { + pub name: String, + pub arguments: Vec<Tracked<InvocationArgument>>, +} + +pub enum InvocationArgument { + String(StringLiteral), + IntegerToken(IntegerToken), + BlockToken(BlockToken), + Invocation(Invocation), +} + +pub enum SemanticError { + MisplacedStringLiteral, + MisplacedListLiteral, + MisplacedSeparator, + MisplacedMacroDefinition, + + ExpectedInteger(SemanticLocation), + ExpectedBlock(SemanticLocation), + + InvalidArgumentDefinition, + InvalidInvocationArgument, + + LabelInMacroDefinition, + SublabelWithoutNamespace, + LocalSymbolWithoutNamespace, +} + +#[derive(Clone, Copy)] +pub enum SemanticLocation { + MacroDefinitionBody, + Expression, + ConditionPredicate, + ConditionBody, + Program, + BlockLiteral, + PinAddress, +} + +impl std::fmt::Display for SemanticLocation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let string = match self { + SemanticLocation::Expression => + "inside this expression", + SemanticLocation::ConditionPredicate => + "as the predicate of this conditional block", + SemanticLocation::ConditionBody => + "as the body of this conditional block", + SemanticLocation::Program => + "at the outermost level of the program", + SemanticLocation::BlockLiteral => + "inside this block literal", + SemanticLocation::MacroDefinitionBody => + "inside the body of this macro definition", + SemanticLocation::PinAddress => + "as the address of this pin", + }; + write!(f, "{string}") + } +} + + +pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) { + for error in errors { + report_semantic_error(error, source_code); + } +} + +fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + SemanticError::MisplacedStringLiteral => + "A string literal can only be used as an invocation argument", + SemanticError::MisplacedListLiteral => + "A list literal can only be used as an invocation argument", + SemanticError::MisplacedSeparator => + "A separator can only be used to construct an argument list", + SemanticError::MisplacedMacroDefinition => + "A macro definition must be used at the outermost level of the program", + + SemanticError::ExpectedInteger(location) => + &format!("An integer value was expected {location}"), + SemanticError::ExpectedBlock(location) => + &format!("A block value was expected {location}"), + + SemanticError::InvalidArgumentDefinition => + "Argument definitions must be in the form 'name' or '{{name}}'", + SemanticError::InvalidInvocationArgument => + "This token cannot be used in an invocation argument", + + SemanticError::LabelInMacroDefinition => + &format!("Only sublabels can be defined inside macro definitions"), + SemanticError::SublabelWithoutNamespace => + &format!("Sublabel was not defined inside a macro definition or after a label"), + SemanticError::LocalSymbolWithoutNamespace => + &format!("Local symbol was not defined inside a macro definition or after a label"), + }; + + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_semantic_token(i: usize, token: &SemanticToken) { + match token { + SemanticToken::MacroDefinition(definition) => { + indent!(i, "MacroDefinition({})", definition.name); + for argument in &definition.arguments { + print_argument_definition(i+1, argument); + } + match &definition.body { + MacroDefinitionBody::Integer(integer) => { + print_integer_token(i+1, integer) + } + MacroDefinitionBody::Block(tokens) => { + print_block(i+1, tokens); + } + MacroDefinitionBody::Invocation(invocation) => { + print_invocation(i+1, invocation); + } + } + } + SemanticToken::BlockToken(block) => print_block_token(0, block), + } +} + +fn print_argument_definition(i: usize, argument: &ArgumentDefinition) { + match argument.variant { + ArgumentType::Integer => { + indent!(i, "Argument({}, integer)", argument.name) + } + ArgumentType::Block => { + indent!(i, "Argument({}, block)", argument.name) + } + } +} + +fn print_block_token(i: usize, block: &BlockToken) { + match block { + BlockToken::Invocation(invocation) => { + print_invocation(i, invocation) + } + BlockToken::LabelDefinition(name) => { + indent!(i, "LabelDefinition({name})") + } + BlockToken::Block(block) => { + print_block(i, block); + } + BlockToken::PinnedAddress(integer) => { + indent!(i, "PinnedAddress"); + print_integer_token(i+1, integer); + } + BlockToken::ConditionalBlock(condition) => { + indent!(i, "ConditionalBlock"); + indent!(i+1, "Predicate"); + print_integer_token(i+2, &condition.predicate); + indent!(i+1, "Body"); + print_block_token(i+2, &condition.body); + } + BlockToken::WordTemplate(word_template) => { + indent!(i, "WordTemplate({word_template})") + } + } +} + +fn print_block(i: usize, tokens: &[Tracked<BlockToken>]) { + indent!(i, "Block"); + for token in tokens { + print_block_token(i+1, token); + } +} + +fn print_invocation(i: usize, invocation: &Invocation) { + indent!(i, "Invocation({})", invocation.name); + for argument in &invocation.arguments { + print_invocation_argument(i+1, argument); + } +} + +fn print_invocation_argument(i: usize, argument: &InvocationArgument) { + match &argument { + InvocationArgument::String(string_literal) => { + indent!(i, "String({string_literal})") + } + InvocationArgument::IntegerToken(integer) => { + print_integer_token(i, integer) + } + InvocationArgument::BlockToken(block) => { + print_block_token(i, block) + } + InvocationArgument::Invocation(invocation) => { + print_invocation(i, invocation) + } + } +} + +fn print_integer_token(i: usize, integer: &IntegerToken) { + match integer { + IntegerToken::IntegerLiteral(value) => { + indent!(i, "IntegerValue({value})") + } + IntegerToken::Expression(expression) => { + print_expression(i, expression) + } + IntegerToken::Invocation(invocation) => { + print_invocation(i, invocation) + } + } +} + +fn print_expression(i: usize, expression: &Expression) { + indent!(i, "Expression"); + for token in &expression.tokens { + match &token.value { + ExpressionToken::IntegerToken(integer) => { + print_integer_token(i+1, &integer) + } + ExpressionToken::Invocation(invocation) => { + print_invocation(i+1, &invocation); + } + ExpressionToken::Operator(operator) => { + indent!(i+1, "Operator({operator})") + } + } + } +} diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs new file mode 100644 index 0000000..2e7f959 --- /dev/null +++ b/src/stages/syntactic.rs @@ -0,0 +1,323 @@ +use crate::*; + +use assembler::Tokeniser; + + +pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path)) +} + +fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + t.add_delimiters(&['@','&','%',';',':','|','{','}','(',')','[',']','#','~','"','\'']); + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + + macro_rules! push_err { + ($error:expr) => {{ + push_err!($error, t.get_source()); + }}; + ($error:expr, $source:expr) => {{ + errors.push(Tracked::from($error, $source)); + continue; + }}; + } + + loop { + t.eat_whitespace(); + t.mark_start(); + let Some(c) = t.eat_char() else { break }; + let token = match c { + '"' => { + let source = t.get_source(); + t.mark_child(); + let is_any_close = |t: &mut Tokeniser| { + t.eat_char() == Some('"') + }; + if let Some(_) = t.track_until(is_any_close) { + let child = t.tokenise_child_span(); + SyntacticToken::StringLiteral(parse_string_literal(child)) + } else { + push_err!(SyntacticError::UnterminatedStringLiteral, source); + } + } + '\'' => { + let source = t.get_source(); + let is_any_close = |t: &mut Tokeniser| { + t.eat_char() == Some('\'') + }; + if let Some(string) = t.track_until(is_any_close) { + let mut chars: Vec<char> = string.chars().collect(); + if chars.len() == 1 { + let value = parse_char(chars.pop().unwrap()); + SyntacticToken::IntegerLiteral(value) + } else { + t.mark_end(); + push_err!(SyntacticError::ExpectedSingleCharacter, t.get_source()); + } + } else { + push_err!(SyntacticError::UnterminatedCharacterLiteral, source); + } + } + + '{' => { + let source = t.get_source(); + t.mark_child(); + let mut depth = 1; + let is_matching_close = |t: &mut Tokeniser| { + match t.eat_char() { + Some('{') => { depth += 1; false } + Some('}') => { depth -= 1; depth == 0 } + _ => false, + } + }; + if let Some(_) = t.track_until(is_matching_close) { + let child = t.tokenise_child_span(); + match parse_syntactic_from_tokeniser(child) { + Ok(tokens) => SyntacticToken::BlockLiteral(tokens), + Err(mut parse_errors) => { + errors.append(&mut parse_errors); + continue; + } + } + } else { + push_err!(SyntacticError::UnterminatedBlock, source); + } + } + '[' => { + let source = t.get_source(); + t.mark_child(); + let mut depth = 1; + let is_matching_close = |t: &mut Tokeniser| { + match t.eat_char() { + Some('[') => { depth += 1; false } + Some(']') => { depth -= 1; depth == 0 } + _ => false, + } + }; + if let Some(_) = t.track_until(is_matching_close) { + let child = t.tokenise_child_span(); + match parse_syntactic_from_tokeniser(child) { + Ok(tokens) => SyntacticToken::Expression(tokens), + Err(mut parse_errors) => { + errors.append(&mut parse_errors); + continue; + } + } + } else { + push_err!(SyntacticError::UnterminatedExpression, source); + } + } + '(' => { + let source = t.get_source(); + let mut depth = 1; + let is_matching_close = |t: &mut Tokeniser| { + match t.eat_char() { + Some('(') => { depth += 1; false } + Some(')') => { depth -= 1; depth == 0 } + _ => false, + } + }; + if let Some(string) = t.track_until(is_matching_close) { + // Check if the comment fills the entire line. + if t.start.position.column == 0 && t.end_of_line() { + if let Some(path) = string.strip_prefix(": ") { + t.embedded_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start.position.line + 1; + } + } + continue; + } else { + push_err!(SyntacticError::UnterminatedComment, source); + } + } + '%' => { + let name = t.eat_token(); + let source = t.get_source(); + t.mark_child(); + let is_any_close = |t: &mut Tokeniser| t.eat_char() == Some(';'); + if let Some(_) = t.track_until(is_any_close) { + let child = t.tokenise_child_span(); + match parse_syntactic_from_tokeniser(child) { + Ok(tokens) => { + let name = Tracked::from(name, source); + let def = SyntacticMacroDefinition { name, tokens }; + SyntacticToken::MacroDefinition(def) + } + Err(mut parse_errors) => { + errors.append(&mut parse_errors); + continue; + } + } + } else { + push_err!(SyntacticError::UnterminatedMacroDefinition(name), source); + } + } + + '}' => push_err!(SyntacticError::UnmatchedBlockTerminator), + ']' => push_err!(SyntacticError::UnmatchedExpressionTerminator), + ')' => push_err!(SyntacticError::UnmatchedCommentTerminator), + ';' => push_err!(SyntacticError::UnmatchedMacroTerminator), + + '@' => SyntacticToken::LabelDefinition(ScopedSymbol::Global(t.eat_token())), + '&' => SyntacticToken::LabelDefinition(ScopedSymbol::Local(t.eat_token())), + '~' => SyntacticToken::Symbol(ScopedSymbol::Local(t.eat_token())), + ':' => SyntacticToken::Separator, + '|' => SyntacticToken::Pin, + '?' => SyntacticToken::Condition, + + '#' => { + t.mark_child(); + t.eat_token(); + let child = t.tokenise_child_span(); + match parse_word_template(child) { + Ok(word_template) => SyntacticToken::WordTemplate(word_template), + Err(mut parse_errors) => { + errors.append(&mut parse_errors); + continue; + } + } + }, + + c => { + let token = format!("{c}{}", t.eat_token()); + if let Some(hex_string) = token.strip_prefix("0x") { + match parse_integer_literal(hex_string, 16) { + Ok(value) => SyntacticToken::IntegerLiteral(value), + Err(_) => push_err!(SyntacticError::InvalidHexadecimalLiteral(token)), + } + } else if let Some(binary_string) = token.strip_prefix("0b") { + match parse_integer_literal(binary_string, 2) { + Ok(value) => SyntacticToken::IntegerLiteral(value), + Err(_) => push_err!(SyntacticError::InvalidBinaryLiteral(token)), + } + } else { + match parse_integer_literal(&token, 10) { + Ok(value) => SyntacticToken::IntegerLiteral(value), + Err(true) => push_err!(SyntacticError::InvalidDecimalLiteral(token)), + Err(false) => SyntacticToken::Symbol(ScopedSymbol::Global(token)), + } + } + } + }; + + t.mark_end(); + tokens.push(Tracked::from(token, t.get_source())) + } + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} + + +fn parse_integer_literal(token: &str, radix: u32) -> Result<isize, bool> { + match usize::from_str_radix(&token.replace('_', ""), radix) { + Ok(value) => match isize::try_from(value) { + Ok(value) => Ok(value), + Err(_) => Err(true), + } + Err(_) => Err(false), + } +} + + +fn parse_string_literal(mut t: Tokeniser) -> StringLiteral { + let mut string = String::new(); + let mut chars = Vec::new(); + + while let Some(c) = t.eat_char() { + string.push(c); + chars.push(Tracked::from(parse_char(c), t.get_source())); + t.mark_start(); + } + StringLiteral { string, chars } +} + +fn parse_char(c: char) -> isize { + c as u32 as isize +} + + +fn parse_word_template(mut t: Tokeniser) -> Result<WordTemplate, Vec<Tracked<SyntacticError>>> { + let mut value = 0; // Value of the whole word template. + let mut value_width = 0; // Bit width of the whole word template. + let mut field_width = 0; // Width of the current bit field. + let mut field_name = '\0'; // Name of the current bit field. + let mut fields: Vec<Tracked<BitField>> = Vec::new(); + let mut errors: Vec<Tracked<SyntacticError>> = Vec::new(); + + macro_rules! push_field { + () => { + if fields.iter().any(|f| f.name == field_name) { + let error = SyntacticError::DuplicateFieldNameInWord(field_name); + errors.push(Tracked::from(error, t.get_source())); + } else { + let field = BitField { name: field_name, width: field_width, shift: 0}; + fields.push(Tracked::from(field, t.get_source())); + } + }; + } + + while let Some(c) = t.eat_char() { + // Ignore underscores. + if c == '_' { + t.mark.undo(); + continue; + } + + // Add a bit to the value; + value <<= 1; + value_width += 1; + for field in &mut fields { + field.shift += 1; + } + + // Extend the current field. + if c == field_name { + field_width += 1; + continue; + } + + // Commit the current field. + if field_width > 0 { + t.mark_end_prev(); + push_field!(); + field_width = 0; + field_name = '\0'; + } + + // Parse bit literals. + if c == '0' { + continue; + } + if c == '1' { + value |= 1; + continue; + } + + t.mark_start_prev(); + if c.is_alphabetic() { + field_name = c; + field_width = 1; + continue; + } else { + t.mark_end(); + let error = SyntacticError::InvalidCharacterInWord(c); + errors.push(Tracked::from(error, t.get_source())); + } + } + + // Commit the final field. + for field in &mut fields { + field.shift += 1; + } + if field_width > 0 { + t.mark_end(); + push_field!(); + } + + match errors.is_empty() { + true => Ok(WordTemplate { value, width: value_width, fields }), + false => Err(errors), + } +} diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs new file mode 100644 index 0000000..eabf34b --- /dev/null +++ b/src/stages/syntactic_tokens.rs @@ -0,0 +1,160 @@ +use crate::*; + +pub enum SyntacticToken { + LabelDefinition(ScopedSymbol), + MacroDefinition(SyntacticMacroDefinition), + + IntegerLiteral(isize), + StringLiteral(StringLiteral), + WordTemplate(WordTemplate), + + BlockLiteral(Vec<Tracked<SyntacticToken>>), + Expression(Vec<Tracked<SyntacticToken>>), + + Symbol(ScopedSymbol), + + Separator, + Condition, + Pin, +} + +pub struct SyntacticMacroDefinition { + pub name: Tracked<String>, + pub tokens: Vec<Tracked<SyntacticToken>>, +} + +pub struct StringLiteral { + pub string: String, + pub chars: Vec<Tracked<isize>>, +} + +impl std::fmt::Display for StringLiteral { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + self.string.fmt(f) + } +} + +pub enum ScopedSymbol { + Local(String), + Global(String), +} + +impl std::fmt::Display for ScopedSymbol { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + match self { + ScopedSymbol::Local(name) => write!(f, "~{name}"), + ScopedSymbol::Global(name) => write!(f, "{name}"), + } + } +} + + +pub enum SyntacticError { + UnterminatedBlock, + UnterminatedExpression, + UnterminatedComment, + UnterminatedCharacterLiteral, + UnterminatedStringLiteral, + UnterminatedMacroDefinition(String), + + UnmatchedBlockTerminator, + UnmatchedExpressionTerminator, + UnmatchedCommentTerminator, + UnmatchedMacroTerminator, + + ExpectedSingleCharacter, + + DuplicateFieldNameInWord(char), + InvalidCharacterInWord(char), + + InvalidDecimalLiteral(String), + InvalidHexadecimalLiteral(String), + InvalidBinaryLiteral(String), +} + + +pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) { + for error in errors { + report_syntactic_error(error, source_code); + } +} + +fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + SyntacticError::UnterminatedBlock => + "Block was not terminated, add a '}}' character to terminate", + SyntacticError::UnterminatedExpression => + "Expression was not terminated, add a ']' character to terminate", + SyntacticError::UnterminatedComment => + "Comment was not terminated, add a ')' character to terminate", + SyntacticError::UnterminatedCharacterLiteral => + "Character was not terminated, add a ' character to terminate", + SyntacticError::UnterminatedStringLiteral => + "String was not terminated, add a '\"' character to terminate", + SyntacticError::UnterminatedMacroDefinition(name) => + &format!("The '{name}' macro definition was not terminated, add a ';' character to terminate"), + + SyntacticError::UnmatchedBlockTerminator => + "Attempted to terminate a block, but no block was in progress", + SyntacticError::UnmatchedExpressionTerminator => + "Attempted to terminate an expression, but no expression was in progress", + SyntacticError::UnmatchedCommentTerminator => + "Attempted to terminate a comment, but no comment was in progress", + SyntacticError::UnmatchedMacroTerminator => + "Attempted to terminate a macro definition, but no macro definition was in progress", + + SyntacticError::ExpectedSingleCharacter => + "A character literal must contain exactly one character", + + SyntacticError::DuplicateFieldNameInWord(name) => + &format!("The field '{name}' has already been used in this word"), + SyntacticError::InvalidCharacterInWord(c) => + &format!("The character '{c}' cannot be used in a word"), + + SyntacticError::InvalidDecimalLiteral(string) => + &format!("The string '{string}' is not a valid decimal literal"), + SyntacticError::InvalidHexadecimalLiteral(string) => + &format!("The string '{string}' is not a valid hexadecimal literal"), + SyntacticError::InvalidBinaryLiteral(string) => + &format!("The string '{string}' is not a valid binary literal"), + }; + + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_syntactic_token(i: usize, token: &SyntacticToken) { + match token { + SyntacticToken::LabelDefinition(symbol) => indent!(i, "LabelDefinition({symbol})"), + SyntacticToken::MacroDefinition(definition) => { + indent!(i, "MacroDefinition({})", definition.name); + for token in &definition.tokens { + print_syntactic_token(i+1, token); + } + } + + SyntacticToken::IntegerLiteral(value) => indent!(i, "IntegerLiteral({value})"), + SyntacticToken::StringLiteral(literal) => indent!(i, "StringLiteral({literal})"), + SyntacticToken::WordTemplate(template) => indent!(i, "WordTemplate({template})"), + + SyntacticToken::BlockLiteral(tokens) => { + indent!(i, "BlockLiteral"); + for token in tokens { + print_syntactic_token(i+1, token); + } + } + SyntacticToken::Expression(tokens) => { + indent!(i, "Expression"); + for token in tokens { + print_syntactic_token(i+1, token); + } + } + + SyntacticToken::Symbol(symbol) => indent!(i, "Symbol({symbol})"), + + SyntacticToken::Separator => indent!(i, "Separator"), + SyntacticToken::Condition => indent!(i, "Condition"), + SyntacticToken::Pin => indent!(i, "Pin"), + } +} diff --git a/src/tokens/assembler.rs b/src/tokens/assembler.rs deleted file mode 100644 index 048062b..0000000 --- a/src/tokens/assembler.rs +++ /dev/null @@ -1,162 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub enum AssembledToken { - Word(AssembledWord), - LabelDefinition(LabelDefinition), - PinnedAddress(PinnedAddress), - Error(AssemblerError), -} - -#[derive(Clone)] -pub struct AssembledWord { - pub source: SourceSpan, - pub value: usize, - pub bits: usize, - pub fields: Vec<AssembledField>, - pub errors: Vec<AssemblerError>, -} - -impl AssembledWord { - pub fn count(&self) -> usize { - // If there is at least one field, and all fields have empty string - // values, then count will be zero. Else count will be at least one. - let mut count = 0; - let mut all_strings = !self.fields.is_empty(); - for field in &self.fields { - if let IntegerArgument::String(string) = &field.value { - count = std::cmp::max(count, string.chars.len()); - } else { - all_strings = false; - } - } - if !all_strings { - count = std::cmp::max(count, 1); - } - return count; - } -} - -#[derive(Clone)] -pub struct AssembledField { - pub source: SourceSpan, - pub value: IntegerArgument, - /// Length of field in bits - pub bits: usize, - /// Distance to left-shift field in value - pub shift: usize, -} - -#[derive(Clone)] -pub struct AssembledExpression { - pub source: SourceSpan, - pub tokens: Vec<AssembledExpressionToken>, -} - -#[derive(Clone)] -pub enum AssembledExpressionToken { - Integer(TrackedInteger), - LabelReference(Tracked<String>), - Operator(Operator), - Expression(Box<AssembledExpression>), -} - -#[derive(Clone)] -pub enum Argument { - Integer(IntegerArgument), - Block(Vec<AssembledToken>), -} - -#[derive(Clone)] -pub enum IntegerArgument { - LabelReference(Tracked<String>), - Integer(TrackedInteger), - Expression(AssembledExpression), - String(TrackedString), -} - -#[derive(Clone)] -pub struct AssemblerError { - pub source: SourceSpan, - pub variant: AssemblerErrorVariant, -} - -#[derive(Clone, Debug)] -pub enum AssemblerErrorVariant { - DefinitionNotFound(String), - NotAnInteger, - NotABlock, - IntegerInBlock, - StringInExpression, - /// expected, received - IncorrectArgumentCount(usize, usize), - /// expected, received, index - IncorrectArgumentType(ArgumentVariant, ArgumentVariant), -} - -// ------------------------------------------------------------------------ // - -macro_rules! indent { - ($indent:expr => $($tokens:tt)*) => {{ - for _ in 0..$indent { print!(" "); } - println!($($tokens)*); - }}; -} - -pub fn print_assembled_tokens(tokens: &[AssembledToken]) { - for token in tokens { - match token { - AssembledToken::LabelDefinition(definition) => { - println!("LABEL {}", definition.name) - } - AssembledToken::PinnedAddress(address) => { - println!("PINNED {}", address.address) - } - AssembledToken::Word(word) => { - println!("WORD {:b}", word.value); - for field in &word.fields { - print!(" FIELD ({} << {}) ", field.bits, field.shift); - match &field.value { - IntegerArgument::LabelReference(name) => { - println!("LABEL '{name}'"); - } - IntegerArgument::Integer(integer) => { - println!("INTEGER '{}'", integer.value); - } - IntegerArgument::String(string) => { - println!("STRING {string}"); - } - IntegerArgument::Expression(expr) => { - println!("EXPRESSION"); - print_assembled_expression(2, expr); - } - } - } - } - AssembledToken::Error(error) => { - println!("ERROR {:?}", error.variant) - } - } - } -} - -fn print_assembled_expression(indent: usize, expr: &AssembledExpression) { - for token in &expr.tokens { - match token { - AssembledExpressionToken::Integer(integer) => { - indent!(indent => "INTEGER {}", integer.value) - } - AssembledExpressionToken::LabelReference(name) => { - indent!(indent => "LABEL '{name}'") - } - AssembledExpressionToken::Operator(operator) => { - indent!(indent => "OPERATOR {operator:?}") - } - AssembledExpressionToken::Expression(expr) => { - indent!(indent => "EXPRESSION"); - print_assembled_expression(indent+1, expr); - } - } - } -} diff --git a/src/tokens/bytecode.rs b/src/tokens/bytecode.rs deleted file mode 100644 index 9ac340e..0000000 --- a/src/tokens/bytecode.rs +++ /dev/null @@ -1,49 +0,0 @@ -use crate::*; - - -pub struct Bytecode { - pub words: Vec<Word>, - pub errors: Vec<BytecodeError>, -} - -#[derive(Clone, Copy)] -pub struct Word { - pub bits: usize, - pub value: usize, -} - -impl std::fmt::Display for Word { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - for i in (0..self.bits).rev() { - let is_first_bit = i+1 == self.bits; - if !is_first_bit && (i+1) % 4 == 0 { - write!(f, "_")?; - } - match (self.value >> i) & 1 { - 0 => write!(f, "0")?, - _ => write!(f, "1")?, - } - } - if self.bits == 0 { - write!(f, "0")?; - } - return Ok(()); - } -} - -pub struct BytecodeError { - pub source: SourceSpan, - pub variant: BytecodeErrorVariant, -} - -pub enum BytecodeErrorVariant { - DefinitionNotFound(String), - DuplicateLabelDefinition(String), - /// pin, real - PinnedAddressBacktrack(usize, usize), - /// expected, received - ValueTooLarge(usize, usize), - StackUnderflow, - MultipleReturnValues, - NoReturnValue, -} diff --git a/src/tokens/expression.rs b/src/tokens/expression.rs deleted file mode 100644 index 1d8a336..0000000 --- a/src/tokens/expression.rs +++ /dev/null @@ -1,78 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub struct Expression { - pub source: SourceSpan, - pub tokens: Vec<ExpressionToken>, -} - -#[derive(Clone)] -pub struct ExpressionToken { - pub source: SourceSpan, - pub variant: ExpressionTokenVariant, -} - -#[derive(Clone)] -pub enum ExpressionTokenVariant { - Invocation(String), - Literal(isize), - Operator(Operator), - Error(ExpressionParseError), -} - -#[derive(Clone, Copy, Debug)] -pub enum Operator { - Equal, - NotEqual, - LessThan, - GreaterThan, - LessThanEqual, - GreaterThanEqual, - Add, - Subtract, - LeftShift, - RightShift, - And, - Or, - Xor, - Not, -} - -#[derive(Clone)] -pub enum ExpressionParseError { - InvalidHexadecimalLiteral(String), -} - -impl std::fmt::Debug for Expression { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - for (i, token) in self.tokens.iter().enumerate() { - let string = match &token.variant { - ExpressionTokenVariant::Invocation(name) => name, - ExpressionTokenVariant::Literal(value) => &value.to_string(), - ExpressionTokenVariant::Operator(operator) => match operator { - Operator::Equal => "=", - Operator::NotEqual => "!=", - Operator::LessThan => "<", - Operator::GreaterThan => ">", - Operator::LessThanEqual => "<=", - Operator::GreaterThanEqual => ">=", - Operator::Add => "+", - Operator::Subtract => "-", - Operator::LeftShift => "<<", - Operator::RightShift => ">>", - Operator::And => "&", - Operator::Or => "|", - Operator::Xor => "^", - Operator::Not => "~", - } - ExpressionTokenVariant::Error(_) => "<error>", - }; - match i { - 0 => write!(f, "{string}")?, - _ => write!(f, " {string}")?, - } - } - return Ok(()); - } -} diff --git a/src/tokens/mod.rs b/src/tokens/mod.rs deleted file mode 100644 index 53ccc6e..0000000 --- a/src/tokens/mod.rs +++ /dev/null @@ -1,19 +0,0 @@ -mod expression; -mod packed_binary_literal; -mod tracked_integer; -mod tracked; - -pub use expression::*; -pub use packed_binary_literal::*; -pub use tracked_integer::*; -pub use tracked::*; - -mod syntactic; -mod semantic; -mod assembler; -mod bytecode; - -pub use syntactic::*; -pub use semantic::*; -pub use assembler::*; -pub use bytecode::*; diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs deleted file mode 100644 index 225cd6b..0000000 --- a/src/tokens/semantic.rs +++ /dev/null @@ -1,192 +0,0 @@ -use crate::*; - -use indexmap::IndexMap; - - -/// The entire semantic program, ready to generate bytecode. -pub struct SemanticProgram { - pub macro_definitions: IndexMap<String, MacroDefinition>, - pub label_definitions: IndexMap<String, LabelDefinition>, - pub body: Vec<SemanticToken>, -} - -/// A symbol definition. -pub struct MacroDefinition { - pub source: SourceSpan, - pub arguments: Vec<ArgumentDefinition>, - pub value: Value, - pub errors: Vec<SemanticParseError>, -} - -pub struct ArgumentDefinition { - pub name: String, - pub source: SourceSpan, - pub variant: ArgumentVariant, -} - -#[derive(PartialEq, Clone, Copy, Debug)] -pub enum ArgumentVariant { - Integer, - Block, -} - -pub struct ArgumentInvocation { - pub source: SourceSpan, - pub value: Value, -} - -pub enum Value { - Integer(Integer), - Block(Vec<SemanticToken>), - Invocation(Invocation), -} - -pub enum Integer { - Literal(TrackedInteger), - String(TrackedString), - Expression(Expression), - LabelReference(Tracked<String>), -} - -pub enum SemanticToken { - Word(PackedBinaryLiteral), - Invocation(Invocation), - LabelDefinition(LabelDefinition), - PinnedAddress(PinnedAddress), - Error(SemanticParseError), -} - -pub struct Invocation { - pub name: String, - pub source: SourceSpan, - pub arguments: Vec<ArgumentInvocation>, - pub errors: Vec<SemanticParseError>, -} - -#[derive(Clone)] -pub struct LabelDefinition { - pub source: SourceSpan, - pub name: String, -} - -#[derive(Clone)] -pub struct PinnedAddress { - pub source: SourceSpan, - pub address: usize, -} - -pub struct SemanticParseError { - pub source: SourceSpan, - pub variant: SemanticParseErrorVariant, -} - -pub enum SemanticParseErrorVariant { - UnterminatedMacroDefinition(String), - UnterminatedBlock, - InvalidToken, -} - - -impl std::fmt::Display for ArgumentVariant { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - match self { - ArgumentVariant::Integer => write!(f, "integer"), - ArgumentVariant::Block => write!(f, "block"), - } - } -} - -// ------------------------------------------------------------------------ // - -macro_rules! indent { - ($indent:expr => $($tokens:tt)*) => {{ - for _ in 0..$indent { print!(" "); } - println!($($tokens)*); - }}; -} - -impl SemanticProgram { - pub fn print_definitions(&self) { - for (name, definition) in &self.macro_definitions { - let variant = match &definition.value { - Value::Integer(_) => "INTEGER", - Value::Block(_) => "BLOCK", - Value::Invocation(_) => "INVOCATION", - }; - println!("DEFINE {variant} '{name}'"); - for argument in &definition.arguments { - self.print_argument_definition(argument); - } - match &definition.value { - Value::Integer(integer) => - self.print_integer(1, integer), - Value::Block(block) => - self.print_block(1, block), - Value::Invocation(invocation) => - indent!(1 => "INVOCATION '{}'", invocation.name), - }; - println!(); - } - - println!("LABELS"); - for (name, _) in &self.label_definitions { - println!(" @{name}"); - } - println!(); - - self.print_block(0, &self.body); - } - - fn print_argument_definition(&self, argument: &ArgumentDefinition) { - let variant = match argument.variant { - ArgumentVariant::Integer => "INTEGER", - ArgumentVariant::Block => "BLOCK", - }; - println!(" ARGUMENT {variant} '{}'", argument.name); - } - - fn print_integer(&self, indent: usize, integer: &Integer) { - match &integer { - Integer::Literal(value) => - indent!(indent => "LITERAL {value}"), - Integer::Expression(expr) => - indent!(indent => "EXPRESSION [{expr:?}]"), - Integer::String(string) => - indent!(indent => "STRING '{string}'"), - Integer::LabelReference(name) => - indent!(indent => "LABEL REFERENCE '{name}'"), - } - } - - fn print_block(&self, indent: usize, block: &[SemanticToken]) { - indent!(indent => "BLOCK"); - for semantic_token in block { - match &semantic_token { - SemanticToken::Word(word) => - indent!(indent+1 => "WORD #{word}"), - SemanticToken::Invocation(invocation) => - self.print_invocation(indent+1, invocation), - SemanticToken::LabelDefinition(definition) => - indent!(indent+1 => "LABEL DEFINITION @{}", definition.name), - SemanticToken::PinnedAddress(addr) => - indent!(indent+1 => "PINNED ADDRESS {}", addr.address), - SemanticToken::Error(_) => - indent!(indent+1 => "ERROR"), - } - } - } - - fn print_invocation(&self, indent: usize, invocation: &Invocation) { - indent!(indent => "INVOCATION '{}'", invocation.name); - for argument in &invocation.arguments { - match &argument.value { - Value::Integer(integer) => - self.print_integer(indent+1, integer), - Value::Block(block) => - self.print_block(indent+1, block), - Value::Invocation(invocation) => - self.print_invocation(indent+1, invocation), - }; - } - } -} diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs deleted file mode 100644 index 780c950..0000000 --- a/src/tokens/syntactic.rs +++ /dev/null @@ -1,84 +0,0 @@ -use crate::*; - - -pub struct SyntacticToken { - pub source: SourceSpan, - pub variant: SyntacticTokenVariant, -} - -pub enum SyntacticTokenVariant { - LabelDefinition(String), - MacroDefinition(String), - MacroDefinitionTerminator, - - IntegerLiteral(isize), - PackedBinaryLiteral(PackedBinaryLiteral), - PinnedAddress(usize), - - Expression(Expression), - - String(TrackedString), - - BlockOpen, - BlockClose, - Separator, - - Symbol(String), - - Error(SyntacticParseError), -} - -#[derive(Clone)] -pub struct TrackedString { - pub source: SourceSpan, - pub string: String, - pub chars: Vec<Tracked<char>>, -} - -impl std::fmt::Display for TrackedString { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - self.string.fmt(f) - } -} - -#[derive(Debug)] -pub enum SyntacticParseError { - InvalidHexadecimalLiteral(String), - InvalidDecimalLiteral(String), - InvalidSymbolIdentifier(String), - UnterminatedComment, - UnterminatedString, - UnterminatedExpression, - LabelInMacroDefinition, -} - - -impl std::fmt::Debug for SyntacticToken { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - use SyntacticTokenVariant::*; - let start = &self.source.in_merged; - let name = match &self.variant { - LabelDefinition(name) => format!("LabelDefinition({name})"), - MacroDefinition(name) => format!("MacroDefinition({name})"), - MacroDefinitionTerminator => format!("MacroDefinitionTerminator"), - - IntegerLiteral(value) => format!("IntegerLiteral({value})"), - PackedBinaryLiteral(pbl) => format!("PackedBinaryLiteral({pbl})"), - PinnedAddress(value) => format!("PinnedAddress({value})"), - - Expression(expr) => format!("Expression({expr:?})"), - - String(string) => format!("String('{string}')"), - - BlockOpen => format!("BlockOpen"), - BlockClose => format!("BlockClose"), - Separator => format!("Separator"), - - Symbol(name) => format!("Symbol({name})"), - - Error(error) => format!("Error({error:?})"), - }; - - write!(f, "{start} {name}") - } -} diff --git a/src/tokens/tracked.rs b/src/tokens/tracked.rs deleted file mode 100644 index ea37047..0000000 --- a/src/tokens/tracked.rs +++ /dev/null @@ -1,47 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub struct Tracked<T> { - pub source: SourceSpan, - pub value: T, -} - -impl<T> Tracked<T> { - pub fn from(value: T, source: SourceSpan) -> Self { - Self { source, value } - } -} - -impl<T> std::ops::Deref for Tracked<T> { - type Target = T; - fn deref(&self) -> &T { - &self.value - } -} - -impl<T> std::ops::DerefMut for Tracked<T> { - fn deref_mut(&mut self) -> &mut T { - &mut self.value - } -} - -impl<T: std::fmt::Display> std::fmt::Display for Tracked<T> { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{}", self.value) - } -} - -impl<T: std::fmt::Debug> std::fmt::Debug for Tracked<T> { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{:?}", self.value) - } -} - -impl<T: PartialEq> PartialEq for Tracked<T> { - fn eq(&self, other: &Tracked<T>) -> bool { - self.value.eq(&other.value) - } -} - -impl<T: Eq> Eq for Tracked<T> {} diff --git a/src/tokens/tracked_integer.rs b/src/tokens/tracked_integer.rs deleted file mode 100644 index fa55f09..0000000 --- a/src/tokens/tracked_integer.rs +++ /dev/null @@ -1,14 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub struct TrackedInteger { - pub source: SourceSpan, - pub value: isize, -} - -impl std::fmt::Display for TrackedInteger { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{}", self.value) - } -} diff --git a/src/types/expression_stack.rs b/src/types/expression_stack.rs new file mode 100644 index 0000000..4d26eb2 --- /dev/null +++ b/src/types/expression_stack.rs @@ -0,0 +1,89 @@ +use crate::*; + + +pub struct ExpressionStack { + stack: Vec<isize>, +} + +impl ExpressionStack { + pub fn new() -> Self { + Self { + stack: Vec::new(), + } + } + + pub fn pull_result(mut self) -> Result<isize, StackError> { + match self.stack.len() { + 0 => Err(StackError::NoReturnValue), + 1 => Ok(self.stack.pop().unwrap()), + _ => Err(StackError::MultipleReturnValues), + } + } + + pub fn push(&mut self, value: isize) { + self.stack.push(value); + } + + pub fn apply(&mut self, operator: Operator, source: &SourceSpan) -> Result<(), Tracked<StackError>> { + macro_rules! push { + ($val:expr) => { self.stack.push($val) } + } + macro_rules! pop { + ($name:ident) => { + let $name = match self.stack.pop() { + Some(value) => value, + None => return Err(Tracked::from(StackError::Underflow, source.clone())), + }; + } + } + macro_rules! truth { + ($bool:expr) => { match $bool { true => 1, false => 0 } }; + } + match operator { + Operator::Equal => { pop!(b); pop!(a); push!(truth!(a==b)) }, + Operator::NotEqual => { pop!(b); pop!(a); push!(truth!(a!=b)) }, + Operator::LessThan => { pop!(b); pop!(a); push!(truth!(a < b)) }, + Operator::GreaterThan => { pop!(b); pop!(a); push!(truth!(a > b)) }, + Operator::LessThanEqual => { pop!(b); pop!(a); push!(truth!(a <= b)) }, + Operator::GreaterThanEqual => { pop!(b); pop!(a); push!(truth!(a >= b)) }, + Operator::Add => { pop!(b); pop!(a); push!(a + b) }, + Operator::Subtract => { pop!(b); pop!(a); push!(a - b) }, + Operator::Multiply => { pop!(b); pop!(a); push!(a * b) }, + Operator::Divide => { pop!(b); pop!(a); push!(a / b) }, + Operator::Modulo => { pop!(b); pop!(a); push!(a % b) }, + Operator::Exponent => { pop!(b); pop!(a); push!( + if let Ok(b) = u32::try_from(b) { a.saturating_pow(b) } else { 0 } ) }, + Operator::LeftShift => { pop!(b); pop!(a); push!( + if b < 0 { a >> -b } else { a << b } ) }, + Operator::RightShift => { pop!(b); pop!(a); push!( + if b < 0 { a << -b } else { a >> b } ) }, + Operator::BitAnd => { pop!(b); pop!(a); push!(a & b) }, + Operator::BitOr => { pop!(b); pop!(a); push!(a | b) }, + Operator::BitXor => { pop!(b); pop!(a); push!(a ^ b) }, + Operator::BitNot => { pop!(a); push!(!a) }, + } + return Ok(()); + } +} + + +pub enum StackError { + Underflow, + MultipleReturnValues, + NoReturnValue, +} + + +pub fn report_stack_error(error: &Tracked<StackError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + StackError::Underflow => + "A stack underflow occurred while evaluating this operator", + StackError::MultipleReturnValues => + "More than one value was left on the stack after this expression was evaluated", + StackError::NoReturnValue => + "No value was left on the stack after this expression was evaluated", + }; + + report_source_issue(LogLevel::Error, &context, message); +} diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 0000000..623d525 --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,7 @@ +mod expression_stack; +mod operator; +mod word_template; + +pub use expression_stack::*; +pub use operator::*; +pub use word_template::*; diff --git a/src/types/operator.rs b/src/types/operator.rs new file mode 100644 index 0000000..a7e7b9b --- /dev/null +++ b/src/types/operator.rs @@ -0,0 +1,87 @@ +#[derive(Clone, Copy)] +pub enum Operator { + Equal, + NotEqual, + LessThan, + GreaterThan, + LessThanEqual, + GreaterThanEqual, + Add, + Subtract, + Multiply, + Divide, + Modulo, + Exponent, + LeftShift, + RightShift, + BitAnd, + BitOr, + BitXor, + BitNot, +} + +impl Operator { + pub fn from_str(string: &str) -> Option<Self> { + match string { + "=" => Some(Operator::Equal), + "==" => Some(Operator::Equal), + "<eq>" => Some(Operator::Equal), + "!=" => Some(Operator::NotEqual), + "<neq>" => Some(Operator::NotEqual), + "<" => Some(Operator::LessThan), + "<lth>" => Some(Operator::LessThan), + ">" => Some(Operator::GreaterThan), + "<gth>" => Some(Operator::GreaterThan), + "<=" => Some(Operator::LessThanEqual), + "<leq>" => Some(Operator::LessThanEqual), + ">=" => Some(Operator::GreaterThanEqual), + "<geq>" => Some(Operator::GreaterThanEqual), + "+" => Some(Operator::Add), + "<add>" => Some(Operator::Add), + "-" => Some(Operator::Subtract), + "<sub>" => Some(Operator::Subtract), + "*" => Some(Operator::Multiply), + "<mul>" => Some(Operator::Multiply), + "/" => Some(Operator::Divide), + "<div>" => Some(Operator::Divide), + "<mod>" => Some(Operator::Modulo), + "**" => Some(Operator::Exponent), + "<exp>" => Some(Operator::Exponent), + "<<" => Some(Operator::LeftShift), + "<shl>" => Some(Operator::LeftShift), + ">>" => Some(Operator::RightShift), + "<shr>" => Some(Operator::RightShift), + "<and>" => Some(Operator::BitAnd), + "<or>" => Some(Operator::BitOr), + "<xor>" => Some(Operator::BitXor), + "<not>" => Some(Operator::BitNot), + _ => None, + } + } +} + +impl std::fmt::Display for Operator { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let string = match self { + Operator::Equal => "<eq>", + Operator::NotEqual => "<neq>", + Operator::LessThan => "<lth>", + Operator::GreaterThan => "<gth>", + Operator::LessThanEqual => "<leq>", + Operator::GreaterThanEqual => "<geq>", + Operator::Add => "<add>", + Operator::Subtract => "<sub>", + Operator::Multiply => "<mul>", + Operator::Divide => "<div>", + Operator::Modulo => "<mod>", + Operator::Exponent => "<exp>", + Operator::LeftShift => "<shl>", + Operator::RightShift => "<shr>", + Operator::BitAnd => "<and>", + Operator::BitOr => "<or>", + Operator::BitXor => "<xor>", + Operator::BitNot => "<not>", + }; + write!(f, "{string}") + } +} diff --git a/src/tokens/packed_binary_literal.rs b/src/types/word_template.rs index a2720b7..33d5933 100644 --- a/src/tokens/packed_binary_literal.rs +++ b/src/types/word_template.rs @@ -1,35 +1,23 @@ use crate::*; -pub struct PackedBinaryLiteral { - pub source: SourceSpan, +pub struct WordTemplate { pub value: usize, - pub bits: usize, - pub fields: Vec<BitField>, - pub errors: Vec<PackedBinaryLiteralParseError>, + /// Width of the word in bits. + pub width: u32, + pub fields: Vec<Tracked<BitField>>, } pub struct BitField { pub name: char, - pub source: SourceSpan, - /// Length of field in bits - pub bits: usize, - /// Distance to left-shift field in value - pub shift: usize, -} - -pub struct PackedBinaryLiteralParseError { - pub source: SourceSpan, - pub variant: PackedBinaryLiteralParseErrorVariant, -} - -pub enum PackedBinaryLiteralParseErrorVariant { - DuplicateFieldName(char), - InvalidCharacter(char), + /// Width of the field in bits. + pub width: u32, + /// Number of bits to the right of the field in the word. + pub shift: u32, } -impl std::fmt::Display for PackedBinaryLiteral { +impl std::fmt::Display for WordTemplate { fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { if self.value == 0 { write!(f, "0")?; @@ -41,7 +29,8 @@ impl std::fmt::Display for PackedBinaryLiteral { write!(f, "_")?; } for field in &self.fields { - if i <= field.bits + field.shift - 1 && i >= field.shift { + let i = i as u32; + if i <= field.width + field.shift - 1 && i >= field.shift { write!(f, "{}", field.name)?; continue 'bit; } |