diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/bin/tq.rs | 62 | ||||
-rw-r--r-- | src/formats/cmd.rs | 54 | ||||
-rw-r--r-- | src/formats/mod.rs | 15 | ||||
-rw-r--r-- | src/lib.rs | 7 | ||||
-rw-r--r-- | src/stages/bytecode.rs | 197 | ||||
-rw-r--r-- | src/stages/bytecode_tokens.rs | 58 | ||||
-rw-r--r-- | src/stages/compiler.rs | 108 | ||||
-rw-r--r-- | src/stages/intermediate.rs | 701 | ||||
-rw-r--r-- | src/stages/intermediate_tokens.rs | 123 | ||||
-rw-r--r-- | src/stages/semantic.rs | 143 | ||||
-rw-r--r-- | src/stages/semantic_tokens.rs | 126 | ||||
-rw-r--r-- | src/stages/syntactic.rs | 43 | ||||
-rw-r--r-- | src/stages/syntactic_tokens.rs | 9 | ||||
-rw-r--r-- | src/types/expression_stack.rs | 279 | ||||
-rw-r--r-- | src/types/operator.rs | 20 | ||||
-rw-r--r-- | src/types/word_template.rs | 2 |
16 files changed, 1161 insertions, 786 deletions
diff --git a/src/bin/tq.rs b/src/bin/tq.rs index b27f702..885c46c 100644 --- a/src/bin/tq.rs +++ b/src/bin/tq.rs @@ -42,56 +42,64 @@ fn main() { if verbose { log::set_log_level(log::LogLevel::Info) } if print_version { let version = env!("CARGO_PKG_VERSION"); - eprintln!("torque assembler, version {version}"); - eprintln!("written by ben bridle"); + eprintln!("Torque meta-assembler, version {version}"); + eprintln!("by Ben Bridle (https://derelict.engineering)"); std::process::exit(0); } if print_help { eprintln!("\ Usage: tq [source] [destination] -Torque multi-assembler, see http://benbridle.com/torque for documentation. +Torque is a lightweight meta-assembler (see http://benbridle.com/torque). + +Usage: + To assemble a Torque program from a source file and write to an output + file, run `tq [source] [destination]`, where [source] is the path + of the source file and [destination] is the path to write to. + + If [destination] is omitted, the assembled program will be written to + standard output. If [source] is omitted, the program source code will + be read from standard input. + +Environment variables: + TORQUE_LIBS + A list of colon-separated paths that will be searched to find Torque + source code files to use as libraries when assembling a Torque program. + If a library file resolves an unresolved symbol in the program being + assembled, the library file will be merged into the program. Arguments: - [source] Path to a source file to assemble - [destination] Path to which output will be written + [source] Torque source code file to assemble + [destination] Destination path for assembler output Switches: - --extension=<ext> File extension to identify library files (default is 'tq') - --format=<fmt> Format to apply to assembled bytecode (default is 'debug') + --dry-run (-n) Assemble and show errors only, don't write any output + --extension=<ext> File extension to identify source files (default is 'tq') + --format=<fmt> Output format to use for assembled program (default is 'debug') --width=<width> Force a fixed width for all assembled words --no-project-libs Don't search for libraries in the source parent folder --no-env-libs Don't search for libraries in the TORQUE_LIBS path variable --no-libs Combination of --no-project-libs and --no-env-libs - --tree Display a tree visualisation of all included library files - --dry-run (-n) Assemble and show errors only, don't write any output + --tree Show a tree diagram of all included library files --help (-h) Print this help information --verbose, (-v) Print additional information - --version Print the assembler version and exit - -Environment variables: - TORQUE_LIBS - A list of colon-separated paths which will be searched to find - Torque source code files to use as libraries when assembling a - Torque program. If a library file resolves an unresolved symbol - in the program being assembled, the library file will be merged - into the program. + --version Print the program version and exit Output formats: - <debug> + cmd + CMD module load format used by the CP/M operating system. + debug Print assembled words as human-readable binary literals. - <inhx> + inhx Original 8-bit Intel hex format. - <inhx32> + inhx32 Modified 16-bit Intel hex format used by Microchip. - <raw> + raw Assembled words are converted to big-endian bytestrings and concatenated. Each word is padded to the nearest byte. Words must all be the same width. - <source> + source Print the source file before assembly, with symbols resolved. - -Created by Ben Bridle. - "); +"); std::process::exit(0); } @@ -169,9 +177,9 @@ Created by Ben Bridle. } }; - if !dry_run { let result = match format { + Format::Cmd => format_cmd(&segments), Format::Debug => format_debug(&segments), Format::Inhx => format_inhx(&segments), Format::Inhx32 => format_inhx32(&segments), diff --git a/src/formats/cmd.rs b/src/formats/cmd.rs new file mode 100644 index 0000000..2ff2097 --- /dev/null +++ b/src/formats/cmd.rs @@ -0,0 +1,54 @@ +// CP/M CMD format, also the TRS-80 Load Module Format (LMF) +// https://en.wikipedia.org/wiki/CMD_file_(CP/M) +// https://www.tim-mann.org/trs80/doc/ldosq1-4.pdf (page 43) +use crate::*; + + +pub fn format_cmd(segments: &[Segment]) -> Result<Vec<u8>, FormatError> { + let mut records = Vec::new(); + let mut address; + for segment in segments { + address = segment.address; + for chunk in segment.words.chunks(16) { + records.push(data_record(chunk, address)?); + address += 16; + } + } + let start_address = segments.first().map(|s| s.address).unwrap_or(0); + records.push(terminating_record(start_address)?); + + let mut output = String::new(); + for record in records { + output.push_str(&record.to_string_plain()); + } + return Ok(output.into_bytes()); +} + +fn data_record(words: &[Tracked<Word>], address: usize) -> Result<InhxRecord, FormatError> { + let Ok(address) = u16::try_from(address) else { + return Err(FormatError::AddressTooLarge(u16::MAX as usize, address)); + }; + let mut record = InhxRecord::new(); + record.byte(0x01); + let data_bytes = words.len() as u8; + record.byte(data_bytes.wrapping_add(2)); + record.le_double(address); + for word in words { + if word.value.width > 8 { + return Err(FormatError::WordTooWide(8, word.width, word.source.clone())); + } + record.byte(word.value.value as u8); + } + return Ok(record); +} + +fn terminating_record(address: usize) -> Result<InhxRecord, FormatError> { + let Ok(address) = u16::try_from(address) else { + return Err(FormatError::AddressTooLarge(u16::MAX as usize, address)); + }; + let mut record = InhxRecord::new(); + record.byte(0x02); + record.byte(0x02); + record.le_double(address); + return Ok(record); +} diff --git a/src/formats/mod.rs b/src/formats/mod.rs index a77bd72..e15bfbd 100644 --- a/src/formats/mod.rs +++ b/src/formats/mod.rs @@ -1,8 +1,10 @@ +mod cmd; mod inhx; mod inhx32; mod raw; mod debug; +pub use cmd::*; pub use inhx::*; pub use inhx32::*; pub use raw::*; @@ -10,11 +12,10 @@ pub use debug::*; use crate::*; -use log::*; - #[derive(Clone, Copy, PartialEq)] pub enum Format { + Cmd, Debug, Inhx, Inhx32, @@ -25,6 +26,7 @@ pub enum Format { impl Format { pub fn from_str(string: &str) -> Self { match string { + "cmd" => Self::Cmd, "debug" => Self::Debug, "inhx" => Self::Inhx, "inhx32" => Self::Inhx32, @@ -38,6 +40,7 @@ impl Format { impl std::fmt::Display for Format { fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { let string = match self { + Self::Cmd => "cmd", Self::Debug => "debug", Self::Inhx => "inhx", Self::Inhx32 => "inhx32", @@ -112,6 +115,14 @@ impl InhxRecord { } format!(":{output}{checksum:0>2X}\n") } + + pub fn to_string_plain(self) -> String { + let mut output = String::new(); + for byte in &self.bytes { + output.push_str(&format!("{byte:0>2X}")); + } + format!("{output}\n") + } } @@ -1,3 +1,6 @@ +const MAX_ITERATIONS_TO_STABILISE: usize = 4; +const MAX_RECURSION_DEPTH: usize = 1024; + mod stages; mod types; mod formats; @@ -7,3 +10,7 @@ pub use types::*; pub use formats::*; pub use assembler::*; + +use log::*; + + diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs index 3618b26..d4e729b 100644 --- a/src/stages/bytecode.rs +++ b/src/stages/bytecode.rs @@ -1,182 +1,47 @@ use crate::*; -use std::collections::HashMap; - -pub fn parse_bytecode(tokens: Vec<Tracked<IntermediateToken>>, width: Option<u32>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> { - BytecodeParser::new(width).parse(tokens) -} +pub fn parse_bytecode(intermediate: Vec<Tracked<IntermediateToken>>, width: Option<u32>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> { + let mut segments = Vec::new(); + let mut errors = Vec::new(); + let mut current_segment = Vec::new(); + let mut segment_source = None; + let mut segment_address = 0; -pub struct BytecodeParser { - width: Option<u32>, - addresses: HashMap<String, Tracked<usize>>, - address: usize, - segment_address: usize, - segment_source: Option<SourceSpan>, - segments: Vec<Segment>, - words: Vec<Tracked<Word>>, - errors: Vec<Tracked<BytecodeError>>, -} - -impl BytecodeParser { - pub fn new(width: Option<u32>) -> Self { - Self { - width, - addresses: HashMap::new(), - address: 0, - segment_address: 0, - segment_source: None, - segments: Vec::new(), - words: Vec::new(), - errors: Vec::new(), - } - } - - pub fn parse(mut self, tokens: Vec<Tracked<IntermediateToken>>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> { - // Calculate all label addresses ahead of time. - let mut address = 0; - for token in &tokens { - let source = &token.source; - match &token.value { - IntermediateToken::LabelDefinition(name) => { - let tracked = Tracked::from(address, source.clone()); - if let Some(_) = self.addresses.insert(name.clone(), tracked) { - unreachable!("Uncaught duplicate label definition '{name}'"); + for token in intermediate { + match token.value { + IntermediateToken::Word(word) => { + if let Some(width) = width { + if word.width != width { + let error = BytecodeError::IncorrectWidth(width, word.width); + errors.push(Tracked::from(error, token.source.clone())); } } - IntermediateToken::Word(_) => { - address += 1; - } - IntermediateToken::PinnedAddress(pinned) => { - address = pinned.value; - } + let source = token.source.clone(); + current_segment.push(Tracked::from(word, source)); } - } - for token in &tokens { - let source = &token.source; - match &token.value { - IntermediateToken::Word(word) => { - let word = self.evaluate_word(word, source); - // Check that the word width fits the provided width. - if let Some(width) = self.width { - if word.width != width { - let error = BytecodeError::IncorrectWidth(width, word.width); - self.errors.push(Tracked::from(error, source.clone())); - } - } - self.words.push(word); - self.address += 1; + IntermediateToken::PinnedAddress(pinned_address) => { + if !current_segment.is_empty() { + let address = segment_address; + let words = std::mem::take(&mut current_segment); + let source = std::mem::take(&mut segment_source); + segments.push(Segment { address, source, words }); } - IntermediateToken::PinnedAddress(address) => { - let current = self.address; - let pinned = address.value; - if current > pinned { - let error = BytecodeError::PinnedAddressBacktrack(pinned, current); - self.errors.push(Tracked::from(error, address.source.clone())); - } else { - let words = std::mem::take(&mut self.words); - if !words.is_empty() { - let address = self.segment_address; - let source = std::mem::take(&mut self.segment_source); - let segment = Segment { address, source, words }; - self.segments.push(segment); - } - self.segment_source = Some(address.source.clone()); - self.address = pinned; - self.segment_address = pinned; - } - } - IntermediateToken::LabelDefinition(_) => (), + segment_address = pinned_address; } } - // Finish final segment. - let words = std::mem::take(&mut self.words); - if !words.is_empty() { - let address = self.segment_address; - let source = std::mem::take(&mut self.segment_source); - let segment = Segment { address, source, words }; - self.segments.push(segment); - } - - match self.errors.is_empty() { - true => Ok(self.segments), - false => Err(self.errors), - } } - - fn evaluate_expression(&mut self, expression: &IntermediateExpression, source: &SourceSpan) -> isize { - let mut stack = ExpressionStack::new(); - for token in &expression.tokens { - let source = &token.source; - match &token.value { - IntermediateExpressionToken::Integer(integer) => match integer { - IntermediateInteger::Integer(value) => { - stack.push(*value); - } - IntermediateInteger::Expression(expression) => { - stack.push(self.evaluate_expression(expression, source)); - } - IntermediateInteger::LabelReference(name) => { - stack.push(self.evaluate_label_reference(name)); - } - } - IntermediateExpressionToken::Operator(operator) => { - if let Err(err) = stack.apply(*operator, source) { - let error = BytecodeError::StackError(err); - self.errors.push(Tracked::from(error, source.clone())) - } - } - } - } - match stack.pull_result() { - Ok(value) => value, - Err(err) => { - let error = BytecodeError::StackError(Tracked::from(err, source.clone())); - self.errors.push(Tracked::from(error, source.clone())); - 0 - } - } + // Finish final segment. + if !current_segment.is_empty() { + let address = segment_address; + let words = std::mem::take(&mut current_segment); + let source = std::mem::take(&mut segment_source); + segments.push(Segment { address, source, words }); } - fn evaluate_label_reference(&mut self, name: &Tracked<String>) -> isize { - if let Some(address) = self.addresses.get(&name.to_string()) { - address.value as isize - } else { - unreachable!("Uncaught unresolved label reference '{name}'") - } - } - - fn evaluate_word(&mut self, word: &IntermediateWord, source: &SourceSpan) -> Tracked<Word> { - let mut word_value = word.value; - for field in &word.fields { - let field_source = &field.value.value.source; - let field_value = match &field.value.value.value { - IntermediateInteger::Expression(expression) => { - self.evaluate_expression(expression, source) - } - IntermediateInteger::LabelReference(name) => { - self.evaluate_label_reference(name) - } - IntermediateInteger::Integer(value) => { - *value - } - }; - let value_width = match field_value.cmp(&0) { - std::cmp::Ordering::Less => (-field_value).ilog2() + 1, - std::cmp::Ordering::Equal => 0, - std::cmp::Ordering::Greater => field_value.ilog2() + 1, - }; - if field.width < value_width { - let error = BytecodeError::ValueTooWide(field.width, value_width); - self.errors.push(Tracked::from(error, field_source.clone())); - } else { - let mask = 2_usize.pow(field.width as u32) - 1; - let clamped_value = (field_value as usize) & mask; - word_value |= (clamped_value << field.shift) as usize; - } - } - let word = Word { width: word.width, value: word_value }; - return Tracked::from(word, source.clone()); + match errors.is_empty() { + true => Ok(segments), + false => Err(errors), } } diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs index b54cb0e..5020827 100644 --- a/src/stages/bytecode_tokens.rs +++ b/src/stages/bytecode_tokens.rs @@ -1,6 +1,5 @@ use crate::*; - pub struct Segment { pub address: usize, /// Source of the address value. @@ -8,42 +7,22 @@ pub struct Segment { pub words: Vec<Tracked<Word>>, } -pub struct Word { - pub value: usize, - pub width: u32, -} - -impl std::fmt::Display for Word { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - if self.width == 0 { - write!(f, "0") - } else { - for i in (0..self.width).rev() { - let is_first_bit = i+1 == self.width; - if !is_first_bit && (i+1) % 4 == 0 { - write!(f, "_")?; - } - match (self.value >> i) & 1 { - 0 => write!(f, "0")?, - _ => write!(f, "1")?, - } - } - Ok(()) - } +pub fn print_segment(segment: &Segment) { + println!("SEGMENT: 0x{:>04x}", segment.address); + // Find maximum width of all words in the segment. + let width = segment.words.iter().map(|w| w.to_string().chars().count()).max().unwrap_or(0); + for word in &segment.words { + let string = word.to_string(); + println!(" {string:>w$}", w=width as usize); } + } pub enum BytecodeError { - /// expected, received - IncorrectWidth(u32, u32), - /// pinned, real - PinnedAddressBacktrack(usize, usize), - /// expected, received - ValueTooWide(u32, u32), - StackError(Tracked<StackError>), + // (expected, received) + IncorrectWidth(u32, u32) } - pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) { for error in errors { report_bytecode_error(error, source_code); @@ -54,25 +33,10 @@ fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) { let context = Context { source_code: &source_code, source: &error.source }; let message = match &error.value { BytecodeError::IncorrectWidth(expected, received) => - &format!("Word is {received} bits wide, but was expected to have a fixed width of {expected} bits"), - BytecodeError::PinnedAddressBacktrack(pinned, real) => - &format!("Cannot pin to address {pinned} when address is already {real}"), - BytecodeError::StackError(stack_error) => { - report_stack_error(stack_error, source_code); return; }, - BytecodeError::ValueTooWide(expected, received) => - &format!("Field is {expected} bits wide, but received a value that is {received} bits wide"), + &format!("Word is {received} bits wide, but should be exactly {expected} bits wide"), }; report_source_issue(LogLevel::Error, &context, message); } -pub fn print_segment(segment: &Segment) { - println!("SEGMENT: 0x{:>04x}", segment.address); - // Find maximum width of all words in the segment. - let width = segment.words.iter().map(|w| w.to_string().chars().count()).max().unwrap_or(0); - for word in &segment.words { - let string = word.to_string(); - println!(" {string:>w$}", w=width as usize); - } -} diff --git a/src/stages/compiler.rs b/src/stages/compiler.rs index 44b7660..b4680c2 100644 --- a/src/stages/compiler.rs +++ b/src/stages/compiler.rs @@ -1,8 +1,6 @@ use crate::*; use assembler::{Symbol, SymbolRole, DefinitionType}; -use SymbolRole::*; -use DefinitionType::*; use std::path::Path; @@ -18,11 +16,33 @@ pub fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbo Ok(syntactic) => syntactic, Err(_errors) => return None, }; + let semantic = match parse_semantic(syntactic) { Ok(semantic) => semantic, Err(_errors) => return None, }; - Some(SymbolParser::new().parse(&semantic)) + + // Convert symbols to the format required by the assembler library. + let parsed = SymbolParser::new().parse(&semantic); + let mut symbols = Vec::new(); + for symbol in parsed { + let name = format!("{}::{}", symbol.name, symbol.arg_count); + let namespace = match symbol.macro_name { + Some(macro_name) => vec![macro_name], + None => vec![], + }; + let source = symbol.source; + let role = match symbol.role { + SymbolRoleDetailed::MacroDefinition => + SymbolRole::Definition(DefinitionType::CanFollowReference), + SymbolRoleDetailed::LabelDefinition => + SymbolRole::Definition(DefinitionType::CanFollowReference), + SymbolRoleDetailed::Invocation => + SymbolRole::Reference, + }; + symbols.push(Symbol { name, namespace, source, role }); + } + Some(symbols) } /// Push source code to a source compilation string. @@ -43,10 +63,27 @@ pub fn push_code(compilation: &mut String, source_file: &SourceFile) { } -// Extract symbol definitions from a list of semantic tokens. +// Track additional information for each symbol. +pub struct SymbolDetailed { + pub name: String, + pub macro_name: Option<String>, + pub arg_count: usize, + pub role: SymbolRoleDetailed, + pub source: SourceSpan, +} + +pub enum SymbolRoleDetailed { + MacroDefinition, + LabelDefinition, + Invocation, +} + + +// Extract symbol definitions and invocations from a list of semantic tokens. pub struct SymbolParser { + /// Current macro definition name. pub macro_name: Option<String>, - pub symbols: Vec<Symbol>, + pub symbols: Vec<SymbolDetailed>, } impl SymbolParser { @@ -57,18 +94,19 @@ impl SymbolParser { } } - fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) { - let name = name.to_string(); - let namespace = match &self.macro_name { - Some(macro_name) => vec![macro_name.to_owned()], - None => vec![], - }; - let source = source.to_owned(); - self.symbols.push(Symbol { name, namespace, source, role }); - + fn record_symbol(&mut self, name: &str, arg_count: usize, source: &SourceSpan, role: SymbolRoleDetailed) { + self.symbols.push( + SymbolDetailed { + name: name.to_string(), + macro_name: self.macro_name.clone(), + arg_count, + role, + source: source.clone(), + } + ); } - pub fn parse(mut self, semantic: &[Tracked<SemanticToken>]) -> Vec<Symbol> { + pub fn parse(mut self, semantic: &[Tracked<SemanticToken>]) -> Vec<SymbolDetailed> { for token in semantic { let source = &token.source; match &token.value { @@ -76,22 +114,28 @@ impl SymbolParser { // Record macro definition. self.record_symbol( &definition.name, + definition.arguments.len(), &definition.name.source, - Definition(MustPrecedeReference), + SymbolRoleDetailed::MacroDefinition, ); + // Track that we're currently inside a macro definition. self.macro_name = Some(definition.name.to_string()); for argument in &definition.arguments { self.record_symbol( &argument.name, + 0, &argument.source, - Definition(MustPrecedeReference), + SymbolRoleDetailed::MacroDefinition, ); } match &definition.body { MacroDefinitionBody::Integer(integer) => { self.parse_integer_token(&integer, &integer.source) } + MacroDefinitionBody::List(list) => { + self.parse_list_token(&list, &list.source) + } MacroDefinitionBody::Invocation(invocation) => { self.parse_invocation(&invocation, &invocation.source) } @@ -118,6 +162,9 @@ impl SymbolParser { ExpressionToken::IntegerToken(integer) => { self.parse_integer_token(integer, source); } + ExpressionToken::ListToken(list) => { + self.parse_list_token(list, source); + } ExpressionToken::Invocation(invocation) => { self.parse_invocation(invocation, source); } @@ -129,8 +176,9 @@ impl SymbolParser { fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) { self.record_symbol( &invocation.name, + invocation.arguments.len(), &source, - Reference, + SymbolRoleDetailed::Invocation, ); for argument in &invocation.arguments { @@ -142,10 +190,12 @@ impl SymbolParser { InvocationArgument::BlockToken(block) => { self.parse_block_token(block, &source); } + InvocationArgument::ListToken(list) => { + self.parse_list_token(list, &source); + }, InvocationArgument::Invocation(invocation) => { self.parse_invocation(invocation, &source); } - InvocationArgument::String(_) => (), } } } @@ -155,8 +205,9 @@ impl SymbolParser { BlockToken::LabelDefinition(name) => { self.record_symbol( &name, + 0, &source, - Definition(CanFollowReference), + SymbolRoleDetailed::LabelDefinition, ); } BlockToken::PinnedAddress(integer) => { @@ -170,8 +221,9 @@ impl SymbolParser { for field in &word_template.fields { self.record_symbol( &field.name.to_string(), + 0, &field.source, - Reference, + SymbolRoleDetailed::Invocation, ); } } @@ -197,4 +249,18 @@ impl SymbolParser { IntegerToken::IntegerLiteral(_) => (), } } + + fn parse_list_token(&mut self, token: &ListToken, source: &SourceSpan) { + match &token { + ListToken::Invocation(invocation) => { + self.parse_invocation(&invocation, source) + } + ListToken::ListLiteral(integers) => { + for integer in integers { + self.parse_integer_token(&integer, source) + } + } + ListToken::StringLiteral(_) => (), + } + } } diff --git a/src/stages/intermediate.rs b/src/stages/intermediate.rs index 6853f62..8cabe26 100644 --- a/src/stages/intermediate.rs +++ b/src/stages/intermediate.rs @@ -1,147 +1,170 @@ use crate::*; -use assembler::{DefinitionType, SourceLocation, SourcePosition, SymbolRole}; - use indexmap::{IndexSet, IndexMap}; -static mut ID: usize = 0; -macro_rules! next_id { () => { unsafe { let id = ID; ID += 1; id }}; } +static mut ENVIRONMENT_ID: usize = 0; +macro_rules! next_id { + () => { + unsafe { + let id = ENVIRONMENT_ID; + ENVIRONMENT_ID += 1; + id + } + }; +} + +pub struct LabelAddress { + pub previous: usize, + pub current: usize, + pub touched: bool, +} + +#[derive(Eq, Hash, PartialEq, Clone)] +pub struct SymbolSignature { + pub name: String, + pub arg_count: usize, +} + +impl std::fmt::Display for SymbolSignature { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{}::{}", self.name, self.arg_count) + } +} + +struct Environment { + arguments: IndexMap<String, Tracked<IntermediateValue>>, + id: usize, +} + pub fn parse_intermediate(semantic: Vec<Tracked<SemanticToken>>) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> { IntermediateParser::new(semantic).parse() } - struct IntermediateParser { semantic: Vec<Tracked<SemanticToken>>, - label_names: IndexSet<Tracked<String>>, - macro_names: IndexSet<Tracked<String>>, - macro_definitions: IndexMap<String, MacroDefinition>, + label_names: IndexSet<Tracked<SymbolSignature>>, + label_addresses: IndexMap<Tracked<String>, LabelAddress>, + macro_definitions: IndexMap<SymbolSignature, Tracked<MacroDefinition>>, + environment_stack: Vec<Environment>, + address: usize, intermediate: Vec<Tracked<IntermediateToken>>, errors: Vec<Tracked<IntermediateError>>, } impl IntermediateParser { - pub fn new(semantic: Vec<Tracked<SemanticToken>>) -> Self { + /// Initialise a new parser. + pub fn new(mut semantic: Vec<Tracked<SemanticToken>>) -> Self { + // Collect all label and macro definitions ahead of time. let mut label_names = IndexSet::new(); - let mut macro_names = IndexSet::new(); + let mut macro_definitions = IndexMap::new(); + // Get the signatures of all defined labels. for symbol in SymbolParser::new().parse(&semantic) { - match symbol.role { - SymbolRole::Definition(DefinitionType::MustPrecedeReference) => { - // Only consider macro definitions, not macro argument definitions. - if symbol.namespace.is_empty() { - if !macro_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) { - unreachable!("Uncaught duplicate macro definition '{}'", symbol.name); - } - } + if let SymbolRoleDetailed::LabelDefinition = symbol.role { + let signature = SymbolSignature { name: symbol.name.clone(), arg_count: symbol.arg_count }; + if !label_names.insert(Tracked::from(signature.clone(), symbol.source)) { + unreachable!("Uncaught duplicate label definition '{signature}'"); } - SymbolRole::Definition(DefinitionType::CanFollowReference) => { - if !label_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) { - unreachable!("Uncaught duplicate label definition '{}'", symbol.name); - } - } - SymbolRole::Reference => (), } } - + // Strip all semantic macro definitions from the semantic tokens. + let definitions = semantic.extract_if(.., + |t| if let SemanticToken::MacroDefinition(_) = t.value { true } else { false }); + for definition in definitions { + let source = definition.source; + let SemanticToken::MacroDefinition(definition) = definition.value else { unreachable!() }; + let name = definition.name.value.clone(); + let arg_count = definition.arguments.len(); + let signature = SymbolSignature { name, arg_count }; + if macro_definitions.insert(signature.clone(), Tracked::from(definition, source)).is_some() { + unreachable!("Uncaught duplicate macro definition '{signature}'") + } + } Self { semantic, label_names, - macro_names, - macro_definitions: IndexMap::new(), + label_addresses: IndexMap::new(), + macro_definitions, + environment_stack: Vec::new(), + address: 0, intermediate: Vec::new(), errors: Vec::new(), } } pub fn parse(mut self) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> { - for token in self.semantic { - let source = &token.source; - match token.value { - SemanticToken::MacroDefinition(definition) => { - // Invoke the body to see if it contains undefined macros. - let error_count = self.errors.len(); - let mut arguments = IndexMap::new(); - // Prepare dummy argument values. - let null = SourceSpan { - string: String::new(), - in_merged: SourceLocation { - path: None, - start: SourcePosition::ZERO, - end: SourcePosition::ZERO, - }, - in_source: None, - child: None, - }; - for argument in &definition.arguments { - let value = match argument.variant { - ArgumentType::Integer => { - let integer = IntermediateInteger::Integer(0); - let tracked = Tracked::from(integer, null.clone()); - IntermediateValue::Integer(tracked) - } - ArgumentType::Block => { - IntermediateValue::Block(Vec::new()) - } - }; - let tracked = Tracked::from(value, null.clone()); - arguments.insert(argument.name.clone(), tracked); - } - let mut env = Environment { - label_names: &self.label_names, - macro_names: &self.macro_names, - macro_definitions: &self.macro_definitions, - arguments, - errors: &mut self.errors, - id: next_id!(), - }; - env.parse_macro_definition_body(&definition.body, source); - if self.errors.len() != error_count { - break; - } + let semantic = std::mem::take(&mut self.semantic); - let name = definition.name.to_string(); - if self.macro_definitions.insert(name.clone(), definition).is_some() { - unreachable!("Uncaught duplicate macro definition '{}'", name); - } - } - SemanticToken::BlockToken(block_token) => { - let mut env = Environment { - label_names: &self.label_names, - macro_names: &self.macro_names, - macro_definitions: &self.macro_definitions, - arguments: IndexMap::new(), - errors: &mut self.errors, - id: next_id!(), - }; - let mut tokens = env.parse_block_token(&block_token, source); - self.intermediate.append(&mut tokens); - } + for i in 0..MAX_ITERATIONS_TO_STABILISE { + info!("Attempting iteration {} of intermediate assembly stage", i+1); + // Erase the previous parse attempt. + self.address = 0; + self.intermediate.clear(); + self.errors.clear(); + self.environment_stack.clear(); + unsafe { ENVIRONMENT_ID = 0; } + // Update label addresses. + for (_, address) in &mut self.label_addresses { + address.previous = address.current; + address.touched = false; } + // Attempt to parse the program (which is now all block tokens). + for token in &semantic { + let source = &token.source; + let SemanticToken::BlockToken(ref block_token) = token.value else { unreachable!() }; + let env = Environment { arguments: IndexMap::new(), id: next_id!() }; + self.environment_stack.push(env); + let mut tokens = self.parse_block_token(&block_token, source); + self.intermediate.append(&mut tokens); + } + // Return unrecoverable errors. + if !self.errors.is_empty() { + return Err(self.errors); + } + // Check label stability + if self.check_for_instability(false) { + continue; + } + // Program is stable, return. + info!("Stabilised in iteration {} of intermediate assembly stage", i+1); + return Ok(self.intermediate); } - match self.errors.is_empty() { - true => Ok(self.intermediate), - false => Err(self.errors), - } + + self.check_for_instability(true); + return Err(self.errors); } -} + /// Check if any label is still stabilising. + fn check_for_instability(&mut self, create_error: bool) -> bool { + for (name, address) in &self.label_addresses { + if address.touched && address.current != address.previous { + info!("Label '{name}' was unstable, moving from address 0x{:04x} to 0x{:04x}", + address.previous, address.current); + if create_error { + let error = IntermediateError::LabelNeverStabilised(name.to_string()); + self.errors.push(Tracked::from(error, name.source.clone())); + } + return true; + } + } + return false; + } -struct Environment<'a> { - label_names: &'a IndexSet<Tracked<String>>, - macro_names: &'a IndexSet<Tracked<String>>, - macro_definitions: &'a IndexMap<String, MacroDefinition>, - arguments: IndexMap<String, Tracked<IntermediateValue>>, - errors: &'a mut Vec<Tracked<IntermediateError>>, - id: usize, -} + /// Get the current environment (the environment at the top of the stack). + fn env(&self) -> &Environment { + self.environment_stack.last().unwrap_or_else(|| + unreachable!("No environment on the stack")) + } -impl<'a> Environment<'a> { - // Attach the invocation ID to every macro label name + // Attach the environment ID to a local label name. fn tag_name(&self, name: &str) -> String { + // If a local label belongs to a macro, the name of that macro + // has been prefixed with the local label name in the + // resolve_label_name method during the semantic parsing stage, + // using a ':' character as a separator. match name.contains(':') { - true => format!("{name}:{}", self.id), + true => format!("{name}:{}", self.env().id), false => name.to_string(), } } @@ -150,7 +173,7 @@ impl<'a> Environment<'a> { match &body { MacroDefinitionBody::Integer(integer) => { let token = self.parse_integer_token(&integer, &source)?; - let integer = IntermediateValue::Integer(token); + let integer = IntermediateValue::Integer(token.value); Some(Tracked::from(integer, source.clone())) } MacroDefinitionBody::Invocation(invocation) => { @@ -164,6 +187,11 @@ impl<'a> Environment<'a> { let value = IntermediateValue::Block(tokens); Some(Tracked::from(value, source.clone())) } + MacroDefinitionBody::List(list) => { + let list = self.parse_list_token(list, &source)?; + let integer = IntermediateValue::List(list.value); + Some(Tracked::from(integer, source.clone())) + } } } @@ -171,75 +199,77 @@ impl<'a> Environment<'a> { let mut intermediate = Vec::new(); match block { BlockToken::LabelDefinition(name) => { - let token = IntermediateToken::LabelDefinition(self.tag_name(name)); - intermediate.push(Tracked::from(token, source.clone())); + let signature = SymbolSignature { name: name.to_string(), arg_count: 0 }; + if !self.label_names.contains(&signature) { + unreachable!("Unrecognised name for label definition"); + } + let tagged_name = self.tag_name(name); + let tracked = Tracked::from(tagged_name.clone(), source.clone()); + self.label_addresses.entry(tracked) + .and_modify(|a| { + if a.touched { unreachable!("Label '{tagged_name}' was already touched during this cycle.") } + a.previous = a.current; + a.current = self.address; + a.touched = true; + }) + .or_insert(LabelAddress { + previous: 0, + current: self.address, + touched: true, + }); } - BlockToken::PinnedAddress(address) => { - if let Some(integer) = self.parse_integer_token(address, &address.source) { - if let Some(source) = integer_contains_label_reference(&integer) { - let error = IntermediateError::LabelReferenceInPinnedAddress; - let new_source = address.source.clone().wrap(source); - self.errors.push(Tracked::from(error, new_source)); - } else { - match evaluate_integer(&integer, source) { - Ok(value) => { - let value = usize::try_from(value).unwrap_or(0); - let tracked = Tracked::from(value, address.source.clone()); - let token = IntermediateToken::PinnedAddress(tracked); - intermediate.push(Tracked::from(token, source.clone())); - } - Err(error) => self.errors.push(error), - } + BlockToken::PinnedAddress(integer) => { + if let Some(pinned) = self.parse_integer_token(integer, &integer.source) { + let pinned = **pinned as usize; + if pinned < self.address { + let error = IntermediateError::PinnedAddressBacktrack(pinned, self.address); + self.errors.push(Tracked::from(error, source.clone())); } + self.address = pinned; + let token = IntermediateToken::PinnedAddress(pinned); + intermediate.push(Tracked::from(token, source.clone())); } } BlockToken::ConditionalBlock(cond) => { - let predicate = self.parse_integer_token(&cond.predicate, &cond.predicate.source); - let mut body = self.parse_block_token(&cond.body, &cond.body.source); - if let Some(predicate) = predicate { - let mut found_error = false; - if let Some(source) = integer_contains_label_reference(&predicate) { - let error = IntermediateError::LabelReferenceInConditionPredicate; - let new_source = cond.predicate.source.clone().wrap(source); - self.errors.push(Tracked::from(error, new_source)); - found_error = true; - }; - if let Some(source) = block_contains_label_definition(&cond.body, &cond.body.source) { - let error = IntermediateError::LabelDefinitionInConditionBody; - let new_source = cond.body.source.clone().wrap(source); - self.errors.push(Tracked::from(error, new_source)); - found_error = true; - } - if !found_error { - match evaluate_integer(&predicate, &cond.predicate.source) { - Ok(value) => if value != 0 { intermediate.append(&mut body) }, - Err(error) => self.errors.push(error), + if let Some(predicate) = self.parse_integer_token(&cond.predicate, &cond.predicate.source) { + if **predicate != 0 { + if let Some(label_source) = block_contains_label_definition(&cond.body, &cond.body.source) { + let error = IntermediateError::LabelDefinitionInConditionBody; + let new_source = cond.body.source.clone().wrap(label_source); + self.errors.push(Tracked::from(error, new_source)); + } else { + let mut body = self.parse_block_token(&cond.body, &cond.body.source); + intermediate.append(&mut body); } } } } BlockToken::WordTemplate(word_template) => { - let mut fields = Vec::new(); - for bit_field in &word_template.fields { - let name = bit_field.name.to_string(); - let source = &bit_field.source; + let word_width = word_template.width; + let mut word_value = word_template.value; + for field in &word_template.fields { + let name = field.name.to_string(); + let field_source = &field.source; let invocation = Invocation { name, arguments: Vec::new() }; - if let Some(value) = self.parse_integer_invocation(&invocation, source) { - let field = IntermediateField { - width: bit_field.width, - shift: bit_field.shift, - value, - }; - fields.push(Tracked::from(field, bit_field.source.clone())); + if let Some(result) = self.parse_integer_invocation(&invocation, field_source) { + let field_value = **result; + // Inject field value into real value. + let value_width = width(field_value); + if field.width < value_width { + let error = IntermediateError::ValueTooWide(field.width, value_width); + let new_source = field_source.wrap(result.source); + self.errors.push(Tracked::from(error, new_source)); + } else { + let mask = 2_usize.pow(field.width as u32) - 1; + let clamped_value = (field_value as usize) & mask; + word_value |= (clamped_value << field.shift) as usize; + } } } - let word = IntermediateWord { - value: word_template.value, - width: word_template.width, - fields, - }; + let word = Word { width: word_width, value: word_value }; let token = IntermediateToken::Word(word); intermediate.push(Tracked::from(token, source.clone())); + self.address += 1; } BlockToken::Block(blocks) => { for block in blocks { @@ -253,15 +283,14 @@ impl<'a> Environment<'a> { } } } - return intermediate; } fn parse_integer_token(&mut self, integer: &IntegerToken, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { match integer { IntegerToken::IntegerLiteral(value) => { - let integer = IntermediateInteger::Integer(*value); - Some(Tracked::from(integer, source.clone())) + let tracked = Tracked::from(*value, source.clone()); + Some(Tracked::from(tracked, source.clone())) } IntegerToken::Expression(expression) => { self.parse_expression(expression, source) @@ -272,23 +301,61 @@ impl<'a> Environment<'a> { } } + fn parse_list_token(&mut self, list: &ListToken, source: &SourceSpan) -> Option<Tracked<IntermediateList>> { + match list { + ListToken::StringLiteral(literal) => { + Some(Tracked::from(literal.chars.clone(), source.clone())) + } + ListToken::ListLiteral(literal) => { + let mut integers = Vec::new(); + for token in literal { + let integer = self.parse_integer_token(&token.value, &token.source)?; + integers.push(integer.value); + } + Some(Tracked::from(integers, source.clone())) + } + ListToken::Invocation(invocation) => { + self.parse_list_invocation(&invocation, source) + } + } + } + fn parse_integer_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { - match self.parse_invocation(invocation, source)?.value { - IntermediateValue::Integer(integer) => Some(integer), - IntermediateValue::Block(_) => { + let result = self.parse_invocation(invocation, source)?; + match result.value { + IntermediateValue::Integer(integer) => { + let source = integer.source.clone(); + Some(Tracked::from(integer, source)) + } + IntermediateValue::Block(_) | IntermediateValue::List(_) => { let error = IntermediateError::ExpectedInteger; - self.errors.push(Tracked::from(error, source.clone())); + self.errors.push(Tracked::from(error, result.source)); None } } } fn parse_block_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Vec<Tracked<IntermediateToken>>> { - match self.parse_invocation(invocation, source)?.value { + let result = self.parse_invocation(invocation, source)?; + let source = result.source; + match result.value { IntermediateValue::Block(tokens) => Some(tokens), - IntermediateValue::Integer(_) => { + IntermediateValue::Integer(_) | IntermediateValue::List(_) => { let error = IntermediateError::ExpectedBlock; - self.errors.push(Tracked::from(error, source.clone())); + self.errors.push(Tracked::from(error, source)); + None + } + } + } + + fn parse_list_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateList>> { + let result = self.parse_invocation(invocation, source)?; + let source = result.source; + match result.value { + IntermediateValue::List(literal) => Some(Tracked::from(literal, source)), + IntermediateValue::Integer(_) | IntermediateValue::Block(_) => { + let error = IntermediateError::ExpectedList; + self.errors.push(Tracked::from(error, source)); None } } @@ -296,7 +363,13 @@ impl<'a> Environment<'a> { fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> { let received_count = invocation.arguments.len(); - if let Some(argument) = self.arguments.get(&invocation.name) { + let signature = SymbolSignature { + name: invocation.name.clone(), + arg_count: invocation.arguments.len(), + }; + if let Some(argument) = self.env().arguments.get(&invocation.name) { + // This invocation is a macro argument. + // Check that no arguments were provided. if received_count != 0 { let error = IntermediateError::IncorrectArgumentCount(0, received_count); self.errors.push(Tracked::from(error, source.clone())); @@ -304,20 +377,24 @@ impl<'a> Environment<'a> { } else { Some(argument.clone()) } - } else if let Some(label_name) = self.label_names.get(&invocation.name) { + } else if let Some(label) = self.label_names.get(&signature) { + // This invocation is a label reference. + // Check that no arguments were provided. if received_count != 0 { let error = IntermediateError::IncorrectArgumentCount(0, received_count); self.errors.push(Tracked::from(error, source.clone())); None } else { - let name = self.tag_name(label_name); - let tracked = Tracked::from(name, label_name.source.clone()); - let integer = IntermediateInteger::LabelReference(tracked); - let tracked = Tracked::from(integer, source.clone()); - let value = IntermediateValue::Integer(tracked); - Some(Tracked::from(value, source.clone())) + let tagged_name = self.tag_name(&signature.name); + let address = self.label_addresses.get(&tagged_name) + .and_then(|a| Some(a.current)).or(Some(0)).unwrap(); + let tracked = Tracked::from(address as isize, label.source.clone()); + let integer = IntermediateValue::Integer(tracked); + Some(Tracked::from(integer, source.clone())) } - } else if let Some(definition) = self.macro_definitions.get(&invocation.name) { + } else if let Some(definition) = self.macro_definitions.get(&signature) { + // This invocation is a macro reference. + let definition = definition.clone(); // Check that the correct number of arguments were provided. let expected_count = definition.arguments.len(); if received_count != expected_count { @@ -328,53 +405,14 @@ impl<'a> Environment<'a> { // Gather and type-check the provided arguments. let mut arguments = Vec::new(); for (i, argument) in invocation.arguments.iter().enumerate() { - let received_type = match &argument.value { - InvocationArgument::String(string) => { - let mut values = Vec::new(); - for c in &string.chars { - let integer = IntermediateInteger::Integer(**c); - let tracked = Tracked::from(integer, c.source.clone()); - values.push(IntermediateValue::Integer(tracked)); - } - arguments.push(RepeatedArgument::List(values)); - ArgumentType::Integer - } - InvocationArgument::IntegerToken(integer) => { - let tracked = self.parse_integer_token(&integer, &argument.source)?; - let value = IntermediateValue::Integer(tracked); - arguments.push(RepeatedArgument::Loop(value)); - ArgumentType::Integer - } - InvocationArgument::BlockToken(block) => { - let tokens = self.parse_block_token(&block, &argument.source); - let value = IntermediateValue::Block(tokens); - arguments.push(RepeatedArgument::Loop(value)); - ArgumentType::Block - } - InvocationArgument::Invocation(invocation) => { - let value = self.parse_invocation(&invocation, &argument.source)?; - let received_type = match &value.value { - IntermediateValue::Integer(_) => ArgumentType::Integer, - IntermediateValue::Block(_) => ArgumentType::Block, - }; - arguments.push(RepeatedArgument::Loop(value.value)); - received_type - } - }; - let expected_type = match received_type { - ArgumentType::Integer => ArgumentType::Block, - ArgumentType::Block => ArgumentType::Integer, - }; - if definition.arguments[i].variant != received_type { - let error = IntermediateError::IncorrectArgumentType(expected_type, received_type); - self.errors.push(Tracked::from(error, argument.source.clone())); - return None; - } + let expected_type = definition.arguments[i].variant; + let received_value = self.parse_invocation_argument(argument, expected_type)?; + arguments.push(received_value); } // Invoke the invocation multiple times. - let repetitions = arguments.iter().map(|a| a.len()).max().unwrap_or(1); + let invocations = arguments.iter().map(|a| a.len()).max().unwrap_or(1); let mut values = Vec::new(); - for i in 0..repetitions { + for i in 0..invocations { // Construct an argument map for this invocation. let mut argument_map = IndexMap::new(); for (a, argument) in arguments.iter().enumerate() { @@ -400,24 +438,29 @@ impl<'a> Environment<'a> { unreachable!("Uncaught duplicate macro argument name '{name}'"); }; } - let mut env = Environment { - label_names: &self.label_names, - macro_names: &self.macro_names, - macro_definitions: &self.macro_definitions, - arguments: argument_map, - errors: &mut self.errors, - id: next_id!(), - }; - values.push(env.parse_macro_definition_body(&definition.body, source)?); + // Test the current recursion depth. + if self.environment_stack.len() == MAX_RECURSION_DEPTH { + let error = IntermediateError::MaxRecursionDepthExceeded; + self.errors.push(Tracked::from(error, source.clone())); + return None; + } + // Invoke the macro once. + let env = Environment { arguments: argument_map, id: next_id!() }; + self.environment_stack.push(env); + let result = self.parse_macro_definition_body(&definition.body, source); + self.environment_stack.pop().unwrap(); + values.push(result?); } if values.len() == 1 { + // If the macro was invoked once, return the value. values.pop() } else { - // Flatten all values into a list of block tokens. + // If the macro was invoked multiple times, create a list of + // block tokens from the returned values. let mut block = Vec::new(); for value in values { match value.value { - IntermediateValue::Integer(_) => { + IntermediateValue::Integer(_) | IntermediateValue::List(_) => { let error = IntermediateError::ExpectedBlock; self.errors.push(Tracked::from(error, value.source)); return None; @@ -430,95 +473,135 @@ impl<'a> Environment<'a> { Some(Tracked::from(IntermediateValue::Block(block), source.clone())) } } - } else if let Some(macro_name) = self.macro_names.get(&invocation.name) { - let error = IntermediateError::InvocationBeforeDefinition; - let source = source.clone().wrap(macro_name.source.clone()); - self.errors.push(Tracked::from(error, source)); - None } else { - unreachable!("Uncaught unresolved reference '{}'", invocation.name); + unreachable!("Uncaught unresolved reference '{signature}'"); } } - fn parse_expression(&mut self, expression: &Expression, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { - let mut intermediate = Vec::new(); - let mut error = false; + fn parse_invocation_argument(&mut self, argument: &Tracked<InvocationArgument>, expected_type: ArgumentType) -> Option<RepeatedArgument> { + let source = &argument.source; + let (received_value, received_type) = match &argument.value { + InvocationArgument::ListToken(list) => { + let list = self.parse_list_token(list, source)?; + self.parse_invocation_list_argument(list, expected_type)? + } + InvocationArgument::IntegerToken(integer) => { + let tracked = self.parse_integer_token(&integer, &argument.source)?; + let value = IntermediateValue::Integer(tracked.value); + (RepeatedArgument::Loop(value), ArgumentType::Integer) + } + InvocationArgument::BlockToken(block) => { + let tokens = self.parse_block_token(&block, &argument.source); + let value = IntermediateValue::Block(tokens); + (RepeatedArgument::Loop(value), ArgumentType::Block) + } + InvocationArgument::Invocation(invocation) => { + let value = self.parse_invocation(&invocation, &argument.source)?; + match value.value { + IntermediateValue::Integer(_) => + (RepeatedArgument::Loop(value.value), ArgumentType::Integer), + IntermediateValue::Block(_) => + (RepeatedArgument::Loop(value.value), ArgumentType::Block), + IntermediateValue::List(list) => + self.parse_invocation_list_argument(Tracked::from(list, value.source), expected_type)? + } + } + }; + if expected_type != received_type { + let error = IntermediateError::IncorrectArgumentType(expected_type, received_type); + self.errors.push(Tracked::from(error, argument.source.clone())); + return None; + } + return Some(received_value); + } + + fn parse_invocation_list_argument(&mut self, list: Tracked<IntermediateList>, expected_type: ArgumentType) -> Option<(RepeatedArgument, ArgumentType)> { + if let ArgumentType::Integer = expected_type { + let mut values = Vec::new(); + for value in &list.value { + values.push(IntermediateValue::Integer(value.clone())); + } + Some((RepeatedArgument::List(values), ArgumentType::Integer)) + } else { + let value = IntermediateValue::List(list.value); + Some((RepeatedArgument::Loop(value), ArgumentType::List)) + } + } + fn parse_expression(&mut self, expression: &Expression, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { + let mut stack = ExpressionStack::new(); for token in &expression.tokens { let source = &token.source; match &token.value { - ExpressionToken::IntegerToken(integer) => { - let Some(integer) = self.parse_integer_token(integer, source) else { - error = true; continue; - }; - let token = IntermediateExpressionToken::Integer(integer.value); - intermediate.push(Tracked::from(token, integer.source)); + ExpressionToken::IntegerToken(token) => { + let integer = self.parse_integer_token(token, source)?; + stack.push(IntermediateValue::Integer(integer.value)); } - ExpressionToken::Operator(operator) => { - let token = IntermediateExpressionToken::Operator(*operator); - intermediate.push(Tracked::from(token, source.clone())); + ExpressionToken::ListToken(token) => { + let list = self.parse_list_token(token, source)?; + stack.push(IntermediateValue::List(list.value)); } ExpressionToken::Invocation(invocation) => { - let Some(integer) = self.parse_integer_invocation(invocation, source) else { - error = true; continue; - }; - let token = IntermediateExpressionToken::Integer(integer.value); - intermediate.push(Tracked::from(token, integer.source)); + if let Some(value) = self.parse_invocation(invocation, source) { + if let IntermediateValue::Block(_) = &value.value { + let error = IntermediateError::InvalidBlockInExpression; + let new_source = source.clone().wrap(value.source.clone()); + self.errors.push(Tracked::from(error, new_source)); + return None; + } + stack.push(value.value); + } + } + ExpressionToken::Operator(operator) => { + if let Err(expr_error) = stack.apply(*operator, source) { + let error = IntermediateError::ExpressionError(expr_error); + self.errors.push(Tracked::from(error, source.clone())); + return None; + } } } } - - if error { return None; } - let expression = IntermediateExpression { tokens: intermediate }; - let integer = IntermediateInteger::Expression(expression); - Some(Tracked::from(integer, source.clone())) + match stack.pull_result() { + Ok(value) => { + let tracked = Tracked::from(value, source.clone()); + Some(Tracked::from(tracked, source.clone())) + } + Err(expr_error) => { + let tracked = Tracked::from(expr_error, source.clone()); + let error = IntermediateError::ExpressionError(tracked); + self.errors.push(Tracked::from(error, source.clone())); + None + } + } } } + macro_rules! return_some { ($option:expr) => { if $option.is_some() { return $option; } }; } -fn integer_contains_label_reference(integer: &IntermediateInteger) -> Option<SourceSpan> { - match integer { - IntermediateInteger::Integer(_) => None, - IntermediateInteger::LabelReference(label) => Some(label.source.clone()), - IntermediateInteger::Expression(expr) => expression_contains_label_reference(expr), - } -} - -fn expression_contains_label_reference(expression: &IntermediateExpression) -> Option<SourceSpan> { - for token in &expression.tokens { - if let IntermediateExpressionToken::Integer(integer) = &token.value { - if let Some(child) = integer_contains_label_reference(&integer) { - return Some(token.source.clone().wrap(child)); - } - } - } - return None; -} +// Check if a block token contains a label definition. fn block_contains_label_definition(block: &BlockToken, source: &SourceSpan) -> Option<SourceSpan> { match &block { - BlockToken::LabelDefinition(_) => { - return Some(source.clone()); - } - BlockToken::Invocation(invocation) => { - return_some!(invocation_contains_label_definition(invocation)) - } - BlockToken::Block(blocks) => { + BlockToken::LabelDefinition(_) => + return Some(source.clone()), + BlockToken::Invocation(invocation) => + return_some!(invocation_contains_label_definition(invocation)), + BlockToken::Block(blocks) => for block in blocks { return_some!(block_contains_label_definition(block, &block.source)) - } - } + }, _ => (), } return None; } +// Check if the arguments passed to an invocation contain a label definition. fn invocation_contains_label_definition(invocation: &Invocation) -> Option<SourceSpan> { for argument in &invocation.arguments { match &argument.value { @@ -533,45 +616,3 @@ fn invocation_contains_label_definition(invocation: &Invocation) -> Option<Sourc } return None; } - -fn evaluate_integer(integer: &IntermediateInteger, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> { - match integer { - IntermediateInteger::Integer(value) => Ok(*value), - IntermediateInteger::LabelReference(name) => - unreachable!("Uncaught label reference '{name}' in condition predicate or pinned address value"), - IntermediateInteger::Expression(expr) => evaluate_expression(expr, source), - } -} - -fn evaluate_expression(expression: &IntermediateExpression, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> { - let mut stack = ExpressionStack::new(); - for token in &expression.tokens { - let source = &token.source; - match &token.value { - IntermediateExpressionToken::Integer(integer) => match integer { - IntermediateInteger::Integer(value) => { - stack.push(*value); - } - IntermediateInteger::Expression(expression) => { - stack.push(evaluate_expression(&expression, source)?); - } - IntermediateInteger::LabelReference(name) => { - unreachable!("Uncaught label reference '{name}' in condition predicate"); - } - } - IntermediateExpressionToken::Operator(operator) => { - if let Err(stack_error) = stack.apply(*operator, source) { - let error = IntermediateError::StackError(stack_error); - return Err(Tracked::from(error, token.source.clone())); - } - } - } - } - match stack.pull_result() { - Ok(value) => Ok(value), - Err(err) => { - let error = Tracked::from(err, source.clone()); - Err(Tracked::from(IntermediateError::StackError(error), source.clone())) - } - } -} diff --git a/src/stages/intermediate_tokens.rs b/src/stages/intermediate_tokens.rs index a09581e..d796299 100644 --- a/src/stages/intermediate_tokens.rs +++ b/src/stages/intermediate_tokens.rs @@ -3,51 +3,47 @@ use crate::*; #[derive(Clone)] pub enum IntermediateToken { - Word(IntermediateWord), - PinnedAddress(Tracked<usize>), - LabelDefinition(String), + Word(Word), + PinnedAddress(usize), } #[derive(Clone)] -pub struct IntermediateWord { +pub struct Word { pub value: usize, /// Width of the word in bits. pub width: u32, - pub fields: Vec<Tracked<IntermediateField>>, } -#[derive(Clone)] -pub struct IntermediateField { - pub value: Tracked<IntermediateInteger>, - /// Width of the field in bits. - pub width: u32, - /// Number of bits to the right of the field in the word. - pub shift: u32, -} - -#[derive(Clone)] -pub enum IntermediateInteger { - Integer(isize), - Expression(IntermediateExpression), - LabelReference(Tracked<String>), -} - -#[derive(Clone)] -pub struct IntermediateExpression { - pub tokens: Vec<Tracked<IntermediateExpressionToken>>, +impl std::fmt::Display for Word { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + if self.width == 0 { + write!(f, "0") + } else { + for i in (0..self.width).rev() { + let is_first_bit = i+1 == self.width; + if !is_first_bit && (i+1) % 4 == 0 { + write!(f, "_")?; + } + match (self.value >> i) & 1 { + 0 => write!(f, "0")?, + _ => write!(f, "1")?, + } + } + Ok(()) + } + } } #[derive(Clone)] -pub enum IntermediateExpressionToken { +pub enum IntermediateValue { Integer(IntermediateInteger), - Operator(Operator), + List(IntermediateList), + Block(IntermediateBlock), } -#[derive(Clone)] -pub enum IntermediateValue { - Integer(Tracked<IntermediateInteger>), - Block(Vec<Tracked<IntermediateToken>>), -} +pub type IntermediateInteger = Tracked<isize>; +pub type IntermediateList = Vec<Tracked<isize>>; +pub type IntermediateBlock = Vec<Tracked<IntermediateToken>>; pub enum RepeatedArgument { Loop(IntermediateValue), @@ -66,16 +62,22 @@ impl RepeatedArgument { pub enum IntermediateError { ExpectedInteger, ExpectedBlock, + ExpectedList, ListExhausted, - LabelReferenceInConditionPredicate, LabelDefinitionInConditionBody, LabelReferenceInPinnedAddress, - StackError(Tracked<StackError>), - InvocationBeforeDefinition, + InvalidBlockInExpression, + ExpressionError(Tracked<ExpressionError>), /// expected, received IncorrectArgumentCount(usize, usize), /// expected, received IncorrectArgumentType(ArgumentType, ArgumentType), + /// pinned, real + PinnedAddressBacktrack(usize, usize), + /// expected, received + ValueTooWide(u32, u32), + LabelNeverStabilised(String), + MaxRecursionDepthExceeded, } pub fn report_intermediate_errors(errors: &[Tracked<IntermediateError>], source_code: &str) { @@ -91,22 +93,32 @@ fn report_intermediate_error(error: &Tracked<IntermediateError>, source_code: &s "An integer value was expected here", IntermediateError::ExpectedBlock => "A block value was expected here", + IntermediateError::ExpectedList => + "A string value was expected here", IntermediateError::ListExhausted => - "This string is shorter than another string passed to the same invocation", - IntermediateError::LabelReferenceInConditionPredicate => - "The predicate of a conditional block cannot contain a label reference", + "This list is shorter than another list passed to the same invocation", IntermediateError::LabelDefinitionInConditionBody => "The body of a conditional block cannot contain a label definition", IntermediateError::LabelReferenceInPinnedAddress => "The value of a pinned address cannot contain a label reference", - IntermediateError::StackError(stack_error) => { - report_stack_error(stack_error, source_code); return; }, - IntermediateError::InvocationBeforeDefinition => - &format!("Macro cannot be invoked before it has been defined"), + IntermediateError::InvalidBlockInExpression => + "Expression cannot contain an invocation that expands to a block value", + + IntermediateError::ExpressionError(expression_error) => { + report_expression_error(expression_error, source_code); return; }, IntermediateError::IncorrectArgumentCount(expected, received) => &format!("Expected {expected} arguments, but received {received} instead"), IntermediateError::IncorrectArgumentType(expected, received) => &format!("Expected {expected} value but received {received} value instead"), + IntermediateError::PinnedAddressBacktrack(pinned, real) => + &format!("Cannot pin to address {pinned} when address is already {real}"), + IntermediateError::ValueTooWide(expected, received) => + &format!("Field is {expected} bits wide, but received a value that is {received} bits wide"), + + IntermediateError::LabelNeverStabilised(name) => + &format!("Label '{name}' never stabilised"), + IntermediateError::MaxRecursionDepthExceeded => + &format!("Macro invocation exceededs the maximum recursion depth of {MAX_RECURSION_DEPTH}"), }; report_source_issue(LogLevel::Error, &context, message); @@ -117,33 +129,10 @@ pub fn print_intermediate_token(i: usize, token: &IntermediateToken) { match token { IntermediateToken::Word(word) => { indent!(i, "Word({:>0w$b})", word.value, w = word.width as usize); - for field in &word.fields { - print_intermediate_integer(i+1, &field.value.value); - } } - IntermediateToken::PinnedAddress(address) => - indent!(i, "PinnedAddress({address})"), - IntermediateToken::LabelDefinition(name) => - indent!(i, "LabelDefinition({name})"), - } -} - -fn print_intermediate_integer(i: usize, integer: &IntermediateInteger) { - match integer { - IntermediateInteger::Integer(value) => - indent!(i, "Integer({value})"), - IntermediateInteger::LabelReference(name) => - indent!(i, "LabelReference({name})"), - IntermediateInteger::Expression(expression) => { - indent!(i, "Expression"); - for token in &expression.tokens { - match &token.value { - IntermediateExpressionToken::Integer(integer) => - print_intermediate_integer(i+1, integer), - IntermediateExpressionToken::Operator(operator) => - indent!(i+1, "Operator({operator})"), - } - } + IntermediateToken::PinnedAddress(address) => { + indent!(i, "PinnedAddress({address})"); } } } + diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs index 3c98192..96cf7af 100644 --- a/src/stages/semantic.rs +++ b/src/stages/semantic.rs @@ -45,7 +45,7 @@ impl SemanticParser { match symbol { ScopedSymbol::Global(name) => match &self.namespace { Namespace::Macro(_) => { - let error = SemanticError::LabelInMacroDefinition; + let error = SemanticError::GlobalLabelInMacroDefinition; self.errors.push(Tracked::from(error, source.to_owned())); None } @@ -62,7 +62,7 @@ impl SemanticParser { Some(format!("{label_ns}/{name}")) } Namespace::None => { - let error = SemanticError::SublabelWithoutNamespace; + let error = SemanticError::LocalLabelWithoutNamespace; self.errors.push(Tracked::from(error, source.to_owned())); None } @@ -137,13 +137,18 @@ impl SemanticParser { for token in tokens { match token { MacroDefinitionBody::Integer(integer) => { - let error = SemanticError::ExpectedInteger(location); + let error = SemanticError::ExpectedBlock(location); let tracked = Tracked::from(error, integer.source); self.errors.push(tracked); } MacroDefinitionBody::Block(mut tokens) => { block_tokens.append(&mut tokens); } + MacroDefinitionBody::List(list) => { + let error = SemanticError::ExpectedBlock(location); + let tracked = Tracked::from(error, list.source); + self.errors.push(tracked); + } MacroDefinitionBody::Invocation(invocation) => { // Convert invocation to a block invocation. let token = BlockToken::Invocation(invocation.value); @@ -178,10 +183,10 @@ impl SemanticParser { let tracked = Tracked::from(token, source); Some(MacroDefinitionBody::Integer(tracked)) } - SyntacticToken::StringLiteral(_) => { - let error = SemanticError::MisplacedStringLiteral; - self.errors.push(Tracked::from(error, source)); - None + SyntacticToken::StringLiteral(value) => { + let token = ListToken::StringLiteral(value); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::List(tracked)) } SyntacticToken::WordTemplate(word_template) => { let token = BlockToken::WordTemplate(word_template); @@ -200,9 +205,16 @@ impl SemanticParser { let mut parser = SemanticParser::from(tokens, self.namespace.clone()); let expression = parser.parse_expression(); self.pull_from(parser); - let token = IntegerToken::Expression(expression); - let tracked = Tracked::from(token, source); - Some(MacroDefinitionBody::Integer(tracked)) + if expression.is_list() { + let list = expression.to_list(); + let token = ListToken::ListLiteral(list); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::List(tracked)) + } else { + let token = IntegerToken::Expression(expression); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Integer(tracked)) + } } SyntacticToken::Symbol(symbol) => { let name = self.resolve_symbol_name(symbol, &source)?; @@ -260,6 +272,11 @@ impl SemanticParser { self.errors.push(Tracked::from(error, token.source)); None } + MacroDefinitionBody::List(list) => { + let error = SemanticError::ExpectedInteger(location); + self.errors.push(Tracked::from(error, list.source)); + None + } } } @@ -281,25 +298,52 @@ impl SemanticParser { self.errors.push(Tracked::from(error, integer.source)); None } + MacroDefinitionBody::List(list) => { + let error = SemanticError::ExpectedBlock(location); + self.errors.push(Tracked::from(error, list.source)); + None + } } } - /// Parse the remaining syntactic tokens as the contents of a block. - fn parse_block(&mut self) -> Vec<Tracked<BlockToken>> { - let mut tokens = Vec::new(); - while !self.syntactic.is_empty() { - if let Some(token) = self.pull_block_token(SemanticLocation::BlockLiteral) { - tokens.push(token); + /// Attempt to pull a token that can be used in an expression. + fn pull_expression_token(&mut self) -> Option<Tracked<ExpressionToken>> { + match self.pull_macro_definition_body_token()? { + MacroDefinitionBody::Block(mut tokens) => { + assert_eq!(tokens.len(), 1); + let token = tokens.pop().unwrap(); + let error = SemanticError::InvalidBlockInExpression; + self.errors.push(Tracked::from(error, token.source)); + None + } + MacroDefinitionBody::Invocation(invocation) => { + // Attempt to parse the invocation as an operator. + if invocation.arguments.is_empty() { + if let Some(operator) = Operator::from_str(&invocation.name) { + let expr = ExpressionToken::Operator(operator); + return Some(Tracked::from(expr, invocation.source)) + } + } + // Parse invocation as an invocation. + let expr = ExpressionToken::Invocation(invocation.value); + Some(Tracked::from(expr, invocation.source)) + } + MacroDefinitionBody::Integer(integer) => { + let expr = ExpressionToken::IntegerToken(Box::new(integer.value)); + Some(Tracked::from(expr, integer.source)) + } + MacroDefinitionBody::List(list) => { + let expr = ExpressionToken::ListToken(list.value); + Some(Tracked::from(expr, list.source)) } } - tokens } - /// Parse the remaining syntactic tokens as a list of integer tokens. - fn parse_integer_list(&mut self, location: SemanticLocation) -> Vec<Tracked<IntegerToken>> { + /// Parse the remaining syntactic tokens as the contents of a block. + fn parse_block(&mut self) -> Vec<Tracked<BlockToken>> { let mut tokens = Vec::new(); while !self.syntactic.is_empty() { - if let Some(token) = self.pull_integer_token(location) { + if let Some(token) = self.pull_block_token(SemanticLocation::BlockLiteral) { tokens.push(token); } } @@ -309,34 +353,8 @@ impl SemanticParser { /// Parse the remaining syntactic tokens as the contents of an expression. fn parse_expression(&mut self) -> Expression { let mut tokens = Vec::new(); - for token in self.parse_integer_list(SemanticLocation::Expression) { - let source = token.source; - match token.value { - IntegerToken::IntegerLiteral(value) => { - let integer = Box::new(IntegerToken::IntegerLiteral(value)); - let token = ExpressionToken::IntegerToken(integer); - tokens.push(Tracked::from(token, source)); - } - IntegerToken::Expression(expression) => { - let integer = Box::new(IntegerToken::Expression(expression)); - let token = ExpressionToken::IntegerToken(integer); - tokens.push(Tracked::from(token, source)); - } - IntegerToken::Invocation(invocation) => { - // Parse the invocation as an operator instead. - if invocation.arguments.is_empty() { - if let Some(operator) = Operator::from_str(&invocation.name) { - let token = ExpressionToken::Operator(operator); - tokens.push(Tracked::from(token, source)); - continue; - } - } - // Parse the invocation as an invocation. - let integer = Box::new(IntegerToken::Invocation(invocation)); - let token = ExpressionToken::IntegerToken(integer); - tokens.push(Tracked::from(token, source)); - } - } + while let Some(token) = self.pull_expression_token() { + tokens.push(token); } Expression { tokens } } @@ -357,7 +375,8 @@ impl SemanticParser { let source = token.source; match token.value { SyntacticToken::StringLiteral(string_literal) => { - let argument = InvocationArgument::String(string_literal); + let string = ListToken::StringLiteral(string_literal); + let argument = InvocationArgument::ListToken(string); Some(Tracked::from(argument, source)) } SyntacticToken::IntegerLiteral(value) => { @@ -369,9 +388,16 @@ impl SemanticParser { let mut parser = SemanticParser::from(tokens, self.namespace.clone()); let expression = parser.parse_expression(); self.pull_from(parser); - let integer = IntegerToken::Expression(expression); - let argument = InvocationArgument::IntegerToken(integer); - Some(Tracked::from(argument, source)) + if expression.is_list() { + let list = expression.to_list(); + let token = ListToken::ListLiteral(list); + let argument = InvocationArgument::ListToken(token); + Some(Tracked::from(argument, source)) + } else { + let integer = IntegerToken::Expression(expression); + let argument = InvocationArgument::IntegerToken(integer); + Some(Tracked::from(argument, source)) + } } SyntacticToken::BlockLiteral(tokens) => { let mut parser = SemanticParser::from(tokens, self.namespace.clone()); @@ -415,11 +441,13 @@ impl SemanticParser { let token = self.syntactic.pop()?; let source = token.source; match token.value { + // Integer-type argument. SyntacticToken::Symbol(ScopedSymbol::Global(name)) => { let variant = ArgumentType::Integer; let definition = ArgumentDefinition { name, variant }; return Some(Tracked::from(definition, source)); } + // Block-type argument. SyntacticToken::BlockLiteral(mut tokens) => { if tokens.len() == 1 { let token = tokens.pop().unwrap(); @@ -430,6 +458,17 @@ impl SemanticParser { } } } + // List-type argument. + SyntacticToken::Expression(mut tokens) => { + if tokens.len() == 1 { + let token = tokens.pop().unwrap(); + if let SyntacticToken::Symbol(ScopedSymbol::Global(name)) = token.value { + let variant = ArgumentType::List; + let definition = ArgumentDefinition { name, variant }; + return Some(Tracked::from(definition, source)); + } + } + } _ => (), }; let error = SemanticError::InvalidArgumentDefinition; diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs index dfbea1a..69c6c98 100644 --- a/src/stages/semantic_tokens.rs +++ b/src/stages/semantic_tokens.rs @@ -6,21 +6,24 @@ pub enum SemanticToken { BlockToken(BlockToken), } +#[derive(Clone)] pub struct MacroDefinition { pub name: Tracked<String>, pub arguments: Vec<Tracked<ArgumentDefinition>>, pub body: MacroDefinitionBody, } +#[derive(Clone)] pub struct ArgumentDefinition { pub name: String, pub variant: ArgumentType, } -#[derive(PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq)] pub enum ArgumentType { Integer, Block, + List, } impl std::fmt::Display for ArgumentType { @@ -28,37 +31,46 @@ impl std::fmt::Display for ArgumentType { match self { ArgumentType::Integer => write!(f, "an integer"), ArgumentType::Block => write!(f, "a block"), + ArgumentType::List => write!(f, "a list"), } } } +#[derive(Clone)] pub enum MacroDefinitionBody { Integer(Tracked<IntegerToken>), Block(Vec<Tracked<BlockToken>>), + List(Tracked<ListToken>), Invocation(Tracked<Invocation>), } +#[derive(Clone)] pub struct ConditionalBlock { pub predicate: Tracked<IntegerToken>, pub body: Tracked<BlockToken>, } +#[derive(Clone)] pub enum IntegerToken { IntegerLiteral(isize), Expression(Expression), Invocation(Invocation), } +#[derive(Clone)] pub struct Expression { pub tokens: Vec<Tracked<ExpressionToken>>, } +#[derive(Clone)] pub enum ExpressionToken { IntegerToken(Box<IntegerToken>), + ListToken(ListToken), Invocation(Invocation), Operator(Operator), } +#[derive(Clone)] pub enum BlockToken { LabelDefinition(String), PinnedAddress(Tracked<IntegerToken>), @@ -68,32 +80,78 @@ pub enum BlockToken { Invocation(Invocation), } +#[derive(Clone)] +pub enum ListToken { + StringLiteral(StringLiteral), + ListLiteral(Vec<Tracked<IntegerToken>>), + Invocation(Invocation), +} + +#[derive(Clone)] pub struct Invocation { pub name: String, pub arguments: Vec<Tracked<InvocationArgument>>, } +#[derive(Clone)] pub enum InvocationArgument { - String(StringLiteral), IntegerToken(IntegerToken), BlockToken(BlockToken), + ListToken(ListToken), Invocation(Invocation), } + +impl Expression { + pub fn is_list(&self) -> bool { + self.tokens.iter().all(|t| { + match t.value { + ExpressionToken::IntegerToken(_) => true, + ExpressionToken::Invocation(_) => true, + ExpressionToken::ListToken(_) => false, + ExpressionToken::Operator(_) => false, + } + }) + } + + pub fn to_list(self) -> Vec<Tracked<IntegerToken>> { + let mut list = Vec::new(); + for token in self.tokens { + let source = token.source; + match token.value { + ExpressionToken::IntegerToken(token) => { + let tracked = Tracked::from(*token, source); + list.push(tracked); + } + ExpressionToken::Invocation(invocation) => { + let token = IntegerToken::Invocation(invocation); + list.push(Tracked::from(token, source)); + } + ExpressionToken::ListToken(_) => unreachable!( + "Could not convert expression containing a list token to a list"), + ExpressionToken::Operator(_) => unreachable!( + "Could not convert expression containing an operator to a list"), + }; + } + return list; + } +} + + pub enum SemanticError { - MisplacedStringLiteral, - MisplacedListLiteral, MisplacedSeparator, MisplacedMacroDefinition, ExpectedInteger(SemanticLocation), ExpectedBlock(SemanticLocation), + ExpectedString(SemanticLocation), InvalidArgumentDefinition, InvalidInvocationArgument, + InvalidBlockInExpression, - LabelInMacroDefinition, - SublabelWithoutNamespace, + GlobalLabelInMacroDefinition, + LocalLabelWithoutNamespace, LocalSymbolWithoutNamespace, } @@ -140,31 +198,31 @@ pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &s fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) { let context = Context { source_code: &source_code, source: &error.source }; let message = match &error.value { - SemanticError::MisplacedStringLiteral => - "A string literal can only be used as an invocation argument", - SemanticError::MisplacedListLiteral => - "A list literal can only be used as an invocation argument", SemanticError::MisplacedSeparator => - "A separator can only be used to construct an argument list", + "Separators can only be used for constructing an argument list", SemanticError::MisplacedMacroDefinition => - "A macro definition must be used at the outermost level of the program", + "Macro definition must be placed at the outermost level of a program", SemanticError::ExpectedInteger(location) => &format!("An integer value was expected {location}"), SemanticError::ExpectedBlock(location) => &format!("A block value was expected {location}"), + SemanticError::ExpectedString(location) => + &format!("A string value was expected {location}"), SemanticError::InvalidArgumentDefinition => - "Argument definitions must be in the form 'name' or '{{name}}'", + "Argument definition must take one of the following forms: name, {name}, or [name]", SemanticError::InvalidInvocationArgument => "This token cannot be used in an invocation argument", + SemanticError::InvalidBlockInExpression => + "Expression cannot contain a block token", - SemanticError::LabelInMacroDefinition => - &format!("Only sublabels can be defined inside macro definitions"), - SemanticError::SublabelWithoutNamespace => - &format!("Sublabel was not defined inside a macro definition or after a label"), + SemanticError::GlobalLabelInMacroDefinition => + &format!("Macro definition cannot contain a global label"), + SemanticError::LocalLabelWithoutNamespace => + &format!("Local label must be placed inside a macro definition or after a global label"), SemanticError::LocalSymbolWithoutNamespace => - &format!("Local symbol was not defined inside a macro definition or after a label"), + &format!("Local symbol must be placed inside a macro definition or after a global label"), }; report_source_issue(LogLevel::Error, &context, message); @@ -185,12 +243,15 @@ pub fn print_semantic_token(i: usize, token: &SemanticToken) { MacroDefinitionBody::Block(tokens) => { print_block(i+1, tokens); } + MacroDefinitionBody::List(list) => { + print_list_token(i+1, list); + } MacroDefinitionBody::Invocation(invocation) => { print_invocation(i+1, invocation); } } } - SemanticToken::BlockToken(block) => print_block_token(0, block), + SemanticToken::BlockToken(block) => print_block_token(i, block), } } @@ -202,6 +263,9 @@ fn print_argument_definition(i: usize, argument: &ArgumentDefinition) { ArgumentType::Block => { indent!(i, "Argument({}, block)", argument.name) } + ArgumentType::List => { + indent!(i, "Argument({}, list)", argument.name) + } } } @@ -249,8 +313,8 @@ fn print_invocation(i: usize, invocation: &Invocation) { fn print_invocation_argument(i: usize, argument: &InvocationArgument) { match &argument { - InvocationArgument::String(string_literal) => { - indent!(i, "String({string_literal})") + InvocationArgument::ListToken(list) => { + print_list_token(i, list) } InvocationArgument::IntegerToken(integer) => { print_integer_token(i, integer) @@ -278,6 +342,23 @@ fn print_integer_token(i: usize, integer: &IntegerToken) { } } +fn print_list_token(i: usize, string: &ListToken) { + match string { + ListToken::StringLiteral(string_literal) => { + indent!(i, "StringLiteral({string_literal})") + } + ListToken::ListLiteral(integers) => { + indent!(i, "ListLiteral"); + for integer in integers { + print_integer_token(i+1, integer); + } + } + ListToken::Invocation(invocation) => { + print_invocation(i, invocation) + } + } +} + fn print_expression(i: usize, expression: &Expression) { indent!(i, "Expression"); for token in &expression.tokens { @@ -285,6 +366,9 @@ fn print_expression(i: usize, expression: &Expression) { ExpressionToken::IntegerToken(integer) => { print_integer_token(i+1, &integer) } + ExpressionToken::ListToken(list) => { + print_list_token(i+1, &list) + } ExpressionToken::Invocation(invocation) => { print_invocation(i+1, &invocation); } diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs index 14f8815..227b399 100644 --- a/src/stages/syntactic.rs +++ b/src/stages/syntactic.rs @@ -4,10 +4,10 @@ use assembler::Tokeniser; pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { - parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path)) + parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path), false) } -fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { +fn parse_syntactic_from_tokeniser(mut t: Tokeniser, in_macro: bool) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { t.add_delimiters(&['@','&','%',';',':','|','{','}','(',')','[',']','#','~','"','\'']); let mut tokens = Vec::new(); let mut errors = Vec::new(); @@ -77,7 +77,7 @@ fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<Syntac t.mark_child(); if let Some(_) = t.track_until(is_matching!('{','}')) { let child = t.tokenise_child_span(); - match parse_syntactic_from_tokeniser(child) { + match parse_syntactic_from_tokeniser(child, in_macro) { Ok(tokens) => SyntacticToken::BlockLiteral(tokens), Err(mut parse_errors) => { errors.append(&mut parse_errors); @@ -93,7 +93,7 @@ fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<Syntac t.mark_child(); if let Some(_) = t.track_until(is_matching!('[',']')) { let child = t.tokenise_child_span(); - match parse_syntactic_from_tokeniser(child) { + match parse_syntactic_from_tokeniser(child, in_macro) { Ok(tokens) => SyntacticToken::Expression(tokens), Err(mut parse_errors) => { errors.append(&mut parse_errors); @@ -121,11 +121,14 @@ fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<Syntac } '%' => { let name = t.eat_token(); + if in_macro { + err!(SyntacticError::MacroDefinitionInMacroDefinition); + } let source = t.get_source(); t.mark_child(); if let Some(_) = t.track_until(is_any!(';')) { let child = t.tokenise_child_span(); - match parse_syntactic_from_tokeniser(child) { + match parse_syntactic_from_tokeniser(child, true) { Ok(body) => { let name = Tracked::from(name, source); let definition = SyntacticMacroDefinition { name, body }; @@ -168,22 +171,33 @@ fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<Syntac c => { let token = format!("{c}{}", t.eat_token()); - if let Some(hex_string) = token.strip_prefix("0x") { + let (stripped, neg) = match token.strip_prefix('-') { + Some(stripped) => (stripped, true), + None => (token.as_str(), false), + }; + if let Some(hex_string) = stripped.strip_prefix("0x") { let hex_string = hex_string.to_string(); - match parse_integer_literal(&hex_string, 16) { + match parse_integer_literal(&hex_string, 16, neg) { Ok(value) => SyntacticToken::IntegerLiteral(value), Err(_) => err!(SyntacticError::InvalidHexadecimalLiteral(hex_string)), } - } else if let Some(binary_string) = token.strip_prefix("0b") { + } else if let Some(binary_string) = stripped.strip_prefix("0b") { let binary_string = binary_string.to_string(); - match parse_integer_literal(&binary_string, 2) { + match parse_integer_literal(&binary_string, 2, neg) { Ok(value) => SyntacticToken::IntegerLiteral(value), Err(_) => err!(SyntacticError::InvalidBinaryLiteral(binary_string)), } + } else if let Some(octal_string) = stripped.strip_prefix("0o") { + let octal_string = octal_string.to_string(); + match parse_integer_literal(&octal_string, 8, neg) { + Ok(value) => SyntacticToken::IntegerLiteral(value), + Err(_) => err!(SyntacticError::InvalidOctalLiteral(octal_string)), + } } else { - match parse_integer_literal(&token, 10) { + let decimal_string = stripped.to_string(); + match parse_integer_literal(&decimal_string, 10, neg) { Ok(value) => SyntacticToken::IntegerLiteral(value), - Err(true) => err!(SyntacticError::InvalidDecimalLiteral(token)), + Err(true) => err!(SyntacticError::InvalidDecimalLiteral(decimal_string)), Err(false) => SyntacticToken::Symbol(ScopedSymbol::Global(token)), } } @@ -200,10 +214,13 @@ fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<Syntac } -fn parse_integer_literal(token: &str, radix: u32) -> Result<isize, bool> { +fn parse_integer_literal(token: &str, radix: u32, neg: bool) -> Result<isize, bool> { match usize::from_str_radix(&token.replace('_', ""), radix) { Ok(value) => match isize::try_from(value) { - Ok(value) => Ok(value), + Ok(value) => match neg { + true => Ok(-value), + false => Ok(value), + } Err(_) => Err(true), } Err(_) => Err(false), diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs index 041c568..273dfe5 100644 --- a/src/stages/syntactic_tokens.rs +++ b/src/stages/syntactic_tokens.rs @@ -23,6 +23,7 @@ pub struct SyntacticMacroDefinition { pub body: Vec<Tracked<SyntacticToken>>, } +#[derive(Clone)] pub struct StringLiteral { pub string: String, pub chars: Vec<Tracked<isize>>, @@ -56,6 +57,7 @@ pub enum SyntacticError { UnterminatedCharacterLiteral, UnterminatedStringLiteral, UnterminatedMacroDefinition(String), + MacroDefinitionInMacroDefinition, UnmatchedBlockTerminator, UnmatchedExpressionTerminator, @@ -70,6 +72,7 @@ pub enum SyntacticError { InvalidDecimalLiteral(String), InvalidHexadecimalLiteral(String), InvalidBinaryLiteral(String), + InvalidOctalLiteral(String), } @@ -94,6 +97,8 @@ fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) { "String was not terminated, add a '\"' character to terminate", SyntacticError::UnterminatedMacroDefinition(name) => &format!("The '{name}' macro definition was not terminated, add a ';' character to terminate"), + SyntacticError::MacroDefinitionInMacroDefinition => + &format!("Attempted to define a macro inside another macro definition"), SyntacticError::UnmatchedBlockTerminator => "Attempted to terminate a block, but no block was in progress", @@ -105,7 +110,7 @@ fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) { "Attempted to terminate a macro definition, but no macro definition was in progress", SyntacticError::ExpectedSingleCharacter => - "A character literal must contain exactly one character", + "Character literal must contain exactly one character", SyntacticError::DuplicateFieldNameInWord(name) => &format!("The field '{name}' has already been used in this word"), @@ -118,6 +123,8 @@ fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) { &format!("The string '{string}' is not a valid hexadecimal literal"), SyntacticError::InvalidBinaryLiteral(string) => &format!("The string '{string}' is not a valid binary literal"), + SyntacticError::InvalidOctalLiteral(string) => + &format!("The string '{string}' is not a valid octal literal"), }; report_source_issue(LogLevel::Error, &context, message); diff --git a/src/types/expression_stack.rs b/src/types/expression_stack.rs index 4d26eb2..1036842 100644 --- a/src/types/expression_stack.rs +++ b/src/types/expression_stack.rs @@ -2,7 +2,7 @@ use crate::*; pub struct ExpressionStack { - stack: Vec<isize>, + stack: Vec<IntermediateValue>, } impl ExpressionStack { @@ -12,77 +12,280 @@ impl ExpressionStack { } } - pub fn pull_result(mut self) -> Result<isize, StackError> { + pub fn pull_result(mut self) -> Result<isize, ExpressionError> { match self.stack.len() { - 0 => Err(StackError::NoReturnValue), - 1 => Ok(self.stack.pop().unwrap()), - _ => Err(StackError::MultipleReturnValues), + 0 => Err(ExpressionError::NoReturnValue), + 1 => { + match self.stack.pop().unwrap() { + IntermediateValue::Integer(value) => Ok(*value), + IntermediateValue::List(_) => Err(ExpressionError::InvalidReturnType("a list")), + IntermediateValue::Block(_) => Err(ExpressionError::InvalidReturnType("a block")), + } + } + _ => Err(ExpressionError::MultipleReturnValues), } } - pub fn push(&mut self, value: isize) { + pub fn push(&mut self, value: IntermediateValue) { self.stack.push(value); } - pub fn apply(&mut self, operator: Operator, source: &SourceSpan) -> Result<(), Tracked<StackError>> { + pub fn apply(&mut self, operator: Operator, source: &SourceSpan) -> Result<(), Tracked<ExpressionError>> { macro_rules! push { - ($val:expr) => { self.stack.push($val) } + ($res:expr) => { + match $res { + Ok(value) => self.stack.push(value), + Err(error) => return Err(Tracked::from(error, source.clone())), + } + } } macro_rules! pop { ($name:ident) => { let $name = match self.stack.pop() { Some(value) => value, - None => return Err(Tracked::from(StackError::Underflow, source.clone())), + None => return Err(Tracked::from(ExpressionError::Underflow, source.clone())), }; } } - macro_rules! truth { - ($bool:expr) => { match $bool { true => 1, false => 0 } }; - } + match operator { - Operator::Equal => { pop!(b); pop!(a); push!(truth!(a==b)) }, - Operator::NotEqual => { pop!(b); pop!(a); push!(truth!(a!=b)) }, - Operator::LessThan => { pop!(b); pop!(a); push!(truth!(a < b)) }, - Operator::GreaterThan => { pop!(b); pop!(a); push!(truth!(a > b)) }, - Operator::LessThanEqual => { pop!(b); pop!(a); push!(truth!(a <= b)) }, - Operator::GreaterThanEqual => { pop!(b); pop!(a); push!(truth!(a >= b)) }, - Operator::Add => { pop!(b); pop!(a); push!(a + b) }, - Operator::Subtract => { pop!(b); pop!(a); push!(a - b) }, - Operator::Multiply => { pop!(b); pop!(a); push!(a * b) }, - Operator::Divide => { pop!(b); pop!(a); push!(a / b) }, - Operator::Modulo => { pop!(b); pop!(a); push!(a % b) }, - Operator::Exponent => { pop!(b); pop!(a); push!( - if let Ok(b) = u32::try_from(b) { a.saturating_pow(b) } else { 0 } ) }, - Operator::LeftShift => { pop!(b); pop!(a); push!( - if b < 0 { a >> -b } else { a << b } ) }, - Operator::RightShift => { pop!(b); pop!(a); push!( - if b < 0 { a << -b } else { a >> b } ) }, - Operator::BitAnd => { pop!(b); pop!(a); push!(a & b) }, - Operator::BitOr => { pop!(b); pop!(a); push!(a | b) }, - Operator::BitXor => { pop!(b); pop!(a); push!(a ^ b) }, - Operator::BitNot => { pop!(a); push!(!a) }, + Operator::Equal => { pop!(b); pop!(a); push!(op_equal(a, b)) }, + Operator::NotEqual => { pop!(b); pop!(a); push!(op_not_equal(a, b)) }, + Operator::LessThan => { pop!(b); pop!(a); push!(op_less_than(a, b)) }, + Operator::GreaterThan => { pop!(b); pop!(a); push!(op_greater_than(a, b)) }, + Operator::LessThanEqual => { pop!(b); pop!(a); push!(op_less_than_equal(a, b)) }, + Operator::GreaterThanEqual => { pop!(b); pop!(a); push!(op_greater_than_equal(a, b)) }, + Operator::Add => { pop!(b); pop!(a); push!(op_add(a, b)) }, + Operator::Subtract => { pop!(b); pop!(a); push!(op_subtract(a, b)) }, + Operator::Multiply => { pop!(b); pop!(a); push!(op_multiply(a, b)) }, + Operator::Divide => { pop!(b); pop!(a); push!(op_divide(a, b)) }, + Operator::Modulo => { pop!(b); pop!(a); push!(op_modulo(a, b)) }, + Operator::Exponent => { pop!(b); pop!(a); push!(op_exponent(a, b)) }, + Operator::LeftShift => { pop!(b); pop!(a); push!(op_left_shift(a, b)) }, + Operator::RightShift => { pop!(b); pop!(a); push!(op_right_shift(a, b)) }, + Operator::BitAnd => { pop!(b); pop!(a); push!(op_bit_and(a, b)) }, + Operator::BitOr => { pop!(b); pop!(a); push!(op_bit_or(a, b)) }, + Operator::BitXor => { pop!(b); pop!(a); push!(op_bit_xor(a, b)) }, + Operator::BitNot => { pop!(a); push!(op_bit_not(a)) }, + Operator::Length => { pop!(a); push!(op_length(a)) }, + Operator::Index => { pop!(b); pop!(a); push!(op_index(a, b)) }, + Operator::Find => { pop!(b); pop!(a); push!(op_find(a, b)) }, + Operator::Sum => { pop!(a); push!(op_sum(a)) }, + Operator::Absolute => { pop!(a); push!(op_absolute(a)) }, + Operator::Debug => { op_debug(&self.stack, &source); }, } return Ok(()); } } +// Generate fake tracking information for synthetic values. +fn null_span() -> SourceSpan { + SourceSpan { + string: String::new(), + in_merged: SourceLocation { + path: None, + start: SourcePosition { line: 0, column: 0 }, + end: SourcePosition { line: 0, column: 0 }, + }, + in_source: None, + child: None, + } +} + +fn to_isize(value: IntermediateValue) -> Result<isize, ExpressionError> { + let received = match value { + IntermediateValue::Integer(integer) => return Ok(integer.value), + IntermediateValue::List(_) => "a list", + IntermediateValue::Block(_) => "a block", + }; + Err(ExpressionError::InvalidArgumentType("an integer", received)) +} + +fn to_list(value: IntermediateValue) -> Result<Vec<isize>, ExpressionError> { + let received = match value { + IntermediateValue::List(list) => return Ok(list.into_iter().map(|t| t.value).collect()), + IntermediateValue::Integer(_) => "an integer", + IntermediateValue::Block(_) => "a block", + }; + Err(ExpressionError::InvalidArgumentType("a list", received)) +} -pub enum StackError { +fn from_isize(value: isize) -> IntermediateValue { + IntermediateValue::Integer(Tracked::from(value, null_span())) +} + +fn from_bool(value: bool) -> IntermediateValue { + // Source span isn't used by anything. + match value { + true => IntermediateValue::Integer(Tracked::from(1, null_span())), + false => IntermediateValue::Integer(Tracked::from(0, null_span())), + } +} + +fn op_equal(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_bool(to_isize(l)? == to_isize(r)?)) } +fn op_not_equal(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_bool(to_isize(l)? != to_isize(r)?)) } +fn op_less_than(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_bool(to_isize(l)? < to_isize(r)?)) } +fn op_greater_than(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_bool(to_isize(l)? > to_isize(r)?)) } +fn op_less_than_equal(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_bool(to_isize(l)? <= to_isize(r)?)) } +fn op_greater_than_equal(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_bool(to_isize(l)? >= to_isize(r)?)) } +fn op_add(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_isize(to_isize(l)? + to_isize(r)?)) } +fn op_subtract(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_isize(to_isize(l)? - to_isize(r)?)) } +fn op_multiply(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_isize(to_isize(l)? * to_isize(r)?)) } +fn op_divide(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + let l = to_isize(l)?; let r = to_isize(r)?; + match r != 0 { + true => Ok(from_isize(l / r)), + false => Ok(from_isize(0)), + } +} +fn op_modulo(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + let l = to_isize(l)?; let r = to_isize(r)?; + match r != 0 { + true => Ok(from_isize(l % r)), + false => Ok(from_isize(0)), + } +} +fn op_exponent(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + let l = to_isize(l)?; let r = to_isize(r)?; + if let Ok(r) = u32::try_from(r) { + Ok(from_isize(l.saturating_pow(r))) + } else { + Ok(from_isize(0)) + } +} +fn op_left_shift(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + let l = to_isize(l)?; let r = to_isize(r)?; + Ok(from_isize(if r < 0 { l >> -r } else { l << r })) +} +fn op_right_shift(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + let l = to_isize(l)?; let r = to_isize(r)?; + Ok(from_isize(if r < 0 { l << -r } else { l >> r })) +} +fn op_bit_and(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_isize((to_isize(l)?) & (to_isize(r)?))) } +fn op_bit_or(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_isize((to_isize(l)?) | (to_isize(r)?))) } +fn op_bit_xor(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_isize((to_isize(l)?) ^ (to_isize(r)?))) } +fn op_bit_not(l: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_isize(!to_isize(l)?)) } +fn op_length(l: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + let length = match l { + IntermediateValue::Integer(integer) => width(*integer) as isize, + IntermediateValue::List(list) => list.len() as isize, + IntermediateValue::Block(block) => block.len() as isize, + }; + Ok(from_isize(length)) +} +fn op_index(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + let l = to_list(l)?; let r = to_isize(r)?; + match r >= 0 && r < l.len() as isize { + true => Ok(from_isize(l[r as usize])), + false => Err(ExpressionError::IndexError(l.len(), r)), + } +} +fn op_find(l: IntermediateValue, r: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + let l = to_list(l)?; let r = to_isize(r)?; + match l.iter().position(|e| *e == r) { + Some(i) => Ok(from_isize(i as isize)), + None => Err(ExpressionError::FindError(l.len(), r)), + } +} +fn op_sum(l: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + let sum = match l { + IntermediateValue::Integer(integer) => ones(*integer) as isize, + IntermediateValue::List(list) => list.into_iter().map(|t| t.value).sum(), + IntermediateValue::Block(_) => return Err(ExpressionError::InvalidArgumentType("an integer or list", "a block")) + }; + Ok(from_isize(sum)) +} +fn op_absolute(l: IntermediateValue) -> Result<IntermediateValue, ExpressionError> { + Ok(from_isize(to_isize(l)?.wrapping_abs())) } +fn op_debug(stack: &Vec<IntermediateValue>, source: &SourceSpan) { + let mut output = String::new(); + for value in stack.iter().rev() { + let string = match value { + IntermediateValue::Integer(integer) => + integer.to_string(), + IntermediateValue::List(list) => { + let strings: Vec<String> = list.iter().map(|t| t.value.to_string()).collect(); + format!("[{}]", strings.join(" ")) + } + IntermediateValue::Block(_) => unreachable!("Block value in expression"), + }; + output.push_str(&format!(" --> {string}\n")); + } + let len = stack.len(); + let element = if len == 1 { "element" } else { "elements" }; + let message = format!("Printing expression stack with {len} {element} at {}", source.location()); + log_info(&message, Some(output)); +} + + +/// Find the number of bits required to hold an integer. +pub fn width(value: isize) -> u32 { + match value.cmp(&0) { + std::cmp::Ordering::Less => (-value).ilog2() + 2, + std::cmp::Ordering::Equal => 0, + std::cmp::Ordering::Greater => value.ilog2() + 1, + } +} + +/// Count the number of one bits in a value. +pub fn ones(value: isize) -> u32 { + let width = width(value); + let mask = 2i32.pow(width) -1; + let value = (value as usize) & (mask as usize); + return value.count_ones(); +} + +pub enum ExpressionError { Underflow, MultipleReturnValues, NoReturnValue, + InvalidReturnType(&'static str), + // (expected, received) + InvalidArgumentType(&'static str, &'static str), + // (length, index) + IndexError(usize, isize), + // (length, value) + FindError(usize, isize), } -pub fn report_stack_error(error: &Tracked<StackError>, source_code: &str) { +pub fn report_expression_error(error: &Tracked<ExpressionError>, source_code: &str) { let context = Context { source_code: &source_code, source: &error.source }; let message = match &error.value { - StackError::Underflow => + ExpressionError::Underflow => "A stack underflow occurred while evaluating this operator", - StackError::MultipleReturnValues => + ExpressionError::MultipleReturnValues => "More than one value was left on the stack after this expression was evaluated", - StackError::NoReturnValue => + ExpressionError::NoReturnValue => "No value was left on the stack after this expression was evaluated", + ExpressionError::InvalidReturnType(received) => + &format!("Expression must return an integer value, not {received} value"), + ExpressionError::InvalidArgumentType(expected, received) => + &format!("Operator expected {expected} value, not {received} value"), + ExpressionError::IndexError(length, index) => + &format!("Could not access element {index} from a list of length {length}"), + ExpressionError::FindError(length, value) => { + let mut character = String::new(); + if let Ok(value) = u32::try_from(*value) { + if let Some(c) = char::from_u32(value) { + character = format!(" (character '{c}')"); + } + } + &format!("Could not find value {value}{character} in list of length {length}") + } }; report_source_issue(LogLevel::Error, &context, message); diff --git a/src/types/operator.rs b/src/types/operator.rs index a7e7b9b..e94c67d 100644 --- a/src/types/operator.rs +++ b/src/types/operator.rs @@ -18,6 +18,12 @@ pub enum Operator { BitOr, BitXor, BitNot, + Length, + Index, + Find, + Sum, + Absolute, + Debug, } impl Operator { @@ -55,7 +61,13 @@ impl Operator { "<or>" => Some(Operator::BitOr), "<xor>" => Some(Operator::BitXor), "<not>" => Some(Operator::BitNot), - _ => None, + "<len>" => Some(Operator::Length), + "<nth>" => Some(Operator::Index), + "<fnd>" => Some(Operator::Find), + "<sum>" => Some(Operator::Sum), + "<abs>" => Some(Operator::Absolute), + "<dbg>" => Some(Operator::Debug), + _ => None, } } } @@ -81,6 +93,12 @@ impl std::fmt::Display for Operator { Operator::BitOr => "<or>", Operator::BitXor => "<xor>", Operator::BitNot => "<not>", + Operator::Length => "<len>", + Operator::Index => "<nth>", + Operator::Find => "<fnd>", + Operator::Sum => "<sum>", + Operator::Absolute => "<abs>", + Operator::Debug => "<dbg>", }; write!(f, "{string}") } diff --git a/src/types/word_template.rs b/src/types/word_template.rs index 33d5933..634e1d5 100644 --- a/src/types/word_template.rs +++ b/src/types/word_template.rs @@ -1,6 +1,7 @@ use crate::*; +#[derive(Clone)] pub struct WordTemplate { pub value: usize, /// Width of the word in bits. @@ -8,6 +9,7 @@ pub struct WordTemplate { pub fields: Vec<Tracked<BitField>>, } +#[derive(Clone)] pub struct BitField { pub name: char, /// Width of the field in bits. |