From 981bb70e5077bd30ef85a0092117a875dcc614fc Mon Sep 17 00:00:00 2001 From: Ben Bridle Date: Tue, 14 Oct 2025 20:40:39 +1300 Subject: Implement new intermediate stage Massive improvement. Label references can be used anywhere in the program, with the program being assembled repeatedly until all labels have stabilised. The bytecode stage will just be a tiny stage tacked onto the end, rather than the old bytecode stage that would resolve labels and expressions. --- src/stages/bytecode.rs | 272 ------------------------------------------------- 1 file changed, 272 deletions(-) delete mode 100644 src/stages/bytecode.rs (limited to 'src/stages/bytecode.rs') diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs deleted file mode 100644 index 71f1ff0..0000000 --- a/src/stages/bytecode.rs +++ /dev/null @@ -1,272 +0,0 @@ -use crate::*; - -use std::collections::HashMap; - - -pub fn parse_bytecode(tokens: Vec>, width: Option) -> Result, Vec>> { - BytecodeParser::new(width).parse(tokens) -} - - -pub struct BytecodeParser { - width: Option, - addresses: HashMap>, - address: usize, - segment_address: usize, - segment_source: Option, - segments: Vec, - words: Vec>, - errors: Vec>, -} - -impl BytecodeParser { - pub fn new(width: Option) -> Self { - Self { - width, - addresses: HashMap::new(), - address: 0, - segment_address: 0, - segment_source: None, - segments: Vec::new(), - words: Vec::new(), - errors: Vec::new(), - } - } - - pub fn parse(mut self, tokens: Vec>) -> Result, Vec>> { - // Register all labels with address 0. - for token in &tokens { - if let IntermediateToken::LabelDefinition(name) = &token.value { - let tracked = Tracked::from(0, token.source.clone()); - if let Some(_) = self.addresses.insert(name.clone(), tracked) { - unreachable!("Uncaught duplicate label definition '{name}'"); - } - } - } - // Attempt to calculate all label addresses naively ahead of time. - // This will give false results if we pin an address calculated from a label address. - let mut address = 0; - for token in &tokens { - let source = &token.source; - match &token.value { - IntermediateToken::LabelDefinition(name) => { - let tracked = Tracked::from(address, source.clone()); - self.addresses.insert(name.clone(), tracked); - } - IntermediateToken::Word(_) => { - address += 1; - } - IntermediateToken::PinnedAddress(pinned) => { - // Attempt to calculate a sane initial value for a pinned address. - match &pinned.value { - IntermediateInteger::Integer(value) => { - address = (*value).try_into().unwrap_or(0); - } - IntermediateInteger::Expression(expression) => { - let result = self.evaluate_expression(&expression, &pinned.source); - address = result.try_into().unwrap_or(0); - } - IntermediateInteger::LabelReference(_) => { - let error = BytecodeError::PinnedLabel; - self.errors.push(Tracked::from(error, source.clone())); - } - } - } - } - } - // Return unrecoverable errors. - if !self.errors.is_empty() { - return Err(self.errors); - } - - for i in 0..4 { - info!("Attempting iteration {} of bytecode assembly stage", i+1); - // Erase the previous parse attempt. - self.segments.clear(); - self.errors.clear(); - // Attempt to parse the program. - let previous_addresses = self.addresses.clone(); - self.parse_iteration(&tokens); - // Return unrecoverable errors. - if !self.errors.is_empty() { - return Err(self.errors); - } - // Check label stability - if self.check_for_instability(&previous_addresses) { - continue; - } - // Check for backtrack - if self.check_for_backtrack() { - continue; - }; - // Program is stable, return. - info!("Stabilised in {} iteration of bytecode assembly stage", i+1); - return Ok(self.segments); - } - - return Err(self.errors); - } - - /// Attempt to parse the full program using the current label values. - fn parse_iteration(&mut self, tokens: &[Tracked]) { - for token in tokens { - let source = &token.source; - match &token.value { - IntermediateToken::Word(word) => { - let word = self.evaluate_word(word, source); - // Check that the word width fits the provided width. - if let Some(width) = self.width { - if word.width != width { - let error = BytecodeError::IncorrectWidth(width, word.width); - self.errors.push(Tracked::from(error, source.clone())); - } - } - self.words.push(word); - self.address += 1; - } - IntermediateToken::PinnedAddress(integer) => { - // Calculate the address of the new segment. - let pinned = match &integer.value { - IntermediateInteger::Integer(value) => { - (*value).try_into().unwrap_or(0) - } - IntermediateInteger::Expression(expression) => { - let result = self.evaluate_expression(&expression, &integer.source); - result.try_into().unwrap_or(0) - } - IntermediateInteger::LabelReference(_) => - // Already handled when registering initial label values. - unreachable!(), - }; - // Start a new segment. - let words = std::mem::take(&mut self.words); - if !words.is_empty() { - let address = self.segment_address; - let source = std::mem::take(&mut self.segment_source); - let segment = Segment { address, source, words }; - self.segments.push(segment); - } - self.segment_source = Some(integer.source.clone()); - self.address = pinned; - self.segment_address = pinned; - } - IntermediateToken::LabelDefinition(name) => { - // Record the latest known address of this label. - let address = self.addresses.get_mut(name).unwrap(); - address.value = self.address; - } - } - } - // Finish final segment. - let words = std::mem::take(&mut self.words); - if !words.is_empty() { - let address = self.segment_address; - let source = std::mem::take(&mut self.segment_source); - let segment = Segment { address, source, words }; - self.segments.push(segment); - } - } - - fn evaluate_expression(&mut self, expression: &IntermediateExpression, source: &SourceSpan) -> isize { - let mut stack = ExpressionStack::new(); - for token in &expression.tokens { - let source = &token.source; - match &token.value { - IntermediateExpressionToken::Integer(integer) => match integer { - IntermediateInteger::Integer(value) => { - stack.push(*value); - } - IntermediateInteger::Expression(expression) => { - stack.push(self.evaluate_expression(expression, source)); - } - IntermediateInteger::LabelReference(name) => { - stack.push(self.evaluate_label_reference(name)); - } - } - IntermediateExpressionToken::Operator(operator) => { - if let Err(err) = stack.apply(*operator, source) { - let error = BytecodeError::StackError(err); - self.errors.push(Tracked::from(error, source.clone())) - } - } - } - } - match stack.pull_result() { - Ok(value) => value, - Err(err) => { - let error = BytecodeError::StackError(Tracked::from(err, source.clone())); - self.errors.push(Tracked::from(error, source.clone())); - 0 - } - } - } - - fn evaluate_label_reference(&mut self, name: &Tracked) -> isize { - if let Some(address) = self.addresses.get(&name.to_string()) { - address.value as isize - } else { - unreachable!("Uncaught unresolved label reference '{name}'") - } - } - - fn evaluate_word(&mut self, word: &IntermediateWord, source: &SourceSpan) -> Tracked { - let mut word_value = word.value; - for field in &word.fields { - let field_source = &field.value.value.source; - let field_value = match &field.value.value.value { - IntermediateInteger::Expression(expression) => { - self.evaluate_expression(expression, source) - } - IntermediateInteger::LabelReference(name) => { - self.evaluate_label_reference(name) - } - IntermediateInteger::Integer(value) => { - *value - } - }; - let value_width = width(field_value); - if field.width < value_width { - let error = BytecodeError::ValueTooWide(field.width, value_width); - self.errors.push(Tracked::from(error, field_source.clone())); - } else { - let mask = 2_usize.pow(field.width as u32) - 1; - let clamped_value = (field_value as usize) & mask; - word_value |= (clamped_value << field.shift) as usize; - } - } - let word = Word { width: word.width, value: word_value }; - return Tracked::from(word, source.clone()); - } - - fn check_for_instability(&mut self, previous_addresses: &HashMap>) -> bool { - let mut instability_occurred = false; - for (name, previous_address) in previous_addresses.iter() { - let current_address = &self.addresses[name]; - if current_address != previous_address { - info!("Label '{name}' was unstable, moving from address 0x{:04x} to 0x{:04x}", - previous_address.value, current_address.value); - let error = BytecodeError::UnstableLabel(name.to_string()); - self.errors.push(Tracked::from(error, previous_address.source.clone())); - instability_occurred = true; - } - } - return instability_occurred; - } - - fn check_for_backtrack(&mut self) -> bool { - let mut backtrack_occurred = false; - let mut current_address = 0; - for segment in &self.segments { - if segment.address < current_address { - let error = BytecodeError::PinnedAddressBacktrack(segment.address, current_address); - if let Some(source) = &segment.source { - self.errors.push(Tracked::from(error, source.clone())); - } - info!("Backtrack occurred with segment at address 0x{:04x}", segment.address); - backtrack_occurred = true; - } - current_address = segment.address + segment.words.len(); - } - return backtrack_occurred; - } -} -- cgit v1.2.3-70-g09d2