use crate::*; use std::collections::HashMap; pub fn parse_bytecode(tokens: Vec>, width: Option) -> Result, Vec>> { BytecodeParser::new(width).parse(tokens) } pub struct BytecodeParser { width: Option, addresses: HashMap>, address: usize, segment_address: usize, segment_source: Option, segments: Vec, words: Vec>, errors: Vec>, } impl BytecodeParser { pub fn new(width: Option) -> Self { Self { width, addresses: HashMap::new(), address: 0, segment_address: 0, segment_source: None, segments: Vec::new(), words: Vec::new(), errors: Vec::new(), } } pub fn parse(mut self, tokens: Vec>) -> Result, Vec>> { // Register all labels with address 0. for token in &tokens { if let IntermediateToken::LabelDefinition(name) = &token.value { let tracked = Tracked::from(0, token.source.clone()); if let Some(_) = self.addresses.insert(name.clone(), tracked) { unreachable!("Uncaught duplicate label definition '{name}'"); } } } // Attempt to calculate all label addresses naively ahead of time. // This will give false results if we pin an address calculated from a label address. let mut address = 0; for token in &tokens { let source = &token.source; match &token.value { IntermediateToken::LabelDefinition(name) => { let tracked = Tracked::from(address, source.clone()); self.addresses.insert(name.clone(), tracked); } IntermediateToken::Word(_) => { address += 1; } IntermediateToken::PinnedAddress(pinned) => { // Attempt to calculate a sane initial value for a pinned address. match &pinned.value { IntermediateInteger::Integer(value) => { address = (*value).try_into().unwrap_or(0); } IntermediateInteger::Expression(expression) => { let result = self.evaluate_expression(&expression, &pinned.source); address = result.try_into().unwrap_or(0); } IntermediateInteger::LabelReference(_) => { let error = BytecodeError::PinnedLabel; self.errors.push(Tracked::from(error, source.clone())); } } } } } // Return unrecoverable errors. if !self.errors.is_empty() { return Err(self.errors); } for i in 0..4 { info!("Attempting iteration {} of bytecode assembly stage", i+1); // Erase the previous parse attempt. self.segments.clear(); self.errors.clear(); // Attempt to parse the program. let previous_addresses = self.addresses.clone(); self.parse_iteration(&tokens); // Return unrecoverable errors. if !self.errors.is_empty() { return Err(self.errors); } // Check label stability if self.check_for_instability(&previous_addresses) { continue; } // Check for backtrack if self.check_for_backtrack() { continue; }; // Program is stable, return. info!("Stabilised in {} iteration of bytecode assembly stage", i+1); return Ok(self.segments); } return Err(self.errors); } /// Attempt to parse the full program using the current label values. fn parse_iteration(&mut self, tokens: &[Tracked]) { for token in tokens { let source = &token.source; match &token.value { IntermediateToken::Word(word) => { let word = self.evaluate_word(word, source); // Check that the word width fits the provided width. if let Some(width) = self.width { if word.width != width { let error = BytecodeError::IncorrectWidth(width, word.width); self.errors.push(Tracked::from(error, source.clone())); } } self.words.push(word); self.address += 1; } IntermediateToken::PinnedAddress(integer) => { // Calculate the address of the new segment. let pinned = match &integer.value { IntermediateInteger::Integer(value) => { (*value).try_into().unwrap_or(0) } IntermediateInteger::Expression(expression) => { let result = self.evaluate_expression(&expression, &integer.source); result.try_into().unwrap_or(0) } IntermediateInteger::LabelReference(_) => // Already handled when registering initial label values. unreachable!(), }; // Start a new segment. let words = std::mem::take(&mut self.words); if !words.is_empty() { let address = self.segment_address; let source = std::mem::take(&mut self.segment_source); let segment = Segment { address, source, words }; self.segments.push(segment); } self.segment_source = Some(integer.source.clone()); self.address = pinned; self.segment_address = pinned; } IntermediateToken::LabelDefinition(name) => { // Record the latest known address of this label. let address = self.addresses.get_mut(name).unwrap(); address.value = self.address; } } } // Finish final segment. let words = std::mem::take(&mut self.words); if !words.is_empty() { let address = self.segment_address; let source = std::mem::take(&mut self.segment_source); let segment = Segment { address, source, words }; self.segments.push(segment); } } fn evaluate_expression(&mut self, expression: &IntermediateExpression, source: &SourceSpan) -> isize { let mut stack = ExpressionStack::new(); for token in &expression.tokens { let source = &token.source; match &token.value { IntermediateExpressionToken::Integer(integer) => match integer { IntermediateInteger::Integer(value) => { stack.push(*value); } IntermediateInteger::Expression(expression) => { stack.push(self.evaluate_expression(expression, source)); } IntermediateInteger::LabelReference(name) => { stack.push(self.evaluate_label_reference(name)); } } IntermediateExpressionToken::Operator(operator) => { if let Err(err) = stack.apply(*operator, source) { let error = BytecodeError::StackError(err); self.errors.push(Tracked::from(error, source.clone())) } } } } match stack.pull_result() { Ok(value) => value, Err(err) => { let error = BytecodeError::StackError(Tracked::from(err, source.clone())); self.errors.push(Tracked::from(error, source.clone())); 0 } } } fn evaluate_label_reference(&mut self, name: &Tracked) -> isize { if let Some(address) = self.addresses.get(&name.to_string()) { address.value as isize } else { unreachable!("Uncaught unresolved label reference '{name}'") } } fn evaluate_word(&mut self, word: &IntermediateWord, source: &SourceSpan) -> Tracked { let mut word_value = word.value; for field in &word.fields { let field_source = &field.value.value.source; let field_value = match &field.value.value.value { IntermediateInteger::Expression(expression) => { self.evaluate_expression(expression, source) } IntermediateInteger::LabelReference(name) => { self.evaluate_label_reference(name) } IntermediateInteger::Integer(value) => { *value } }; let value_width = width(field_value); if field.width < value_width { let error = BytecodeError::ValueTooWide(field.width, value_width); self.errors.push(Tracked::from(error, field_source.clone())); } else { let mask = 2_usize.pow(field.width as u32) - 1; let clamped_value = (field_value as usize) & mask; word_value |= (clamped_value << field.shift) as usize; } } let word = Word { width: word.width, value: word_value }; return Tracked::from(word, source.clone()); } fn check_for_instability(&mut self, previous_addresses: &HashMap>) -> bool { let mut instability_occurred = false; for (name, previous_address) in previous_addresses.iter() { let current_address = &self.addresses[name]; if current_address != previous_address { info!("Label '{name}' was unstable, moving from address 0x{:04x} to 0x{:04x}", previous_address.value, current_address.value); let error = BytecodeError::UnstableLabel(name.to_string()); self.errors.push(Tracked::from(error, previous_address.source.clone())); instability_occurred = true; } } return instability_occurred; } fn check_for_backtrack(&mut self) -> bool { let mut backtrack_occurred = false; let mut current_address = 0; for segment in &self.segments { if segment.address < current_address { let error = BytecodeError::PinnedAddressBacktrack(segment.address, current_address); if let Some(source) = &segment.source { self.errors.push(Tracked::from(error, source.clone())); } info!("Backtrack occurred with segment at address 0x{:04x}", segment.address); backtrack_occurred = true; } current_address = segment.address + segment.words.len(); } return backtrack_occurred; } }