From 1ecee352f5844b0809d7ae66df52e34f42b44c8e Mon Sep 17 00:00:00 2001 From: Ben Bridle Date: Thu, 6 Mar 2025 20:33:27 +1300 Subject: Rewrite entire assembler The language is now more general, the code is better structured, error reporting is more detailed, and many new language features have been implemented: - conditional blocks - first-class strings - more expression operators - binary literals - negative values - invocations in constant expressions --- src/stages/intermediate.rs | 577 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 577 insertions(+) create mode 100644 src/stages/intermediate.rs (limited to 'src/stages/intermediate.rs') diff --git a/src/stages/intermediate.rs b/src/stages/intermediate.rs new file mode 100644 index 0000000..6853f62 --- /dev/null +++ b/src/stages/intermediate.rs @@ -0,0 +1,577 @@ +use crate::*; + +use assembler::{DefinitionType, SourceLocation, SourcePosition, SymbolRole}; + +use indexmap::{IndexSet, IndexMap}; + + +static mut ID: usize = 0; +macro_rules! next_id { () => { unsafe { let id = ID; ID += 1; id }}; } + +pub fn parse_intermediate(semantic: Vec>) -> Result>, Vec>> { + IntermediateParser::new(semantic).parse() +} + + +struct IntermediateParser { + semantic: Vec>, + label_names: IndexSet>, + macro_names: IndexSet>, + macro_definitions: IndexMap, + intermediate: Vec>, + errors: Vec>, +} + +impl IntermediateParser { + pub fn new(semantic: Vec>) -> Self { + let mut label_names = IndexSet::new(); + let mut macro_names = IndexSet::new(); + for symbol in SymbolParser::new().parse(&semantic) { + match symbol.role { + SymbolRole::Definition(DefinitionType::MustPrecedeReference) => { + // Only consider macro definitions, not macro argument definitions. + if symbol.namespace.is_empty() { + if !macro_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) { + unreachable!("Uncaught duplicate macro definition '{}'", symbol.name); + } + } + } + SymbolRole::Definition(DefinitionType::CanFollowReference) => { + if !label_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) { + unreachable!("Uncaught duplicate label definition '{}'", symbol.name); + } + } + SymbolRole::Reference => (), + } + } + + Self { + semantic, + label_names, + macro_names, + macro_definitions: IndexMap::new(), + intermediate: Vec::new(), + errors: Vec::new(), + } + } + + pub fn parse(mut self) -> Result>, Vec>> { + for token in self.semantic { + let source = &token.source; + match token.value { + SemanticToken::MacroDefinition(definition) => { + // Invoke the body to see if it contains undefined macros. + let error_count = self.errors.len(); + let mut arguments = IndexMap::new(); + // Prepare dummy argument values. + let null = SourceSpan { + string: String::new(), + in_merged: SourceLocation { + path: None, + start: SourcePosition::ZERO, + end: SourcePosition::ZERO, + }, + in_source: None, + child: None, + }; + for argument in &definition.arguments { + let value = match argument.variant { + ArgumentType::Integer => { + let integer = IntermediateInteger::Integer(0); + let tracked = Tracked::from(integer, null.clone()); + IntermediateValue::Integer(tracked) + } + ArgumentType::Block => { + IntermediateValue::Block(Vec::new()) + } + }; + let tracked = Tracked::from(value, null.clone()); + arguments.insert(argument.name.clone(), tracked); + } + let mut env = Environment { + label_names: &self.label_names, + macro_names: &self.macro_names, + macro_definitions: &self.macro_definitions, + arguments, + errors: &mut self.errors, + id: next_id!(), + }; + env.parse_macro_definition_body(&definition.body, source); + if self.errors.len() != error_count { + break; + } + + let name = definition.name.to_string(); + if self.macro_definitions.insert(name.clone(), definition).is_some() { + unreachable!("Uncaught duplicate macro definition '{}'", name); + } + } + SemanticToken::BlockToken(block_token) => { + let mut env = Environment { + label_names: &self.label_names, + macro_names: &self.macro_names, + macro_definitions: &self.macro_definitions, + arguments: IndexMap::new(), + errors: &mut self.errors, + id: next_id!(), + }; + let mut tokens = env.parse_block_token(&block_token, source); + self.intermediate.append(&mut tokens); + } + } + } + match self.errors.is_empty() { + true => Ok(self.intermediate), + false => Err(self.errors), + } + } +} + + +struct Environment<'a> { + label_names: &'a IndexSet>, + macro_names: &'a IndexSet>, + macro_definitions: &'a IndexMap, + arguments: IndexMap>, + errors: &'a mut Vec>, + id: usize, +} + +impl<'a> Environment<'a> { + // Attach the invocation ID to every macro label name + fn tag_name(&self, name: &str) -> String { + match name.contains(':') { + true => format!("{name}:{}", self.id), + false => name.to_string(), + } + } + + fn parse_macro_definition_body(&mut self, body: &MacroDefinitionBody, source: &SourceSpan) -> Option> { + match &body { + MacroDefinitionBody::Integer(integer) => { + let token = self.parse_integer_token(&integer, &source)?; + let integer = IntermediateValue::Integer(token); + Some(Tracked::from(integer, source.clone())) + } + MacroDefinitionBody::Invocation(invocation) => { + self.parse_invocation(&invocation, &invocation.source) + } + MacroDefinitionBody::Block(blocks) => { + let mut tokens = Vec::new(); + for block in blocks { + tokens.append(&mut self.parse_block_token(block, &block.source)); + } + let value = IntermediateValue::Block(tokens); + Some(Tracked::from(value, source.clone())) + } + } + } + + fn parse_block_token(&mut self, block: &BlockToken, source: &SourceSpan) -> Vec> { + let mut intermediate = Vec::new(); + match block { + BlockToken::LabelDefinition(name) => { + let token = IntermediateToken::LabelDefinition(self.tag_name(name)); + intermediate.push(Tracked::from(token, source.clone())); + } + BlockToken::PinnedAddress(address) => { + if let Some(integer) = self.parse_integer_token(address, &address.source) { + if let Some(source) = integer_contains_label_reference(&integer) { + let error = IntermediateError::LabelReferenceInPinnedAddress; + let new_source = address.source.clone().wrap(source); + self.errors.push(Tracked::from(error, new_source)); + } else { + match evaluate_integer(&integer, source) { + Ok(value) => { + let value = usize::try_from(value).unwrap_or(0); + let tracked = Tracked::from(value, address.source.clone()); + let token = IntermediateToken::PinnedAddress(tracked); + intermediate.push(Tracked::from(token, source.clone())); + } + Err(error) => self.errors.push(error), + } + } + } + } + BlockToken::ConditionalBlock(cond) => { + let predicate = self.parse_integer_token(&cond.predicate, &cond.predicate.source); + let mut body = self.parse_block_token(&cond.body, &cond.body.source); + if let Some(predicate) = predicate { + let mut found_error = false; + if let Some(source) = integer_contains_label_reference(&predicate) { + let error = IntermediateError::LabelReferenceInConditionPredicate; + let new_source = cond.predicate.source.clone().wrap(source); + self.errors.push(Tracked::from(error, new_source)); + found_error = true; + }; + if let Some(source) = block_contains_label_definition(&cond.body, &cond.body.source) { + let error = IntermediateError::LabelDefinitionInConditionBody; + let new_source = cond.body.source.clone().wrap(source); + self.errors.push(Tracked::from(error, new_source)); + found_error = true; + } + if !found_error { + match evaluate_integer(&predicate, &cond.predicate.source) { + Ok(value) => if value != 0 { intermediate.append(&mut body) }, + Err(error) => self.errors.push(error), + } + } + } + } + BlockToken::WordTemplate(word_template) => { + let mut fields = Vec::new(); + for bit_field in &word_template.fields { + let name = bit_field.name.to_string(); + let source = &bit_field.source; + let invocation = Invocation { name, arguments: Vec::new() }; + if let Some(value) = self.parse_integer_invocation(&invocation, source) { + let field = IntermediateField { + width: bit_field.width, + shift: bit_field.shift, + value, + }; + fields.push(Tracked::from(field, bit_field.source.clone())); + } + } + let word = IntermediateWord { + value: word_template.value, + width: word_template.width, + fields, + }; + let token = IntermediateToken::Word(word); + intermediate.push(Tracked::from(token, source.clone())); + } + BlockToken::Block(blocks) => { + for block in blocks { + let mut tokens = self.parse_block_token(block, &block.source); + intermediate.append(&mut tokens); + } + } + BlockToken::Invocation(invocation) => { + if let Some(mut tokens) = self.parse_block_invocation(invocation, source) { + intermediate.append(&mut tokens); + } + } + } + + return intermediate; + } + + fn parse_integer_token(&mut self, integer: &IntegerToken, source: &SourceSpan) -> Option> { + match integer { + IntegerToken::IntegerLiteral(value) => { + let integer = IntermediateInteger::Integer(*value); + Some(Tracked::from(integer, source.clone())) + } + IntegerToken::Expression(expression) => { + self.parse_expression(expression, source) + } + IntegerToken::Invocation(invocation) => { + self.parse_integer_invocation(invocation, source) + } + } + } + + fn parse_integer_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option> { + match self.parse_invocation(invocation, source)?.value { + IntermediateValue::Integer(integer) => Some(integer), + IntermediateValue::Block(_) => { + let error = IntermediateError::ExpectedInteger; + self.errors.push(Tracked::from(error, source.clone())); + None + } + } + } + + fn parse_block_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option>> { + match self.parse_invocation(invocation, source)?.value { + IntermediateValue::Block(tokens) => Some(tokens), + IntermediateValue::Integer(_) => { + let error = IntermediateError::ExpectedBlock; + self.errors.push(Tracked::from(error, source.clone())); + None + } + } + } + + fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option> { + let received_count = invocation.arguments.len(); + if let Some(argument) = self.arguments.get(&invocation.name) { + if received_count != 0 { + let error = IntermediateError::IncorrectArgumentCount(0, received_count); + self.errors.push(Tracked::from(error, source.clone())); + None + } else { + Some(argument.clone()) + } + } else if let Some(label_name) = self.label_names.get(&invocation.name) { + if received_count != 0 { + let error = IntermediateError::IncorrectArgumentCount(0, received_count); + self.errors.push(Tracked::from(error, source.clone())); + None + } else { + let name = self.tag_name(label_name); + let tracked = Tracked::from(name, label_name.source.clone()); + let integer = IntermediateInteger::LabelReference(tracked); + let tracked = Tracked::from(integer, source.clone()); + let value = IntermediateValue::Integer(tracked); + Some(Tracked::from(value, source.clone())) + } + } else if let Some(definition) = self.macro_definitions.get(&invocation.name) { + // Check that the correct number of arguments were provided. + let expected_count = definition.arguments.len(); + if received_count != expected_count { + let error = IntermediateError::IncorrectArgumentCount(expected_count, received_count); + self.errors.push(Tracked::from(error, source.clone())); + None + } else { + // Gather and type-check the provided arguments. + let mut arguments = Vec::new(); + for (i, argument) in invocation.arguments.iter().enumerate() { + let received_type = match &argument.value { + InvocationArgument::String(string) => { + let mut values = Vec::new(); + for c in &string.chars { + let integer = IntermediateInteger::Integer(**c); + let tracked = Tracked::from(integer, c.source.clone()); + values.push(IntermediateValue::Integer(tracked)); + } + arguments.push(RepeatedArgument::List(values)); + ArgumentType::Integer + } + InvocationArgument::IntegerToken(integer) => { + let tracked = self.parse_integer_token(&integer, &argument.source)?; + let value = IntermediateValue::Integer(tracked); + arguments.push(RepeatedArgument::Loop(value)); + ArgumentType::Integer + } + InvocationArgument::BlockToken(block) => { + let tokens = self.parse_block_token(&block, &argument.source); + let value = IntermediateValue::Block(tokens); + arguments.push(RepeatedArgument::Loop(value)); + ArgumentType::Block + } + InvocationArgument::Invocation(invocation) => { + let value = self.parse_invocation(&invocation, &argument.source)?; + let received_type = match &value.value { + IntermediateValue::Integer(_) => ArgumentType::Integer, + IntermediateValue::Block(_) => ArgumentType::Block, + }; + arguments.push(RepeatedArgument::Loop(value.value)); + received_type + } + }; + let expected_type = match received_type { + ArgumentType::Integer => ArgumentType::Block, + ArgumentType::Block => ArgumentType::Integer, + }; + if definition.arguments[i].variant != received_type { + let error = IntermediateError::IncorrectArgumentType(expected_type, received_type); + self.errors.push(Tracked::from(error, argument.source.clone())); + return None; + } + } + // Invoke the invocation multiple times. + let repetitions = arguments.iter().map(|a| a.len()).max().unwrap_or(1); + let mut values = Vec::new(); + for i in 0..repetitions { + // Construct an argument map for this invocation. + let mut argument_map = IndexMap::new(); + for (a, argument) in arguments.iter().enumerate() { + let name = definition.arguments[a].name.clone(); + let source = invocation.arguments[a].source.clone(); + let value = match argument { + RepeatedArgument::Loop(value) => { + Tracked::from(value.clone(), source) + } + RepeatedArgument::List(list) => match list.get(i) { + Some(value) => { + Tracked::from(value.clone(), source) + } + None => { + let error = IntermediateError::ListExhausted; + let source = invocation.arguments[a].source.clone(); + self.errors.push(Tracked::from(error, source)); + return None; + } + } + }; + if argument_map.insert(name.clone(), value).is_some() { + unreachable!("Uncaught duplicate macro argument name '{name}'"); + }; + } + let mut env = Environment { + label_names: &self.label_names, + macro_names: &self.macro_names, + macro_definitions: &self.macro_definitions, + arguments: argument_map, + errors: &mut self.errors, + id: next_id!(), + }; + values.push(env.parse_macro_definition_body(&definition.body, source)?); + } + if values.len() == 1 { + values.pop() + } else { + // Flatten all values into a list of block tokens. + let mut block = Vec::new(); + for value in values { + match value.value { + IntermediateValue::Integer(_) => { + let error = IntermediateError::ExpectedBlock; + self.errors.push(Tracked::from(error, value.source)); + return None; + } + IntermediateValue::Block(mut tokens) => { + block.append(&mut tokens); + } + } + } + Some(Tracked::from(IntermediateValue::Block(block), source.clone())) + } + } + } else if let Some(macro_name) = self.macro_names.get(&invocation.name) { + let error = IntermediateError::InvocationBeforeDefinition; + let source = source.clone().wrap(macro_name.source.clone()); + self.errors.push(Tracked::from(error, source)); + None + } else { + unreachable!("Uncaught unresolved reference '{}'", invocation.name); + } + } + + fn parse_expression(&mut self, expression: &Expression, source: &SourceSpan) -> Option> { + let mut intermediate = Vec::new(); + let mut error = false; + + for token in &expression.tokens { + let source = &token.source; + match &token.value { + ExpressionToken::IntegerToken(integer) => { + let Some(integer) = self.parse_integer_token(integer, source) else { + error = true; continue; + }; + let token = IntermediateExpressionToken::Integer(integer.value); + intermediate.push(Tracked::from(token, integer.source)); + } + ExpressionToken::Operator(operator) => { + let token = IntermediateExpressionToken::Operator(*operator); + intermediate.push(Tracked::from(token, source.clone())); + } + ExpressionToken::Invocation(invocation) => { + let Some(integer) = self.parse_integer_invocation(invocation, source) else { + error = true; continue; + }; + let token = IntermediateExpressionToken::Integer(integer.value); + intermediate.push(Tracked::from(token, integer.source)); + } + } + } + + if error { return None; } + let expression = IntermediateExpression { tokens: intermediate }; + let integer = IntermediateInteger::Expression(expression); + Some(Tracked::from(integer, source.clone())) + } +} + + +macro_rules! return_some { + ($option:expr) => { + if $option.is_some() { return $option; } + }; +} + +fn integer_contains_label_reference(integer: &IntermediateInteger) -> Option { + match integer { + IntermediateInteger::Integer(_) => None, + IntermediateInteger::LabelReference(label) => Some(label.source.clone()), + IntermediateInteger::Expression(expr) => expression_contains_label_reference(expr), + } +} + +fn expression_contains_label_reference(expression: &IntermediateExpression) -> Option { + for token in &expression.tokens { + if let IntermediateExpressionToken::Integer(integer) = &token.value { + if let Some(child) = integer_contains_label_reference(&integer) { + return Some(token.source.clone().wrap(child)); + } + } + } + return None; +} + +fn block_contains_label_definition(block: &BlockToken, source: &SourceSpan) -> Option { + match &block { + BlockToken::LabelDefinition(_) => { + return Some(source.clone()); + } + BlockToken::Invocation(invocation) => { + return_some!(invocation_contains_label_definition(invocation)) + } + BlockToken::Block(blocks) => { + for block in blocks { + return_some!(block_contains_label_definition(block, &block.source)) + } + } + _ => (), + } + return None; +} + +fn invocation_contains_label_definition(invocation: &Invocation) -> Option { + for argument in &invocation.arguments { + match &argument.value { + InvocationArgument::BlockToken(block) => { + return_some!(block_contains_label_definition(&block, &argument.source)) + } + InvocationArgument::Invocation(invocation) => { + return_some!(invocation_contains_label_definition(&invocation)) + } + _ => (), + } + } + return None; +} + +fn evaluate_integer(integer: &IntermediateInteger, source: &SourceSpan) -> Result> { + match integer { + IntermediateInteger::Integer(value) => Ok(*value), + IntermediateInteger::LabelReference(name) => + unreachable!("Uncaught label reference '{name}' in condition predicate or pinned address value"), + IntermediateInteger::Expression(expr) => evaluate_expression(expr, source), + } +} + +fn evaluate_expression(expression: &IntermediateExpression, source: &SourceSpan) -> Result> { + let mut stack = ExpressionStack::new(); + for token in &expression.tokens { + let source = &token.source; + match &token.value { + IntermediateExpressionToken::Integer(integer) => match integer { + IntermediateInteger::Integer(value) => { + stack.push(*value); + } + IntermediateInteger::Expression(expression) => { + stack.push(evaluate_expression(&expression, source)?); + } + IntermediateInteger::LabelReference(name) => { + unreachable!("Uncaught label reference '{name}' in condition predicate"); + } + } + IntermediateExpressionToken::Operator(operator) => { + if let Err(stack_error) = stack.apply(*operator, source) { + let error = IntermediateError::StackError(stack_error); + return Err(Tracked::from(error, token.source.clone())); + } + } + } + } + match stack.pull_result() { + Ok(value) => Ok(value), + Err(err) => { + let error = Tracked::from(err, source.clone()); + Err(Tracked::from(IntermediateError::StackError(error), source.clone())) + } + } +} -- cgit v1.2.3-70-g09d2