summaryrefslogtreecommitdiff
path: root/src/stages/intermediate.rs
diff options
context:
space:
mode:
authorBen Bridle <ben@derelict.engineering>2025-03-06 20:33:27 +1300
committerBen Bridle <ben@derelict.engineering>2025-03-11 16:59:26 +1300
commit1ecee352f5844b0809d7ae66df52e34f42b44c8e (patch)
tree472b6fd57ff7f64ac3f8cd676cbe7a113ba01f05 /src/stages/intermediate.rs
parentf2ed89083f5326a7a6f0a1720033d3388aa431fb (diff)
downloadtorque-asm-1ecee352f5844b0809d7ae66df52e34f42b44c8e.zip
Rewrite entire assembler
The language is now more general, the code is better structured, error reporting is more detailed, and many new language features have been implemented: - conditional blocks - first-class strings - more expression operators - binary literals - negative values - invocations in constant expressions
Diffstat (limited to 'src/stages/intermediate.rs')
-rw-r--r--src/stages/intermediate.rs577
1 files changed, 577 insertions, 0 deletions
diff --git a/src/stages/intermediate.rs b/src/stages/intermediate.rs
new file mode 100644
index 0000000..6853f62
--- /dev/null
+++ b/src/stages/intermediate.rs
@@ -0,0 +1,577 @@
+use crate::*;
+
+use assembler::{DefinitionType, SourceLocation, SourcePosition, SymbolRole};
+
+use indexmap::{IndexSet, IndexMap};
+
+
+static mut ID: usize = 0;
+macro_rules! next_id { () => { unsafe { let id = ID; ID += 1; id }}; }
+
+pub fn parse_intermediate(semantic: Vec<Tracked<SemanticToken>>) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> {
+ IntermediateParser::new(semantic).parse()
+}
+
+
+struct IntermediateParser {
+ semantic: Vec<Tracked<SemanticToken>>,
+ label_names: IndexSet<Tracked<String>>,
+ macro_names: IndexSet<Tracked<String>>,
+ macro_definitions: IndexMap<String, MacroDefinition>,
+ intermediate: Vec<Tracked<IntermediateToken>>,
+ errors: Vec<Tracked<IntermediateError>>,
+}
+
+impl IntermediateParser {
+ pub fn new(semantic: Vec<Tracked<SemanticToken>>) -> Self {
+ let mut label_names = IndexSet::new();
+ let mut macro_names = IndexSet::new();
+ for symbol in SymbolParser::new().parse(&semantic) {
+ match symbol.role {
+ SymbolRole::Definition(DefinitionType::MustPrecedeReference) => {
+ // Only consider macro definitions, not macro argument definitions.
+ if symbol.namespace.is_empty() {
+ if !macro_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) {
+ unreachable!("Uncaught duplicate macro definition '{}'", symbol.name);
+ }
+ }
+ }
+ SymbolRole::Definition(DefinitionType::CanFollowReference) => {
+ if !label_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) {
+ unreachable!("Uncaught duplicate label definition '{}'", symbol.name);
+ }
+ }
+ SymbolRole::Reference => (),
+ }
+ }
+
+ Self {
+ semantic,
+ label_names,
+ macro_names,
+ macro_definitions: IndexMap::new(),
+ intermediate: Vec::new(),
+ errors: Vec::new(),
+ }
+ }
+
+ pub fn parse(mut self) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> {
+ for token in self.semantic {
+ let source = &token.source;
+ match token.value {
+ SemanticToken::MacroDefinition(definition) => {
+ // Invoke the body to see if it contains undefined macros.
+ let error_count = self.errors.len();
+ let mut arguments = IndexMap::new();
+ // Prepare dummy argument values.
+ let null = SourceSpan {
+ string: String::new(),
+ in_merged: SourceLocation {
+ path: None,
+ start: SourcePosition::ZERO,
+ end: SourcePosition::ZERO,
+ },
+ in_source: None,
+ child: None,
+ };
+ for argument in &definition.arguments {
+ let value = match argument.variant {
+ ArgumentType::Integer => {
+ let integer = IntermediateInteger::Integer(0);
+ let tracked = Tracked::from(integer, null.clone());
+ IntermediateValue::Integer(tracked)
+ }
+ ArgumentType::Block => {
+ IntermediateValue::Block(Vec::new())
+ }
+ };
+ let tracked = Tracked::from(value, null.clone());
+ arguments.insert(argument.name.clone(), tracked);
+ }
+ let mut env = Environment {
+ label_names: &self.label_names,
+ macro_names: &self.macro_names,
+ macro_definitions: &self.macro_definitions,
+ arguments,
+ errors: &mut self.errors,
+ id: next_id!(),
+ };
+ env.parse_macro_definition_body(&definition.body, source);
+ if self.errors.len() != error_count {
+ break;
+ }
+
+ let name = definition.name.to_string();
+ if self.macro_definitions.insert(name.clone(), definition).is_some() {
+ unreachable!("Uncaught duplicate macro definition '{}'", name);
+ }
+ }
+ SemanticToken::BlockToken(block_token) => {
+ let mut env = Environment {
+ label_names: &self.label_names,
+ macro_names: &self.macro_names,
+ macro_definitions: &self.macro_definitions,
+ arguments: IndexMap::new(),
+ errors: &mut self.errors,
+ id: next_id!(),
+ };
+ let mut tokens = env.parse_block_token(&block_token, source);
+ self.intermediate.append(&mut tokens);
+ }
+ }
+ }
+ match self.errors.is_empty() {
+ true => Ok(self.intermediate),
+ false => Err(self.errors),
+ }
+ }
+}
+
+
+struct Environment<'a> {
+ label_names: &'a IndexSet<Tracked<String>>,
+ macro_names: &'a IndexSet<Tracked<String>>,
+ macro_definitions: &'a IndexMap<String, MacroDefinition>,
+ arguments: IndexMap<String, Tracked<IntermediateValue>>,
+ errors: &'a mut Vec<Tracked<IntermediateError>>,
+ id: usize,
+}
+
+impl<'a> Environment<'a> {
+ // Attach the invocation ID to every macro label name
+ fn tag_name(&self, name: &str) -> String {
+ match name.contains(':') {
+ true => format!("{name}:{}", self.id),
+ false => name.to_string(),
+ }
+ }
+
+ fn parse_macro_definition_body(&mut self, body: &MacroDefinitionBody, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> {
+ match &body {
+ MacroDefinitionBody::Integer(integer) => {
+ let token = self.parse_integer_token(&integer, &source)?;
+ let integer = IntermediateValue::Integer(token);
+ Some(Tracked::from(integer, source.clone()))
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ self.parse_invocation(&invocation, &invocation.source)
+ }
+ MacroDefinitionBody::Block(blocks) => {
+ let mut tokens = Vec::new();
+ for block in blocks {
+ tokens.append(&mut self.parse_block_token(block, &block.source));
+ }
+ let value = IntermediateValue::Block(tokens);
+ Some(Tracked::from(value, source.clone()))
+ }
+ }
+ }
+
+ fn parse_block_token(&mut self, block: &BlockToken, source: &SourceSpan) -> Vec<Tracked<IntermediateToken>> {
+ let mut intermediate = Vec::new();
+ match block {
+ BlockToken::LabelDefinition(name) => {
+ let token = IntermediateToken::LabelDefinition(self.tag_name(name));
+ intermediate.push(Tracked::from(token, source.clone()));
+ }
+ BlockToken::PinnedAddress(address) => {
+ if let Some(integer) = self.parse_integer_token(address, &address.source) {
+ if let Some(source) = integer_contains_label_reference(&integer) {
+ let error = IntermediateError::LabelReferenceInPinnedAddress;
+ let new_source = address.source.clone().wrap(source);
+ self.errors.push(Tracked::from(error, new_source));
+ } else {
+ match evaluate_integer(&integer, source) {
+ Ok(value) => {
+ let value = usize::try_from(value).unwrap_or(0);
+ let tracked = Tracked::from(value, address.source.clone());
+ let token = IntermediateToken::PinnedAddress(tracked);
+ intermediate.push(Tracked::from(token, source.clone()));
+ }
+ Err(error) => self.errors.push(error),
+ }
+ }
+ }
+ }
+ BlockToken::ConditionalBlock(cond) => {
+ let predicate = self.parse_integer_token(&cond.predicate, &cond.predicate.source);
+ let mut body = self.parse_block_token(&cond.body, &cond.body.source);
+ if let Some(predicate) = predicate {
+ let mut found_error = false;
+ if let Some(source) = integer_contains_label_reference(&predicate) {
+ let error = IntermediateError::LabelReferenceInConditionPredicate;
+ let new_source = cond.predicate.source.clone().wrap(source);
+ self.errors.push(Tracked::from(error, new_source));
+ found_error = true;
+ };
+ if let Some(source) = block_contains_label_definition(&cond.body, &cond.body.source) {
+ let error = IntermediateError::LabelDefinitionInConditionBody;
+ let new_source = cond.body.source.clone().wrap(source);
+ self.errors.push(Tracked::from(error, new_source));
+ found_error = true;
+ }
+ if !found_error {
+ match evaluate_integer(&predicate, &cond.predicate.source) {
+ Ok(value) => if value != 0 { intermediate.append(&mut body) },
+ Err(error) => self.errors.push(error),
+ }
+ }
+ }
+ }
+ BlockToken::WordTemplate(word_template) => {
+ let mut fields = Vec::new();
+ for bit_field in &word_template.fields {
+ let name = bit_field.name.to_string();
+ let source = &bit_field.source;
+ let invocation = Invocation { name, arguments: Vec::new() };
+ if let Some(value) = self.parse_integer_invocation(&invocation, source) {
+ let field = IntermediateField {
+ width: bit_field.width,
+ shift: bit_field.shift,
+ value,
+ };
+ fields.push(Tracked::from(field, bit_field.source.clone()));
+ }
+ }
+ let word = IntermediateWord {
+ value: word_template.value,
+ width: word_template.width,
+ fields,
+ };
+ let token = IntermediateToken::Word(word);
+ intermediate.push(Tracked::from(token, source.clone()));
+ }
+ BlockToken::Block(blocks) => {
+ for block in blocks {
+ let mut tokens = self.parse_block_token(block, &block.source);
+ intermediate.append(&mut tokens);
+ }
+ }
+ BlockToken::Invocation(invocation) => {
+ if let Some(mut tokens) = self.parse_block_invocation(invocation, source) {
+ intermediate.append(&mut tokens);
+ }
+ }
+ }
+
+ return intermediate;
+ }
+
+ fn parse_integer_token(&mut self, integer: &IntegerToken, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> {
+ match integer {
+ IntegerToken::IntegerLiteral(value) => {
+ let integer = IntermediateInteger::Integer(*value);
+ Some(Tracked::from(integer, source.clone()))
+ }
+ IntegerToken::Expression(expression) => {
+ self.parse_expression(expression, source)
+ }
+ IntegerToken::Invocation(invocation) => {
+ self.parse_integer_invocation(invocation, source)
+ }
+ }
+ }
+
+ fn parse_integer_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> {
+ match self.parse_invocation(invocation, source)?.value {
+ IntermediateValue::Integer(integer) => Some(integer),
+ IntermediateValue::Block(_) => {
+ let error = IntermediateError::ExpectedInteger;
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ }
+ }
+ }
+
+ fn parse_block_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Vec<Tracked<IntermediateToken>>> {
+ match self.parse_invocation(invocation, source)?.value {
+ IntermediateValue::Block(tokens) => Some(tokens),
+ IntermediateValue::Integer(_) => {
+ let error = IntermediateError::ExpectedBlock;
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ }
+ }
+ }
+
+ fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> {
+ let received_count = invocation.arguments.len();
+ if let Some(argument) = self.arguments.get(&invocation.name) {
+ if received_count != 0 {
+ let error = IntermediateError::IncorrectArgumentCount(0, received_count);
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ } else {
+ Some(argument.clone())
+ }
+ } else if let Some(label_name) = self.label_names.get(&invocation.name) {
+ if received_count != 0 {
+ let error = IntermediateError::IncorrectArgumentCount(0, received_count);
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ } else {
+ let name = self.tag_name(label_name);
+ let tracked = Tracked::from(name, label_name.source.clone());
+ let integer = IntermediateInteger::LabelReference(tracked);
+ let tracked = Tracked::from(integer, source.clone());
+ let value = IntermediateValue::Integer(tracked);
+ Some(Tracked::from(value, source.clone()))
+ }
+ } else if let Some(definition) = self.macro_definitions.get(&invocation.name) {
+ // Check that the correct number of arguments were provided.
+ let expected_count = definition.arguments.len();
+ if received_count != expected_count {
+ let error = IntermediateError::IncorrectArgumentCount(expected_count, received_count);
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ } else {
+ // Gather and type-check the provided arguments.
+ let mut arguments = Vec::new();
+ for (i, argument) in invocation.arguments.iter().enumerate() {
+ let received_type = match &argument.value {
+ InvocationArgument::String(string) => {
+ let mut values = Vec::new();
+ for c in &string.chars {
+ let integer = IntermediateInteger::Integer(**c);
+ let tracked = Tracked::from(integer, c.source.clone());
+ values.push(IntermediateValue::Integer(tracked));
+ }
+ arguments.push(RepeatedArgument::List(values));
+ ArgumentType::Integer
+ }
+ InvocationArgument::IntegerToken(integer) => {
+ let tracked = self.parse_integer_token(&integer, &argument.source)?;
+ let value = IntermediateValue::Integer(tracked);
+ arguments.push(RepeatedArgument::Loop(value));
+ ArgumentType::Integer
+ }
+ InvocationArgument::BlockToken(block) => {
+ let tokens = self.parse_block_token(&block, &argument.source);
+ let value = IntermediateValue::Block(tokens);
+ arguments.push(RepeatedArgument::Loop(value));
+ ArgumentType::Block
+ }
+ InvocationArgument::Invocation(invocation) => {
+ let value = self.parse_invocation(&invocation, &argument.source)?;
+ let received_type = match &value.value {
+ IntermediateValue::Integer(_) => ArgumentType::Integer,
+ IntermediateValue::Block(_) => ArgumentType::Block,
+ };
+ arguments.push(RepeatedArgument::Loop(value.value));
+ received_type
+ }
+ };
+ let expected_type = match received_type {
+ ArgumentType::Integer => ArgumentType::Block,
+ ArgumentType::Block => ArgumentType::Integer,
+ };
+ if definition.arguments[i].variant != received_type {
+ let error = IntermediateError::IncorrectArgumentType(expected_type, received_type);
+ self.errors.push(Tracked::from(error, argument.source.clone()));
+ return None;
+ }
+ }
+ // Invoke the invocation multiple times.
+ let repetitions = arguments.iter().map(|a| a.len()).max().unwrap_or(1);
+ let mut values = Vec::new();
+ for i in 0..repetitions {
+ // Construct an argument map for this invocation.
+ let mut argument_map = IndexMap::new();
+ for (a, argument) in arguments.iter().enumerate() {
+ let name = definition.arguments[a].name.clone();
+ let source = invocation.arguments[a].source.clone();
+ let value = match argument {
+ RepeatedArgument::Loop(value) => {
+ Tracked::from(value.clone(), source)
+ }
+ RepeatedArgument::List(list) => match list.get(i) {
+ Some(value) => {
+ Tracked::from(value.clone(), source)
+ }
+ None => {
+ let error = IntermediateError::ListExhausted;
+ let source = invocation.arguments[a].source.clone();
+ self.errors.push(Tracked::from(error, source));
+ return None;
+ }
+ }
+ };
+ if argument_map.insert(name.clone(), value).is_some() {
+ unreachable!("Uncaught duplicate macro argument name '{name}'");
+ };
+ }
+ let mut env = Environment {
+ label_names: &self.label_names,
+ macro_names: &self.macro_names,
+ macro_definitions: &self.macro_definitions,
+ arguments: argument_map,
+ errors: &mut self.errors,
+ id: next_id!(),
+ };
+ values.push(env.parse_macro_definition_body(&definition.body, source)?);
+ }
+ if values.len() == 1 {
+ values.pop()
+ } else {
+ // Flatten all values into a list of block tokens.
+ let mut block = Vec::new();
+ for value in values {
+ match value.value {
+ IntermediateValue::Integer(_) => {
+ let error = IntermediateError::ExpectedBlock;
+ self.errors.push(Tracked::from(error, value.source));
+ return None;
+ }
+ IntermediateValue::Block(mut tokens) => {
+ block.append(&mut tokens);
+ }
+ }
+ }
+ Some(Tracked::from(IntermediateValue::Block(block), source.clone()))
+ }
+ }
+ } else if let Some(macro_name) = self.macro_names.get(&invocation.name) {
+ let error = IntermediateError::InvocationBeforeDefinition;
+ let source = source.clone().wrap(macro_name.source.clone());
+ self.errors.push(Tracked::from(error, source));
+ None
+ } else {
+ unreachable!("Uncaught unresolved reference '{}'", invocation.name);
+ }
+ }
+
+ fn parse_expression(&mut self, expression: &Expression, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> {
+ let mut intermediate = Vec::new();
+ let mut error = false;
+
+ for token in &expression.tokens {
+ let source = &token.source;
+ match &token.value {
+ ExpressionToken::IntegerToken(integer) => {
+ let Some(integer) = self.parse_integer_token(integer, source) else {
+ error = true; continue;
+ };
+ let token = IntermediateExpressionToken::Integer(integer.value);
+ intermediate.push(Tracked::from(token, integer.source));
+ }
+ ExpressionToken::Operator(operator) => {
+ let token = IntermediateExpressionToken::Operator(*operator);
+ intermediate.push(Tracked::from(token, source.clone()));
+ }
+ ExpressionToken::Invocation(invocation) => {
+ let Some(integer) = self.parse_integer_invocation(invocation, source) else {
+ error = true; continue;
+ };
+ let token = IntermediateExpressionToken::Integer(integer.value);
+ intermediate.push(Tracked::from(token, integer.source));
+ }
+ }
+ }
+
+ if error { return None; }
+ let expression = IntermediateExpression { tokens: intermediate };
+ let integer = IntermediateInteger::Expression(expression);
+ Some(Tracked::from(integer, source.clone()))
+ }
+}
+
+
+macro_rules! return_some {
+ ($option:expr) => {
+ if $option.is_some() { return $option; }
+ };
+}
+
+fn integer_contains_label_reference(integer: &IntermediateInteger) -> Option<SourceSpan> {
+ match integer {
+ IntermediateInteger::Integer(_) => None,
+ IntermediateInteger::LabelReference(label) => Some(label.source.clone()),
+ IntermediateInteger::Expression(expr) => expression_contains_label_reference(expr),
+ }
+}
+
+fn expression_contains_label_reference(expression: &IntermediateExpression) -> Option<SourceSpan> {
+ for token in &expression.tokens {
+ if let IntermediateExpressionToken::Integer(integer) = &token.value {
+ if let Some(child) = integer_contains_label_reference(&integer) {
+ return Some(token.source.clone().wrap(child));
+ }
+ }
+ }
+ return None;
+}
+
+fn block_contains_label_definition(block: &BlockToken, source: &SourceSpan) -> Option<SourceSpan> {
+ match &block {
+ BlockToken::LabelDefinition(_) => {
+ return Some(source.clone());
+ }
+ BlockToken::Invocation(invocation) => {
+ return_some!(invocation_contains_label_definition(invocation))
+ }
+ BlockToken::Block(blocks) => {
+ for block in blocks {
+ return_some!(block_contains_label_definition(block, &block.source))
+ }
+ }
+ _ => (),
+ }
+ return None;
+}
+
+fn invocation_contains_label_definition(invocation: &Invocation) -> Option<SourceSpan> {
+ for argument in &invocation.arguments {
+ match &argument.value {
+ InvocationArgument::BlockToken(block) => {
+ return_some!(block_contains_label_definition(&block, &argument.source))
+ }
+ InvocationArgument::Invocation(invocation) => {
+ return_some!(invocation_contains_label_definition(&invocation))
+ }
+ _ => (),
+ }
+ }
+ return None;
+}
+
+fn evaluate_integer(integer: &IntermediateInteger, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> {
+ match integer {
+ IntermediateInteger::Integer(value) => Ok(*value),
+ IntermediateInteger::LabelReference(name) =>
+ unreachable!("Uncaught label reference '{name}' in condition predicate or pinned address value"),
+ IntermediateInteger::Expression(expr) => evaluate_expression(expr, source),
+ }
+}
+
+fn evaluate_expression(expression: &IntermediateExpression, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> {
+ let mut stack = ExpressionStack::new();
+ for token in &expression.tokens {
+ let source = &token.source;
+ match &token.value {
+ IntermediateExpressionToken::Integer(integer) => match integer {
+ IntermediateInteger::Integer(value) => {
+ stack.push(*value);
+ }
+ IntermediateInteger::Expression(expression) => {
+ stack.push(evaluate_expression(&expression, source)?);
+ }
+ IntermediateInteger::LabelReference(name) => {
+ unreachable!("Uncaught label reference '{name}' in condition predicate");
+ }
+ }
+ IntermediateExpressionToken::Operator(operator) => {
+ if let Err(stack_error) = stack.apply(*operator, source) {
+ let error = IntermediateError::StackError(stack_error);
+ return Err(Tracked::from(error, token.source.clone()));
+ }
+ }
+ }
+ }
+ match stack.pull_result() {
+ Ok(value) => Ok(value),
+ Err(err) => {
+ let error = Tracked::from(err, source.clone());
+ Err(Tracked::from(IntermediateError::StackError(error), source.clone()))
+ }
+ }
+}