diff options
Diffstat (limited to 'src/stages')
-rw-r--r-- | src/stages/bytecode.rs | 182 | ||||
-rw-r--r-- | src/stages/bytecode_tokens.rs | 78 | ||||
-rw-r--r-- | src/stages/intermediate.rs | 577 | ||||
-rw-r--r-- | src/stages/intermediate_tokens.rs | 149 | ||||
-rw-r--r-- | src/stages/mod.rs | 31 | ||||
-rw-r--r-- | src/stages/semantic.rs | 478 | ||||
-rw-r--r-- | src/stages/semantic_tokens.rs | 296 | ||||
-rw-r--r-- | src/stages/syntactic.rs | 323 | ||||
-rw-r--r-- | src/stages/syntactic_tokens.rs | 160 |
9 files changed, 2274 insertions, 0 deletions
diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs new file mode 100644 index 0000000..3618b26 --- /dev/null +++ b/src/stages/bytecode.rs @@ -0,0 +1,182 @@ +use crate::*; + +use std::collections::HashMap; + + +pub fn parse_bytecode(tokens: Vec<Tracked<IntermediateToken>>, width: Option<u32>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> { + BytecodeParser::new(width).parse(tokens) +} + + +pub struct BytecodeParser { + width: Option<u32>, + addresses: HashMap<String, Tracked<usize>>, + address: usize, + segment_address: usize, + segment_source: Option<SourceSpan>, + segments: Vec<Segment>, + words: Vec<Tracked<Word>>, + errors: Vec<Tracked<BytecodeError>>, +} + +impl BytecodeParser { + pub fn new(width: Option<u32>) -> Self { + Self { + width, + addresses: HashMap::new(), + address: 0, + segment_address: 0, + segment_source: None, + segments: Vec::new(), + words: Vec::new(), + errors: Vec::new(), + } + } + + pub fn parse(mut self, tokens: Vec<Tracked<IntermediateToken>>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> { + // Calculate all label addresses ahead of time. + let mut address = 0; + for token in &tokens { + let source = &token.source; + match &token.value { + IntermediateToken::LabelDefinition(name) => { + let tracked = Tracked::from(address, source.clone()); + if let Some(_) = self.addresses.insert(name.clone(), tracked) { + unreachable!("Uncaught duplicate label definition '{name}'"); + } + } + IntermediateToken::Word(_) => { + address += 1; + } + IntermediateToken::PinnedAddress(pinned) => { + address = pinned.value; + } + } + } + for token in &tokens { + let source = &token.source; + match &token.value { + IntermediateToken::Word(word) => { + let word = self.evaluate_word(word, source); + // Check that the word width fits the provided width. + if let Some(width) = self.width { + if word.width != width { + let error = BytecodeError::IncorrectWidth(width, word.width); + self.errors.push(Tracked::from(error, source.clone())); + } + } + self.words.push(word); + self.address += 1; + } + IntermediateToken::PinnedAddress(address) => { + let current = self.address; + let pinned = address.value; + if current > pinned { + let error = BytecodeError::PinnedAddressBacktrack(pinned, current); + self.errors.push(Tracked::from(error, address.source.clone())); + } else { + let words = std::mem::take(&mut self.words); + if !words.is_empty() { + let address = self.segment_address; + let source = std::mem::take(&mut self.segment_source); + let segment = Segment { address, source, words }; + self.segments.push(segment); + } + self.segment_source = Some(address.source.clone()); + self.address = pinned; + self.segment_address = pinned; + } + } + IntermediateToken::LabelDefinition(_) => (), + } + } + // Finish final segment. + let words = std::mem::take(&mut self.words); + if !words.is_empty() { + let address = self.segment_address; + let source = std::mem::take(&mut self.segment_source); + let segment = Segment { address, source, words }; + self.segments.push(segment); + } + + match self.errors.is_empty() { + true => Ok(self.segments), + false => Err(self.errors), + } + } + + fn evaluate_expression(&mut self, expression: &IntermediateExpression, source: &SourceSpan) -> isize { + let mut stack = ExpressionStack::new(); + for token in &expression.tokens { + let source = &token.source; + match &token.value { + IntermediateExpressionToken::Integer(integer) => match integer { + IntermediateInteger::Integer(value) => { + stack.push(*value); + } + IntermediateInteger::Expression(expression) => { + stack.push(self.evaluate_expression(expression, source)); + } + IntermediateInteger::LabelReference(name) => { + stack.push(self.evaluate_label_reference(name)); + } + } + IntermediateExpressionToken::Operator(operator) => { + if let Err(err) = stack.apply(*operator, source) { + let error = BytecodeError::StackError(err); + self.errors.push(Tracked::from(error, source.clone())) + } + } + } + } + match stack.pull_result() { + Ok(value) => value, + Err(err) => { + let error = BytecodeError::StackError(Tracked::from(err, source.clone())); + self.errors.push(Tracked::from(error, source.clone())); + 0 + } + } + } + + fn evaluate_label_reference(&mut self, name: &Tracked<String>) -> isize { + if let Some(address) = self.addresses.get(&name.to_string()) { + address.value as isize + } else { + unreachable!("Uncaught unresolved label reference '{name}'") + } + } + + fn evaluate_word(&mut self, word: &IntermediateWord, source: &SourceSpan) -> Tracked<Word> { + let mut word_value = word.value; + for field in &word.fields { + let field_source = &field.value.value.source; + let field_value = match &field.value.value.value { + IntermediateInteger::Expression(expression) => { + self.evaluate_expression(expression, source) + } + IntermediateInteger::LabelReference(name) => { + self.evaluate_label_reference(name) + } + IntermediateInteger::Integer(value) => { + *value + } + }; + let value_width = match field_value.cmp(&0) { + std::cmp::Ordering::Less => (-field_value).ilog2() + 1, + std::cmp::Ordering::Equal => 0, + std::cmp::Ordering::Greater => field_value.ilog2() + 1, + }; + if field.width < value_width { + let error = BytecodeError::ValueTooWide(field.width, value_width); + self.errors.push(Tracked::from(error, field_source.clone())); + } else { + let mask = 2_usize.pow(field.width as u32) - 1; + let clamped_value = (field_value as usize) & mask; + word_value |= (clamped_value << field.shift) as usize; + } + } + let word = Word { width: word.width, value: word_value }; + return Tracked::from(word, source.clone()); + } +} diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs new file mode 100644 index 0000000..b54cb0e --- /dev/null +++ b/src/stages/bytecode_tokens.rs @@ -0,0 +1,78 @@ +use crate::*; + + +pub struct Segment { + pub address: usize, + /// Source of the address value. + pub source: Option<SourceSpan>, + pub words: Vec<Tracked<Word>>, +} + +pub struct Word { + pub value: usize, + pub width: u32, +} + +impl std::fmt::Display for Word { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + if self.width == 0 { + write!(f, "0") + } else { + for i in (0..self.width).rev() { + let is_first_bit = i+1 == self.width; + if !is_first_bit && (i+1) % 4 == 0 { + write!(f, "_")?; + } + match (self.value >> i) & 1 { + 0 => write!(f, "0")?, + _ => write!(f, "1")?, + } + } + Ok(()) + } + } +} + +pub enum BytecodeError { + /// expected, received + IncorrectWidth(u32, u32), + /// pinned, real + PinnedAddressBacktrack(usize, usize), + /// expected, received + ValueTooWide(u32, u32), + StackError(Tracked<StackError>), +} + + +pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) { + for error in errors { + report_bytecode_error(error, source_code); + } +} + +fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + BytecodeError::IncorrectWidth(expected, received) => + &format!("Word is {received} bits wide, but was expected to have a fixed width of {expected} bits"), + BytecodeError::PinnedAddressBacktrack(pinned, real) => + &format!("Cannot pin to address {pinned} when address is already {real}"), + BytecodeError::StackError(stack_error) => { + report_stack_error(stack_error, source_code); return; }, + BytecodeError::ValueTooWide(expected, received) => + &format!("Field is {expected} bits wide, but received a value that is {received} bits wide"), + }; + + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_segment(segment: &Segment) { + println!("SEGMENT: 0x{:>04x}", segment.address); + // Find maximum width of all words in the segment. + let width = segment.words.iter().map(|w| w.to_string().chars().count()).max().unwrap_or(0); + for word in &segment.words { + let string = word.to_string(); + println!(" {string:>w$}", w=width as usize); + } +} diff --git a/src/stages/intermediate.rs b/src/stages/intermediate.rs new file mode 100644 index 0000000..6853f62 --- /dev/null +++ b/src/stages/intermediate.rs @@ -0,0 +1,577 @@ +use crate::*; + +use assembler::{DefinitionType, SourceLocation, SourcePosition, SymbolRole}; + +use indexmap::{IndexSet, IndexMap}; + + +static mut ID: usize = 0; +macro_rules! next_id { () => { unsafe { let id = ID; ID += 1; id }}; } + +pub fn parse_intermediate(semantic: Vec<Tracked<SemanticToken>>) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> { + IntermediateParser::new(semantic).parse() +} + + +struct IntermediateParser { + semantic: Vec<Tracked<SemanticToken>>, + label_names: IndexSet<Tracked<String>>, + macro_names: IndexSet<Tracked<String>>, + macro_definitions: IndexMap<String, MacroDefinition>, + intermediate: Vec<Tracked<IntermediateToken>>, + errors: Vec<Tracked<IntermediateError>>, +} + +impl IntermediateParser { + pub fn new(semantic: Vec<Tracked<SemanticToken>>) -> Self { + let mut label_names = IndexSet::new(); + let mut macro_names = IndexSet::new(); + for symbol in SymbolParser::new().parse(&semantic) { + match symbol.role { + SymbolRole::Definition(DefinitionType::MustPrecedeReference) => { + // Only consider macro definitions, not macro argument definitions. + if symbol.namespace.is_empty() { + if !macro_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) { + unreachable!("Uncaught duplicate macro definition '{}'", symbol.name); + } + } + } + SymbolRole::Definition(DefinitionType::CanFollowReference) => { + if !label_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) { + unreachable!("Uncaught duplicate label definition '{}'", symbol.name); + } + } + SymbolRole::Reference => (), + } + } + + Self { + semantic, + label_names, + macro_names, + macro_definitions: IndexMap::new(), + intermediate: Vec::new(), + errors: Vec::new(), + } + } + + pub fn parse(mut self) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> { + for token in self.semantic { + let source = &token.source; + match token.value { + SemanticToken::MacroDefinition(definition) => { + // Invoke the body to see if it contains undefined macros. + let error_count = self.errors.len(); + let mut arguments = IndexMap::new(); + // Prepare dummy argument values. + let null = SourceSpan { + string: String::new(), + in_merged: SourceLocation { + path: None, + start: SourcePosition::ZERO, + end: SourcePosition::ZERO, + }, + in_source: None, + child: None, + }; + for argument in &definition.arguments { + let value = match argument.variant { + ArgumentType::Integer => { + let integer = IntermediateInteger::Integer(0); + let tracked = Tracked::from(integer, null.clone()); + IntermediateValue::Integer(tracked) + } + ArgumentType::Block => { + IntermediateValue::Block(Vec::new()) + } + }; + let tracked = Tracked::from(value, null.clone()); + arguments.insert(argument.name.clone(), tracked); + } + let mut env = Environment { + label_names: &self.label_names, + macro_names: &self.macro_names, + macro_definitions: &self.macro_definitions, + arguments, + errors: &mut self.errors, + id: next_id!(), + }; + env.parse_macro_definition_body(&definition.body, source); + if self.errors.len() != error_count { + break; + } + + let name = definition.name.to_string(); + if self.macro_definitions.insert(name.clone(), definition).is_some() { + unreachable!("Uncaught duplicate macro definition '{}'", name); + } + } + SemanticToken::BlockToken(block_token) => { + let mut env = Environment { + label_names: &self.label_names, + macro_names: &self.macro_names, + macro_definitions: &self.macro_definitions, + arguments: IndexMap::new(), + errors: &mut self.errors, + id: next_id!(), + }; + let mut tokens = env.parse_block_token(&block_token, source); + self.intermediate.append(&mut tokens); + } + } + } + match self.errors.is_empty() { + true => Ok(self.intermediate), + false => Err(self.errors), + } + } +} + + +struct Environment<'a> { + label_names: &'a IndexSet<Tracked<String>>, + macro_names: &'a IndexSet<Tracked<String>>, + macro_definitions: &'a IndexMap<String, MacroDefinition>, + arguments: IndexMap<String, Tracked<IntermediateValue>>, + errors: &'a mut Vec<Tracked<IntermediateError>>, + id: usize, +} + +impl<'a> Environment<'a> { + // Attach the invocation ID to every macro label name + fn tag_name(&self, name: &str) -> String { + match name.contains(':') { + true => format!("{name}:{}", self.id), + false => name.to_string(), + } + } + + fn parse_macro_definition_body(&mut self, body: &MacroDefinitionBody, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> { + match &body { + MacroDefinitionBody::Integer(integer) => { + let token = self.parse_integer_token(&integer, &source)?; + let integer = IntermediateValue::Integer(token); + Some(Tracked::from(integer, source.clone())) + } + MacroDefinitionBody::Invocation(invocation) => { + self.parse_invocation(&invocation, &invocation.source) + } + MacroDefinitionBody::Block(blocks) => { + let mut tokens = Vec::new(); + for block in blocks { + tokens.append(&mut self.parse_block_token(block, &block.source)); + } + let value = IntermediateValue::Block(tokens); + Some(Tracked::from(value, source.clone())) + } + } + } + + fn parse_block_token(&mut self, block: &BlockToken, source: &SourceSpan) -> Vec<Tracked<IntermediateToken>> { + let mut intermediate = Vec::new(); + match block { + BlockToken::LabelDefinition(name) => { + let token = IntermediateToken::LabelDefinition(self.tag_name(name)); + intermediate.push(Tracked::from(token, source.clone())); + } + BlockToken::PinnedAddress(address) => { + if let Some(integer) = self.parse_integer_token(address, &address.source) { + if let Some(source) = integer_contains_label_reference(&integer) { + let error = IntermediateError::LabelReferenceInPinnedAddress; + let new_source = address.source.clone().wrap(source); + self.errors.push(Tracked::from(error, new_source)); + } else { + match evaluate_integer(&integer, source) { + Ok(value) => { + let value = usize::try_from(value).unwrap_or(0); + let tracked = Tracked::from(value, address.source.clone()); + let token = IntermediateToken::PinnedAddress(tracked); + intermediate.push(Tracked::from(token, source.clone())); + } + Err(error) => self.errors.push(error), + } + } + } + } + BlockToken::ConditionalBlock(cond) => { + let predicate = self.parse_integer_token(&cond.predicate, &cond.predicate.source); + let mut body = self.parse_block_token(&cond.body, &cond.body.source); + if let Some(predicate) = predicate { + let mut found_error = false; + if let Some(source) = integer_contains_label_reference(&predicate) { + let error = IntermediateError::LabelReferenceInConditionPredicate; + let new_source = cond.predicate.source.clone().wrap(source); + self.errors.push(Tracked::from(error, new_source)); + found_error = true; + }; + if let Some(source) = block_contains_label_definition(&cond.body, &cond.body.source) { + let error = IntermediateError::LabelDefinitionInConditionBody; + let new_source = cond.body.source.clone().wrap(source); + self.errors.push(Tracked::from(error, new_source)); + found_error = true; + } + if !found_error { + match evaluate_integer(&predicate, &cond.predicate.source) { + Ok(value) => if value != 0 { intermediate.append(&mut body) }, + Err(error) => self.errors.push(error), + } + } + } + } + BlockToken::WordTemplate(word_template) => { + let mut fields = Vec::new(); + for bit_field in &word_template.fields { + let name = bit_field.name.to_string(); + let source = &bit_field.source; + let invocation = Invocation { name, arguments: Vec::new() }; + if let Some(value) = self.parse_integer_invocation(&invocation, source) { + let field = IntermediateField { + width: bit_field.width, + shift: bit_field.shift, + value, + }; + fields.push(Tracked::from(field, bit_field.source.clone())); + } + } + let word = IntermediateWord { + value: word_template.value, + width: word_template.width, + fields, + }; + let token = IntermediateToken::Word(word); + intermediate.push(Tracked::from(token, source.clone())); + } + BlockToken::Block(blocks) => { + for block in blocks { + let mut tokens = self.parse_block_token(block, &block.source); + intermediate.append(&mut tokens); + } + } + BlockToken::Invocation(invocation) => { + if let Some(mut tokens) = self.parse_block_invocation(invocation, source) { + intermediate.append(&mut tokens); + } + } + } + + return intermediate; + } + + fn parse_integer_token(&mut self, integer: &IntegerToken, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { + match integer { + IntegerToken::IntegerLiteral(value) => { + let integer = IntermediateInteger::Integer(*value); + Some(Tracked::from(integer, source.clone())) + } + IntegerToken::Expression(expression) => { + self.parse_expression(expression, source) + } + IntegerToken::Invocation(invocation) => { + self.parse_integer_invocation(invocation, source) + } + } + } + + fn parse_integer_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { + match self.parse_invocation(invocation, source)?.value { + IntermediateValue::Integer(integer) => Some(integer), + IntermediateValue::Block(_) => { + let error = IntermediateError::ExpectedInteger; + self.errors.push(Tracked::from(error, source.clone())); + None + } + } + } + + fn parse_block_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Vec<Tracked<IntermediateToken>>> { + match self.parse_invocation(invocation, source)?.value { + IntermediateValue::Block(tokens) => Some(tokens), + IntermediateValue::Integer(_) => { + let error = IntermediateError::ExpectedBlock; + self.errors.push(Tracked::from(error, source.clone())); + None + } + } + } + + fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> { + let received_count = invocation.arguments.len(); + if let Some(argument) = self.arguments.get(&invocation.name) { + if received_count != 0 { + let error = IntermediateError::IncorrectArgumentCount(0, received_count); + self.errors.push(Tracked::from(error, source.clone())); + None + } else { + Some(argument.clone()) + } + } else if let Some(label_name) = self.label_names.get(&invocation.name) { + if received_count != 0 { + let error = IntermediateError::IncorrectArgumentCount(0, received_count); + self.errors.push(Tracked::from(error, source.clone())); + None + } else { + let name = self.tag_name(label_name); + let tracked = Tracked::from(name, label_name.source.clone()); + let integer = IntermediateInteger::LabelReference(tracked); + let tracked = Tracked::from(integer, source.clone()); + let value = IntermediateValue::Integer(tracked); + Some(Tracked::from(value, source.clone())) + } + } else if let Some(definition) = self.macro_definitions.get(&invocation.name) { + // Check that the correct number of arguments were provided. + let expected_count = definition.arguments.len(); + if received_count != expected_count { + let error = IntermediateError::IncorrectArgumentCount(expected_count, received_count); + self.errors.push(Tracked::from(error, source.clone())); + None + } else { + // Gather and type-check the provided arguments. + let mut arguments = Vec::new(); + for (i, argument) in invocation.arguments.iter().enumerate() { + let received_type = match &argument.value { + InvocationArgument::String(string) => { + let mut values = Vec::new(); + for c in &string.chars { + let integer = IntermediateInteger::Integer(**c); + let tracked = Tracked::from(integer, c.source.clone()); + values.push(IntermediateValue::Integer(tracked)); + } + arguments.push(RepeatedArgument::List(values)); + ArgumentType::Integer + } + InvocationArgument::IntegerToken(integer) => { + let tracked = self.parse_integer_token(&integer, &argument.source)?; + let value = IntermediateValue::Integer(tracked); + arguments.push(RepeatedArgument::Loop(value)); + ArgumentType::Integer + } + InvocationArgument::BlockToken(block) => { + let tokens = self.parse_block_token(&block, &argument.source); + let value = IntermediateValue::Block(tokens); + arguments.push(RepeatedArgument::Loop(value)); + ArgumentType::Block + } + InvocationArgument::Invocation(invocation) => { + let value = self.parse_invocation(&invocation, &argument.source)?; + let received_type = match &value.value { + IntermediateValue::Integer(_) => ArgumentType::Integer, + IntermediateValue::Block(_) => ArgumentType::Block, + }; + arguments.push(RepeatedArgument::Loop(value.value)); + received_type + } + }; + let expected_type = match received_type { + ArgumentType::Integer => ArgumentType::Block, + ArgumentType::Block => ArgumentType::Integer, + }; + if definition.arguments[i].variant != received_type { + let error = IntermediateError::IncorrectArgumentType(expected_type, received_type); + self.errors.push(Tracked::from(error, argument.source.clone())); + return None; + } + } + // Invoke the invocation multiple times. + let repetitions = arguments.iter().map(|a| a.len()).max().unwrap_or(1); + let mut values = Vec::new(); + for i in 0..repetitions { + // Construct an argument map for this invocation. + let mut argument_map = IndexMap::new(); + for (a, argument) in arguments.iter().enumerate() { + let name = definition.arguments[a].name.clone(); + let source = invocation.arguments[a].source.clone(); + let value = match argument { + RepeatedArgument::Loop(value) => { + Tracked::from(value.clone(), source) + } + RepeatedArgument::List(list) => match list.get(i) { + Some(value) => { + Tracked::from(value.clone(), source) + } + None => { + let error = IntermediateError::ListExhausted; + let source = invocation.arguments[a].source.clone(); + self.errors.push(Tracked::from(error, source)); + return None; + } + } + }; + if argument_map.insert(name.clone(), value).is_some() { + unreachable!("Uncaught duplicate macro argument name '{name}'"); + }; + } + let mut env = Environment { + label_names: &self.label_names, + macro_names: &self.macro_names, + macro_definitions: &self.macro_definitions, + arguments: argument_map, + errors: &mut self.errors, + id: next_id!(), + }; + values.push(env.parse_macro_definition_body(&definition.body, source)?); + } + if values.len() == 1 { + values.pop() + } else { + // Flatten all values into a list of block tokens. + let mut block = Vec::new(); + for value in values { + match value.value { + IntermediateValue::Integer(_) => { + let error = IntermediateError::ExpectedBlock; + self.errors.push(Tracked::from(error, value.source)); + return None; + } + IntermediateValue::Block(mut tokens) => { + block.append(&mut tokens); + } + } + } + Some(Tracked::from(IntermediateValue::Block(block), source.clone())) + } + } + } else if let Some(macro_name) = self.macro_names.get(&invocation.name) { + let error = IntermediateError::InvocationBeforeDefinition; + let source = source.clone().wrap(macro_name.source.clone()); + self.errors.push(Tracked::from(error, source)); + None + } else { + unreachable!("Uncaught unresolved reference '{}'", invocation.name); + } + } + + fn parse_expression(&mut self, expression: &Expression, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { + let mut intermediate = Vec::new(); + let mut error = false; + + for token in &expression.tokens { + let source = &token.source; + match &token.value { + ExpressionToken::IntegerToken(integer) => { + let Some(integer) = self.parse_integer_token(integer, source) else { + error = true; continue; + }; + let token = IntermediateExpressionToken::Integer(integer.value); + intermediate.push(Tracked::from(token, integer.source)); + } + ExpressionToken::Operator(operator) => { + let token = IntermediateExpressionToken::Operator(*operator); + intermediate.push(Tracked::from(token, source.clone())); + } + ExpressionToken::Invocation(invocation) => { + let Some(integer) = self.parse_integer_invocation(invocation, source) else { + error = true; continue; + }; + let token = IntermediateExpressionToken::Integer(integer.value); + intermediate.push(Tracked::from(token, integer.source)); + } + } + } + + if error { return None; } + let expression = IntermediateExpression { tokens: intermediate }; + let integer = IntermediateInteger::Expression(expression); + Some(Tracked::from(integer, source.clone())) + } +} + + +macro_rules! return_some { + ($option:expr) => { + if $option.is_some() { return $option; } + }; +} + +fn integer_contains_label_reference(integer: &IntermediateInteger) -> Option<SourceSpan> { + match integer { + IntermediateInteger::Integer(_) => None, + IntermediateInteger::LabelReference(label) => Some(label.source.clone()), + IntermediateInteger::Expression(expr) => expression_contains_label_reference(expr), + } +} + +fn expression_contains_label_reference(expression: &IntermediateExpression) -> Option<SourceSpan> { + for token in &expression.tokens { + if let IntermediateExpressionToken::Integer(integer) = &token.value { + if let Some(child) = integer_contains_label_reference(&integer) { + return Some(token.source.clone().wrap(child)); + } + } + } + return None; +} + +fn block_contains_label_definition(block: &BlockToken, source: &SourceSpan) -> Option<SourceSpan> { + match &block { + BlockToken::LabelDefinition(_) => { + return Some(source.clone()); + } + BlockToken::Invocation(invocation) => { + return_some!(invocation_contains_label_definition(invocation)) + } + BlockToken::Block(blocks) => { + for block in blocks { + return_some!(block_contains_label_definition(block, &block.source)) + } + } + _ => (), + } + return None; +} + +fn invocation_contains_label_definition(invocation: &Invocation) -> Option<SourceSpan> { + for argument in &invocation.arguments { + match &argument.value { + InvocationArgument::BlockToken(block) => { + return_some!(block_contains_label_definition(&block, &argument.source)) + } + InvocationArgument::Invocation(invocation) => { + return_some!(invocation_contains_label_definition(&invocation)) + } + _ => (), + } + } + return None; +} + +fn evaluate_integer(integer: &IntermediateInteger, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> { + match integer { + IntermediateInteger::Integer(value) => Ok(*value), + IntermediateInteger::LabelReference(name) => + unreachable!("Uncaught label reference '{name}' in condition predicate or pinned address value"), + IntermediateInteger::Expression(expr) => evaluate_expression(expr, source), + } +} + +fn evaluate_expression(expression: &IntermediateExpression, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> { + let mut stack = ExpressionStack::new(); + for token in &expression.tokens { + let source = &token.source; + match &token.value { + IntermediateExpressionToken::Integer(integer) => match integer { + IntermediateInteger::Integer(value) => { + stack.push(*value); + } + IntermediateInteger::Expression(expression) => { + stack.push(evaluate_expression(&expression, source)?); + } + IntermediateInteger::LabelReference(name) => { + unreachable!("Uncaught label reference '{name}' in condition predicate"); + } + } + IntermediateExpressionToken::Operator(operator) => { + if let Err(stack_error) = stack.apply(*operator, source) { + let error = IntermediateError::StackError(stack_error); + return Err(Tracked::from(error, token.source.clone())); + } + } + } + } + match stack.pull_result() { + Ok(value) => Ok(value), + Err(err) => { + let error = Tracked::from(err, source.clone()); + Err(Tracked::from(IntermediateError::StackError(error), source.clone())) + } + } +} diff --git a/src/stages/intermediate_tokens.rs b/src/stages/intermediate_tokens.rs new file mode 100644 index 0000000..a09581e --- /dev/null +++ b/src/stages/intermediate_tokens.rs @@ -0,0 +1,149 @@ +use crate::*; + + +#[derive(Clone)] +pub enum IntermediateToken { + Word(IntermediateWord), + PinnedAddress(Tracked<usize>), + LabelDefinition(String), +} + +#[derive(Clone)] +pub struct IntermediateWord { + pub value: usize, + /// Width of the word in bits. + pub width: u32, + pub fields: Vec<Tracked<IntermediateField>>, +} + +#[derive(Clone)] +pub struct IntermediateField { + pub value: Tracked<IntermediateInteger>, + /// Width of the field in bits. + pub width: u32, + /// Number of bits to the right of the field in the word. + pub shift: u32, +} + +#[derive(Clone)] +pub enum IntermediateInteger { + Integer(isize), + Expression(IntermediateExpression), + LabelReference(Tracked<String>), +} + +#[derive(Clone)] +pub struct IntermediateExpression { + pub tokens: Vec<Tracked<IntermediateExpressionToken>>, +} + +#[derive(Clone)] +pub enum IntermediateExpressionToken { + Integer(IntermediateInteger), + Operator(Operator), +} + +#[derive(Clone)] +pub enum IntermediateValue { + Integer(Tracked<IntermediateInteger>), + Block(Vec<Tracked<IntermediateToken>>), +} + +pub enum RepeatedArgument { + Loop(IntermediateValue), + List(Vec<IntermediateValue>), +} + +impl RepeatedArgument { + pub fn len(&self) -> usize { + match self { + Self::Loop(_) => 1, + Self::List(list) => list.len(), + } + } +} + +pub enum IntermediateError { + ExpectedInteger, + ExpectedBlock, + ListExhausted, + LabelReferenceInConditionPredicate, + LabelDefinitionInConditionBody, + LabelReferenceInPinnedAddress, + StackError(Tracked<StackError>), + InvocationBeforeDefinition, + /// expected, received + IncorrectArgumentCount(usize, usize), + /// expected, received + IncorrectArgumentType(ArgumentType, ArgumentType), +} + +pub fn report_intermediate_errors(errors: &[Tracked<IntermediateError>], source_code: &str) { + for error in errors { + report_intermediate_error(error, source_code); + } +} + +fn report_intermediate_error(error: &Tracked<IntermediateError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + IntermediateError::ExpectedInteger => + "An integer value was expected here", + IntermediateError::ExpectedBlock => + "A block value was expected here", + IntermediateError::ListExhausted => + "This string is shorter than another string passed to the same invocation", + IntermediateError::LabelReferenceInConditionPredicate => + "The predicate of a conditional block cannot contain a label reference", + IntermediateError::LabelDefinitionInConditionBody => + "The body of a conditional block cannot contain a label definition", + IntermediateError::LabelReferenceInPinnedAddress => + "The value of a pinned address cannot contain a label reference", + IntermediateError::StackError(stack_error) => { + report_stack_error(stack_error, source_code); return; }, + IntermediateError::InvocationBeforeDefinition => + &format!("Macro cannot be invoked before it has been defined"), + IntermediateError::IncorrectArgumentCount(expected, received) => + &format!("Expected {expected} arguments, but received {received} instead"), + IntermediateError::IncorrectArgumentType(expected, received) => + &format!("Expected {expected} value but received {received} value instead"), + }; + + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_intermediate_token(i: usize, token: &IntermediateToken) { + match token { + IntermediateToken::Word(word) => { + indent!(i, "Word({:>0w$b})", word.value, w = word.width as usize); + for field in &word.fields { + print_intermediate_integer(i+1, &field.value.value); + } + } + IntermediateToken::PinnedAddress(address) => + indent!(i, "PinnedAddress({address})"), + IntermediateToken::LabelDefinition(name) => + indent!(i, "LabelDefinition({name})"), + } +} + +fn print_intermediate_integer(i: usize, integer: &IntermediateInteger) { + match integer { + IntermediateInteger::Integer(value) => + indent!(i, "Integer({value})"), + IntermediateInteger::LabelReference(name) => + indent!(i, "LabelReference({name})"), + IntermediateInteger::Expression(expression) => { + indent!(i, "Expression"); + for token in &expression.tokens { + match &token.value { + IntermediateExpressionToken::Integer(integer) => + print_intermediate_integer(i+1, integer), + IntermediateExpressionToken::Operator(operator) => + indent!(i+1, "Operator({operator})"), + } + } + } + } +} diff --git a/src/stages/mod.rs b/src/stages/mod.rs new file mode 100644 index 0000000..e735f05 --- /dev/null +++ b/src/stages/mod.rs @@ -0,0 +1,31 @@ +mod syntactic; +mod syntactic_tokens; +mod semantic; +mod semantic_tokens; +mod intermediate; +mod intermediate_tokens; +mod bytecode; +mod bytecode_tokens; + +pub use syntactic::*; +pub use syntactic_tokens::*; +pub use semantic::*; +pub use semantic_tokens::*; +pub use intermediate::*; +pub use intermediate_tokens::*; +pub use bytecode::*; +pub use bytecode_tokens::*; + + +#[macro_export] +macro_rules! indent { + (0, $($tokens:tt)*) => {{ + println!($($tokens)*); + }}; + ($indent:expr, $($tokens:tt)*) => {{ + for _ in 0..$indent { print!(" "); } + println!($($tokens)*); + }}; +} + + diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..e225608 --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,478 @@ +use crate::*; + +use std::collections::VecDeque; + + +pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> { + SemanticParser::from(syntactic, Namespace::None).parse() +} + +#[derive(Clone)] +enum Namespace { + Macro(String), + Label(String), + None, +} + + +struct SemanticParser { + namespace: Namespace, + syntactic: SyntacticTokenStream, + semantic: Vec<Tracked<SemanticToken>>, + errors: Vec<Tracked<SemanticError>>, +} + +impl SemanticParser { + pub fn from(syntactic: Vec<Tracked<SyntacticToken>>, namespace: Namespace) -> Self { + Self { + namespace, + syntactic: SyntacticTokenStream::from(syntactic), + semantic: Vec::new(), + errors: Vec::new(), + } + } + + fn pull_from(&mut self, mut other: SemanticParser) { + self.errors.append(&mut other.errors); + if let Namespace::Macro(_) = other.namespace { + () + } else { + self.namespace = other.namespace; + } + } + + fn resolve_label_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> { + match symbol { + ScopedSymbol::Global(name) => match &self.namespace { + Namespace::Macro(_) => { + let error = SemanticError::LabelInMacroDefinition; + self.errors.push(Tracked::from(error, source.to_owned())); + None + } + Namespace::Label(_) | Namespace::None => { + self.namespace = Namespace::Label(name.clone()); + Some(name) + } + } + ScopedSymbol::Local(name) => match &self.namespace { + Namespace::Macro(macro_ns) => { + Some(format!("{macro_ns}:{name}")) + } + Namespace::Label(label_ns) => { + Some(format!("{label_ns}/{name}")) + } + Namespace::None => { + let error = SemanticError::SublabelWithoutNamespace; + self.errors.push(Tracked::from(error, source.to_owned())); + None + } + } + } + } + + fn resolve_symbol_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> { + match symbol { + ScopedSymbol::Global(name) => { + Some(name) + } + ScopedSymbol::Local(name) => match &self.namespace { + Namespace::Macro(macro_ns) => { + Some(format!("{macro_ns}:{name}")) + } + Namespace::Label(label_ns) => { + Some(format!("{label_ns}/{name}")) + } + Namespace::None => { + let error = SemanticError::LocalSymbolWithoutNamespace; + self.errors.push(Tracked::from(error, source.to_owned())); + None + } + } + } + } + + /// Parse the remaining syntactic tokens as a full program. + pub fn parse(mut self) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> { + while let Some(token) = self.syntactic.pop() { + if let SyntacticToken::MacroDefinition(definition) = token.value { + let namespace = Namespace::Macro(definition.name.to_string()); + let mut parser = SemanticParser::from(definition.tokens, namespace); + let mut arguments = Vec::new(); + while let Some(argument) = parser.pull_argument_definition() { + arguments.push(argument); + } + let body = parser.parse_macro_definition_body(SemanticLocation::MacroDefinitionBody); + self.pull_from(parser); + let definition = MacroDefinition { name: definition.name, arguments, body }; + let semantic = SemanticToken::MacroDefinition(definition); + self.semantic.push(Tracked::from(semantic, token.source)); + } else { + self.syntactic.unpop(token); + if let Some(token) = self.pull_block_token(SemanticLocation::Program) { + let semantic = SemanticToken::BlockToken(token.value); + self.semantic.push(Tracked::from(semantic, token.source)); + } + } + } + match self.errors.is_empty() { + true => Ok(self.semantic), + false => Err(self.errors), + } + } + + /// Parse the remaining syntactic tokens as a macro definition body. + fn parse_macro_definition_body(&mut self, location: SemanticLocation) -> MacroDefinitionBody { + let mut tokens = Vec::new(); + while !self.syntactic.is_empty() { + if let Some(token) = self.pull_macro_definition_body_token() { + tokens.push(token); + } + } + if tokens.is_empty() { + MacroDefinitionBody::Block(Vec::new()) + } else if tokens.len() == 1 { + tokens.pop().unwrap() + } else { + let mut block_tokens = Vec::new(); + for token in tokens { + match token { + MacroDefinitionBody::Integer(integer) => { + let error = SemanticError::ExpectedInteger(location); + let tracked = Tracked::from(error, integer.source); + self.errors.push(tracked); + } + MacroDefinitionBody::Block(mut tokens) => { + block_tokens.append(&mut tokens); + } + MacroDefinitionBody::Invocation(invocation) => { + // Convert invocation to a block invocation. + let token = BlockToken::Invocation(invocation.value); + block_tokens.push(Tracked::from(token, invocation.source)); + } + } + } + MacroDefinitionBody::Block(block_tokens) + } + } + + /// Attempt to pull a MacroDefinitionBody token from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + /// Each BodyToken is wrapped in a separate MacroDefinitionBody. + fn pull_macro_definition_body_token(&mut self) -> Option<MacroDefinitionBody> { + let token = self.syntactic.pop()?; + let source = token.source; + match token.value { + SyntacticToken::LabelDefinition(symbol) => { + let name = self.resolve_label_name(symbol, &source)?; + let token = BlockToken::LabelDefinition(name); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::MacroDefinition(_) => { + let error = SemanticError::MisplacedMacroDefinition; + self.errors.push(Tracked::from(error, source)); + None + } + SyntacticToken::IntegerLiteral(value) => { + let token = IntegerToken::IntegerLiteral(value); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Integer(tracked)) + } + SyntacticToken::StringLiteral(_) => { + let error = SemanticError::MisplacedStringLiteral; + self.errors.push(Tracked::from(error, source)); + None + } + SyntacticToken::WordTemplate(word_template) => { + let token = BlockToken::WordTemplate(word_template); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::BlockLiteral(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let tokens = parser.parse_block(); + self.pull_from(parser); + let token = BlockToken::Block(tokens); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::Expression(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let expression = parser.parse_expression(); + self.pull_from(parser); + let token = IntegerToken::Expression(expression); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Integer(tracked)) + } + SyntacticToken::Symbol(symbol) => { + let name = self.resolve_symbol_name(symbol, &source)?; + let arguments = self.pull_all_invocation_arguments(); + // Extend invocation source span to cover all arguments. + let mut source = source; + if let Some(last) = arguments.last() { + source.in_merged.end = last.source.in_merged.end; + if let Some(last_in_source) = &last.source.in_source { + if let Some(in_source) = &mut source.in_source { + in_source.end = last_in_source.end.clone(); + } + } + } + let invocation = Invocation { name, arguments }; + let tracked = Tracked::from(invocation, source); + Some(MacroDefinitionBody::Invocation(tracked)) + } + SyntacticToken::Separator => { + let error = SemanticError::MisplacedSeparator; + self.errors.push(Tracked::from(error, source)); + None + } + SyntacticToken::Condition => { + let conditional = self.pull_conditional_block()?; + let token = BlockToken::ConditionalBlock(Box::new(conditional)); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + SyntacticToken::Pin => { + let integer = self.pull_integer_token(SemanticLocation::PinAddress)?; + let token = BlockToken::PinnedAddress(integer); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::Block(vec![tracked])) + } + } + } + + /// Attempt to pull an integer token from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + fn pull_integer_token(&mut self, location: SemanticLocation) -> Option<Tracked<IntegerToken>> { + match self.pull_macro_definition_body_token()? { + MacroDefinitionBody::Integer(integer) => { + Some(integer) + } + MacroDefinitionBody::Invocation(invocation) => { + // Convert invocation to an integer invocation. + let token = IntegerToken::Invocation(invocation.value); + Some(Tracked::from(token, invocation.source)) + } + MacroDefinitionBody::Block(mut tokens) => { + assert_eq!(tokens.len(), 1); + let token = tokens.pop().unwrap(); + let error = SemanticError::ExpectedInteger(location); + self.errors.push(Tracked::from(error, token.source)); + None + } + } + } + + /// Attempt to pull a BlockToken from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + fn pull_block_token(&mut self, location: SemanticLocation) -> Option<Tracked<BlockToken>> { + match self.pull_macro_definition_body_token()? { + MacroDefinitionBody::Block(mut tokens) => { + assert_eq!(tokens.len(), 1); + tokens.pop() + } + MacroDefinitionBody::Invocation(invocation) => { + // Convert invocation to a block invocation. + let token = BlockToken::Invocation(invocation.value); + Some(Tracked::from(token, invocation.source)) + } + MacroDefinitionBody::Integer(integer) => { + let error = SemanticError::ExpectedBlock(location); + self.errors.push(Tracked::from(error, integer.source)); + None + } + } + } + + /// Parse the remaining syntactic tokens as the contents of a block. + fn parse_block(&mut self) -> Vec<Tracked<BlockToken>> { + let mut tokens = Vec::new(); + while !self.syntactic.is_empty() { + if let Some(token) = self.pull_block_token(SemanticLocation::BlockLiteral) { + tokens.push(token); + } + } + tokens + } + + /// Parse the remaining syntactic tokens as a list of integer tokens. + fn parse_integer_list(&mut self, location: SemanticLocation) -> Vec<Tracked<IntegerToken>> { + let mut tokens = Vec::new(); + while !self.syntactic.is_empty() { + if let Some(token) = self.pull_integer_token(location) { + tokens.push(token); + } + } + tokens + } + + /// Parse the remaining syntactic tokens as the contents of an expression. + fn parse_expression(&mut self) -> Expression { + let mut tokens = Vec::new(); + for token in self.parse_integer_list(SemanticLocation::Expression) { + let source = token.source; + match token.value { + IntegerToken::IntegerLiteral(value) => { + let integer = Box::new(IntegerToken::IntegerLiteral(value)); + let token = ExpressionToken::IntegerToken(integer); + tokens.push(Tracked::from(token, source)); + } + IntegerToken::Expression(expression) => { + let integer = Box::new(IntegerToken::Expression(expression)); + let token = ExpressionToken::IntegerToken(integer); + tokens.push(Tracked::from(token, source)); + } + IntegerToken::Invocation(invocation) => { + // Parse the invocation as an operator instead. + if invocation.arguments.is_empty() { + if let Some(operator) = Operator::from_str(&invocation.name) { + let token = ExpressionToken::Operator(operator); + tokens.push(Tracked::from(token, source)); + continue; + } + } + // Parse the invocation as an invocation. + let integer = Box::new(IntegerToken::Invocation(invocation)); + let token = ExpressionToken::IntegerToken(integer); + tokens.push(Tracked::from(token, source)); + } + } + } + Expression { tokens } + } + + /// Attempt to pull a conditional block from the token stream. + /// Invalid values are noted and dropped, and a None is returned. + fn pull_conditional_block(&mut self) -> Option<ConditionalBlock> { + let predicate = self.pull_integer_token(SemanticLocation::ConditionPredicate)?; + let body = self.pull_block_token(SemanticLocation::ConditionBody)?; + Some(ConditionalBlock { predicate, body }) + } + + /// Attempt to pull an invocation argument from the token stream. + /// Invalid values are not dropped, a None indicates that no arguments remain. + fn pull_invocation_argument(&mut self) -> Option<Tracked<InvocationArgument>> { + self.syntactic.pop_if(is_separator)?; + let token = self.syntactic.pop()?; + let source = token.source; + match token.value { + SyntacticToken::StringLiteral(string_literal) => { + let argument = InvocationArgument::String(string_literal); + Some(Tracked::from(argument, source)) + } + SyntacticToken::IntegerLiteral(value) => { + let integer = IntegerToken::IntegerLiteral(value); + let argument = InvocationArgument::IntegerToken(integer); + Some(Tracked::from(argument, source)) + } + SyntacticToken::Expression(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let expression = parser.parse_expression(); + self.pull_from(parser); + let integer = IntegerToken::Expression(expression); + let argument = InvocationArgument::IntegerToken(integer); + Some(Tracked::from(argument, source)) + } + SyntacticToken::BlockLiteral(tokens) => { + let mut parser = SemanticParser::from(tokens, self.namespace.clone()); + let tokens = parser.parse_block(); + self.pull_from(parser); + let block = BlockToken::Block(tokens); + let argument = InvocationArgument::BlockToken(block); + Some(Tracked::from(argument, source)) + } + SyntacticToken::Symbol(symbol) => { + let name = self.resolve_symbol_name(symbol, &source)?; + let invocation = Invocation { name, arguments: Vec::new() }; + let argument = InvocationArgument::Invocation(invocation); + Some(Tracked::from(argument, source)) + } + SyntacticToken::WordTemplate(word_template) => { + let block = BlockToken::WordTemplate(word_template); + let argument = InvocationArgument::BlockToken(block); + Some(Tracked::from(argument, source)) + } + _ => { + let error = SemanticError::InvalidInvocationArgument; + self.errors.push(Tracked::from(error, source)); + return None; + } + } + } + + fn pull_all_invocation_arguments(&mut self) -> Vec<Tracked<InvocationArgument>> { + let mut arguments = Vec::new(); + while let Some(argument) = self.pull_invocation_argument() { + arguments.push(argument); + } + return arguments; + } + + /// Attempt to pull an argument definition from the token stream. + /// Invalid values are not dropped, a None indicates that no arguments remain. + fn pull_argument_definition(&mut self) -> Option<Tracked<ArgumentDefinition>> { + self.syntactic.pop_if(is_separator)?; + let token = self.syntactic.pop()?; + let source = token.source; + match token.value { + SyntacticToken::Symbol(ScopedSymbol::Global(name)) => { + let variant = ArgumentType::Integer; + let definition = ArgumentDefinition { name, variant }; + return Some(Tracked::from(definition, source)); + } + SyntacticToken::BlockLiteral(mut tokens) => { + if tokens.len() == 1 { + let token = tokens.pop().unwrap(); + if let SyntacticToken::Symbol(ScopedSymbol::Global(name)) = token.value { + let variant = ArgumentType::Block; + let definition = ArgumentDefinition { name, variant }; + return Some(Tracked::from(definition, source)); + } + } + } + _ => (), + }; + let error = SemanticError::InvalidArgumentDefinition; + self.errors.push(Tracked::from(error, source)); + return None; + } +} + + + +struct SyntacticTokenStream { + tokens: VecDeque<Tracked<SyntacticToken>>, +} + +impl SyntacticTokenStream { + pub fn from<T: Into<VecDeque<Tracked<SyntacticToken>>>>(tokens: T) -> Self { + Self { tokens: tokens.into() } + } + + pub fn pop(&mut self) -> Option<Tracked<SyntacticToken>> { + self.tokens.pop_front() + } + + pub fn pop_if(&mut self, predicate: fn(&Tracked<SyntacticToken>) -> bool) -> Option<Tracked<SyntacticToken>> { + match predicate(self.tokens.front()?) { + true => self.tokens.pop_front(), + false => None, + } + } + + pub fn unpop(&mut self, token: Tracked<SyntacticToken>) { + self.tokens.push_front(token); + } + + pub fn is_empty(&self) -> bool { + self.tokens.is_empty() + } +} + + +fn is_separator(token: &Tracked<SyntacticToken>) -> bool { + match token.value { + SyntacticToken::Separator => true, + _ => false, + } +} diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs new file mode 100644 index 0000000..dfbea1a --- /dev/null +++ b/src/stages/semantic_tokens.rs @@ -0,0 +1,296 @@ +use crate::*; + + +pub enum SemanticToken { + MacroDefinition(MacroDefinition), + BlockToken(BlockToken), +} + +pub struct MacroDefinition { + pub name: Tracked<String>, + pub arguments: Vec<Tracked<ArgumentDefinition>>, + pub body: MacroDefinitionBody, +} + +pub struct ArgumentDefinition { + pub name: String, + pub variant: ArgumentType, +} + +#[derive(PartialEq)] +pub enum ArgumentType { + Integer, + Block, +} + +impl std::fmt::Display for ArgumentType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + match self { + ArgumentType::Integer => write!(f, "an integer"), + ArgumentType::Block => write!(f, "a block"), + } + } +} + +pub enum MacroDefinitionBody { + Integer(Tracked<IntegerToken>), + Block(Vec<Tracked<BlockToken>>), + Invocation(Tracked<Invocation>), +} + +pub struct ConditionalBlock { + pub predicate: Tracked<IntegerToken>, + pub body: Tracked<BlockToken>, +} + +pub enum IntegerToken { + IntegerLiteral(isize), + Expression(Expression), + Invocation(Invocation), +} + +pub struct Expression { + pub tokens: Vec<Tracked<ExpressionToken>>, +} + +pub enum ExpressionToken { + IntegerToken(Box<IntegerToken>), + Invocation(Invocation), + Operator(Operator), +} + +pub enum BlockToken { + LabelDefinition(String), + PinnedAddress(Tracked<IntegerToken>), + ConditionalBlock(Box<ConditionalBlock>), + WordTemplate(WordTemplate), + Block(Vec<Tracked<BlockToken>>), + Invocation(Invocation), +} + +pub struct Invocation { + pub name: String, + pub arguments: Vec<Tracked<InvocationArgument>>, +} + +pub enum InvocationArgument { + String(StringLiteral), + IntegerToken(IntegerToken), + BlockToken(BlockToken), + Invocation(Invocation), +} + +pub enum SemanticError { + MisplacedStringLiteral, + MisplacedListLiteral, + MisplacedSeparator, + MisplacedMacroDefinition, + + ExpectedInteger(SemanticLocation), + ExpectedBlock(SemanticLocation), + + InvalidArgumentDefinition, + InvalidInvocationArgument, + + LabelInMacroDefinition, + SublabelWithoutNamespace, + LocalSymbolWithoutNamespace, +} + +#[derive(Clone, Copy)] +pub enum SemanticLocation { + MacroDefinitionBody, + Expression, + ConditionPredicate, + ConditionBody, + Program, + BlockLiteral, + PinAddress, +} + +impl std::fmt::Display for SemanticLocation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let string = match self { + SemanticLocation::Expression => + "inside this expression", + SemanticLocation::ConditionPredicate => + "as the predicate of this conditional block", + SemanticLocation::ConditionBody => + "as the body of this conditional block", + SemanticLocation::Program => + "at the outermost level of the program", + SemanticLocation::BlockLiteral => + "inside this block literal", + SemanticLocation::MacroDefinitionBody => + "inside the body of this macro definition", + SemanticLocation::PinAddress => + "as the address of this pin", + }; + write!(f, "{string}") + } +} + + +pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) { + for error in errors { + report_semantic_error(error, source_code); + } +} + +fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + SemanticError::MisplacedStringLiteral => + "A string literal can only be used as an invocation argument", + SemanticError::MisplacedListLiteral => + "A list literal can only be used as an invocation argument", + SemanticError::MisplacedSeparator => + "A separator can only be used to construct an argument list", + SemanticError::MisplacedMacroDefinition => + "A macro definition must be used at the outermost level of the program", + + SemanticError::ExpectedInteger(location) => + &format!("An integer value was expected {location}"), + SemanticError::ExpectedBlock(location) => + &format!("A block value was expected {location}"), + + SemanticError::InvalidArgumentDefinition => + "Argument definitions must be in the form 'name' or '{{name}}'", + SemanticError::InvalidInvocationArgument => + "This token cannot be used in an invocation argument", + + SemanticError::LabelInMacroDefinition => + &format!("Only sublabels can be defined inside macro definitions"), + SemanticError::SublabelWithoutNamespace => + &format!("Sublabel was not defined inside a macro definition or after a label"), + SemanticError::LocalSymbolWithoutNamespace => + &format!("Local symbol was not defined inside a macro definition or after a label"), + }; + + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_semantic_token(i: usize, token: &SemanticToken) { + match token { + SemanticToken::MacroDefinition(definition) => { + indent!(i, "MacroDefinition({})", definition.name); + for argument in &definition.arguments { + print_argument_definition(i+1, argument); + } + match &definition.body { + MacroDefinitionBody::Integer(integer) => { + print_integer_token(i+1, integer) + } + MacroDefinitionBody::Block(tokens) => { + print_block(i+1, tokens); + } + MacroDefinitionBody::Invocation(invocation) => { + print_invocation(i+1, invocation); + } + } + } + SemanticToken::BlockToken(block) => print_block_token(0, block), + } +} + +fn print_argument_definition(i: usize, argument: &ArgumentDefinition) { + match argument.variant { + ArgumentType::Integer => { + indent!(i, "Argument({}, integer)", argument.name) + } + ArgumentType::Block => { + indent!(i, "Argument({}, block)", argument.name) + } + } +} + +fn print_block_token(i: usize, block: &BlockToken) { + match block { + BlockToken::Invocation(invocation) => { + print_invocation(i, invocation) + } + BlockToken::LabelDefinition(name) => { + indent!(i, "LabelDefinition({name})") + } + BlockToken::Block(block) => { + print_block(i, block); + } + BlockToken::PinnedAddress(integer) => { + indent!(i, "PinnedAddress"); + print_integer_token(i+1, integer); + } + BlockToken::ConditionalBlock(condition) => { + indent!(i, "ConditionalBlock"); + indent!(i+1, "Predicate"); + print_integer_token(i+2, &condition.predicate); + indent!(i+1, "Body"); + print_block_token(i+2, &condition.body); + } + BlockToken::WordTemplate(word_template) => { + indent!(i, "WordTemplate({word_template})") + } + } +} + +fn print_block(i: usize, tokens: &[Tracked<BlockToken>]) { + indent!(i, "Block"); + for token in tokens { + print_block_token(i+1, token); + } +} + +fn print_invocation(i: usize, invocation: &Invocation) { + indent!(i, "Invocation({})", invocation.name); + for argument in &invocation.arguments { + print_invocation_argument(i+1, argument); + } +} + +fn print_invocation_argument(i: usize, argument: &InvocationArgument) { + match &argument { + InvocationArgument::String(string_literal) => { + indent!(i, "String({string_literal})") + } + InvocationArgument::IntegerToken(integer) => { + print_integer_token(i, integer) + } + InvocationArgument::BlockToken(block) => { + print_block_token(i, block) + } + InvocationArgument::Invocation(invocation) => { + print_invocation(i, invocation) + } + } +} + +fn print_integer_token(i: usize, integer: &IntegerToken) { + match integer { + IntegerToken::IntegerLiteral(value) => { + indent!(i, "IntegerValue({value})") + } + IntegerToken::Expression(expression) => { + print_expression(i, expression) + } + IntegerToken::Invocation(invocation) => { + print_invocation(i, invocation) + } + } +} + +fn print_expression(i: usize, expression: &Expression) { + indent!(i, "Expression"); + for token in &expression.tokens { + match &token.value { + ExpressionToken::IntegerToken(integer) => { + print_integer_token(i+1, &integer) + } + ExpressionToken::Invocation(invocation) => { + print_invocation(i+1, &invocation); + } + ExpressionToken::Operator(operator) => { + indent!(i+1, "Operator({operator})") + } + } + } +} diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs new file mode 100644 index 0000000..2e7f959 --- /dev/null +++ b/src/stages/syntactic.rs @@ -0,0 +1,323 @@ +use crate::*; + +use assembler::Tokeniser; + + +pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path)) +} + +fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + t.add_delimiters(&['@','&','%',';',':','|','{','}','(',')','[',']','#','~','"','\'']); + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + + macro_rules! push_err { + ($error:expr) => {{ + push_err!($error, t.get_source()); + }}; + ($error:expr, $source:expr) => {{ + errors.push(Tracked::from($error, $source)); + continue; + }}; + } + + loop { + t.eat_whitespace(); + t.mark_start(); + let Some(c) = t.eat_char() else { break }; + let token = match c { + '"' => { + let source = t.get_source(); + t.mark_child(); + let is_any_close = |t: &mut Tokeniser| { + t.eat_char() == Some('"') + }; + if let Some(_) = t.track_until(is_any_close) { + let child = t.tokenise_child_span(); + SyntacticToken::StringLiteral(parse_string_literal(child)) + } else { + push_err!(SyntacticError::UnterminatedStringLiteral, source); + } + } + '\'' => { + let source = t.get_source(); + let is_any_close = |t: &mut Tokeniser| { + t.eat_char() == Some('\'') + }; + if let Some(string) = t.track_until(is_any_close) { + let mut chars: Vec<char> = string.chars().collect(); + if chars.len() == 1 { + let value = parse_char(chars.pop().unwrap()); + SyntacticToken::IntegerLiteral(value) + } else { + t.mark_end(); + push_err!(SyntacticError::ExpectedSingleCharacter, t.get_source()); + } + } else { + push_err!(SyntacticError::UnterminatedCharacterLiteral, source); + } + } + + '{' => { + let source = t.get_source(); + t.mark_child(); + let mut depth = 1; + let is_matching_close = |t: &mut Tokeniser| { + match t.eat_char() { + Some('{') => { depth += 1; false } + Some('}') => { depth -= 1; depth == 0 } + _ => false, + } + }; + if let Some(_) = t.track_until(is_matching_close) { + let child = t.tokenise_child_span(); + match parse_syntactic_from_tokeniser(child) { + Ok(tokens) => SyntacticToken::BlockLiteral(tokens), + Err(mut parse_errors) => { + errors.append(&mut parse_errors); + continue; + } + } + } else { + push_err!(SyntacticError::UnterminatedBlock, source); + } + } + '[' => { + let source = t.get_source(); + t.mark_child(); + let mut depth = 1; + let is_matching_close = |t: &mut Tokeniser| { + match t.eat_char() { + Some('[') => { depth += 1; false } + Some(']') => { depth -= 1; depth == 0 } + _ => false, + } + }; + if let Some(_) = t.track_until(is_matching_close) { + let child = t.tokenise_child_span(); + match parse_syntactic_from_tokeniser(child) { + Ok(tokens) => SyntacticToken::Expression(tokens), + Err(mut parse_errors) => { + errors.append(&mut parse_errors); + continue; + } + } + } else { + push_err!(SyntacticError::UnterminatedExpression, source); + } + } + '(' => { + let source = t.get_source(); + let mut depth = 1; + let is_matching_close = |t: &mut Tokeniser| { + match t.eat_char() { + Some('(') => { depth += 1; false } + Some(')') => { depth -= 1; depth == 0 } + _ => false, + } + }; + if let Some(string) = t.track_until(is_matching_close) { + // Check if the comment fills the entire line. + if t.start.position.column == 0 && t.end_of_line() { + if let Some(path) = string.strip_prefix(": ") { + t.embedded_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start.position.line + 1; + } + } + continue; + } else { + push_err!(SyntacticError::UnterminatedComment, source); + } + } + '%' => { + let name = t.eat_token(); + let source = t.get_source(); + t.mark_child(); + let is_any_close = |t: &mut Tokeniser| t.eat_char() == Some(';'); + if let Some(_) = t.track_until(is_any_close) { + let child = t.tokenise_child_span(); + match parse_syntactic_from_tokeniser(child) { + Ok(tokens) => { + let name = Tracked::from(name, source); + let def = SyntacticMacroDefinition { name, tokens }; + SyntacticToken::MacroDefinition(def) + } + Err(mut parse_errors) => { + errors.append(&mut parse_errors); + continue; + } + } + } else { + push_err!(SyntacticError::UnterminatedMacroDefinition(name), source); + } + } + + '}' => push_err!(SyntacticError::UnmatchedBlockTerminator), + ']' => push_err!(SyntacticError::UnmatchedExpressionTerminator), + ')' => push_err!(SyntacticError::UnmatchedCommentTerminator), + ';' => push_err!(SyntacticError::UnmatchedMacroTerminator), + + '@' => SyntacticToken::LabelDefinition(ScopedSymbol::Global(t.eat_token())), + '&' => SyntacticToken::LabelDefinition(ScopedSymbol::Local(t.eat_token())), + '~' => SyntacticToken::Symbol(ScopedSymbol::Local(t.eat_token())), + ':' => SyntacticToken::Separator, + '|' => SyntacticToken::Pin, + '?' => SyntacticToken::Condition, + + '#' => { + t.mark_child(); + t.eat_token(); + let child = t.tokenise_child_span(); + match parse_word_template(child) { + Ok(word_template) => SyntacticToken::WordTemplate(word_template), + Err(mut parse_errors) => { + errors.append(&mut parse_errors); + continue; + } + } + }, + + c => { + let token = format!("{c}{}", t.eat_token()); + if let Some(hex_string) = token.strip_prefix("0x") { + match parse_integer_literal(hex_string, 16) { + Ok(value) => SyntacticToken::IntegerLiteral(value), + Err(_) => push_err!(SyntacticError::InvalidHexadecimalLiteral(token)), + } + } else if let Some(binary_string) = token.strip_prefix("0b") { + match parse_integer_literal(binary_string, 2) { + Ok(value) => SyntacticToken::IntegerLiteral(value), + Err(_) => push_err!(SyntacticError::InvalidBinaryLiteral(token)), + } + } else { + match parse_integer_literal(&token, 10) { + Ok(value) => SyntacticToken::IntegerLiteral(value), + Err(true) => push_err!(SyntacticError::InvalidDecimalLiteral(token)), + Err(false) => SyntacticToken::Symbol(ScopedSymbol::Global(token)), + } + } + } + }; + + t.mark_end(); + tokens.push(Tracked::from(token, t.get_source())) + } + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} + + +fn parse_integer_literal(token: &str, radix: u32) -> Result<isize, bool> { + match usize::from_str_radix(&token.replace('_', ""), radix) { + Ok(value) => match isize::try_from(value) { + Ok(value) => Ok(value), + Err(_) => Err(true), + } + Err(_) => Err(false), + } +} + + +fn parse_string_literal(mut t: Tokeniser) -> StringLiteral { + let mut string = String::new(); + let mut chars = Vec::new(); + + while let Some(c) = t.eat_char() { + string.push(c); + chars.push(Tracked::from(parse_char(c), t.get_source())); + t.mark_start(); + } + StringLiteral { string, chars } +} + +fn parse_char(c: char) -> isize { + c as u32 as isize +} + + +fn parse_word_template(mut t: Tokeniser) -> Result<WordTemplate, Vec<Tracked<SyntacticError>>> { + let mut value = 0; // Value of the whole word template. + let mut value_width = 0; // Bit width of the whole word template. + let mut field_width = 0; // Width of the current bit field. + let mut field_name = '\0'; // Name of the current bit field. + let mut fields: Vec<Tracked<BitField>> = Vec::new(); + let mut errors: Vec<Tracked<SyntacticError>> = Vec::new(); + + macro_rules! push_field { + () => { + if fields.iter().any(|f| f.name == field_name) { + let error = SyntacticError::DuplicateFieldNameInWord(field_name); + errors.push(Tracked::from(error, t.get_source())); + } else { + let field = BitField { name: field_name, width: field_width, shift: 0}; + fields.push(Tracked::from(field, t.get_source())); + } + }; + } + + while let Some(c) = t.eat_char() { + // Ignore underscores. + if c == '_' { + t.mark.undo(); + continue; + } + + // Add a bit to the value; + value <<= 1; + value_width += 1; + for field in &mut fields { + field.shift += 1; + } + + // Extend the current field. + if c == field_name { + field_width += 1; + continue; + } + + // Commit the current field. + if field_width > 0 { + t.mark_end_prev(); + push_field!(); + field_width = 0; + field_name = '\0'; + } + + // Parse bit literals. + if c == '0' { + continue; + } + if c == '1' { + value |= 1; + continue; + } + + t.mark_start_prev(); + if c.is_alphabetic() { + field_name = c; + field_width = 1; + continue; + } else { + t.mark_end(); + let error = SyntacticError::InvalidCharacterInWord(c); + errors.push(Tracked::from(error, t.get_source())); + } + } + + // Commit the final field. + for field in &mut fields { + field.shift += 1; + } + if field_width > 0 { + t.mark_end(); + push_field!(); + } + + match errors.is_empty() { + true => Ok(WordTemplate { value, width: value_width, fields }), + false => Err(errors), + } +} diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs new file mode 100644 index 0000000..eabf34b --- /dev/null +++ b/src/stages/syntactic_tokens.rs @@ -0,0 +1,160 @@ +use crate::*; + +pub enum SyntacticToken { + LabelDefinition(ScopedSymbol), + MacroDefinition(SyntacticMacroDefinition), + + IntegerLiteral(isize), + StringLiteral(StringLiteral), + WordTemplate(WordTemplate), + + BlockLiteral(Vec<Tracked<SyntacticToken>>), + Expression(Vec<Tracked<SyntacticToken>>), + + Symbol(ScopedSymbol), + + Separator, + Condition, + Pin, +} + +pub struct SyntacticMacroDefinition { + pub name: Tracked<String>, + pub tokens: Vec<Tracked<SyntacticToken>>, +} + +pub struct StringLiteral { + pub string: String, + pub chars: Vec<Tracked<isize>>, +} + +impl std::fmt::Display for StringLiteral { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + self.string.fmt(f) + } +} + +pub enum ScopedSymbol { + Local(String), + Global(String), +} + +impl std::fmt::Display for ScopedSymbol { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + match self { + ScopedSymbol::Local(name) => write!(f, "~{name}"), + ScopedSymbol::Global(name) => write!(f, "{name}"), + } + } +} + + +pub enum SyntacticError { + UnterminatedBlock, + UnterminatedExpression, + UnterminatedComment, + UnterminatedCharacterLiteral, + UnterminatedStringLiteral, + UnterminatedMacroDefinition(String), + + UnmatchedBlockTerminator, + UnmatchedExpressionTerminator, + UnmatchedCommentTerminator, + UnmatchedMacroTerminator, + + ExpectedSingleCharacter, + + DuplicateFieldNameInWord(char), + InvalidCharacterInWord(char), + + InvalidDecimalLiteral(String), + InvalidHexadecimalLiteral(String), + InvalidBinaryLiteral(String), +} + + +pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) { + for error in errors { + report_syntactic_error(error, source_code); + } +} + +fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + SyntacticError::UnterminatedBlock => + "Block was not terminated, add a '}}' character to terminate", + SyntacticError::UnterminatedExpression => + "Expression was not terminated, add a ']' character to terminate", + SyntacticError::UnterminatedComment => + "Comment was not terminated, add a ')' character to terminate", + SyntacticError::UnterminatedCharacterLiteral => + "Character was not terminated, add a ' character to terminate", + SyntacticError::UnterminatedStringLiteral => + "String was not terminated, add a '\"' character to terminate", + SyntacticError::UnterminatedMacroDefinition(name) => + &format!("The '{name}' macro definition was not terminated, add a ';' character to terminate"), + + SyntacticError::UnmatchedBlockTerminator => + "Attempted to terminate a block, but no block was in progress", + SyntacticError::UnmatchedExpressionTerminator => + "Attempted to terminate an expression, but no expression was in progress", + SyntacticError::UnmatchedCommentTerminator => + "Attempted to terminate a comment, but no comment was in progress", + SyntacticError::UnmatchedMacroTerminator => + "Attempted to terminate a macro definition, but no macro definition was in progress", + + SyntacticError::ExpectedSingleCharacter => + "A character literal must contain exactly one character", + + SyntacticError::DuplicateFieldNameInWord(name) => + &format!("The field '{name}' has already been used in this word"), + SyntacticError::InvalidCharacterInWord(c) => + &format!("The character '{c}' cannot be used in a word"), + + SyntacticError::InvalidDecimalLiteral(string) => + &format!("The string '{string}' is not a valid decimal literal"), + SyntacticError::InvalidHexadecimalLiteral(string) => + &format!("The string '{string}' is not a valid hexadecimal literal"), + SyntacticError::InvalidBinaryLiteral(string) => + &format!("The string '{string}' is not a valid binary literal"), + }; + + report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_syntactic_token(i: usize, token: &SyntacticToken) { + match token { + SyntacticToken::LabelDefinition(symbol) => indent!(i, "LabelDefinition({symbol})"), + SyntacticToken::MacroDefinition(definition) => { + indent!(i, "MacroDefinition({})", definition.name); + for token in &definition.tokens { + print_syntactic_token(i+1, token); + } + } + + SyntacticToken::IntegerLiteral(value) => indent!(i, "IntegerLiteral({value})"), + SyntacticToken::StringLiteral(literal) => indent!(i, "StringLiteral({literal})"), + SyntacticToken::WordTemplate(template) => indent!(i, "WordTemplate({template})"), + + SyntacticToken::BlockLiteral(tokens) => { + indent!(i, "BlockLiteral"); + for token in tokens { + print_syntactic_token(i+1, token); + } + } + SyntacticToken::Expression(tokens) => { + indent!(i, "Expression"); + for token in tokens { + print_syntactic_token(i+1, token); + } + } + + SyntacticToken::Symbol(symbol) => indent!(i, "Symbol({symbol})"), + + SyntacticToken::Separator => indent!(i, "Separator"), + SyntacticToken::Condition => indent!(i, "Condition"), + SyntacticToken::Pin => indent!(i, "Pin"), + } +} |