summaryrefslogtreecommitdiff
path: root/src/stages
diff options
context:
space:
mode:
Diffstat (limited to 'src/stages')
-rw-r--r--src/stages/bytecode.rs182
-rw-r--r--src/stages/bytecode_tokens.rs78
-rw-r--r--src/stages/intermediate.rs577
-rw-r--r--src/stages/intermediate_tokens.rs149
-rw-r--r--src/stages/mod.rs31
-rw-r--r--src/stages/semantic.rs478
-rw-r--r--src/stages/semantic_tokens.rs296
-rw-r--r--src/stages/syntactic.rs323
-rw-r--r--src/stages/syntactic_tokens.rs160
9 files changed, 2274 insertions, 0 deletions
diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs
new file mode 100644
index 0000000..3618b26
--- /dev/null
+++ b/src/stages/bytecode.rs
@@ -0,0 +1,182 @@
+use crate::*;
+
+use std::collections::HashMap;
+
+
+pub fn parse_bytecode(tokens: Vec<Tracked<IntermediateToken>>, width: Option<u32>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> {
+ BytecodeParser::new(width).parse(tokens)
+}
+
+
+pub struct BytecodeParser {
+ width: Option<u32>,
+ addresses: HashMap<String, Tracked<usize>>,
+ address: usize,
+ segment_address: usize,
+ segment_source: Option<SourceSpan>,
+ segments: Vec<Segment>,
+ words: Vec<Tracked<Word>>,
+ errors: Vec<Tracked<BytecodeError>>,
+}
+
+impl BytecodeParser {
+ pub fn new(width: Option<u32>) -> Self {
+ Self {
+ width,
+ addresses: HashMap::new(),
+ address: 0,
+ segment_address: 0,
+ segment_source: None,
+ segments: Vec::new(),
+ words: Vec::new(),
+ errors: Vec::new(),
+ }
+ }
+
+ pub fn parse(mut self, tokens: Vec<Tracked<IntermediateToken>>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> {
+ // Calculate all label addresses ahead of time.
+ let mut address = 0;
+ for token in &tokens {
+ let source = &token.source;
+ match &token.value {
+ IntermediateToken::LabelDefinition(name) => {
+ let tracked = Tracked::from(address, source.clone());
+ if let Some(_) = self.addresses.insert(name.clone(), tracked) {
+ unreachable!("Uncaught duplicate label definition '{name}'");
+ }
+ }
+ IntermediateToken::Word(_) => {
+ address += 1;
+ }
+ IntermediateToken::PinnedAddress(pinned) => {
+ address = pinned.value;
+ }
+ }
+ }
+ for token in &tokens {
+ let source = &token.source;
+ match &token.value {
+ IntermediateToken::Word(word) => {
+ let word = self.evaluate_word(word, source);
+ // Check that the word width fits the provided width.
+ if let Some(width) = self.width {
+ if word.width != width {
+ let error = BytecodeError::IncorrectWidth(width, word.width);
+ self.errors.push(Tracked::from(error, source.clone()));
+ }
+ }
+ self.words.push(word);
+ self.address += 1;
+ }
+ IntermediateToken::PinnedAddress(address) => {
+ let current = self.address;
+ let pinned = address.value;
+ if current > pinned {
+ let error = BytecodeError::PinnedAddressBacktrack(pinned, current);
+ self.errors.push(Tracked::from(error, address.source.clone()));
+ } else {
+ let words = std::mem::take(&mut self.words);
+ if !words.is_empty() {
+ let address = self.segment_address;
+ let source = std::mem::take(&mut self.segment_source);
+ let segment = Segment { address, source, words };
+ self.segments.push(segment);
+ }
+ self.segment_source = Some(address.source.clone());
+ self.address = pinned;
+ self.segment_address = pinned;
+ }
+ }
+ IntermediateToken::LabelDefinition(_) => (),
+ }
+ }
+ // Finish final segment.
+ let words = std::mem::take(&mut self.words);
+ if !words.is_empty() {
+ let address = self.segment_address;
+ let source = std::mem::take(&mut self.segment_source);
+ let segment = Segment { address, source, words };
+ self.segments.push(segment);
+ }
+
+ match self.errors.is_empty() {
+ true => Ok(self.segments),
+ false => Err(self.errors),
+ }
+ }
+
+ fn evaluate_expression(&mut self, expression: &IntermediateExpression, source: &SourceSpan) -> isize {
+ let mut stack = ExpressionStack::new();
+ for token in &expression.tokens {
+ let source = &token.source;
+ match &token.value {
+ IntermediateExpressionToken::Integer(integer) => match integer {
+ IntermediateInteger::Integer(value) => {
+ stack.push(*value);
+ }
+ IntermediateInteger::Expression(expression) => {
+ stack.push(self.evaluate_expression(expression, source));
+ }
+ IntermediateInteger::LabelReference(name) => {
+ stack.push(self.evaluate_label_reference(name));
+ }
+ }
+ IntermediateExpressionToken::Operator(operator) => {
+ if let Err(err) = stack.apply(*operator, source) {
+ let error = BytecodeError::StackError(err);
+ self.errors.push(Tracked::from(error, source.clone()))
+ }
+ }
+ }
+ }
+ match stack.pull_result() {
+ Ok(value) => value,
+ Err(err) => {
+ let error = BytecodeError::StackError(Tracked::from(err, source.clone()));
+ self.errors.push(Tracked::from(error, source.clone()));
+ 0
+ }
+ }
+ }
+
+ fn evaluate_label_reference(&mut self, name: &Tracked<String>) -> isize {
+ if let Some(address) = self.addresses.get(&name.to_string()) {
+ address.value as isize
+ } else {
+ unreachable!("Uncaught unresolved label reference '{name}'")
+ }
+ }
+
+ fn evaluate_word(&mut self, word: &IntermediateWord, source: &SourceSpan) -> Tracked<Word> {
+ let mut word_value = word.value;
+ for field in &word.fields {
+ let field_source = &field.value.value.source;
+ let field_value = match &field.value.value.value {
+ IntermediateInteger::Expression(expression) => {
+ self.evaluate_expression(expression, source)
+ }
+ IntermediateInteger::LabelReference(name) => {
+ self.evaluate_label_reference(name)
+ }
+ IntermediateInteger::Integer(value) => {
+ *value
+ }
+ };
+ let value_width = match field_value.cmp(&0) {
+ std::cmp::Ordering::Less => (-field_value).ilog2() + 1,
+ std::cmp::Ordering::Equal => 0,
+ std::cmp::Ordering::Greater => field_value.ilog2() + 1,
+ };
+ if field.width < value_width {
+ let error = BytecodeError::ValueTooWide(field.width, value_width);
+ self.errors.push(Tracked::from(error, field_source.clone()));
+ } else {
+ let mask = 2_usize.pow(field.width as u32) - 1;
+ let clamped_value = (field_value as usize) & mask;
+ word_value |= (clamped_value << field.shift) as usize;
+ }
+ }
+ let word = Word { width: word.width, value: word_value };
+ return Tracked::from(word, source.clone());
+ }
+}
diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs
new file mode 100644
index 0000000..b54cb0e
--- /dev/null
+++ b/src/stages/bytecode_tokens.rs
@@ -0,0 +1,78 @@
+use crate::*;
+
+
+pub struct Segment {
+ pub address: usize,
+ /// Source of the address value.
+ pub source: Option<SourceSpan>,
+ pub words: Vec<Tracked<Word>>,
+}
+
+pub struct Word {
+ pub value: usize,
+ pub width: u32,
+}
+
+impl std::fmt::Display for Word {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ if self.width == 0 {
+ write!(f, "0")
+ } else {
+ for i in (0..self.width).rev() {
+ let is_first_bit = i+1 == self.width;
+ if !is_first_bit && (i+1) % 4 == 0 {
+ write!(f, "_")?;
+ }
+ match (self.value >> i) & 1 {
+ 0 => write!(f, "0")?,
+ _ => write!(f, "1")?,
+ }
+ }
+ Ok(())
+ }
+ }
+}
+
+pub enum BytecodeError {
+ /// expected, received
+ IncorrectWidth(u32, u32),
+ /// pinned, real
+ PinnedAddressBacktrack(usize, usize),
+ /// expected, received
+ ValueTooWide(u32, u32),
+ StackError(Tracked<StackError>),
+}
+
+
+pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) {
+ for error in errors {
+ report_bytecode_error(error, source_code);
+ }
+}
+
+fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ BytecodeError::IncorrectWidth(expected, received) =>
+ &format!("Word is {received} bits wide, but was expected to have a fixed width of {expected} bits"),
+ BytecodeError::PinnedAddressBacktrack(pinned, real) =>
+ &format!("Cannot pin to address {pinned} when address is already {real}"),
+ BytecodeError::StackError(stack_error) => {
+ report_stack_error(stack_error, source_code); return; },
+ BytecodeError::ValueTooWide(expected, received) =>
+ &format!("Field is {expected} bits wide, but received a value that is {received} bits wide"),
+ };
+
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_segment(segment: &Segment) {
+ println!("SEGMENT: 0x{:>04x}", segment.address);
+ // Find maximum width of all words in the segment.
+ let width = segment.words.iter().map(|w| w.to_string().chars().count()).max().unwrap_or(0);
+ for word in &segment.words {
+ let string = word.to_string();
+ println!(" {string:>w$}", w=width as usize);
+ }
+}
diff --git a/src/stages/intermediate.rs b/src/stages/intermediate.rs
new file mode 100644
index 0000000..6853f62
--- /dev/null
+++ b/src/stages/intermediate.rs
@@ -0,0 +1,577 @@
+use crate::*;
+
+use assembler::{DefinitionType, SourceLocation, SourcePosition, SymbolRole};
+
+use indexmap::{IndexSet, IndexMap};
+
+
+static mut ID: usize = 0;
+macro_rules! next_id { () => { unsafe { let id = ID; ID += 1; id }}; }
+
+pub fn parse_intermediate(semantic: Vec<Tracked<SemanticToken>>) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> {
+ IntermediateParser::new(semantic).parse()
+}
+
+
+struct IntermediateParser {
+ semantic: Vec<Tracked<SemanticToken>>,
+ label_names: IndexSet<Tracked<String>>,
+ macro_names: IndexSet<Tracked<String>>,
+ macro_definitions: IndexMap<String, MacroDefinition>,
+ intermediate: Vec<Tracked<IntermediateToken>>,
+ errors: Vec<Tracked<IntermediateError>>,
+}
+
+impl IntermediateParser {
+ pub fn new(semantic: Vec<Tracked<SemanticToken>>) -> Self {
+ let mut label_names = IndexSet::new();
+ let mut macro_names = IndexSet::new();
+ for symbol in SymbolParser::new().parse(&semantic) {
+ match symbol.role {
+ SymbolRole::Definition(DefinitionType::MustPrecedeReference) => {
+ // Only consider macro definitions, not macro argument definitions.
+ if symbol.namespace.is_empty() {
+ if !macro_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) {
+ unreachable!("Uncaught duplicate macro definition '{}'", symbol.name);
+ }
+ }
+ }
+ SymbolRole::Definition(DefinitionType::CanFollowReference) => {
+ if !label_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) {
+ unreachable!("Uncaught duplicate label definition '{}'", symbol.name);
+ }
+ }
+ SymbolRole::Reference => (),
+ }
+ }
+
+ Self {
+ semantic,
+ label_names,
+ macro_names,
+ macro_definitions: IndexMap::new(),
+ intermediate: Vec::new(),
+ errors: Vec::new(),
+ }
+ }
+
+ pub fn parse(mut self) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> {
+ for token in self.semantic {
+ let source = &token.source;
+ match token.value {
+ SemanticToken::MacroDefinition(definition) => {
+ // Invoke the body to see if it contains undefined macros.
+ let error_count = self.errors.len();
+ let mut arguments = IndexMap::new();
+ // Prepare dummy argument values.
+ let null = SourceSpan {
+ string: String::new(),
+ in_merged: SourceLocation {
+ path: None,
+ start: SourcePosition::ZERO,
+ end: SourcePosition::ZERO,
+ },
+ in_source: None,
+ child: None,
+ };
+ for argument in &definition.arguments {
+ let value = match argument.variant {
+ ArgumentType::Integer => {
+ let integer = IntermediateInteger::Integer(0);
+ let tracked = Tracked::from(integer, null.clone());
+ IntermediateValue::Integer(tracked)
+ }
+ ArgumentType::Block => {
+ IntermediateValue::Block(Vec::new())
+ }
+ };
+ let tracked = Tracked::from(value, null.clone());
+ arguments.insert(argument.name.clone(), tracked);
+ }
+ let mut env = Environment {
+ label_names: &self.label_names,
+ macro_names: &self.macro_names,
+ macro_definitions: &self.macro_definitions,
+ arguments,
+ errors: &mut self.errors,
+ id: next_id!(),
+ };
+ env.parse_macro_definition_body(&definition.body, source);
+ if self.errors.len() != error_count {
+ break;
+ }
+
+ let name = definition.name.to_string();
+ if self.macro_definitions.insert(name.clone(), definition).is_some() {
+ unreachable!("Uncaught duplicate macro definition '{}'", name);
+ }
+ }
+ SemanticToken::BlockToken(block_token) => {
+ let mut env = Environment {
+ label_names: &self.label_names,
+ macro_names: &self.macro_names,
+ macro_definitions: &self.macro_definitions,
+ arguments: IndexMap::new(),
+ errors: &mut self.errors,
+ id: next_id!(),
+ };
+ let mut tokens = env.parse_block_token(&block_token, source);
+ self.intermediate.append(&mut tokens);
+ }
+ }
+ }
+ match self.errors.is_empty() {
+ true => Ok(self.intermediate),
+ false => Err(self.errors),
+ }
+ }
+}
+
+
+struct Environment<'a> {
+ label_names: &'a IndexSet<Tracked<String>>,
+ macro_names: &'a IndexSet<Tracked<String>>,
+ macro_definitions: &'a IndexMap<String, MacroDefinition>,
+ arguments: IndexMap<String, Tracked<IntermediateValue>>,
+ errors: &'a mut Vec<Tracked<IntermediateError>>,
+ id: usize,
+}
+
+impl<'a> Environment<'a> {
+ // Attach the invocation ID to every macro label name
+ fn tag_name(&self, name: &str) -> String {
+ match name.contains(':') {
+ true => format!("{name}:{}", self.id),
+ false => name.to_string(),
+ }
+ }
+
+ fn parse_macro_definition_body(&mut self, body: &MacroDefinitionBody, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> {
+ match &body {
+ MacroDefinitionBody::Integer(integer) => {
+ let token = self.parse_integer_token(&integer, &source)?;
+ let integer = IntermediateValue::Integer(token);
+ Some(Tracked::from(integer, source.clone()))
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ self.parse_invocation(&invocation, &invocation.source)
+ }
+ MacroDefinitionBody::Block(blocks) => {
+ let mut tokens = Vec::new();
+ for block in blocks {
+ tokens.append(&mut self.parse_block_token(block, &block.source));
+ }
+ let value = IntermediateValue::Block(tokens);
+ Some(Tracked::from(value, source.clone()))
+ }
+ }
+ }
+
+ fn parse_block_token(&mut self, block: &BlockToken, source: &SourceSpan) -> Vec<Tracked<IntermediateToken>> {
+ let mut intermediate = Vec::new();
+ match block {
+ BlockToken::LabelDefinition(name) => {
+ let token = IntermediateToken::LabelDefinition(self.tag_name(name));
+ intermediate.push(Tracked::from(token, source.clone()));
+ }
+ BlockToken::PinnedAddress(address) => {
+ if let Some(integer) = self.parse_integer_token(address, &address.source) {
+ if let Some(source) = integer_contains_label_reference(&integer) {
+ let error = IntermediateError::LabelReferenceInPinnedAddress;
+ let new_source = address.source.clone().wrap(source);
+ self.errors.push(Tracked::from(error, new_source));
+ } else {
+ match evaluate_integer(&integer, source) {
+ Ok(value) => {
+ let value = usize::try_from(value).unwrap_or(0);
+ let tracked = Tracked::from(value, address.source.clone());
+ let token = IntermediateToken::PinnedAddress(tracked);
+ intermediate.push(Tracked::from(token, source.clone()));
+ }
+ Err(error) => self.errors.push(error),
+ }
+ }
+ }
+ }
+ BlockToken::ConditionalBlock(cond) => {
+ let predicate = self.parse_integer_token(&cond.predicate, &cond.predicate.source);
+ let mut body = self.parse_block_token(&cond.body, &cond.body.source);
+ if let Some(predicate) = predicate {
+ let mut found_error = false;
+ if let Some(source) = integer_contains_label_reference(&predicate) {
+ let error = IntermediateError::LabelReferenceInConditionPredicate;
+ let new_source = cond.predicate.source.clone().wrap(source);
+ self.errors.push(Tracked::from(error, new_source));
+ found_error = true;
+ };
+ if let Some(source) = block_contains_label_definition(&cond.body, &cond.body.source) {
+ let error = IntermediateError::LabelDefinitionInConditionBody;
+ let new_source = cond.body.source.clone().wrap(source);
+ self.errors.push(Tracked::from(error, new_source));
+ found_error = true;
+ }
+ if !found_error {
+ match evaluate_integer(&predicate, &cond.predicate.source) {
+ Ok(value) => if value != 0 { intermediate.append(&mut body) },
+ Err(error) => self.errors.push(error),
+ }
+ }
+ }
+ }
+ BlockToken::WordTemplate(word_template) => {
+ let mut fields = Vec::new();
+ for bit_field in &word_template.fields {
+ let name = bit_field.name.to_string();
+ let source = &bit_field.source;
+ let invocation = Invocation { name, arguments: Vec::new() };
+ if let Some(value) = self.parse_integer_invocation(&invocation, source) {
+ let field = IntermediateField {
+ width: bit_field.width,
+ shift: bit_field.shift,
+ value,
+ };
+ fields.push(Tracked::from(field, bit_field.source.clone()));
+ }
+ }
+ let word = IntermediateWord {
+ value: word_template.value,
+ width: word_template.width,
+ fields,
+ };
+ let token = IntermediateToken::Word(word);
+ intermediate.push(Tracked::from(token, source.clone()));
+ }
+ BlockToken::Block(blocks) => {
+ for block in blocks {
+ let mut tokens = self.parse_block_token(block, &block.source);
+ intermediate.append(&mut tokens);
+ }
+ }
+ BlockToken::Invocation(invocation) => {
+ if let Some(mut tokens) = self.parse_block_invocation(invocation, source) {
+ intermediate.append(&mut tokens);
+ }
+ }
+ }
+
+ return intermediate;
+ }
+
+ fn parse_integer_token(&mut self, integer: &IntegerToken, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> {
+ match integer {
+ IntegerToken::IntegerLiteral(value) => {
+ let integer = IntermediateInteger::Integer(*value);
+ Some(Tracked::from(integer, source.clone()))
+ }
+ IntegerToken::Expression(expression) => {
+ self.parse_expression(expression, source)
+ }
+ IntegerToken::Invocation(invocation) => {
+ self.parse_integer_invocation(invocation, source)
+ }
+ }
+ }
+
+ fn parse_integer_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> {
+ match self.parse_invocation(invocation, source)?.value {
+ IntermediateValue::Integer(integer) => Some(integer),
+ IntermediateValue::Block(_) => {
+ let error = IntermediateError::ExpectedInteger;
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ }
+ }
+ }
+
+ fn parse_block_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Vec<Tracked<IntermediateToken>>> {
+ match self.parse_invocation(invocation, source)?.value {
+ IntermediateValue::Block(tokens) => Some(tokens),
+ IntermediateValue::Integer(_) => {
+ let error = IntermediateError::ExpectedBlock;
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ }
+ }
+ }
+
+ fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> {
+ let received_count = invocation.arguments.len();
+ if let Some(argument) = self.arguments.get(&invocation.name) {
+ if received_count != 0 {
+ let error = IntermediateError::IncorrectArgumentCount(0, received_count);
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ } else {
+ Some(argument.clone())
+ }
+ } else if let Some(label_name) = self.label_names.get(&invocation.name) {
+ if received_count != 0 {
+ let error = IntermediateError::IncorrectArgumentCount(0, received_count);
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ } else {
+ let name = self.tag_name(label_name);
+ let tracked = Tracked::from(name, label_name.source.clone());
+ let integer = IntermediateInteger::LabelReference(tracked);
+ let tracked = Tracked::from(integer, source.clone());
+ let value = IntermediateValue::Integer(tracked);
+ Some(Tracked::from(value, source.clone()))
+ }
+ } else if let Some(definition) = self.macro_definitions.get(&invocation.name) {
+ // Check that the correct number of arguments were provided.
+ let expected_count = definition.arguments.len();
+ if received_count != expected_count {
+ let error = IntermediateError::IncorrectArgumentCount(expected_count, received_count);
+ self.errors.push(Tracked::from(error, source.clone()));
+ None
+ } else {
+ // Gather and type-check the provided arguments.
+ let mut arguments = Vec::new();
+ for (i, argument) in invocation.arguments.iter().enumerate() {
+ let received_type = match &argument.value {
+ InvocationArgument::String(string) => {
+ let mut values = Vec::new();
+ for c in &string.chars {
+ let integer = IntermediateInteger::Integer(**c);
+ let tracked = Tracked::from(integer, c.source.clone());
+ values.push(IntermediateValue::Integer(tracked));
+ }
+ arguments.push(RepeatedArgument::List(values));
+ ArgumentType::Integer
+ }
+ InvocationArgument::IntegerToken(integer) => {
+ let tracked = self.parse_integer_token(&integer, &argument.source)?;
+ let value = IntermediateValue::Integer(tracked);
+ arguments.push(RepeatedArgument::Loop(value));
+ ArgumentType::Integer
+ }
+ InvocationArgument::BlockToken(block) => {
+ let tokens = self.parse_block_token(&block, &argument.source);
+ let value = IntermediateValue::Block(tokens);
+ arguments.push(RepeatedArgument::Loop(value));
+ ArgumentType::Block
+ }
+ InvocationArgument::Invocation(invocation) => {
+ let value = self.parse_invocation(&invocation, &argument.source)?;
+ let received_type = match &value.value {
+ IntermediateValue::Integer(_) => ArgumentType::Integer,
+ IntermediateValue::Block(_) => ArgumentType::Block,
+ };
+ arguments.push(RepeatedArgument::Loop(value.value));
+ received_type
+ }
+ };
+ let expected_type = match received_type {
+ ArgumentType::Integer => ArgumentType::Block,
+ ArgumentType::Block => ArgumentType::Integer,
+ };
+ if definition.arguments[i].variant != received_type {
+ let error = IntermediateError::IncorrectArgumentType(expected_type, received_type);
+ self.errors.push(Tracked::from(error, argument.source.clone()));
+ return None;
+ }
+ }
+ // Invoke the invocation multiple times.
+ let repetitions = arguments.iter().map(|a| a.len()).max().unwrap_or(1);
+ let mut values = Vec::new();
+ for i in 0..repetitions {
+ // Construct an argument map for this invocation.
+ let mut argument_map = IndexMap::new();
+ for (a, argument) in arguments.iter().enumerate() {
+ let name = definition.arguments[a].name.clone();
+ let source = invocation.arguments[a].source.clone();
+ let value = match argument {
+ RepeatedArgument::Loop(value) => {
+ Tracked::from(value.clone(), source)
+ }
+ RepeatedArgument::List(list) => match list.get(i) {
+ Some(value) => {
+ Tracked::from(value.clone(), source)
+ }
+ None => {
+ let error = IntermediateError::ListExhausted;
+ let source = invocation.arguments[a].source.clone();
+ self.errors.push(Tracked::from(error, source));
+ return None;
+ }
+ }
+ };
+ if argument_map.insert(name.clone(), value).is_some() {
+ unreachable!("Uncaught duplicate macro argument name '{name}'");
+ };
+ }
+ let mut env = Environment {
+ label_names: &self.label_names,
+ macro_names: &self.macro_names,
+ macro_definitions: &self.macro_definitions,
+ arguments: argument_map,
+ errors: &mut self.errors,
+ id: next_id!(),
+ };
+ values.push(env.parse_macro_definition_body(&definition.body, source)?);
+ }
+ if values.len() == 1 {
+ values.pop()
+ } else {
+ // Flatten all values into a list of block tokens.
+ let mut block = Vec::new();
+ for value in values {
+ match value.value {
+ IntermediateValue::Integer(_) => {
+ let error = IntermediateError::ExpectedBlock;
+ self.errors.push(Tracked::from(error, value.source));
+ return None;
+ }
+ IntermediateValue::Block(mut tokens) => {
+ block.append(&mut tokens);
+ }
+ }
+ }
+ Some(Tracked::from(IntermediateValue::Block(block), source.clone()))
+ }
+ }
+ } else if let Some(macro_name) = self.macro_names.get(&invocation.name) {
+ let error = IntermediateError::InvocationBeforeDefinition;
+ let source = source.clone().wrap(macro_name.source.clone());
+ self.errors.push(Tracked::from(error, source));
+ None
+ } else {
+ unreachable!("Uncaught unresolved reference '{}'", invocation.name);
+ }
+ }
+
+ fn parse_expression(&mut self, expression: &Expression, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> {
+ let mut intermediate = Vec::new();
+ let mut error = false;
+
+ for token in &expression.tokens {
+ let source = &token.source;
+ match &token.value {
+ ExpressionToken::IntegerToken(integer) => {
+ let Some(integer) = self.parse_integer_token(integer, source) else {
+ error = true; continue;
+ };
+ let token = IntermediateExpressionToken::Integer(integer.value);
+ intermediate.push(Tracked::from(token, integer.source));
+ }
+ ExpressionToken::Operator(operator) => {
+ let token = IntermediateExpressionToken::Operator(*operator);
+ intermediate.push(Tracked::from(token, source.clone()));
+ }
+ ExpressionToken::Invocation(invocation) => {
+ let Some(integer) = self.parse_integer_invocation(invocation, source) else {
+ error = true; continue;
+ };
+ let token = IntermediateExpressionToken::Integer(integer.value);
+ intermediate.push(Tracked::from(token, integer.source));
+ }
+ }
+ }
+
+ if error { return None; }
+ let expression = IntermediateExpression { tokens: intermediate };
+ let integer = IntermediateInteger::Expression(expression);
+ Some(Tracked::from(integer, source.clone()))
+ }
+}
+
+
+macro_rules! return_some {
+ ($option:expr) => {
+ if $option.is_some() { return $option; }
+ };
+}
+
+fn integer_contains_label_reference(integer: &IntermediateInteger) -> Option<SourceSpan> {
+ match integer {
+ IntermediateInteger::Integer(_) => None,
+ IntermediateInteger::LabelReference(label) => Some(label.source.clone()),
+ IntermediateInteger::Expression(expr) => expression_contains_label_reference(expr),
+ }
+}
+
+fn expression_contains_label_reference(expression: &IntermediateExpression) -> Option<SourceSpan> {
+ for token in &expression.tokens {
+ if let IntermediateExpressionToken::Integer(integer) = &token.value {
+ if let Some(child) = integer_contains_label_reference(&integer) {
+ return Some(token.source.clone().wrap(child));
+ }
+ }
+ }
+ return None;
+}
+
+fn block_contains_label_definition(block: &BlockToken, source: &SourceSpan) -> Option<SourceSpan> {
+ match &block {
+ BlockToken::LabelDefinition(_) => {
+ return Some(source.clone());
+ }
+ BlockToken::Invocation(invocation) => {
+ return_some!(invocation_contains_label_definition(invocation))
+ }
+ BlockToken::Block(blocks) => {
+ for block in blocks {
+ return_some!(block_contains_label_definition(block, &block.source))
+ }
+ }
+ _ => (),
+ }
+ return None;
+}
+
+fn invocation_contains_label_definition(invocation: &Invocation) -> Option<SourceSpan> {
+ for argument in &invocation.arguments {
+ match &argument.value {
+ InvocationArgument::BlockToken(block) => {
+ return_some!(block_contains_label_definition(&block, &argument.source))
+ }
+ InvocationArgument::Invocation(invocation) => {
+ return_some!(invocation_contains_label_definition(&invocation))
+ }
+ _ => (),
+ }
+ }
+ return None;
+}
+
+fn evaluate_integer(integer: &IntermediateInteger, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> {
+ match integer {
+ IntermediateInteger::Integer(value) => Ok(*value),
+ IntermediateInteger::LabelReference(name) =>
+ unreachable!("Uncaught label reference '{name}' in condition predicate or pinned address value"),
+ IntermediateInteger::Expression(expr) => evaluate_expression(expr, source),
+ }
+}
+
+fn evaluate_expression(expression: &IntermediateExpression, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> {
+ let mut stack = ExpressionStack::new();
+ for token in &expression.tokens {
+ let source = &token.source;
+ match &token.value {
+ IntermediateExpressionToken::Integer(integer) => match integer {
+ IntermediateInteger::Integer(value) => {
+ stack.push(*value);
+ }
+ IntermediateInteger::Expression(expression) => {
+ stack.push(evaluate_expression(&expression, source)?);
+ }
+ IntermediateInteger::LabelReference(name) => {
+ unreachable!("Uncaught label reference '{name}' in condition predicate");
+ }
+ }
+ IntermediateExpressionToken::Operator(operator) => {
+ if let Err(stack_error) = stack.apply(*operator, source) {
+ let error = IntermediateError::StackError(stack_error);
+ return Err(Tracked::from(error, token.source.clone()));
+ }
+ }
+ }
+ }
+ match stack.pull_result() {
+ Ok(value) => Ok(value),
+ Err(err) => {
+ let error = Tracked::from(err, source.clone());
+ Err(Tracked::from(IntermediateError::StackError(error), source.clone()))
+ }
+ }
+}
diff --git a/src/stages/intermediate_tokens.rs b/src/stages/intermediate_tokens.rs
new file mode 100644
index 0000000..a09581e
--- /dev/null
+++ b/src/stages/intermediate_tokens.rs
@@ -0,0 +1,149 @@
+use crate::*;
+
+
+#[derive(Clone)]
+pub enum IntermediateToken {
+ Word(IntermediateWord),
+ PinnedAddress(Tracked<usize>),
+ LabelDefinition(String),
+}
+
+#[derive(Clone)]
+pub struct IntermediateWord {
+ pub value: usize,
+ /// Width of the word in bits.
+ pub width: u32,
+ pub fields: Vec<Tracked<IntermediateField>>,
+}
+
+#[derive(Clone)]
+pub struct IntermediateField {
+ pub value: Tracked<IntermediateInteger>,
+ /// Width of the field in bits.
+ pub width: u32,
+ /// Number of bits to the right of the field in the word.
+ pub shift: u32,
+}
+
+#[derive(Clone)]
+pub enum IntermediateInteger {
+ Integer(isize),
+ Expression(IntermediateExpression),
+ LabelReference(Tracked<String>),
+}
+
+#[derive(Clone)]
+pub struct IntermediateExpression {
+ pub tokens: Vec<Tracked<IntermediateExpressionToken>>,
+}
+
+#[derive(Clone)]
+pub enum IntermediateExpressionToken {
+ Integer(IntermediateInteger),
+ Operator(Operator),
+}
+
+#[derive(Clone)]
+pub enum IntermediateValue {
+ Integer(Tracked<IntermediateInteger>),
+ Block(Vec<Tracked<IntermediateToken>>),
+}
+
+pub enum RepeatedArgument {
+ Loop(IntermediateValue),
+ List(Vec<IntermediateValue>),
+}
+
+impl RepeatedArgument {
+ pub fn len(&self) -> usize {
+ match self {
+ Self::Loop(_) => 1,
+ Self::List(list) => list.len(),
+ }
+ }
+}
+
+pub enum IntermediateError {
+ ExpectedInteger,
+ ExpectedBlock,
+ ListExhausted,
+ LabelReferenceInConditionPredicate,
+ LabelDefinitionInConditionBody,
+ LabelReferenceInPinnedAddress,
+ StackError(Tracked<StackError>),
+ InvocationBeforeDefinition,
+ /// expected, received
+ IncorrectArgumentCount(usize, usize),
+ /// expected, received
+ IncorrectArgumentType(ArgumentType, ArgumentType),
+}
+
+pub fn report_intermediate_errors(errors: &[Tracked<IntermediateError>], source_code: &str) {
+ for error in errors {
+ report_intermediate_error(error, source_code);
+ }
+}
+
+fn report_intermediate_error(error: &Tracked<IntermediateError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ IntermediateError::ExpectedInteger =>
+ "An integer value was expected here",
+ IntermediateError::ExpectedBlock =>
+ "A block value was expected here",
+ IntermediateError::ListExhausted =>
+ "This string is shorter than another string passed to the same invocation",
+ IntermediateError::LabelReferenceInConditionPredicate =>
+ "The predicate of a conditional block cannot contain a label reference",
+ IntermediateError::LabelDefinitionInConditionBody =>
+ "The body of a conditional block cannot contain a label definition",
+ IntermediateError::LabelReferenceInPinnedAddress =>
+ "The value of a pinned address cannot contain a label reference",
+ IntermediateError::StackError(stack_error) => {
+ report_stack_error(stack_error, source_code); return; },
+ IntermediateError::InvocationBeforeDefinition =>
+ &format!("Macro cannot be invoked before it has been defined"),
+ IntermediateError::IncorrectArgumentCount(expected, received) =>
+ &format!("Expected {expected} arguments, but received {received} instead"),
+ IntermediateError::IncorrectArgumentType(expected, received) =>
+ &format!("Expected {expected} value but received {received} value instead"),
+ };
+
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_intermediate_token(i: usize, token: &IntermediateToken) {
+ match token {
+ IntermediateToken::Word(word) => {
+ indent!(i, "Word({:>0w$b})", word.value, w = word.width as usize);
+ for field in &word.fields {
+ print_intermediate_integer(i+1, &field.value.value);
+ }
+ }
+ IntermediateToken::PinnedAddress(address) =>
+ indent!(i, "PinnedAddress({address})"),
+ IntermediateToken::LabelDefinition(name) =>
+ indent!(i, "LabelDefinition({name})"),
+ }
+}
+
+fn print_intermediate_integer(i: usize, integer: &IntermediateInteger) {
+ match integer {
+ IntermediateInteger::Integer(value) =>
+ indent!(i, "Integer({value})"),
+ IntermediateInteger::LabelReference(name) =>
+ indent!(i, "LabelReference({name})"),
+ IntermediateInteger::Expression(expression) => {
+ indent!(i, "Expression");
+ for token in &expression.tokens {
+ match &token.value {
+ IntermediateExpressionToken::Integer(integer) =>
+ print_intermediate_integer(i+1, integer),
+ IntermediateExpressionToken::Operator(operator) =>
+ indent!(i+1, "Operator({operator})"),
+ }
+ }
+ }
+ }
+}
diff --git a/src/stages/mod.rs b/src/stages/mod.rs
new file mode 100644
index 0000000..e735f05
--- /dev/null
+++ b/src/stages/mod.rs
@@ -0,0 +1,31 @@
+mod syntactic;
+mod syntactic_tokens;
+mod semantic;
+mod semantic_tokens;
+mod intermediate;
+mod intermediate_tokens;
+mod bytecode;
+mod bytecode_tokens;
+
+pub use syntactic::*;
+pub use syntactic_tokens::*;
+pub use semantic::*;
+pub use semantic_tokens::*;
+pub use intermediate::*;
+pub use intermediate_tokens::*;
+pub use bytecode::*;
+pub use bytecode_tokens::*;
+
+
+#[macro_export]
+macro_rules! indent {
+ (0, $($tokens:tt)*) => {{
+ println!($($tokens)*);
+ }};
+ ($indent:expr, $($tokens:tt)*) => {{
+ for _ in 0..$indent { print!(" "); }
+ println!($($tokens)*);
+ }};
+}
+
+
diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs
new file mode 100644
index 0000000..e225608
--- /dev/null
+++ b/src/stages/semantic.rs
@@ -0,0 +1,478 @@
+use crate::*;
+
+use std::collections::VecDeque;
+
+
+pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> {
+ SemanticParser::from(syntactic, Namespace::None).parse()
+}
+
+#[derive(Clone)]
+enum Namespace {
+ Macro(String),
+ Label(String),
+ None,
+}
+
+
+struct SemanticParser {
+ namespace: Namespace,
+ syntactic: SyntacticTokenStream,
+ semantic: Vec<Tracked<SemanticToken>>,
+ errors: Vec<Tracked<SemanticError>>,
+}
+
+impl SemanticParser {
+ pub fn from(syntactic: Vec<Tracked<SyntacticToken>>, namespace: Namespace) -> Self {
+ Self {
+ namespace,
+ syntactic: SyntacticTokenStream::from(syntactic),
+ semantic: Vec::new(),
+ errors: Vec::new(),
+ }
+ }
+
+ fn pull_from(&mut self, mut other: SemanticParser) {
+ self.errors.append(&mut other.errors);
+ if let Namespace::Macro(_) = other.namespace {
+ ()
+ } else {
+ self.namespace = other.namespace;
+ }
+ }
+
+ fn resolve_label_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> {
+ match symbol {
+ ScopedSymbol::Global(name) => match &self.namespace {
+ Namespace::Macro(_) => {
+ let error = SemanticError::LabelInMacroDefinition;
+ self.errors.push(Tracked::from(error, source.to_owned()));
+ None
+ }
+ Namespace::Label(_) | Namespace::None => {
+ self.namespace = Namespace::Label(name.clone());
+ Some(name)
+ }
+ }
+ ScopedSymbol::Local(name) => match &self.namespace {
+ Namespace::Macro(macro_ns) => {
+ Some(format!("{macro_ns}:{name}"))
+ }
+ Namespace::Label(label_ns) => {
+ Some(format!("{label_ns}/{name}"))
+ }
+ Namespace::None => {
+ let error = SemanticError::SublabelWithoutNamespace;
+ self.errors.push(Tracked::from(error, source.to_owned()));
+ None
+ }
+ }
+ }
+ }
+
+ fn resolve_symbol_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> {
+ match symbol {
+ ScopedSymbol::Global(name) => {
+ Some(name)
+ }
+ ScopedSymbol::Local(name) => match &self.namespace {
+ Namespace::Macro(macro_ns) => {
+ Some(format!("{macro_ns}:{name}"))
+ }
+ Namespace::Label(label_ns) => {
+ Some(format!("{label_ns}/{name}"))
+ }
+ Namespace::None => {
+ let error = SemanticError::LocalSymbolWithoutNamespace;
+ self.errors.push(Tracked::from(error, source.to_owned()));
+ None
+ }
+ }
+ }
+ }
+
+ /// Parse the remaining syntactic tokens as a full program.
+ pub fn parse(mut self) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> {
+ while let Some(token) = self.syntactic.pop() {
+ if let SyntacticToken::MacroDefinition(definition) = token.value {
+ let namespace = Namespace::Macro(definition.name.to_string());
+ let mut parser = SemanticParser::from(definition.tokens, namespace);
+ let mut arguments = Vec::new();
+ while let Some(argument) = parser.pull_argument_definition() {
+ arguments.push(argument);
+ }
+ let body = parser.parse_macro_definition_body(SemanticLocation::MacroDefinitionBody);
+ self.pull_from(parser);
+ let definition = MacroDefinition { name: definition.name, arguments, body };
+ let semantic = SemanticToken::MacroDefinition(definition);
+ self.semantic.push(Tracked::from(semantic, token.source));
+ } else {
+ self.syntactic.unpop(token);
+ if let Some(token) = self.pull_block_token(SemanticLocation::Program) {
+ let semantic = SemanticToken::BlockToken(token.value);
+ self.semantic.push(Tracked::from(semantic, token.source));
+ }
+ }
+ }
+ match self.errors.is_empty() {
+ true => Ok(self.semantic),
+ false => Err(self.errors),
+ }
+ }
+
+ /// Parse the remaining syntactic tokens as a macro definition body.
+ fn parse_macro_definition_body(&mut self, location: SemanticLocation) -> MacroDefinitionBody {
+ let mut tokens = Vec::new();
+ while !self.syntactic.is_empty() {
+ if let Some(token) = self.pull_macro_definition_body_token() {
+ tokens.push(token);
+ }
+ }
+ if tokens.is_empty() {
+ MacroDefinitionBody::Block(Vec::new())
+ } else if tokens.len() == 1 {
+ tokens.pop().unwrap()
+ } else {
+ let mut block_tokens = Vec::new();
+ for token in tokens {
+ match token {
+ MacroDefinitionBody::Integer(integer) => {
+ let error = SemanticError::ExpectedInteger(location);
+ let tracked = Tracked::from(error, integer.source);
+ self.errors.push(tracked);
+ }
+ MacroDefinitionBody::Block(mut tokens) => {
+ block_tokens.append(&mut tokens);
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ // Convert invocation to a block invocation.
+ let token = BlockToken::Invocation(invocation.value);
+ block_tokens.push(Tracked::from(token, invocation.source));
+ }
+ }
+ }
+ MacroDefinitionBody::Block(block_tokens)
+ }
+ }
+
+ /// Attempt to pull a MacroDefinitionBody token from the token stream.
+ /// Invalid values are noted and dropped, and a None is returned.
+ /// Each BodyToken is wrapped in a separate MacroDefinitionBody.
+ fn pull_macro_definition_body_token(&mut self) -> Option<MacroDefinitionBody> {
+ let token = self.syntactic.pop()?;
+ let source = token.source;
+ match token.value {
+ SyntacticToken::LabelDefinition(symbol) => {
+ let name = self.resolve_label_name(symbol, &source)?;
+ let token = BlockToken::LabelDefinition(name);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ SyntacticToken::MacroDefinition(_) => {
+ let error = SemanticError::MisplacedMacroDefinition;
+ self.errors.push(Tracked::from(error, source));
+ None
+ }
+ SyntacticToken::IntegerLiteral(value) => {
+ let token = IntegerToken::IntegerLiteral(value);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Integer(tracked))
+ }
+ SyntacticToken::StringLiteral(_) => {
+ let error = SemanticError::MisplacedStringLiteral;
+ self.errors.push(Tracked::from(error, source));
+ None
+ }
+ SyntacticToken::WordTemplate(word_template) => {
+ let token = BlockToken::WordTemplate(word_template);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ SyntacticToken::BlockLiteral(tokens) => {
+ let mut parser = SemanticParser::from(tokens, self.namespace.clone());
+ let tokens = parser.parse_block();
+ self.pull_from(parser);
+ let token = BlockToken::Block(tokens);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ SyntacticToken::Expression(tokens) => {
+ let mut parser = SemanticParser::from(tokens, self.namespace.clone());
+ let expression = parser.parse_expression();
+ self.pull_from(parser);
+ let token = IntegerToken::Expression(expression);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Integer(tracked))
+ }
+ SyntacticToken::Symbol(symbol) => {
+ let name = self.resolve_symbol_name(symbol, &source)?;
+ let arguments = self.pull_all_invocation_arguments();
+ // Extend invocation source span to cover all arguments.
+ let mut source = source;
+ if let Some(last) = arguments.last() {
+ source.in_merged.end = last.source.in_merged.end;
+ if let Some(last_in_source) = &last.source.in_source {
+ if let Some(in_source) = &mut source.in_source {
+ in_source.end = last_in_source.end.clone();
+ }
+ }
+ }
+ let invocation = Invocation { name, arguments };
+ let tracked = Tracked::from(invocation, source);
+ Some(MacroDefinitionBody::Invocation(tracked))
+ }
+ SyntacticToken::Separator => {
+ let error = SemanticError::MisplacedSeparator;
+ self.errors.push(Tracked::from(error, source));
+ None
+ }
+ SyntacticToken::Condition => {
+ let conditional = self.pull_conditional_block()?;
+ let token = BlockToken::ConditionalBlock(Box::new(conditional));
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ SyntacticToken::Pin => {
+ let integer = self.pull_integer_token(SemanticLocation::PinAddress)?;
+ let token = BlockToken::PinnedAddress(integer);
+ let tracked = Tracked::from(token, source);
+ Some(MacroDefinitionBody::Block(vec![tracked]))
+ }
+ }
+ }
+
+ /// Attempt to pull an integer token from the token stream.
+ /// Invalid values are noted and dropped, and a None is returned.
+ fn pull_integer_token(&mut self, location: SemanticLocation) -> Option<Tracked<IntegerToken>> {
+ match self.pull_macro_definition_body_token()? {
+ MacroDefinitionBody::Integer(integer) => {
+ Some(integer)
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ // Convert invocation to an integer invocation.
+ let token = IntegerToken::Invocation(invocation.value);
+ Some(Tracked::from(token, invocation.source))
+ }
+ MacroDefinitionBody::Block(mut tokens) => {
+ assert_eq!(tokens.len(), 1);
+ let token = tokens.pop().unwrap();
+ let error = SemanticError::ExpectedInteger(location);
+ self.errors.push(Tracked::from(error, token.source));
+ None
+ }
+ }
+ }
+
+ /// Attempt to pull a BlockToken from the token stream.
+ /// Invalid values are noted and dropped, and a None is returned.
+ fn pull_block_token(&mut self, location: SemanticLocation) -> Option<Tracked<BlockToken>> {
+ match self.pull_macro_definition_body_token()? {
+ MacroDefinitionBody::Block(mut tokens) => {
+ assert_eq!(tokens.len(), 1);
+ tokens.pop()
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ // Convert invocation to a block invocation.
+ let token = BlockToken::Invocation(invocation.value);
+ Some(Tracked::from(token, invocation.source))
+ }
+ MacroDefinitionBody::Integer(integer) => {
+ let error = SemanticError::ExpectedBlock(location);
+ self.errors.push(Tracked::from(error, integer.source));
+ None
+ }
+ }
+ }
+
+ /// Parse the remaining syntactic tokens as the contents of a block.
+ fn parse_block(&mut self) -> Vec<Tracked<BlockToken>> {
+ let mut tokens = Vec::new();
+ while !self.syntactic.is_empty() {
+ if let Some(token) = self.pull_block_token(SemanticLocation::BlockLiteral) {
+ tokens.push(token);
+ }
+ }
+ tokens
+ }
+
+ /// Parse the remaining syntactic tokens as a list of integer tokens.
+ fn parse_integer_list(&mut self, location: SemanticLocation) -> Vec<Tracked<IntegerToken>> {
+ let mut tokens = Vec::new();
+ while !self.syntactic.is_empty() {
+ if let Some(token) = self.pull_integer_token(location) {
+ tokens.push(token);
+ }
+ }
+ tokens
+ }
+
+ /// Parse the remaining syntactic tokens as the contents of an expression.
+ fn parse_expression(&mut self) -> Expression {
+ let mut tokens = Vec::new();
+ for token in self.parse_integer_list(SemanticLocation::Expression) {
+ let source = token.source;
+ match token.value {
+ IntegerToken::IntegerLiteral(value) => {
+ let integer = Box::new(IntegerToken::IntegerLiteral(value));
+ let token = ExpressionToken::IntegerToken(integer);
+ tokens.push(Tracked::from(token, source));
+ }
+ IntegerToken::Expression(expression) => {
+ let integer = Box::new(IntegerToken::Expression(expression));
+ let token = ExpressionToken::IntegerToken(integer);
+ tokens.push(Tracked::from(token, source));
+ }
+ IntegerToken::Invocation(invocation) => {
+ // Parse the invocation as an operator instead.
+ if invocation.arguments.is_empty() {
+ if let Some(operator) = Operator::from_str(&invocation.name) {
+ let token = ExpressionToken::Operator(operator);
+ tokens.push(Tracked::from(token, source));
+ continue;
+ }
+ }
+ // Parse the invocation as an invocation.
+ let integer = Box::new(IntegerToken::Invocation(invocation));
+ let token = ExpressionToken::IntegerToken(integer);
+ tokens.push(Tracked::from(token, source));
+ }
+ }
+ }
+ Expression { tokens }
+ }
+
+ /// Attempt to pull a conditional block from the token stream.
+ /// Invalid values are noted and dropped, and a None is returned.
+ fn pull_conditional_block(&mut self) -> Option<ConditionalBlock> {
+ let predicate = self.pull_integer_token(SemanticLocation::ConditionPredicate)?;
+ let body = self.pull_block_token(SemanticLocation::ConditionBody)?;
+ Some(ConditionalBlock { predicate, body })
+ }
+
+ /// Attempt to pull an invocation argument from the token stream.
+ /// Invalid values are not dropped, a None indicates that no arguments remain.
+ fn pull_invocation_argument(&mut self) -> Option<Tracked<InvocationArgument>> {
+ self.syntactic.pop_if(is_separator)?;
+ let token = self.syntactic.pop()?;
+ let source = token.source;
+ match token.value {
+ SyntacticToken::StringLiteral(string_literal) => {
+ let argument = InvocationArgument::String(string_literal);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::IntegerLiteral(value) => {
+ let integer = IntegerToken::IntegerLiteral(value);
+ let argument = InvocationArgument::IntegerToken(integer);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::Expression(tokens) => {
+ let mut parser = SemanticParser::from(tokens, self.namespace.clone());
+ let expression = parser.parse_expression();
+ self.pull_from(parser);
+ let integer = IntegerToken::Expression(expression);
+ let argument = InvocationArgument::IntegerToken(integer);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::BlockLiteral(tokens) => {
+ let mut parser = SemanticParser::from(tokens, self.namespace.clone());
+ let tokens = parser.parse_block();
+ self.pull_from(parser);
+ let block = BlockToken::Block(tokens);
+ let argument = InvocationArgument::BlockToken(block);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::Symbol(symbol) => {
+ let name = self.resolve_symbol_name(symbol, &source)?;
+ let invocation = Invocation { name, arguments: Vec::new() };
+ let argument = InvocationArgument::Invocation(invocation);
+ Some(Tracked::from(argument, source))
+ }
+ SyntacticToken::WordTemplate(word_template) => {
+ let block = BlockToken::WordTemplate(word_template);
+ let argument = InvocationArgument::BlockToken(block);
+ Some(Tracked::from(argument, source))
+ }
+ _ => {
+ let error = SemanticError::InvalidInvocationArgument;
+ self.errors.push(Tracked::from(error, source));
+ return None;
+ }
+ }
+ }
+
+ fn pull_all_invocation_arguments(&mut self) -> Vec<Tracked<InvocationArgument>> {
+ let mut arguments = Vec::new();
+ while let Some(argument) = self.pull_invocation_argument() {
+ arguments.push(argument);
+ }
+ return arguments;
+ }
+
+ /// Attempt to pull an argument definition from the token stream.
+ /// Invalid values are not dropped, a None indicates that no arguments remain.
+ fn pull_argument_definition(&mut self) -> Option<Tracked<ArgumentDefinition>> {
+ self.syntactic.pop_if(is_separator)?;
+ let token = self.syntactic.pop()?;
+ let source = token.source;
+ match token.value {
+ SyntacticToken::Symbol(ScopedSymbol::Global(name)) => {
+ let variant = ArgumentType::Integer;
+ let definition = ArgumentDefinition { name, variant };
+ return Some(Tracked::from(definition, source));
+ }
+ SyntacticToken::BlockLiteral(mut tokens) => {
+ if tokens.len() == 1 {
+ let token = tokens.pop().unwrap();
+ if let SyntacticToken::Symbol(ScopedSymbol::Global(name)) = token.value {
+ let variant = ArgumentType::Block;
+ let definition = ArgumentDefinition { name, variant };
+ return Some(Tracked::from(definition, source));
+ }
+ }
+ }
+ _ => (),
+ };
+ let error = SemanticError::InvalidArgumentDefinition;
+ self.errors.push(Tracked::from(error, source));
+ return None;
+ }
+}
+
+
+
+struct SyntacticTokenStream {
+ tokens: VecDeque<Tracked<SyntacticToken>>,
+}
+
+impl SyntacticTokenStream {
+ pub fn from<T: Into<VecDeque<Tracked<SyntacticToken>>>>(tokens: T) -> Self {
+ Self { tokens: tokens.into() }
+ }
+
+ pub fn pop(&mut self) -> Option<Tracked<SyntacticToken>> {
+ self.tokens.pop_front()
+ }
+
+ pub fn pop_if(&mut self, predicate: fn(&Tracked<SyntacticToken>) -> bool) -> Option<Tracked<SyntacticToken>> {
+ match predicate(self.tokens.front()?) {
+ true => self.tokens.pop_front(),
+ false => None,
+ }
+ }
+
+ pub fn unpop(&mut self, token: Tracked<SyntacticToken>) {
+ self.tokens.push_front(token);
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.tokens.is_empty()
+ }
+}
+
+
+fn is_separator(token: &Tracked<SyntacticToken>) -> bool {
+ match token.value {
+ SyntacticToken::Separator => true,
+ _ => false,
+ }
+}
diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs
new file mode 100644
index 0000000..dfbea1a
--- /dev/null
+++ b/src/stages/semantic_tokens.rs
@@ -0,0 +1,296 @@
+use crate::*;
+
+
+pub enum SemanticToken {
+ MacroDefinition(MacroDefinition),
+ BlockToken(BlockToken),
+}
+
+pub struct MacroDefinition {
+ pub name: Tracked<String>,
+ pub arguments: Vec<Tracked<ArgumentDefinition>>,
+ pub body: MacroDefinitionBody,
+}
+
+pub struct ArgumentDefinition {
+ pub name: String,
+ pub variant: ArgumentType,
+}
+
+#[derive(PartialEq)]
+pub enum ArgumentType {
+ Integer,
+ Block,
+}
+
+impl std::fmt::Display for ArgumentType {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ match self {
+ ArgumentType::Integer => write!(f, "an integer"),
+ ArgumentType::Block => write!(f, "a block"),
+ }
+ }
+}
+
+pub enum MacroDefinitionBody {
+ Integer(Tracked<IntegerToken>),
+ Block(Vec<Tracked<BlockToken>>),
+ Invocation(Tracked<Invocation>),
+}
+
+pub struct ConditionalBlock {
+ pub predicate: Tracked<IntegerToken>,
+ pub body: Tracked<BlockToken>,
+}
+
+pub enum IntegerToken {
+ IntegerLiteral(isize),
+ Expression(Expression),
+ Invocation(Invocation),
+}
+
+pub struct Expression {
+ pub tokens: Vec<Tracked<ExpressionToken>>,
+}
+
+pub enum ExpressionToken {
+ IntegerToken(Box<IntegerToken>),
+ Invocation(Invocation),
+ Operator(Operator),
+}
+
+pub enum BlockToken {
+ LabelDefinition(String),
+ PinnedAddress(Tracked<IntegerToken>),
+ ConditionalBlock(Box<ConditionalBlock>),
+ WordTemplate(WordTemplate),
+ Block(Vec<Tracked<BlockToken>>),
+ Invocation(Invocation),
+}
+
+pub struct Invocation {
+ pub name: String,
+ pub arguments: Vec<Tracked<InvocationArgument>>,
+}
+
+pub enum InvocationArgument {
+ String(StringLiteral),
+ IntegerToken(IntegerToken),
+ BlockToken(BlockToken),
+ Invocation(Invocation),
+}
+
+pub enum SemanticError {
+ MisplacedStringLiteral,
+ MisplacedListLiteral,
+ MisplacedSeparator,
+ MisplacedMacroDefinition,
+
+ ExpectedInteger(SemanticLocation),
+ ExpectedBlock(SemanticLocation),
+
+ InvalidArgumentDefinition,
+ InvalidInvocationArgument,
+
+ LabelInMacroDefinition,
+ SublabelWithoutNamespace,
+ LocalSymbolWithoutNamespace,
+}
+
+#[derive(Clone, Copy)]
+pub enum SemanticLocation {
+ MacroDefinitionBody,
+ Expression,
+ ConditionPredicate,
+ ConditionBody,
+ Program,
+ BlockLiteral,
+ PinAddress,
+}
+
+impl std::fmt::Display for SemanticLocation {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let string = match self {
+ SemanticLocation::Expression =>
+ "inside this expression",
+ SemanticLocation::ConditionPredicate =>
+ "as the predicate of this conditional block",
+ SemanticLocation::ConditionBody =>
+ "as the body of this conditional block",
+ SemanticLocation::Program =>
+ "at the outermost level of the program",
+ SemanticLocation::BlockLiteral =>
+ "inside this block literal",
+ SemanticLocation::MacroDefinitionBody =>
+ "inside the body of this macro definition",
+ SemanticLocation::PinAddress =>
+ "as the address of this pin",
+ };
+ write!(f, "{string}")
+ }
+}
+
+
+pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) {
+ for error in errors {
+ report_semantic_error(error, source_code);
+ }
+}
+
+fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ SemanticError::MisplacedStringLiteral =>
+ "A string literal can only be used as an invocation argument",
+ SemanticError::MisplacedListLiteral =>
+ "A list literal can only be used as an invocation argument",
+ SemanticError::MisplacedSeparator =>
+ "A separator can only be used to construct an argument list",
+ SemanticError::MisplacedMacroDefinition =>
+ "A macro definition must be used at the outermost level of the program",
+
+ SemanticError::ExpectedInteger(location) =>
+ &format!("An integer value was expected {location}"),
+ SemanticError::ExpectedBlock(location) =>
+ &format!("A block value was expected {location}"),
+
+ SemanticError::InvalidArgumentDefinition =>
+ "Argument definitions must be in the form 'name' or '{{name}}'",
+ SemanticError::InvalidInvocationArgument =>
+ "This token cannot be used in an invocation argument",
+
+ SemanticError::LabelInMacroDefinition =>
+ &format!("Only sublabels can be defined inside macro definitions"),
+ SemanticError::SublabelWithoutNamespace =>
+ &format!("Sublabel was not defined inside a macro definition or after a label"),
+ SemanticError::LocalSymbolWithoutNamespace =>
+ &format!("Local symbol was not defined inside a macro definition or after a label"),
+ };
+
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_semantic_token(i: usize, token: &SemanticToken) {
+ match token {
+ SemanticToken::MacroDefinition(definition) => {
+ indent!(i, "MacroDefinition({})", definition.name);
+ for argument in &definition.arguments {
+ print_argument_definition(i+1, argument);
+ }
+ match &definition.body {
+ MacroDefinitionBody::Integer(integer) => {
+ print_integer_token(i+1, integer)
+ }
+ MacroDefinitionBody::Block(tokens) => {
+ print_block(i+1, tokens);
+ }
+ MacroDefinitionBody::Invocation(invocation) => {
+ print_invocation(i+1, invocation);
+ }
+ }
+ }
+ SemanticToken::BlockToken(block) => print_block_token(0, block),
+ }
+}
+
+fn print_argument_definition(i: usize, argument: &ArgumentDefinition) {
+ match argument.variant {
+ ArgumentType::Integer => {
+ indent!(i, "Argument({}, integer)", argument.name)
+ }
+ ArgumentType::Block => {
+ indent!(i, "Argument({}, block)", argument.name)
+ }
+ }
+}
+
+fn print_block_token(i: usize, block: &BlockToken) {
+ match block {
+ BlockToken::Invocation(invocation) => {
+ print_invocation(i, invocation)
+ }
+ BlockToken::LabelDefinition(name) => {
+ indent!(i, "LabelDefinition({name})")
+ }
+ BlockToken::Block(block) => {
+ print_block(i, block);
+ }
+ BlockToken::PinnedAddress(integer) => {
+ indent!(i, "PinnedAddress");
+ print_integer_token(i+1, integer);
+ }
+ BlockToken::ConditionalBlock(condition) => {
+ indent!(i, "ConditionalBlock");
+ indent!(i+1, "Predicate");
+ print_integer_token(i+2, &condition.predicate);
+ indent!(i+1, "Body");
+ print_block_token(i+2, &condition.body);
+ }
+ BlockToken::WordTemplate(word_template) => {
+ indent!(i, "WordTemplate({word_template})")
+ }
+ }
+}
+
+fn print_block(i: usize, tokens: &[Tracked<BlockToken>]) {
+ indent!(i, "Block");
+ for token in tokens {
+ print_block_token(i+1, token);
+ }
+}
+
+fn print_invocation(i: usize, invocation: &Invocation) {
+ indent!(i, "Invocation({})", invocation.name);
+ for argument in &invocation.arguments {
+ print_invocation_argument(i+1, argument);
+ }
+}
+
+fn print_invocation_argument(i: usize, argument: &InvocationArgument) {
+ match &argument {
+ InvocationArgument::String(string_literal) => {
+ indent!(i, "String({string_literal})")
+ }
+ InvocationArgument::IntegerToken(integer) => {
+ print_integer_token(i, integer)
+ }
+ InvocationArgument::BlockToken(block) => {
+ print_block_token(i, block)
+ }
+ InvocationArgument::Invocation(invocation) => {
+ print_invocation(i, invocation)
+ }
+ }
+}
+
+fn print_integer_token(i: usize, integer: &IntegerToken) {
+ match integer {
+ IntegerToken::IntegerLiteral(value) => {
+ indent!(i, "IntegerValue({value})")
+ }
+ IntegerToken::Expression(expression) => {
+ print_expression(i, expression)
+ }
+ IntegerToken::Invocation(invocation) => {
+ print_invocation(i, invocation)
+ }
+ }
+}
+
+fn print_expression(i: usize, expression: &Expression) {
+ indent!(i, "Expression");
+ for token in &expression.tokens {
+ match &token.value {
+ ExpressionToken::IntegerToken(integer) => {
+ print_integer_token(i+1, &integer)
+ }
+ ExpressionToken::Invocation(invocation) => {
+ print_invocation(i+1, &invocation);
+ }
+ ExpressionToken::Operator(operator) => {
+ indent!(i+1, "Operator({operator})")
+ }
+ }
+ }
+}
diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs
new file mode 100644
index 0000000..2e7f959
--- /dev/null
+++ b/src/stages/syntactic.rs
@@ -0,0 +1,323 @@
+use crate::*;
+
+use assembler::Tokeniser;
+
+
+pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path))
+}
+
+fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ t.add_delimiters(&['@','&','%',';',':','|','{','}','(',')','[',']','#','~','"','\'']);
+ let mut tokens = Vec::new();
+ let mut errors = Vec::new();
+
+ macro_rules! push_err {
+ ($error:expr) => {{
+ push_err!($error, t.get_source());
+ }};
+ ($error:expr, $source:expr) => {{
+ errors.push(Tracked::from($error, $source));
+ continue;
+ }};
+ }
+
+ loop {
+ t.eat_whitespace();
+ t.mark_start();
+ let Some(c) = t.eat_char() else { break };
+ let token = match c {
+ '"' => {
+ let source = t.get_source();
+ t.mark_child();
+ let is_any_close = |t: &mut Tokeniser| {
+ t.eat_char() == Some('"')
+ };
+ if let Some(_) = t.track_until(is_any_close) {
+ let child = t.tokenise_child_span();
+ SyntacticToken::StringLiteral(parse_string_literal(child))
+ } else {
+ push_err!(SyntacticError::UnterminatedStringLiteral, source);
+ }
+ }
+ '\'' => {
+ let source = t.get_source();
+ let is_any_close = |t: &mut Tokeniser| {
+ t.eat_char() == Some('\'')
+ };
+ if let Some(string) = t.track_until(is_any_close) {
+ let mut chars: Vec<char> = string.chars().collect();
+ if chars.len() == 1 {
+ let value = parse_char(chars.pop().unwrap());
+ SyntacticToken::IntegerLiteral(value)
+ } else {
+ t.mark_end();
+ push_err!(SyntacticError::ExpectedSingleCharacter, t.get_source());
+ }
+ } else {
+ push_err!(SyntacticError::UnterminatedCharacterLiteral, source);
+ }
+ }
+
+ '{' => {
+ let source = t.get_source();
+ t.mark_child();
+ let mut depth = 1;
+ let is_matching_close = |t: &mut Tokeniser| {
+ match t.eat_char() {
+ Some('{') => { depth += 1; false }
+ Some('}') => { depth -= 1; depth == 0 }
+ _ => false,
+ }
+ };
+ if let Some(_) = t.track_until(is_matching_close) {
+ let child = t.tokenise_child_span();
+ match parse_syntactic_from_tokeniser(child) {
+ Ok(tokens) => SyntacticToken::BlockLiteral(tokens),
+ Err(mut parse_errors) => {
+ errors.append(&mut parse_errors);
+ continue;
+ }
+ }
+ } else {
+ push_err!(SyntacticError::UnterminatedBlock, source);
+ }
+ }
+ '[' => {
+ let source = t.get_source();
+ t.mark_child();
+ let mut depth = 1;
+ let is_matching_close = |t: &mut Tokeniser| {
+ match t.eat_char() {
+ Some('[') => { depth += 1; false }
+ Some(']') => { depth -= 1; depth == 0 }
+ _ => false,
+ }
+ };
+ if let Some(_) = t.track_until(is_matching_close) {
+ let child = t.tokenise_child_span();
+ match parse_syntactic_from_tokeniser(child) {
+ Ok(tokens) => SyntacticToken::Expression(tokens),
+ Err(mut parse_errors) => {
+ errors.append(&mut parse_errors);
+ continue;
+ }
+ }
+ } else {
+ push_err!(SyntacticError::UnterminatedExpression, source);
+ }
+ }
+ '(' => {
+ let source = t.get_source();
+ let mut depth = 1;
+ let is_matching_close = |t: &mut Tokeniser| {
+ match t.eat_char() {
+ Some('(') => { depth += 1; false }
+ Some(')') => { depth -= 1; depth == 0 }
+ _ => false,
+ }
+ };
+ if let Some(string) = t.track_until(is_matching_close) {
+ // Check if the comment fills the entire line.
+ if t.start.position.column == 0 && t.end_of_line() {
+ if let Some(path) = string.strip_prefix(": ") {
+ t.embedded_path = Some(PathBuf::from(path.trim()));
+ t.embedded_first_line = t.start.position.line + 1;
+ }
+ }
+ continue;
+ } else {
+ push_err!(SyntacticError::UnterminatedComment, source);
+ }
+ }
+ '%' => {
+ let name = t.eat_token();
+ let source = t.get_source();
+ t.mark_child();
+ let is_any_close = |t: &mut Tokeniser| t.eat_char() == Some(';');
+ if let Some(_) = t.track_until(is_any_close) {
+ let child = t.tokenise_child_span();
+ match parse_syntactic_from_tokeniser(child) {
+ Ok(tokens) => {
+ let name = Tracked::from(name, source);
+ let def = SyntacticMacroDefinition { name, tokens };
+ SyntacticToken::MacroDefinition(def)
+ }
+ Err(mut parse_errors) => {
+ errors.append(&mut parse_errors);
+ continue;
+ }
+ }
+ } else {
+ push_err!(SyntacticError::UnterminatedMacroDefinition(name), source);
+ }
+ }
+
+ '}' => push_err!(SyntacticError::UnmatchedBlockTerminator),
+ ']' => push_err!(SyntacticError::UnmatchedExpressionTerminator),
+ ')' => push_err!(SyntacticError::UnmatchedCommentTerminator),
+ ';' => push_err!(SyntacticError::UnmatchedMacroTerminator),
+
+ '@' => SyntacticToken::LabelDefinition(ScopedSymbol::Global(t.eat_token())),
+ '&' => SyntacticToken::LabelDefinition(ScopedSymbol::Local(t.eat_token())),
+ '~' => SyntacticToken::Symbol(ScopedSymbol::Local(t.eat_token())),
+ ':' => SyntacticToken::Separator,
+ '|' => SyntacticToken::Pin,
+ '?' => SyntacticToken::Condition,
+
+ '#' => {
+ t.mark_child();
+ t.eat_token();
+ let child = t.tokenise_child_span();
+ match parse_word_template(child) {
+ Ok(word_template) => SyntacticToken::WordTemplate(word_template),
+ Err(mut parse_errors) => {
+ errors.append(&mut parse_errors);
+ continue;
+ }
+ }
+ },
+
+ c => {
+ let token = format!("{c}{}", t.eat_token());
+ if let Some(hex_string) = token.strip_prefix("0x") {
+ match parse_integer_literal(hex_string, 16) {
+ Ok(value) => SyntacticToken::IntegerLiteral(value),
+ Err(_) => push_err!(SyntacticError::InvalidHexadecimalLiteral(token)),
+ }
+ } else if let Some(binary_string) = token.strip_prefix("0b") {
+ match parse_integer_literal(binary_string, 2) {
+ Ok(value) => SyntacticToken::IntegerLiteral(value),
+ Err(_) => push_err!(SyntacticError::InvalidBinaryLiteral(token)),
+ }
+ } else {
+ match parse_integer_literal(&token, 10) {
+ Ok(value) => SyntacticToken::IntegerLiteral(value),
+ Err(true) => push_err!(SyntacticError::InvalidDecimalLiteral(token)),
+ Err(false) => SyntacticToken::Symbol(ScopedSymbol::Global(token)),
+ }
+ }
+ }
+ };
+
+ t.mark_end();
+ tokens.push(Tracked::from(token, t.get_source()))
+ }
+ match errors.is_empty() {
+ true => Ok(tokens),
+ false => Err(errors),
+ }
+}
+
+
+fn parse_integer_literal(token: &str, radix: u32) -> Result<isize, bool> {
+ match usize::from_str_radix(&token.replace('_', ""), radix) {
+ Ok(value) => match isize::try_from(value) {
+ Ok(value) => Ok(value),
+ Err(_) => Err(true),
+ }
+ Err(_) => Err(false),
+ }
+}
+
+
+fn parse_string_literal(mut t: Tokeniser) -> StringLiteral {
+ let mut string = String::new();
+ let mut chars = Vec::new();
+
+ while let Some(c) = t.eat_char() {
+ string.push(c);
+ chars.push(Tracked::from(parse_char(c), t.get_source()));
+ t.mark_start();
+ }
+ StringLiteral { string, chars }
+}
+
+fn parse_char(c: char) -> isize {
+ c as u32 as isize
+}
+
+
+fn parse_word_template(mut t: Tokeniser) -> Result<WordTemplate, Vec<Tracked<SyntacticError>>> {
+ let mut value = 0; // Value of the whole word template.
+ let mut value_width = 0; // Bit width of the whole word template.
+ let mut field_width = 0; // Width of the current bit field.
+ let mut field_name = '\0'; // Name of the current bit field.
+ let mut fields: Vec<Tracked<BitField>> = Vec::new();
+ let mut errors: Vec<Tracked<SyntacticError>> = Vec::new();
+
+ macro_rules! push_field {
+ () => {
+ if fields.iter().any(|f| f.name == field_name) {
+ let error = SyntacticError::DuplicateFieldNameInWord(field_name);
+ errors.push(Tracked::from(error, t.get_source()));
+ } else {
+ let field = BitField { name: field_name, width: field_width, shift: 0};
+ fields.push(Tracked::from(field, t.get_source()));
+ }
+ };
+ }
+
+ while let Some(c) = t.eat_char() {
+ // Ignore underscores.
+ if c == '_' {
+ t.mark.undo();
+ continue;
+ }
+
+ // Add a bit to the value;
+ value <<= 1;
+ value_width += 1;
+ for field in &mut fields {
+ field.shift += 1;
+ }
+
+ // Extend the current field.
+ if c == field_name {
+ field_width += 1;
+ continue;
+ }
+
+ // Commit the current field.
+ if field_width > 0 {
+ t.mark_end_prev();
+ push_field!();
+ field_width = 0;
+ field_name = '\0';
+ }
+
+ // Parse bit literals.
+ if c == '0' {
+ continue;
+ }
+ if c == '1' {
+ value |= 1;
+ continue;
+ }
+
+ t.mark_start_prev();
+ if c.is_alphabetic() {
+ field_name = c;
+ field_width = 1;
+ continue;
+ } else {
+ t.mark_end();
+ let error = SyntacticError::InvalidCharacterInWord(c);
+ errors.push(Tracked::from(error, t.get_source()));
+ }
+ }
+
+ // Commit the final field.
+ for field in &mut fields {
+ field.shift += 1;
+ }
+ if field_width > 0 {
+ t.mark_end();
+ push_field!();
+ }
+
+ match errors.is_empty() {
+ true => Ok(WordTemplate { value, width: value_width, fields }),
+ false => Err(errors),
+ }
+}
diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs
new file mode 100644
index 0000000..eabf34b
--- /dev/null
+++ b/src/stages/syntactic_tokens.rs
@@ -0,0 +1,160 @@
+use crate::*;
+
+pub enum SyntacticToken {
+ LabelDefinition(ScopedSymbol),
+ MacroDefinition(SyntacticMacroDefinition),
+
+ IntegerLiteral(isize),
+ StringLiteral(StringLiteral),
+ WordTemplate(WordTemplate),
+
+ BlockLiteral(Vec<Tracked<SyntacticToken>>),
+ Expression(Vec<Tracked<SyntacticToken>>),
+
+ Symbol(ScopedSymbol),
+
+ Separator,
+ Condition,
+ Pin,
+}
+
+pub struct SyntacticMacroDefinition {
+ pub name: Tracked<String>,
+ pub tokens: Vec<Tracked<SyntacticToken>>,
+}
+
+pub struct StringLiteral {
+ pub string: String,
+ pub chars: Vec<Tracked<isize>>,
+}
+
+impl std::fmt::Display for StringLiteral {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ self.string.fmt(f)
+ }
+}
+
+pub enum ScopedSymbol {
+ Local(String),
+ Global(String),
+}
+
+impl std::fmt::Display for ScopedSymbol {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ match self {
+ ScopedSymbol::Local(name) => write!(f, "~{name}"),
+ ScopedSymbol::Global(name) => write!(f, "{name}"),
+ }
+ }
+}
+
+
+pub enum SyntacticError {
+ UnterminatedBlock,
+ UnterminatedExpression,
+ UnterminatedComment,
+ UnterminatedCharacterLiteral,
+ UnterminatedStringLiteral,
+ UnterminatedMacroDefinition(String),
+
+ UnmatchedBlockTerminator,
+ UnmatchedExpressionTerminator,
+ UnmatchedCommentTerminator,
+ UnmatchedMacroTerminator,
+
+ ExpectedSingleCharacter,
+
+ DuplicateFieldNameInWord(char),
+ InvalidCharacterInWord(char),
+
+ InvalidDecimalLiteral(String),
+ InvalidHexadecimalLiteral(String),
+ InvalidBinaryLiteral(String),
+}
+
+
+pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) {
+ for error in errors {
+ report_syntactic_error(error, source_code);
+ }
+}
+
+fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ SyntacticError::UnterminatedBlock =>
+ "Block was not terminated, add a '}}' character to terminate",
+ SyntacticError::UnterminatedExpression =>
+ "Expression was not terminated, add a ']' character to terminate",
+ SyntacticError::UnterminatedComment =>
+ "Comment was not terminated, add a ')' character to terminate",
+ SyntacticError::UnterminatedCharacterLiteral =>
+ "Character was not terminated, add a ' character to terminate",
+ SyntacticError::UnterminatedStringLiteral =>
+ "String was not terminated, add a '\"' character to terminate",
+ SyntacticError::UnterminatedMacroDefinition(name) =>
+ &format!("The '{name}' macro definition was not terminated, add a ';' character to terminate"),
+
+ SyntacticError::UnmatchedBlockTerminator =>
+ "Attempted to terminate a block, but no block was in progress",
+ SyntacticError::UnmatchedExpressionTerminator =>
+ "Attempted to terminate an expression, but no expression was in progress",
+ SyntacticError::UnmatchedCommentTerminator =>
+ "Attempted to terminate a comment, but no comment was in progress",
+ SyntacticError::UnmatchedMacroTerminator =>
+ "Attempted to terminate a macro definition, but no macro definition was in progress",
+
+ SyntacticError::ExpectedSingleCharacter =>
+ "A character literal must contain exactly one character",
+
+ SyntacticError::DuplicateFieldNameInWord(name) =>
+ &format!("The field '{name}' has already been used in this word"),
+ SyntacticError::InvalidCharacterInWord(c) =>
+ &format!("The character '{c}' cannot be used in a word"),
+
+ SyntacticError::InvalidDecimalLiteral(string) =>
+ &format!("The string '{string}' is not a valid decimal literal"),
+ SyntacticError::InvalidHexadecimalLiteral(string) =>
+ &format!("The string '{string}' is not a valid hexadecimal literal"),
+ SyntacticError::InvalidBinaryLiteral(string) =>
+ &format!("The string '{string}' is not a valid binary literal"),
+ };
+
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_syntactic_token(i: usize, token: &SyntacticToken) {
+ match token {
+ SyntacticToken::LabelDefinition(symbol) => indent!(i, "LabelDefinition({symbol})"),
+ SyntacticToken::MacroDefinition(definition) => {
+ indent!(i, "MacroDefinition({})", definition.name);
+ for token in &definition.tokens {
+ print_syntactic_token(i+1, token);
+ }
+ }
+
+ SyntacticToken::IntegerLiteral(value) => indent!(i, "IntegerLiteral({value})"),
+ SyntacticToken::StringLiteral(literal) => indent!(i, "StringLiteral({literal})"),
+ SyntacticToken::WordTemplate(template) => indent!(i, "WordTemplate({template})"),
+
+ SyntacticToken::BlockLiteral(tokens) => {
+ indent!(i, "BlockLiteral");
+ for token in tokens {
+ print_syntactic_token(i+1, token);
+ }
+ }
+ SyntacticToken::Expression(tokens) => {
+ indent!(i, "Expression");
+ for token in tokens {
+ print_syntactic_token(i+1, token);
+ }
+ }
+
+ SyntacticToken::Symbol(symbol) => indent!(i, "Symbol({symbol})"),
+
+ SyntacticToken::Separator => indent!(i, "Separator"),
+ SyntacticToken::Condition => indent!(i, "Condition"),
+ SyntacticToken::Pin => indent!(i, "Pin"),
+ }
+}