summaryrefslogtreecommitdiff
path: root/src/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'src/parsers')
-rw-r--r--src/parsers/assembler.rs282
-rw-r--r--src/parsers/bytecode.rs161
-rw-r--r--src/parsers/expression.rs (renamed from src/parsers/constant_expression.rs)26
-rw-r--r--src/parsers/mod.rs14
-rw-r--r--src/parsers/packed_binary_literal.rs43
-rw-r--r--src/parsers/semantic.rs339
-rw-r--r--src/parsers/syntactic.rs197
7 files changed, 783 insertions, 279 deletions
diff --git a/src/parsers/assembler.rs b/src/parsers/assembler.rs
new file mode 100644
index 0000000..eb180e3
--- /dev/null
+++ b/src/parsers/assembler.rs
@@ -0,0 +1,282 @@
+use crate::*;
+use AssemblerErrorVariant as ErrVar;
+
+use indexmap::IndexMap;
+
+
+static mut ID: usize = 0;
+macro_rules! new_id {
+ () => { unsafe {
+ let id = ID;
+ ID += 1;
+ id
+ }};
+}
+
+
+impl SemanticProgram {
+ pub fn assemble(&self) -> Vec<AssembledToken> {
+ let environment = Environment {
+ macro_definitions: &self.macro_definitions,
+ label_definitions: &self.label_definitions,
+ arguments: &IndexMap::new(),
+ id: new_id!(),
+ };
+ let mut assembled_tokens = Vec::new();
+ for token in &self.body {
+ let tokens = environment.reify_semantic_token(token);
+ assembled_tokens.extend(tokens);
+ }
+ return assembled_tokens;
+ }
+}
+
+
+pub struct Environment<'a> {
+ pub macro_definitions: &'a IndexMap<String, MacroDefinition>,
+ pub label_definitions: &'a IndexMap<String, LabelDefinition>,
+ pub arguments: &'a IndexMap<String, Argument>,
+ pub id: usize,
+}
+
+impl<'a> Environment<'a> {
+ // This is only ever called for the highest level body tokens, never for invocations.
+ fn reify_semantic_token(&self, token: &SemanticToken) -> Vec<AssembledToken> {
+ let mut assembled_tokens = Vec::new();
+ match token {
+ SemanticToken::Word(pbl) => {
+ let word = self.reify_packed_binary_literal(pbl);
+ assembled_tokens.push(AssembledToken::Word(word));
+ }
+ SemanticToken::Invocation(invocation) => {
+ match self.reify_invocation(invocation) {
+ Ok(argument) => match argument {
+ Argument::Block(block) => assembled_tokens.extend(block),
+ Argument::Integer(_) => {
+ let variant = AssemblerErrorVariant::NotABlock;
+ let source = invocation.source.clone();
+ let error = AssemblerError { source, variant };
+ assembled_tokens.push(AssembledToken::Error(error))
+ }
+ }
+ Err(error) => assembled_tokens.push(AssembledToken::Error(error)),
+ }
+ }
+ SemanticToken::LabelDefinition(definition) => {
+ assembled_tokens.push(AssembledToken::LabelDefinition(definition.clone()));
+ }
+ SemanticToken::PinnedAddress(address) => {
+ assembled_tokens.push(AssembledToken::PinnedAddress(address.clone()));
+ }
+ SemanticToken::Error(_) => (),
+ }
+ return assembled_tokens;
+ }
+
+ fn reify_packed_binary_literal(&self, pbl: &PackedBinaryLiteral) -> AssembledWord {
+ let mut assembled_fields = Vec::new();
+ let mut errors = Vec::new();
+ for field in &pbl.fields {
+ let name = field.name.to_string();
+ match self.reify_integer_reference(&name, &field.source) {
+ Ok(value) => assembled_fields.push(
+ AssembledField {
+ source: field.source.clone(),
+ value,
+ bits: field.bits,
+ shift: field.shift,
+ }
+ ),
+ Err(error) => errors.push(error),
+ };
+ }
+ let source = pbl.source.clone();
+ let value = pbl.value;
+ let bits = pbl.bits;
+ AssembledWord { source, bits, fields: assembled_fields, value, errors }
+ }
+
+ fn reify_integer_reference(&self, name: &str, source: &SourceSpan) -> Result<IntegerArgument, AssemblerError> {
+ match self.reify_reference(name, source)? {
+ Argument::Integer(integer) => Ok(integer),
+ Argument::Block(_) => Err(
+ AssemblerError {
+ source: source.clone(),
+ variant: ErrVar::NotAnInteger,
+ }
+ ),
+ }
+ }
+
+ fn reify_reference(&self, name: &str, source: &SourceSpan) -> Result<Argument, AssemblerError> {
+ let source = source.clone();
+ if let Some(argument) = self.arguments.get(name) {
+ Ok(argument.clone())
+ } else if let Some(definition) = self.macro_definitions.get(name) {
+ self.reify_value(&definition.value)
+ } else if let Some(label) = self.label_definitions.get(name) {
+ let name = Tracked::from(self.tag_label_name(&label.name), &source);
+ Ok(Argument::Integer(IntegerArgument::LabelReference(name)))
+ } else {
+ let variant = ErrVar::DefinitionNotFound(name.to_string());
+ Err(AssemblerError { source, variant })
+ }
+ }
+
+ fn tag_label_name(&self, name: &str) -> String {
+ match name.contains(':') {
+ true => format!("{name}:{}", self.id),
+ false => name.to_string(),
+ }
+ }
+
+ fn reify_value(&self, value: &Value) -> Result<Argument, AssemblerError> {
+ match value {
+ Value::Integer(integer) => {
+ let value = match &integer {
+ Integer::Literal(integer) => {
+ IntegerArgument::Integer(integer.clone())
+ }
+ Integer::Expression(expr) => {
+ let expr = self.reify_constant_expression(expr)?;
+ IntegerArgument::Expression(expr)
+ }
+ Integer::LabelReference(name) => {
+ let name = Tracked::from(self.tag_label_name(name), &name.source);
+ IntegerArgument::LabelReference(name)
+ }
+ };
+ Ok(Argument::Integer(value))
+ }
+ Value::Block(block) => {
+ let mut assembled_tokens = Vec::new();
+ for token in block {
+ match &token {
+ SemanticToken::Word(pbl) => {
+ let word = self.reify_packed_binary_literal(pbl);
+ assembled_tokens.push(AssembledToken::Word(word));
+ }
+ SemanticToken::Invocation(invocation) => {
+ match self.reify_invocation(invocation)? {
+ Argument::Block(block) => assembled_tokens.extend(block),
+ Argument::Integer(_) => {
+ let source = invocation.source.clone();
+ let variant = AssemblerErrorVariant::IntegerInBlock;
+ return Err(AssemblerError { source, variant});
+ }
+ }
+ }
+ SemanticToken::LabelDefinition(definition) => {
+ let mut definition = definition.clone();
+ definition.name.push_str(&format!(":{}", self.id));
+ let token = AssembledToken::LabelDefinition(definition);
+ assembled_tokens.push(token);
+ }
+ SemanticToken::PinnedAddress(address) => {
+ let token = AssembledToken::PinnedAddress(address.to_owned());
+ assembled_tokens.push(token);
+ }
+ SemanticToken::Error(_) => (),
+ }
+ }
+ Ok(Argument::Block(assembled_tokens))
+ }
+ Value::Invocation(invocation) => {
+ self.reify_invocation(invocation)
+ }
+ }
+ }
+
+ fn reify_invocation(&self, invocation: &Invocation) -> Result<Argument, AssemblerError> {
+ macro_rules! err {
+ ($variant:expr) => { Err(AssemblerError {
+ source: invocation.source.clone(), variant: $variant
+ }) };
+ }
+ if let Some(argument) = self.arguments.get(&invocation.name) {
+ let expected = 0;
+ let received = invocation.arguments.len();
+ if received != expected {
+ return err!(ErrVar::IncorrectArgumentCount(expected, received));
+ }
+ Ok(argument.clone())
+ } else if let Some(definition) = self.macro_definitions.get(&invocation.name) {
+ // Check that the correct number of arguments were provided.
+ let received = invocation.arguments.len();
+ let expected = definition.arguments.len();
+ if received != expected {
+ return err!(ErrVar::IncorrectArgumentCount(expected, received));
+ }
+ let mut arguments = IndexMap::new();
+ for (i, argument) in invocation.arguments.iter().enumerate() {
+ // Check that the correct types of arguments were provided.
+ let arg_invocation = self.reify_value(&argument.value)?;
+ let arg_invocation_type = match &arg_invocation {
+ Argument::Integer(_) => ArgumentVariant::Integer,
+ Argument::Block(_) => ArgumentVariant::Block,
+ };
+ let arg_definition_type = definition.arguments[i].variant;
+ if arg_invocation_type != arg_definition_type {
+ let variant = ErrVar::IncorrectArgumentType(
+ arg_definition_type, arg_invocation_type
+ );
+ return Err(AssemblerError { source: argument.source.clone(), variant });
+ }
+ let name = definition.arguments[i].name.clone();
+ arguments.insert(name, arg_invocation);
+ }
+ let environment = Environment {
+ macro_definitions: &self.macro_definitions,
+ label_definitions: &self.label_definitions,
+ arguments: &arguments,
+ id: new_id!(),
+ };
+ environment.reify_value(&definition.value)
+ } else if let Some(label) = self.label_definitions.get(&invocation.name) {
+ let expected = 0;
+ let received = invocation.arguments.len();
+ if received != expected {
+ return err!(ErrVar::IncorrectArgumentCount(expected, received));
+ }
+ let name = Tracked::from(self.tag_label_name(&label.name), &label.source);
+ Ok(Argument::Integer(IntegerArgument::LabelReference(name)))
+ } else {
+ err!(ErrVar::DefinitionNotFound(invocation.name.to_string()))
+ }
+ }
+
+ fn reify_constant_expression(&self, expr: &Expression) -> Result<AssembledExpression, AssemblerError> {
+ use ExpressionTokenVariant as ExprVar;
+
+ let mut assembled_tokens = Vec::new();
+ for token in &expr.tokens {
+ let assembled_token = match &token.variant {
+ ExprVar::Literal(value) => {
+ let source = token.source.clone();
+ let integer = TrackedInteger { source, value: *value };
+ AssembledExpressionToken::Integer(integer)
+ }
+ ExprVar::Operator(operator) => {
+ AssembledExpressionToken::Operator(*operator)
+ }
+ ExprVar::Invocation(name) => {
+ match self.reify_integer_reference(&name, &token.source)? {
+ IntegerArgument::LabelReference(name) => {
+ AssembledExpressionToken::LabelReference(name)
+ }
+ IntegerArgument::Integer(integer) => {
+ AssembledExpressionToken::Integer(integer)
+ }
+ IntegerArgument::Expression(expr) => {
+ AssembledExpressionToken::Expression(Box::new(expr))
+ },
+ }
+ }
+ ExprVar::Error(_) => continue,
+ };
+ assembled_tokens.push(assembled_token);
+ }
+ Ok(AssembledExpression { source: expr.source.clone(), tokens: assembled_tokens })
+ }
+}
+
diff --git a/src/parsers/bytecode.rs b/src/parsers/bytecode.rs
new file mode 100644
index 0000000..ec19d9f
--- /dev/null
+++ b/src/parsers/bytecode.rs
@@ -0,0 +1,161 @@
+use crate::*;
+
+use std::collections::HashMap;
+
+
+pub struct BytecodeGenerator<'a> {
+ tokens: &'a [AssembledToken],
+ addresses: HashMap<String, Tracked<usize>>,
+ words: Vec<Word>,
+ errors: Vec<BytecodeError>,
+}
+
+impl<'a> BytecodeGenerator<'a> {
+ pub fn new(tokens: &'a [AssembledToken]) -> Self {
+ Self {
+ tokens,
+ addresses: HashMap::new(),
+ words: Vec::new(),
+ errors: Vec::new(),
+ }
+ }
+
+ pub fn generate(mut self) -> Bytecode {
+ self.calculate_addresses();
+ for token in self.tokens {
+ match token {
+ AssembledToken::Word(assembled_word) => {
+ let mut value = assembled_word.value;
+ for field in &assembled_word.fields {
+ let (field_value, source) = match &field.value {
+ IntegerArgument::Expression(expr) =>
+ (self.resolve_expression(expr), expr.source.clone()),
+ IntegerArgument::LabelReference(name) =>
+ (self.resolve_label_reference(name), name.source.clone()),
+ IntegerArgument::Integer(integer) =>
+ (integer.value, integer.source.clone()),
+ };
+ let bitcount = match field_value {
+ 0 => 0,
+ _ => (field_value.ilog2() + 1) as usize,
+ };
+ if field.bits < bitcount {
+ let variant = BytecodeErrorVariant::ValueTooLarge(field.bits, bitcount);
+ self.errors.push(BytecodeError { source, variant });
+ } else {
+ value |= (field_value << field.shift) as usize;
+ }
+ }
+ self.words.push(Word { bits: assembled_word.bits, value });
+ }
+ AssembledToken::PinnedAddress(pinned) => {
+ if self.words.len() > pinned.address {
+ let variant = BytecodeErrorVariant::PinnedAddressBacktrack(
+ pinned.address, self.words.len());
+ let source = pinned.source.clone();
+ self.errors.push(BytecodeError { source, variant });
+ } else {
+ self.words.resize(pinned.address, Word { bits: 0, value: 0});
+ }
+ }
+ AssembledToken::LabelDefinition(_) => (),
+ AssembledToken::Error(_) => (),
+ }
+ }
+
+ return Bytecode {
+ words: self.words,
+ errors: self.errors,
+ }
+ }
+
+ fn calculate_addresses(&mut self) {
+ let mut i = 0;
+ for token in self.tokens {
+ match token {
+ AssembledToken::LabelDefinition(definition) => {
+ let address = Tracked::from(i, &definition.source);
+ if let Some(_) = self.addresses.insert(definition.name.clone(), address) {
+ let name = definition.name.clone();
+ let variant = BytecodeErrorVariant::DuplicateLabelDefinition(name);
+ let source = definition.source.clone();
+ self.errors.push(BytecodeError { source, variant });
+ }
+ }
+ AssembledToken::Word(_) => {
+ i += 1;
+ }
+ AssembledToken::PinnedAddress(pinned) => {
+ i = pinned.address;
+ }
+ AssembledToken::Error(_) => (),
+ }
+ }
+ }
+
+ fn resolve_expression(&mut self, expr: &AssembledExpression) -> isize {
+ let mut stack = Vec::new();
+ macro_rules! push {
+ ($value:expr) => { stack.push($value) };
+ }
+ macro_rules! pop {
+ ($name:ident) => { let $name = match stack.pop() {
+ Some(value) => value,
+ None => {
+ let variant = BytecodeErrorVariant::StackUnderflow;
+ self.errors.push(BytecodeError { source: expr.source.clone(), variant });
+ return 0;
+ },
+ }; };
+ }
+ macro_rules! truth {
+ ($bool:expr) => { match $bool { true => 1, false => 0 } };
+ }
+
+ for token in &expr.tokens {
+ match &token {
+ AssembledExpressionToken::Integer(value) => {
+ push!(value.value)
+ }
+ AssembledExpressionToken::LabelReference(name) => {
+ push!(self.resolve_label_reference(name))
+ }
+ AssembledExpressionToken::Expression(expr) => {
+ push!(self.resolve_expression(expr))
+ }
+ AssembledExpressionToken::Operator(operator) => match operator {
+ Operator::Equal => { pop!(b); pop!(a); push!(truth!(a==b)) },
+ Operator::NotEqual => { pop!(b); pop!(a); push!(truth!(a!=b)) },
+ Operator::LessThan => { pop!(b); pop!(a); push!(truth!(a < b)) },
+ Operator::GreaterThan => { pop!(b); pop!(a); push!(truth!(a > b)) },
+ Operator::Add => { pop!(b); pop!(a); push!(a + b) },
+ Operator::Subtract => { pop!(b); pop!(a); push!(a - b) },
+ Operator::LeftShift => { pop!(b); pop!(a); push!(a << b) },
+ Operator::RightShift => { pop!(b); pop!(a); push!(a >> b) },
+ Operator::And => { pop!(b); pop!(a); push!(a & b) },
+ Operator::Or => { pop!(b); pop!(a); push!(a | b) },
+ Operator::Xor => { pop!(b); pop!(a); push!(a ^ b) },
+ Operator::Not => { pop!(a); push!(!a) },
+ }
+ }
+ }
+
+ let variant = match stack.len() {
+ 0 => BytecodeErrorVariant::NoReturnValue,
+ 1 => return stack[0],
+ _ => BytecodeErrorVariant::MultipleReturnValues,
+ };
+ self.errors.push(BytecodeError { source: expr.source.clone(), variant});
+ 0
+ }
+
+ fn resolve_label_reference(&mut self, name: &Tracked<String>) -> isize {
+ if let Some(address) = self.addresses.get(&name.value) {
+ address.value as isize
+ } else {
+ let variant = BytecodeErrorVariant::DefinitionNotFound(name.value.clone());
+ self.errors.push(BytecodeError { source: name.source.clone(), variant });
+ 0
+ }
+ }
+}
diff --git a/src/parsers/constant_expression.rs b/src/parsers/expression.rs
index 78dc697..f902858 100644
--- a/src/parsers/constant_expression.rs
+++ b/src/parsers/expression.rs
@@ -1,17 +1,15 @@
use crate::*;
-pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantExpression {
- use ConstantExpressionTokenVariant as TokenVar;
- use ConstantExpressionParseError as ParseError;
+pub fn parse_constant_expression(mut t: Tokeniser, source: SourceSpan) -> Expression {
+ use ExpressionTokenVariant as TokenVar;
+ use ExpressionParseError as ParseError;
let mut tokens = Vec::new();
- let mut t = Tokeniser::new_child(string, parent);
- t.position.to_next_char(); // skip opening delimiter
loop {
- t.drop_whitespace();
- t.mark_start_position();
+ t.eat_whitespace();
+ t.mark_start();
let token = t.eat_token();
if token.is_empty() {
break;
@@ -19,7 +17,7 @@ pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantEx
let variant = match token.as_str() {
"=" => TokenVar::Operator(Operator::Equal),
- "!" => TokenVar::Operator(Operator::NotEqual),
+ "!=" => TokenVar::Operator(Operator::NotEqual),
"<" => TokenVar::Operator(Operator::LessThan),
">" => TokenVar::Operator(Operator::GreaterThan),
"+" => TokenVar::Operator(Operator::Add),
@@ -32,21 +30,21 @@ pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantEx
"~" => TokenVar::Operator(Operator::Not),
_ => if let Some(stripped) = token.strip_prefix("0x") {
match usize::from_str_radix(stripped, 16) {
- Ok(value) => TokenVar::IntegerLiteral(value),
+ Ok(value) => TokenVar::Literal(value as isize),
Err(_) => TokenVar::Error(
ParseError::InvalidHexadecimalLiteral(stripped.to_string())),
}
} else {
match usize::from_str_radix(&token, 10) {
- Ok(value) => TokenVar::IntegerLiteral(value),
- Err(_) => TokenVar::SymbolReference(token.to_string()),
+ Ok(value) => TokenVar::Literal(value as isize),
+ Err(_) => TokenVar::Invocation(token.to_string()),
}
}
};
- let source = t.mark_end_position();
- tokens.push(ConstantExpressionToken { source, variant });
+ let source = t.get_source();
+ tokens.push(ExpressionToken { source, variant });
}
- return ConstantExpression { tokens };
+ return Expression { source, tokens };
}
diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs
index 91765a9..da2c23a 100644
--- a/src/parsers/mod.rs
+++ b/src/parsers/mod.rs
@@ -1,11 +1,15 @@
-mod constant_expression;
-pub use constant_expression::*;
-
+mod expression;
mod packed_binary_literal;
+
+pub use expression::*;
pub use packed_binary_literal::*;
mod syntactic;
-pub use syntactic::*;
-
mod semantic;
+mod assembler;
+mod bytecode;
+
+pub use syntactic::*;
pub use semantic::*;
+pub use assembler::*;
+pub use bytecode::*;
diff --git a/src/parsers/packed_binary_literal.rs b/src/parsers/packed_binary_literal.rs
index 9704fc4..18f8da7 100644
--- a/src/parsers/packed_binary_literal.rs
+++ b/src/parsers/packed_binary_literal.rs
@@ -1,53 +1,54 @@
use crate::*;
-pub fn parse_packed_binary_literal(string: &str, parent: &Tokeniser) -> PackedBinaryLiteral {
+/// t is a Tokeniser over the characters of the PBL, excluding the leading hash.
+pub fn parse_packed_binary_literal(mut t: Tokeniser, source: SourceSpan) -> PackedBinaryLiteral {
use PackedBinaryLiteralParseError as ParseError;
use PackedBinaryLiteralParseErrorVariant as ParseErrorVar;
let mut value = 0;
let mut bits = 0;
+ let mut field_bits = 0;
let mut name = '\0';
let mut fields: Vec<BitField> = Vec::new();
let mut errors: Vec<ParseError> = Vec::new();
macro_rules! push_field {
- ($source:expr) => {
+ () => {
if fields.iter().any(|f| f.name == name) {
let variant = ParseErrorVar::DuplicateFieldName(name);
- errors.push(ParseError { source: $source, variant });
+ errors.push(ParseError { source: t.get_source(), variant });
} else {
- fields.push(BitField { name, source: $source, bits, shift: 0 });
+ fields.push(BitField { name, source: t.get_source(), bits: field_bits, shift: 0 });
}
};
}
- let mut t = Tokeniser::new_child(string, parent);
- t.position.to_next_char(); // skip opening hash character
-
while let Some(c) = t.eat_char() {
// Ignore underscores.
if c == '_' {
- t.prev_position = t.prev_prev_position;
+ t.mark.undo();
continue;
}
// Add a bit to the value;
value <<= 1;
+ bits += 1;
for field in &mut fields {
field.shift += 1;
}
// Extend the current field.
if c == name {
- bits += 1;
+ field_bits += 1;
continue;
}
// Commit the current field.
- if bits > 0 {
- push_field!(t.mark_prev_end_position());
- bits = 0;
+ if field_bits > 0 {
+ t.mark_end_prev();
+ push_field!();
+ field_bits = 0;
name = '\0';
}
@@ -60,21 +61,25 @@ pub fn parse_packed_binary_literal(string: &str, parent: &Tokeniser) -> PackedBi
continue;
}
- t.mark_prev_start_position();
+ t.mark_start_prev();
if c.is_alphabetic() {
name = c;
- bits = 1;
+ field_bits = 1;
continue;
} else {
- let source = t.mark_end_position();
- errors.push(ParseError { source, variant: ParseErrorVar::InvalidCharacter(c) });
+ let source = t.get_source();
+ let variant = ParseErrorVar::InvalidCharacter(c);
+ errors.push(ParseError { source, variant });
}
}
// Commit the final field.
- if bits > 0 {
- push_field!(t.mark_end_position());
+ for field in &mut fields {
+ field.shift += 1;
+ }
+ if field_bits > 0 {
+ push_field!();
}
- PackedBinaryLiteral { value, fields, errors }
+ PackedBinaryLiteral { source, bits, value, fields, errors }
}
diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs
index 7ef4a4a..a58fb5f 100644
--- a/src/parsers/semantic.rs
+++ b/src/parsers/semantic.rs
@@ -1,136 +1,106 @@
use crate::*;
-
-use syntactic as syn;
-use syn::TokenVariant as SynVar;
-use semantic::*;
+use SyntacticTokenVariant as SynVar;
use std::collections::VecDeque;
+use indexmap::IndexMap;
+
macro_rules! fn_is_syn_variant {
($name:ident, $variant:ty) => { paste::paste! {
- fn [< is_ $name >](token: &syn::Token) -> bool {
+ fn [< is_ $name >](token: &SyntacticToken) -> bool {
match token.variant { $variant => true, _ => false, }
} } }; }
-fn_is_syn_variant!(block_open, syn::TokenVariant::BlockOpen);
-fn_is_syn_variant!(block_close, syn::TokenVariant::BlockClose);
-fn_is_syn_variant!(separator, syn::TokenVariant::Separator);
-fn_is_syn_variant!(terminator, syn::TokenVariant::MacroDefinitionTerminator);
-
-
-pub struct Tokens {
- tokens: VecDeque<syn::Token>,
-}
-
-impl Tokens {
- pub fn new<T: Into<VecDeque<syn::Token>>>(tokens: T) -> Self {
- Self { tokens: tokens.into() }
- }
-
- pub fn pop(&mut self) -> Option<syn::Token> {
- self.tokens.pop_front()
- }
+fn_is_syn_variant!(block_open, SyntacticTokenVariant::BlockOpen);
+fn_is_syn_variant!(block_close, SyntacticTokenVariant::BlockClose);
+fn_is_syn_variant!(separator, SyntacticTokenVariant::Separator);
+fn_is_syn_variant!(terminator, SyntacticTokenVariant::MacroDefinitionTerminator);
- pub fn pop_if(&mut self, predicate: fn(&syn::Token) -> bool) -> Option<syn::Token> {
- match predicate(self.tokens.front()?) {
- true => self.tokens.pop_front(),
- false => None,
- }
- }
- pub fn unpop(&mut self, token: syn::Token) {
- self.tokens.push_front(token);
- }
-
- /// Pull tokens until the predicate returns true, otherwise return Err.
- pub fn pull_until(&mut self, mut predicate: impl FnMut(&syn::Token) -> bool) -> Result<Self, ()> {
- let mut output = VecDeque::new();
- while let Some(token) = self.tokens.pop_front() {
- match predicate(&token) {
- true => return Ok(Self::new(output)),
- false => output.push_back(token),
- };
- }
- return Err(());
- }
-
- pub fn take(&mut self) -> Self {
- Self { tokens: std::mem::take(&mut self.tokens) }
- }
-
- pub fn len(&self) -> usize {
- self.tokens.len()
- }
-}
-
-
-pub struct ProgramParser {
+pub struct SemanticParser {
tokens: Tokens,
- definitions: Vec<Definition>,
- invocations: Vec<Invocation>,
- errors: Vec<ParseError>,
+ macro_definitions: IndexMap<String, MacroDefinition>,
+ label_definitions: IndexMap<String, LabelDefinition>,
+ body: Vec<SemanticToken>,
}
-impl ProgramParser {
- pub fn new(syntactic_tokens: Vec<syn::Token>) -> Self {
+impl SemanticParser {
+ pub fn new(syntactic_tokens: Vec<SyntacticToken>) -> Self {
+ // Gather all labels ahead of time.
+ let mut label_definitions = IndexMap::new();
+ for token in &syntactic_tokens {
+ if let SyntacticTokenVariant::LabelDefinition(name) = &token.variant {
+ let definition = LabelDefinition {
+ source: token.source.clone(),
+ name: name.clone(),
+ };
+ let None = label_definitions.insert(name.to_string(), definition) else {
+ unreachable!("Duplicate definition for label {name:?}");
+ };
+ }
+ }
Self {
tokens: Tokens::new(syntactic_tokens),
- definitions: Vec::new(),
- invocations: Vec::new(),
- errors: Vec::new(),
+ macro_definitions: IndexMap::new(),
+ label_definitions,
+ body: Vec::new(),
}
}
- pub fn parse(mut self) -> Program {
+ pub fn parse(mut self) -> SemanticProgram {
while let Some(syn) = self.tokens.pop() {
match syn.variant {
SynVar::MacroDefinition(name) => {
- // Collect all tokens up to the next definition terminator.
let Ok(definition_tokens) = self.tokens.pull_until(is_terminator) else {
- let variant = ParseErrorVariant::UnterminatedMacroDefinition(name);
- self.errors.push(ParseError { source: syn.source, variant});
+ let variant = SemanticParseErrorVariant::UnterminatedMacroDefinition(name);
+ let error = SemanticParseError { source: syn.source, variant };
+ self.body.push(SemanticToken::Error(error));
break;
};
- // Parse macro definition arguments.
- match DefinitionParser::new(name, syn.source, definition_tokens).parse() {
- Ok(definition) => self.definitions.push(definition),
- Err(errors) => self.errors.extend(errors),
+ let definition = MacroDefinitionParser::new(syn.source, definition_tokens).parse();
+ let None = self.macro_definitions.insert(name.clone(), definition) else {
+ unreachable!("Duplicate definition for macro {name}");
};
}
- SynVar::Comment(_) => (),
+ SynVar::LabelDefinition(name) => {
+ let label_definition = LabelDefinition { source: syn.source, name };
+ self.body.push(SemanticToken::LabelDefinition(label_definition));
+ }
+ SynVar::PinnedAddress(address) => {
+ let pinned_address = PinnedAddress { source: syn.source, address };
+ self.body.push(SemanticToken::PinnedAddress(pinned_address));
+ }
SynVar::Symbol(name) => {
- let parser = InvocationParser::new(name, &mut self.tokens);
- self.invocations.push(parser.parse());
+ let invocation = InvocationParser::new(name, syn.source, &mut self.tokens).parse();
+ self.body.push(SemanticToken::Invocation(invocation));
}
_ => {
- let variant = ParseErrorVariant::InvalidToken;
- self.errors.push(ParseError { source: syn.source, variant});
- break;
+ let variant = SemanticParseErrorVariant::InvalidToken;
+ let error = SemanticParseError { source: syn.source, variant };
+ self.body.push(SemanticToken::Error(error));
}
}
}
- Program {
- definitions: self.definitions,
- invocations: self.invocations,
- errors: self.errors,
+ SemanticProgram {
+ macro_definitions: self.macro_definitions,
+ label_definitions: self.label_definitions,
+ body: self.body,
}
}
}
-pub struct DefinitionParser {
- name: String,
+pub struct MacroDefinitionParser {
source: SourceSpan,
tokens: Tokens,
arguments: Vec<ArgumentDefinition>,
- errors: Vec<ParseError>,
+ errors: Vec<SemanticParseError>,
}
-impl DefinitionParser {
- pub fn new(name: String, source: SourceSpan, tokens: Tokens) -> Self {
+impl MacroDefinitionParser {
+ pub fn new(source: SourceSpan, tokens: Tokens) -> Self {
Self {
- name,
tokens,
source,
arguments: Vec::new(),
@@ -138,20 +108,15 @@ impl DefinitionParser {
}
}
- pub fn parse(mut self) -> Result<Definition, Vec<ParseError>> {
+ pub fn parse(mut self) -> MacroDefinition {
while let Some(definition) = self.parse_argument_definition() {
self.arguments.push(definition)
}
- if self.errors.is_empty() {
- let variant = self.parse_body();
- Ok(Definition {
- name: self.name,
- source: self.source,
- arguments: self.arguments,
- variant,
- })
- } else {
- Err(self.errors)
+ MacroDefinition {
+ value: self.parse_body(),
+ source: self.source,
+ arguments: self.arguments,
+ errors: self.errors,
}
}
@@ -172,47 +137,45 @@ impl DefinitionParser {
let token = token?;
let source = token.source;
if let SynVar::Symbol(name) = token.variant {
- let variant = ArgumentDefinitionVariant::Integer;
+ let variant = match is_block {
+ true => ArgumentVariant::Block,
+ false => ArgumentVariant::Integer,
+ };
Some(ArgumentDefinition { name, source, variant })
} else {
- let name = self.name.clone();
- let variant = ParseErrorVariant::InvalidArgumentDefinition(name);
- self.errors.push(ParseError { source, variant});
+ let variant = SemanticParseErrorVariant::InvalidToken;
+ self.errors.push(SemanticParseError { source, variant});
None
}
}
- fn parse_body(&mut self) -> DefinitionVariant {
- // Attempt to parse an IntegerDefinition.
+ fn parse_body(&mut self) -> Value {
+ // Attempt to parse an Integer.
if self.tokens.len() == 1 {
let token = self.tokens.pop().unwrap();
match token.variant {
- SynVar::DecimalLiteral(value) | SynVar::HexadecimalLiteral(value) => {
- return DefinitionVariant::Integer(IntegerDefinition {
- source: token.source,
- variant: IntegerDefinitionVariant::Literal(value),
- });
+ SynVar::IntegerLiteral(value) => {
+ let integer = TrackedInteger { source: token.source, value };
+ return Value::Integer(Integer::Literal(integer));
}
- SynVar::ConstantExpression(expr) => {
- return DefinitionVariant::Integer(IntegerDefinition {
- source: token.source,
- variant: IntegerDefinitionVariant::Constant(expr),
- });
- }
- SynVar::Symbol(name) => {
- return DefinitionVariant::Reference(ReferenceDefinition {
- source: token.source,
- name,
- });
+ SynVar::Expression(expr) => {
+ return Value::Integer(Integer::Expression(expr));
}
_ => (),
}
self.tokens.unpop(token);
}
-
- // Parse the remaining tokens as a BlockDefinition.
- let block = BlockParser::new(self.tokens.take()).parse();
- return DefinitionVariant::Block(block);
+ // Parse a Block.
+ let mut block = BlockParser::new(self.tokens.take()).parse();
+ // If the block contains a single invocation, unwrap it.
+ if block.len() == 1 {
+ match block.pop() {
+ Some(SemanticToken::Invocation(invocation)) => return Value::Invocation(invocation),
+ Some(other) => block.push(other),
+ None => (),
+ };
+ }
+ return Value::Block(block);
}
}
@@ -220,52 +183,52 @@ impl DefinitionParser {
/// Parse an entire block, excluding delimiters.
pub struct BlockParser {
tokens: Tokens,
- block_tokens: Vec<BlockToken>,
- errors: Vec<ParseError>,
+ semantic_tokens: Vec<SemanticToken>,
}
impl BlockParser {
pub fn new(tokens: Tokens) -> Self {
- Self { tokens, block_tokens: Vec::new(), errors: Vec::new() }
+ Self { tokens, semantic_tokens: Vec::new() }
}
- pub fn parse(mut self) -> BlockDefinition {
+ pub fn parse(mut self) -> Vec<SemanticToken> {
while let Some(token) = self.tokens.pop() {
let source = token.source;
match token.variant {
SynVar::Symbol(name) => {
- let parser = InvocationParser::new(name, &mut self.tokens);
- let invocation = parser.parse();
- let variant = BlockTokenVariant::Invocation(invocation);
- let block_token = BlockToken { source, variant };
- self.block_tokens.push(block_token);
+ let invocation = InvocationParser::new(name, source, &mut self.tokens).parse();
+ self.semantic_tokens.push(SemanticToken::Invocation(invocation));
}
SynVar::PackedBinaryLiteral(pbl) => {
- let variant = BlockTokenVariant::Word(pbl);
- let block_token = BlockToken { source, variant };
- self.block_tokens.push(block_token);
+ self.semantic_tokens.push(SemanticToken::Word(pbl));
+ }
+ SynVar::LabelDefinition(name) => {
+ let label_definition = LabelDefinition { source, name };
+ self.semantic_tokens.push(SemanticToken::LabelDefinition(label_definition));
}
_ => {
- let variant = ParseErrorVariant::InvalidToken;
- self.errors.push(ParseError { source, variant })
+ let variant = SemanticParseErrorVariant::InvalidToken;
+ let error = SemanticParseError { source, variant };
+ self.semantic_tokens.push(SemanticToken::Error(error));
}
}
}
- BlockDefinition { tokens: self.block_tokens, errors: self.errors }
+ return self.semantic_tokens;
}
}
struct InvocationParser<'a> {
name: String,
+ source: SourceSpan,
tokens: &'a mut Tokens,
- arguments: Vec<DefinitionVariant>,
- errors: Vec<ParseError>,
+ arguments: Vec<ArgumentInvocation>,
+ errors: Vec<SemanticParseError>,
}
impl<'a> InvocationParser<'a> {
- pub fn new(name: String, tokens: &'a mut Tokens) -> Self {
- Self { name, tokens, arguments: Vec::new(), errors: Vec::new() }
+ pub fn new(name: String, source: SourceSpan, tokens: &'a mut Tokens) -> Self {
+ Self { name, source, tokens, arguments: Vec::new(), errors: Vec::new() }
}
pub fn parse(mut self) -> Invocation {
@@ -274,33 +237,34 @@ impl<'a> InvocationParser<'a> {
}
Invocation {
name: self.name,
+ source: self.source,
arguments: self.arguments,
errors: self.errors,
}
}
- fn parse_invocation_argument(&mut self) -> Option<DefinitionVariant> {
+ fn parse_invocation_argument(&mut self) -> Option<ArgumentInvocation> {
// Only continue if the first token is a separator.
self.tokens.pop_if(is_separator)?;
if let Some(block_open) = self.tokens.pop_if(is_block_open) {
let source = block_open.source;
let mut depth = 1;
- let is_matching_block_close = |token: &syntactic::Token| {
+ let is_matching_block_close = |token: &SyntacticToken| {
match token.variant {
- syntactic::TokenVariant::BlockOpen => {
+ SyntacticTokenVariant::BlockOpen => {
depth += 1; false }
- syntactic::TokenVariant::BlockClose => {
+ SyntacticTokenVariant::BlockClose => {
depth -= 1; depth == 0 }
_ => false,
}
};
if let Ok(block_tokens) = self.tokens.pull_until(is_matching_block_close) {
let block = BlockParser::new(block_tokens).parse();
- Some(DefinitionVariant::Block(block))
+ Some(ArgumentInvocation { source, value: Value::Block(block) })
} else {
- let variant = ParseErrorVariant::UnterminatedBlockDefinition;
- self.errors.push(ParseError { source, variant });
+ let variant = SemanticParseErrorVariant::UnterminatedBlock;
+ self.errors.push(SemanticParseError { source, variant });
None
}
} else {
@@ -308,25 +272,74 @@ impl<'a> InvocationParser<'a> {
let source = token.source;
match token.variant {
SynVar::Symbol(name) => {
- let reference = ReferenceDefinition { source, name };
- Some(DefinitionVariant::Reference(reference))
+ let arguments = Vec::new();
+ let errors = Vec::new();
+ let invocation = Invocation { source: source.clone(), name, arguments, errors };
+ let value = Value::Invocation(invocation);
+ Some(ArgumentInvocation { source, value })
}
- SynVar::DecimalLiteral(value) | SynVar::HexadecimalLiteral(value) => {
- let variant = IntegerDefinitionVariant::Literal(value);
- let integer = IntegerDefinition { source, variant };
- Some(DefinitionVariant::Integer(integer))
+ SynVar::IntegerLiteral(value) => {
+ let integer = TrackedInteger { source: source.clone(), value };
+ let value = Value::Integer(Integer::Literal(integer));
+ Some(ArgumentInvocation { source, value })
}
- SynVar::ConstantExpression(expr) => {
- let variant = IntegerDefinitionVariant::Constant(expr);
- let integer = IntegerDefinition { source, variant };
- Some(DefinitionVariant::Integer(integer))
+ SynVar::Expression(expr) => {
+ let value = Value::Integer(Integer::Expression(expr));
+ Some(ArgumentInvocation { source, value })
}
_ => {
- let variant = ParseErrorVariant::InvalidToken;
- self.errors.push(ParseError { source, variant });
+ let variant = SemanticParseErrorVariant::InvalidToken;
+ self.errors.push(SemanticParseError { source, variant });
None
}
}
}
}
}
+
+
+pub struct Tokens {
+ tokens: VecDeque<SyntacticToken>,
+}
+
+impl Tokens {
+ pub fn new<T: Into<VecDeque<SyntacticToken>>>(tokens: T) -> Self {
+ Self { tokens: tokens.into() }
+ }
+
+ pub fn pop(&mut self) -> Option<SyntacticToken> {
+ self.tokens.pop_front()
+ }
+
+ pub fn pop_if(&mut self, predicate: fn(&SyntacticToken) -> bool) -> Option<SyntacticToken> {
+ match predicate(self.tokens.front()?) {
+ true => self.tokens.pop_front(),
+ false => None,
+ }
+ }
+
+ pub fn unpop(&mut self, token: SyntacticToken) {
+ self.tokens.push_front(token);
+ }
+
+ /// Pull tokens until the predicate returns true, otherwise return Err.
+ pub fn pull_until(&mut self, mut predicate: impl FnMut(&SyntacticToken) -> bool) -> Result<Self, ()> {
+ let mut output = VecDeque::new();
+ while let Some(token) = self.tokens.pop_front() {
+ match predicate(&token) {
+ true => return Ok(Self::new(output)),
+ false => output.push_back(token),
+ };
+ }
+ return Err(());
+ }
+
+ pub fn take(&mut self) -> Self {
+ Self { tokens: std::mem::take(&mut self.tokens) }
+ }
+
+ pub fn len(&self) -> usize {
+ self.tokens.len()
+ }
+}
+
diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs
index 909dbaa..37f8e6c 100644
--- a/src/parsers/syntactic.rs
+++ b/src/parsers/syntactic.rs
@@ -1,106 +1,147 @@
use crate::*;
-use syntactic::*;
pub struct SyntacticParser {
tokeniser: Tokeniser,
- /// The name of the most recently parsed label.
- label_name: String,
+ tokens: Vec<SyntacticToken>,
/// The name of the macro being parsed.
macro_name: Option<String>,
+ /// The name of the most recent label.
+ label_name: String,
}
impl SyntacticParser {
- pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
+ pub fn new<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
let mut tokeniser = Tokeniser::new(source_code, path);
- tokeniser.add_delimiters(&['@','&','%',';',':','{','}','(','[','#','~']);
+ tokeniser.add_delimiters(&['@','%',';',':','{','}','(','[','#','~']);
Self {
tokeniser,
- label_name: String::new(),
+ tokens: Vec::new(),
macro_name: None,
+ label_name: String::new(),
}
}
-}
-
-impl Iterator for SyntacticParser {
- type Item = Token;
-
- /// Sequentially parse tokens from the source code.
- fn next(&mut self) -> Option<Token> {
+ pub fn parse(mut self) -> Vec<SyntacticToken> {
+ use SyntacticTokenVariant as SynVar;
+ use SyntacticParseError as SynErr;
let t = &mut self.tokeniser;
- t.drop_whitespace();
- t.mark_start_position();
- let variant = match t.eat_char()? {
- '@' => {
- self.label_name = t.eat_token();
- TokenVariant::LabelDefinition(self.label_name.clone())
- }
- '&' => {
- let token = t.eat_token();
- TokenVariant::LabelDefinition(format!("{}/{token}", self.label_name))
- }
- '%' => {
- let macro_name = t.eat_token();
- self.macro_name = Some(macro_name.clone());
- TokenVariant::MacroDefinition(macro_name)
- }
- ';' => {
- self.macro_name = None;
- TokenVariant::MacroDefinitionTerminator
- }
- '[' => match t.eat_to_delimiter(']') {
- Some(string) => {
- let constant = ConstantExpression::from_str(&string, t);
- TokenVariant::ConstantExpression(constant)
+ loop {
+ t.eat_whitespace();
+ t.mark_start();
+ let Some(c) = t.eat_char() else { break };
+ let variant = match c {
+ ':' => SynVar::Separator,
+ '{' => SynVar::BlockOpen,
+ '}' => SynVar::BlockClose,
+ '@' => match &self.macro_name {
+ Some(_) => {
+ t.eat_token();
+ SynVar::Error(SynErr::LabelInMacroDefinition)
+ }
+ None => {
+ self.label_name = t.eat_token();
+ SynVar::LabelDefinition(self.label_name.clone())
+ }
}
- None => TokenVariant::Error(ParseError::UnterminatedConstantExpression),
- }
- '{' => TokenVariant::BlockOpen,
- '}' => TokenVariant::BlockClose,
- '(' => match t.eat_to_delimiter(')') {
- Some(string) => TokenVariant::Comment(string),
- None => TokenVariant::Error(ParseError::UnterminatedComment),
- }
- '#' => {
- let token = t.eat_token();
- let pbl = PackedBinaryLiteral::from_str(&token, t);
- TokenVariant::PackedBinaryLiteral(pbl)
- },
- '~' => {
- let token = t.eat_token();
- TokenVariant::Symbol(format!("{}/{token}", self.label_name))
- }
- ':' => TokenVariant::Separator,
- c => {
- let token = format!("{c}{}", t.eat_token());
- if let Some(hex_string) = token.strip_prefix("0x") {
- match usize::from_str_radix(hex_string, 16) {
- Ok(hex) => TokenVariant::HexadecimalLiteral(hex),
- Err(_) => TokenVariant::Error(ParseError::InvalidHexadecimalLiteral(token)),
+ '&' => match &self.macro_name {
+ Some(macro_name) => {
+ let label_name = format!("{macro_name}:{}", t.eat_token());
+ SynVar::LabelDefinition(label_name)
}
- } else {
- match usize::from_str_radix(&token, 10) {
- Ok(value) => TokenVariant::DecimalLiteral(value),
- Err(_) => TokenVariant::Symbol(token),
+ None => {
+ let label_name = &self.label_name;
+ let sublabel_name = format!("{label_name}/{}", t.eat_token());
+ SynVar::LabelDefinition(sublabel_name)
}
}
- }
- };
-
- // Parse source path comments.
- if let TokenVariant::Comment(comment) = &variant {
- // Check if the comment fills the entire line.
- if t.start_position.column == 0 && t.end_of_line() {
- if let Some(path) = comment.strip_prefix(": ") {
- t.embedded_path = Some(PathBuf::from(path.trim()));
- t.embedded_first_line = t.start_position.line + 1;
+ '%' => {
+ let macro_name = t.eat_token();
+ self.macro_name = Some(macro_name.clone());
+ SynVar::MacroDefinition(macro_name)
+ }
+ ';' => {
+ self.macro_name = None;
+ SynVar::MacroDefinitionTerminator
}
- }
+ '[' => {
+ t.mark_child();
+ match t.eat_to_delimiter(']') {
+ Some(_) => {
+ let child = t.subtokenise();
+ t.mark_end();
+ let expr = parse_constant_expression(child, t.get_source());
+ SynVar::Expression(expr)
+ }
+ None => SynVar::Error(SynErr::UnterminatedExpression),
+ }
+ }
+ '(' => match t.eat_to_delimiter(')') {
+ Some(string) => {
+ // Check if the comment fills the entire line.
+ if t.start.position.column == 0 && t.end_of_line() {
+ if let Some(path) = string.strip_prefix(": ") {
+ t.embedded_path = Some(PathBuf::from(path.trim()));
+ t.embedded_first_line = t.start.position.line + 1;
+ }
+ }
+ continue;
+ },
+ None => SynVar::Error(SynErr::UnterminatedComment),
+ }
+ '|' => {
+ let token = t.eat_token();
+ if let Some(hex_string) = token.strip_prefix("0x") {
+ match usize::from_str_radix(hex_string, 16) {
+ Ok(addr) => SynVar::PinnedAddress(addr),
+ Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)),
+ }
+ } else {
+ match usize::from_str_radix(&token, 10) {
+ Ok(addr) => SynVar::PinnedAddress(addr),
+ Err(_) => SynVar::Error(SynErr::InvalidDecimalLiteral(token)),
+ }
+ }
+ }
+ '#' => {
+ t.mark_child();
+ t.eat_token();
+ let pbl = parse_packed_binary_literal(t.subtokenise(), t.get_source());
+ SynVar::PackedBinaryLiteral(pbl)
+ },
+ '~' => match &self.macro_name {
+ Some(macro_name) => {
+ let symbol_name = format!("{macro_name}:{}", t.eat_token());
+ SynVar::Symbol(symbol_name)
+ }
+ None => {
+ let label_name = &self.label_name;
+ let symbol_name = format!("{label_name}/{}", t.eat_token());
+ SynVar::Symbol(symbol_name)
+ }
+ }
+ c => {
+ let token = format!("{c}{}", t.eat_token());
+ if let Some(hex_string) = token.strip_prefix("0x") {
+ match usize::from_str_radix(hex_string, 16) {
+ Ok(value) => SynVar::IntegerLiteral(value as isize),
+ Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)),
+ }
+ } else {
+ match usize::from_str_radix(&token, 10) {
+ Ok(value) => SynVar::IntegerLiteral(value as isize),
+ Err(_) => SynVar::Symbol(token),
+ }
+ }
+ }
+ };
+
+ t.mark_end();
+ let source = t.get_source();
+ self.tokens.push(SyntacticToken { source, variant });
}
- let source = t.mark_end_position();
- Some( Token { source, variant } )
+ return self.tokens;
}
}