diff options
Diffstat (limited to 'src/parsers')
-rw-r--r-- | src/parsers/assembler.rs | 290 | ||||
-rw-r--r-- | src/parsers/bytecode.rs | 191 | ||||
-rw-r--r-- | src/parsers/expression.rs | 52 | ||||
-rw-r--r-- | src/parsers/mod.rs | 15 | ||||
-rw-r--r-- | src/parsers/packed_binary_literal.rs | 85 | ||||
-rw-r--r-- | src/parsers/semantic.rs | 352 | ||||
-rw-r--r-- | src/parsers/syntactic.rs | 172 |
7 files changed, 0 insertions, 1157 deletions
diff --git a/src/parsers/assembler.rs b/src/parsers/assembler.rs deleted file mode 100644 index 61e1a84..0000000 --- a/src/parsers/assembler.rs +++ /dev/null @@ -1,290 +0,0 @@ -use crate::*; -use AssemblerErrorVariant as ErrVar; - -use indexmap::IndexMap; - - -static mut ID: usize = 0; -macro_rules! new_id { - () => { unsafe { - let id = ID; - ID += 1; - id - }}; -} - - -impl SemanticProgram { - pub fn assemble(&self) -> Vec<AssembledToken> { - let environment = Environment { - macro_definitions: &self.macro_definitions, - label_definitions: &self.label_definitions, - arguments: &IndexMap::new(), - id: new_id!(), - }; - let mut assembled_tokens = Vec::new(); - for token in &self.body { - let tokens = environment.reify_semantic_token(token); - assembled_tokens.extend(tokens); - } - return assembled_tokens; - } -} - - -pub struct Environment<'a> { - pub macro_definitions: &'a IndexMap<String, MacroDefinition>, - pub label_definitions: &'a IndexMap<String, LabelDefinition>, - pub arguments: &'a IndexMap<String, Argument>, - pub id: usize, -} - -impl<'a> Environment<'a> { - // This is only ever called for the highest level body tokens, never for invocations. - fn reify_semantic_token(&self, token: &SemanticToken) -> Vec<AssembledToken> { - let mut assembled_tokens = Vec::new(); - match token { - SemanticToken::Word(pbl) => { - let word = self.reify_packed_binary_literal(pbl); - assembled_tokens.push(AssembledToken::Word(word)); - } - SemanticToken::Invocation(invocation) => { - match self.reify_invocation(invocation) { - Ok(argument) => match argument { - Argument::Block(block) => assembled_tokens.extend(block), - Argument::Integer(_) => { - let variant = AssemblerErrorVariant::NotABlock; - let source = invocation.source.clone(); - let error = AssemblerError { source, variant }; - assembled_tokens.push(AssembledToken::Error(error)) - } - } - Err(error) => assembled_tokens.push(AssembledToken::Error(error)), - } - } - SemanticToken::LabelDefinition(definition) => { - assembled_tokens.push(AssembledToken::LabelDefinition(definition.clone())); - } - SemanticToken::PinnedAddress(address) => { - assembled_tokens.push(AssembledToken::PinnedAddress(address.clone())); - } - SemanticToken::Error(_) => (), - } - return assembled_tokens; - } - - fn reify_packed_binary_literal(&self, pbl: &PackedBinaryLiteral) -> AssembledWord { - let mut assembled_fields = Vec::new(); - let mut errors = Vec::new(); - for field in &pbl.fields { - let name = field.name.to_string(); - match self.reify_integer_reference(&name, &field.source) { - Ok(value) => assembled_fields.push( - AssembledField { - source: field.source.clone(), - value, - bits: field.bits, - shift: field.shift, - } - ), - Err(error) => errors.push(error), - }; - } - let source = pbl.source.clone(); - let value = pbl.value; - let bits = pbl.bits; - AssembledWord { source, bits, fields: assembled_fields, value, errors } - } - - fn reify_integer_reference(&self, name: &str, source: &SourceSpan) -> Result<IntegerArgument, AssemblerError> { - match self.reify_reference(name, source)? { - Argument::Integer(integer) => Ok(integer), - Argument::Block(_) => Err( - AssemblerError { - source: source.clone(), - variant: ErrVar::NotAnInteger, - } - ), - } - } - - fn reify_reference(&self, name: &str, source: &SourceSpan) -> Result<Argument, AssemblerError> { - let source = source.clone(); - if let Some(argument) = self.arguments.get(name) { - Ok(argument.clone()) - } else if let Some(definition) = self.macro_definitions.get(name) { - self.reify_value(&definition.value) - } else if let Some(label) = self.label_definitions.get(name) { - let name = Tracked::from(self.tag_label_name(&label.name), source); - Ok(Argument::Integer(IntegerArgument::LabelReference(name))) - } else { - let variant = ErrVar::DefinitionNotFound(name.to_string()); - Err(AssemblerError { source, variant }) - } - } - - fn tag_label_name(&self, name: &str) -> String { - match name.contains(':') { - true => format!("{name}:{}", self.id), - false => name.to_string(), - } - } - - fn reify_value(&self, value: &Value) -> Result<Argument, AssemblerError> { - match value { - Value::Integer(integer) => { - let value = match &integer { - Integer::Literal(integer) => { - IntegerArgument::Integer(integer.clone()) - } - Integer::Expression(expr) => { - let expr = self.reify_constant_expression(expr)?; - IntegerArgument::Expression(expr) - } - Integer::LabelReference(name) => { - let name = Tracked::from(self.tag_label_name(name), name.source.clone()); - IntegerArgument::LabelReference(name) - } - Integer::String(string) => { - IntegerArgument::String(string.clone()) - } - }; - Ok(Argument::Integer(value)) - } - Value::Block(block) => { - let mut assembled_tokens = Vec::new(); - for token in block { - match &token { - SemanticToken::Word(pbl) => { - let word = self.reify_packed_binary_literal(pbl); - assembled_tokens.push(AssembledToken::Word(word)); - } - SemanticToken::Invocation(invocation) => { - match self.reify_invocation(invocation)? { - Argument::Block(block) => assembled_tokens.extend(block), - Argument::Integer(_) => { - let source = invocation.source.clone(); - let variant = AssemblerErrorVariant::IntegerInBlock; - return Err(AssemblerError { source, variant}); - } - } - } - SemanticToken::LabelDefinition(definition) => { - let mut definition = definition.clone(); - definition.name.push_str(&format!(":{}", self.id)); - let token = AssembledToken::LabelDefinition(definition); - assembled_tokens.push(token); - } - SemanticToken::PinnedAddress(address) => { - let token = AssembledToken::PinnedAddress(address.to_owned()); - assembled_tokens.push(token); - } - SemanticToken::Error(_) => (), - } - } - Ok(Argument::Block(assembled_tokens)) - } - Value::Invocation(invocation) => { - self.reify_invocation(invocation) - } - } - } - - fn reify_invocation(&self, invocation: &Invocation) -> Result<Argument, AssemblerError> { - macro_rules! err { - ($variant:expr) => { Err(AssemblerError { - source: invocation.source.clone(), variant: $variant - }) }; - } - if let Some(argument) = self.arguments.get(&invocation.name) { - let expected = 0; - let received = invocation.arguments.len(); - if received != expected { - return err!(ErrVar::IncorrectArgumentCount(expected, received)); - } - Ok(argument.clone()) - } else if let Some(definition) = self.macro_definitions.get(&invocation.name) { - // Check that the correct number of arguments were provided. - let received = invocation.arguments.len(); - let expected = definition.arguments.len(); - if received != expected { - return err!(ErrVar::IncorrectArgumentCount(expected, received)); - } - let mut arguments = IndexMap::new(); - for (i, argument) in invocation.arguments.iter().enumerate() { - // Check that the correct types of arguments were provided. - let arg_invocation = self.reify_value(&argument.value)?; - let arg_invocation_type = match &arg_invocation { - Argument::Integer(_) => ArgumentVariant::Integer, - Argument::Block(_) => ArgumentVariant::Block, - }; - let arg_definition_type = definition.arguments[i].variant; - if arg_invocation_type != arg_definition_type { - let variant = ErrVar::IncorrectArgumentType( - arg_definition_type, arg_invocation_type - ); - return Err(AssemblerError { source: argument.source.clone(), variant }); - } - let name = definition.arguments[i].name.clone(); - arguments.insert(name, arg_invocation); - } - let environment = Environment { - macro_definitions: &self.macro_definitions, - label_definitions: &self.label_definitions, - arguments: &arguments, - id: new_id!(), - }; - environment.reify_value(&definition.value) - } else if let Some(label) = self.label_definitions.get(&invocation.name) { - let expected = 0; - let received = invocation.arguments.len(); - if received != expected { - return err!(ErrVar::IncorrectArgumentCount(expected, received)); - } - let name = Tracked::from(self.tag_label_name(&label.name), label.source.clone()); - Ok(Argument::Integer(IntegerArgument::LabelReference(name))) - } else { - err!(ErrVar::DefinitionNotFound(invocation.name.to_string())) - } - } - - fn reify_constant_expression(&self, expr: &Expression) -> Result<AssembledExpression, AssemblerError> { - use ExpressionTokenVariant as ExprVar; - - let mut assembled_tokens = Vec::new(); - for token in &expr.tokens { - let assembled_token = match &token.variant { - ExprVar::Literal(value) => { - let source = token.source.clone(); - let integer = TrackedInteger { source, value: *value }; - AssembledExpressionToken::Integer(integer) - } - ExprVar::Operator(operator) => { - AssembledExpressionToken::Operator(*operator) - } - ExprVar::Invocation(name) => { - match self.reify_integer_reference(&name, &token.source)? { - IntegerArgument::LabelReference(name) => { - AssembledExpressionToken::LabelReference(name) - } - IntegerArgument::Integer(integer) => { - AssembledExpressionToken::Integer(integer) - } - IntegerArgument::Expression(expr) => { - AssembledExpressionToken::Expression(Box::new(expr)) - }, - IntegerArgument::String(string) => { - let source = string.source.clone(); - let variant = AssemblerErrorVariant::StringInExpression; - return Err(AssemblerError { source, variant }) - } - } - } - ExprVar::Error(_) => continue, - }; - assembled_tokens.push(assembled_token); - } - Ok(AssembledExpression { source: expr.source.clone(), tokens: assembled_tokens }) - } -} - diff --git a/src/parsers/bytecode.rs b/src/parsers/bytecode.rs deleted file mode 100644 index ed16e22..0000000 --- a/src/parsers/bytecode.rs +++ /dev/null @@ -1,191 +0,0 @@ -use crate::*; - -use std::collections::HashMap; - - -pub struct BytecodeGenerator<'a> { - tokens: &'a [AssembledToken], - addresses: HashMap<String, Tracked<usize>>, - words: Vec<Word>, - errors: Vec<BytecodeError>, -} - -impl<'a> BytecodeGenerator<'a> { - pub fn new(tokens: &'a [AssembledToken]) -> Self { - Self { - tokens, - addresses: HashMap::new(), - words: Vec::new(), - errors: Vec::new(), - } - } - - pub fn generate(mut self) -> Bytecode { - self.calculate_addresses(); - for token in self.tokens { - match token { - AssembledToken::Word(assembled_word) => { - self.assemble_word(assembled_word); - } - AssembledToken::PinnedAddress(pinned) => { - if self.words.len() > pinned.address { - let variant = BytecodeErrorVariant::PinnedAddressBacktrack( - pinned.address, self.words.len()); - let source = pinned.source.clone(); - self.errors.push(BytecodeError { source, variant }); - } else { - self.words.resize(pinned.address, Word { bits: 0, value: 0}); - } - } - AssembledToken::LabelDefinition(_) => (), - AssembledToken::Error(_) => (), - } - } - - return Bytecode { - words: self.words, - errors: self.errors, - } - } - - fn calculate_addresses(&mut self) { - let mut i = 0; - for token in self.tokens { - match token { - AssembledToken::LabelDefinition(definition) => { - let address = Tracked::from(i, definition.source.clone()); - if let Some(_) = self.addresses.insert(definition.name.clone(), address) { - let name = definition.name.clone(); - let variant = BytecodeErrorVariant::DuplicateLabelDefinition(name); - let source = definition.source.clone(); - self.errors.push(BytecodeError { source, variant }); - } - } - AssembledToken::Word(word) => { - i += word.count(); - } - AssembledToken::PinnedAddress(pinned) => { - i = pinned.address; - } - AssembledToken::Error(_) => (), - } - } - } - - fn resolve_expression(&mut self, expr: &AssembledExpression) -> isize { - let mut stack = Vec::new(); - macro_rules! push { - ($value:expr) => { stack.push($value) }; - } - macro_rules! pop { - ($name:ident) => { let $name = match stack.pop() { - Some(value) => value, - None => { - let variant = BytecodeErrorVariant::StackUnderflow; - self.errors.push(BytecodeError { source: expr.source.clone(), variant }); - return 0; - }, - }; }; - } - macro_rules! truth { - ($bool:expr) => { match $bool { true => 1, false => 0 } }; - } - - for token in &expr.tokens { - match &token { - AssembledExpressionToken::Integer(value) => { - push!(value.value) - } - AssembledExpressionToken::LabelReference(name) => { - push!(self.resolve_label_reference(name)) - } - AssembledExpressionToken::Expression(expr) => { - push!(self.resolve_expression(expr)) - } - AssembledExpressionToken::Operator(operator) => match operator { - Operator::Equal => { pop!(b); pop!(a); push!(truth!(a==b)) }, - Operator::NotEqual => { pop!(b); pop!(a); push!(truth!(a!=b)) }, - Operator::LessThan => { pop!(b); pop!(a); push!(truth!(a < b)) }, - Operator::GreaterThan => { pop!(b); pop!(a); push!(truth!(a > b)) }, - Operator::LessThanEqual => { pop!(b); pop!(a); push!(truth!(a <= b)) }, - Operator::GreaterThanEqual => { pop!(b); pop!(a); push!(truth!(a >= b)) }, - Operator::Add => { pop!(b); pop!(a); push!(a + b) }, - Operator::Subtract => { pop!(b); pop!(a); push!(a - b) }, - Operator::LeftShift => { pop!(b); pop!(a); push!(a << b) }, - Operator::RightShift => { pop!(b); pop!(a); push!(a >> b) }, - Operator::And => { pop!(b); pop!(a); push!(a & b) }, - Operator::Or => { pop!(b); pop!(a); push!(a | b) }, - Operator::Xor => { pop!(b); pop!(a); push!(a ^ b) }, - Operator::Not => { pop!(a); push!(!a) }, - } - } - } - - let variant = match stack.len() { - 0 => BytecodeErrorVariant::NoReturnValue, - 1 => return stack[0], - _ => BytecodeErrorVariant::MultipleReturnValues, - }; - self.errors.push(BytecodeError { source: expr.source.clone(), variant}); - 0 - } - - fn resolve_label_reference(&mut self, name: &Tracked<String>) -> isize { - if let Some(address) = self.addresses.get(&name.value) { - address.value as isize - } else { - let variant = BytecodeErrorVariant::DefinitionNotFound(name.value.clone()); - self.errors.push(BytecodeError { source: name.source.clone(), variant }); - 0 - } - } - - fn assemble_word(&mut self, assembled_word: &AssembledWord) { - let mut field_values = Vec::new(); - for field in &assembled_word.fields { - match &field.value { - IntegerArgument::Expression(expr) => { - let source = expr.source.clone(); - let value = self.resolve_expression(expr); - field_values.push(vec![Tracked::from(value, source)]) - } - IntegerArgument::LabelReference(name) => { - let source = name.source.clone(); - let value = self.resolve_label_reference(name); - field_values.push(vec![Tracked::from(value, source)]) - } - IntegerArgument::Integer(integer) => { - let source = integer.source.clone(); - let value = integer.value; - field_values.push(vec![Tracked::from(value, source)]) - } - IntegerArgument::String(string) => { - let values = string.chars.iter() - .map(|c| Tracked::from(c.value as isize, c.source.clone())) - .collect(); - field_values.push(values); - } - }; - } - for i in 0..assembled_word.count() { - let mut value = assembled_word.value; - for (f, field) in assembled_word.fields.iter().enumerate() { - let (field_value, source) = match field_values[f].get(i) { - Some(tracked) => (tracked.value, Some(tracked.source.clone())), - None => (0, None), - }; - let bitcount = match field_value { - 0 => 0, - _ => (field_value.ilog2() + 1) as usize, - }; - if field.bits < bitcount { - let variant = BytecodeErrorVariant::ValueTooLarge(field.bits, bitcount); - self.errors.push(BytecodeError { source: source.unwrap(), variant }); - } else { - value |= (field_value << field.shift) as usize; - } - } - self.words.push(Word { bits: assembled_word.bits, value }); - } - } -} diff --git a/src/parsers/expression.rs b/src/parsers/expression.rs deleted file mode 100644 index e938881..0000000 --- a/src/parsers/expression.rs +++ /dev/null @@ -1,52 +0,0 @@ -use crate::*; - - -pub fn parse_constant_expression(mut t: Tokeniser, source: SourceSpan) -> Expression { - use ExpressionTokenVariant as TokenVar; - use ExpressionParseError as ParseError; - - let mut tokens = Vec::new(); - - loop { - t.eat_whitespace(); - t.mark_start(); - let token = t.eat_token(); - if token.is_empty() { - break; - } - - let variant = match token.as_str() { - "=" => TokenVar::Operator(Operator::Equal), - "!=" => TokenVar::Operator(Operator::NotEqual), - "<" => TokenVar::Operator(Operator::LessThan), - ">" => TokenVar::Operator(Operator::GreaterThan), - "<=" => TokenVar::Operator(Operator::LessThanEqual), - ">=" => TokenVar::Operator(Operator::GreaterThanEqual), - "+" => TokenVar::Operator(Operator::Add), - "-" => TokenVar::Operator(Operator::Subtract), - "<<" => TokenVar::Operator(Operator::LeftShift), - ">>" => TokenVar::Operator(Operator::RightShift), - "&" => TokenVar::Operator(Operator::And), - "|" => TokenVar::Operator(Operator::Or), - "^" => TokenVar::Operator(Operator::Xor), - "~" => TokenVar::Operator(Operator::Not), - _ => if let Some(stripped) = token.strip_prefix("0x") { - match usize::from_str_radix(stripped, 16) { - Ok(value) => TokenVar::Literal(value as isize), - Err(_) => TokenVar::Error( - ParseError::InvalidHexadecimalLiteral(stripped.to_string())), - } - } else { - match usize::from_str_radix(&token, 10) { - Ok(value) => TokenVar::Literal(value as isize), - Err(_) => TokenVar::Invocation(token.to_string()), - } - } - }; - - let source = t.get_source(); - tokens.push(ExpressionToken { source, variant }); - } - - return Expression { source, tokens }; -} diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs deleted file mode 100644 index da2c23a..0000000 --- a/src/parsers/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -mod expression; -mod packed_binary_literal; - -pub use expression::*; -pub use packed_binary_literal::*; - -mod syntactic; -mod semantic; -mod assembler; -mod bytecode; - -pub use syntactic::*; -pub use semantic::*; -pub use assembler::*; -pub use bytecode::*; diff --git a/src/parsers/packed_binary_literal.rs b/src/parsers/packed_binary_literal.rs deleted file mode 100644 index 18f8da7..0000000 --- a/src/parsers/packed_binary_literal.rs +++ /dev/null @@ -1,85 +0,0 @@ -use crate::*; - - -/// t is a Tokeniser over the characters of the PBL, excluding the leading hash. -pub fn parse_packed_binary_literal(mut t: Tokeniser, source: SourceSpan) -> PackedBinaryLiteral { - use PackedBinaryLiteralParseError as ParseError; - use PackedBinaryLiteralParseErrorVariant as ParseErrorVar; - - let mut value = 0; - let mut bits = 0; - let mut field_bits = 0; - let mut name = '\0'; - let mut fields: Vec<BitField> = Vec::new(); - let mut errors: Vec<ParseError> = Vec::new(); - - macro_rules! push_field { - () => { - if fields.iter().any(|f| f.name == name) { - let variant = ParseErrorVar::DuplicateFieldName(name); - errors.push(ParseError { source: t.get_source(), variant }); - } else { - fields.push(BitField { name, source: t.get_source(), bits: field_bits, shift: 0 }); - } - }; - } - - while let Some(c) = t.eat_char() { - // Ignore underscores. - if c == '_' { - t.mark.undo(); - continue; - } - - // Add a bit to the value; - value <<= 1; - bits += 1; - for field in &mut fields { - field.shift += 1; - } - - // Extend the current field. - if c == name { - field_bits += 1; - continue; - } - - // Commit the current field. - if field_bits > 0 { - t.mark_end_prev(); - push_field!(); - field_bits = 0; - name = '\0'; - } - - // Parse bit literals. - if c == '0' { - continue; - } - if c == '1' { - value |= 1; - continue; - } - - t.mark_start_prev(); - if c.is_alphabetic() { - name = c; - field_bits = 1; - continue; - } else { - let source = t.get_source(); - let variant = ParseErrorVar::InvalidCharacter(c); - errors.push(ParseError { source, variant }); - } - } - - // Commit the final field. - for field in &mut fields { - field.shift += 1; - } - if field_bits > 0 { - push_field!(); - } - - PackedBinaryLiteral { source, bits, value, fields, errors } -} diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs deleted file mode 100644 index 00cfc80..0000000 --- a/src/parsers/semantic.rs +++ /dev/null @@ -1,352 +0,0 @@ -use crate::*; -use SyntacticTokenVariant as SynVar; - -use std::collections::VecDeque; - -use indexmap::IndexMap; - - -macro_rules! fn_is_syn_variant { - ($name:ident, $variant:ty) => { paste::paste! { - fn [< is_ $name >](token: &SyntacticToken) -> bool { - match token.variant { $variant => true, _ => false, } - } } }; } -fn_is_syn_variant!(block_open, SyntacticTokenVariant::BlockOpen); -fn_is_syn_variant!(block_close, SyntacticTokenVariant::BlockClose); -fn_is_syn_variant!(separator, SyntacticTokenVariant::Separator); -fn_is_syn_variant!(terminator, SyntacticTokenVariant::MacroDefinitionTerminator); - - -pub struct SemanticParser { - tokens: Tokens, - macro_definitions: IndexMap<String, MacroDefinition>, - label_definitions: IndexMap<String, LabelDefinition>, - body: Vec<SemanticToken>, -} - -impl SemanticParser { - pub fn new(syntactic_tokens: Vec<SyntacticToken>) -> Self { - // Gather all labels ahead of time. - let mut label_definitions = IndexMap::new(); - for token in &syntactic_tokens { - if let SyntacticTokenVariant::LabelDefinition(name) = &token.variant { - let definition = LabelDefinition { - source: token.source.clone(), - name: name.clone(), - }; - let None = label_definitions.insert(name.to_string(), definition) else { - unreachable!("Duplicate definition for label {name:?}"); - }; - } - } - Self { - tokens: Tokens::new(syntactic_tokens), - macro_definitions: IndexMap::new(), - label_definitions, - body: Vec::new(), - } - } - - pub fn parse(mut self) -> SemanticProgram { - while let Some(syn) = self.tokens.pop() { - match syn.variant { - SynVar::MacroDefinition(name) => { - let Ok(definition_tokens) = self.tokens.pull_until(is_terminator) else { - let variant = SemanticParseErrorVariant::UnterminatedMacroDefinition(name); - let error = SemanticParseError { source: syn.source, variant }; - self.body.push(SemanticToken::Error(error)); - break; - }; - let definition = MacroDefinitionParser::new(syn.source, definition_tokens).parse(); - let None = self.macro_definitions.insert(name.clone(), definition) else { - unreachable!("Duplicate definition for macro {name}"); - }; - } - SynVar::LabelDefinition(name) => { - let label_definition = LabelDefinition { source: syn.source, name }; - self.body.push(SemanticToken::LabelDefinition(label_definition)); - } - SynVar::PinnedAddress(address) => { - let pinned_address = PinnedAddress { source: syn.source, address }; - self.body.push(SemanticToken::PinnedAddress(pinned_address)); - } - SynVar::Symbol(name) => { - let invocation = InvocationParser::new(name, syn.source, &mut self.tokens).parse(); - self.body.push(SemanticToken::Invocation(invocation)); - } - SynVar::PackedBinaryLiteral(pbl) => { - self.body.push(SemanticToken::Word(pbl)); - } - _ => { - let variant = SemanticParseErrorVariant::InvalidToken; - let error = SemanticParseError { source: syn.source, variant }; - self.body.push(SemanticToken::Error(error)); - } - } - } - - SemanticProgram { - macro_definitions: self.macro_definitions, - label_definitions: self.label_definitions, - body: self.body, - } - } -} - - -pub struct MacroDefinitionParser { - source: SourceSpan, - tokens: Tokens, - arguments: Vec<ArgumentDefinition>, - errors: Vec<SemanticParseError>, -} - -impl MacroDefinitionParser { - pub fn new(source: SourceSpan, tokens: Tokens) -> Self { - Self { - tokens, - source, - arguments: Vec::new(), - errors: Vec::new(), - } - } - - pub fn parse(mut self) -> MacroDefinition { - while let Some(definition) = self.parse_argument_definition() { - self.arguments.push(definition) - } - MacroDefinition { - value: self.parse_body(), - source: self.source, - arguments: self.arguments, - errors: self.errors, - } - } - - fn parse_argument_definition(&mut self) -> Option<ArgumentDefinition> { - // Only continue if the first token is a separator. - self.tokens.pop_if(is_separator)?; - - // Pop argument tokens. - let is_block = match self.tokens.pop_if(is_block_open) { - Some(_) => true, - None => false, - }; - let token = self.tokens.pop(); - if is_block { - self.tokens.pop_if(is_block_close); - } - // Parse argument token. - let token = token?; - let source = token.source; - if let SynVar::Symbol(name) = token.variant { - let variant = match is_block { - true => ArgumentVariant::Block, - false => ArgumentVariant::Integer, - }; - Some(ArgumentDefinition { name, source, variant }) - } else { - let variant = SemanticParseErrorVariant::InvalidToken; - self.errors.push(SemanticParseError { source, variant}); - None - } - } - - fn parse_body(&mut self) -> Value { - // Attempt to parse an Integer. - if self.tokens.len() == 1 { - let token = self.tokens.pop().unwrap(); - match token.variant { - SynVar::IntegerLiteral(value) => { - let integer = TrackedInteger { source: token.source, value }; - return Value::Integer(Integer::Literal(integer)); - } - SynVar::Expression(expr) => { - return Value::Integer(Integer::Expression(expr)); - } - _ => (), - } - self.tokens.unpop(token); - } - // Parse a Block. - let mut block = BlockParser::new(self.tokens.take()).parse(); - // If the block contains a single invocation, unwrap it. - if block.len() == 1 { - match block.pop() { - Some(SemanticToken::Invocation(invocation)) => return Value::Invocation(invocation), - Some(other) => block.push(other), - None => (), - }; - } - return Value::Block(block); - } -} - - -/// Parse an entire block, excluding delimiters. -pub struct BlockParser { - tokens: Tokens, - semantic_tokens: Vec<SemanticToken>, -} - -impl BlockParser { - pub fn new(tokens: Tokens) -> Self { - Self { tokens, semantic_tokens: Vec::new() } - } - - pub fn parse(mut self) -> Vec<SemanticToken> { - while let Some(token) = self.tokens.pop() { - let source = token.source; - match token.variant { - SynVar::Symbol(name) => { - let invocation = InvocationParser::new(name, source, &mut self.tokens).parse(); - self.semantic_tokens.push(SemanticToken::Invocation(invocation)); - } - SynVar::PackedBinaryLiteral(pbl) => { - self.semantic_tokens.push(SemanticToken::Word(pbl)); - } - SynVar::LabelDefinition(name) => { - let label_definition = LabelDefinition { source, name }; - self.semantic_tokens.push(SemanticToken::LabelDefinition(label_definition)); - } - _ => { - let variant = SemanticParseErrorVariant::InvalidToken; - let error = SemanticParseError { source, variant }; - self.semantic_tokens.push(SemanticToken::Error(error)); - } - } - } - return self.semantic_tokens; - } -} - - -struct InvocationParser<'a> { - name: String, - source: SourceSpan, - tokens: &'a mut Tokens, - arguments: Vec<ArgumentInvocation>, - errors: Vec<SemanticParseError>, -} - -impl<'a> InvocationParser<'a> { - pub fn new(name: String, source: SourceSpan, tokens: &'a mut Tokens) -> Self { - Self { name, source, tokens, arguments: Vec::new(), errors: Vec::new() } - } - - pub fn parse(mut self) -> Invocation { - while let Some(argument) = self.parse_invocation_argument() { - self.arguments.push(argument); - } - Invocation { - name: self.name, - source: self.source, - arguments: self.arguments, - errors: self.errors, - } - } - - fn parse_invocation_argument(&mut self) -> Option<ArgumentInvocation> { - // Only continue if the first token is a separator. - self.tokens.pop_if(is_separator)?; - - if let Some(block_open) = self.tokens.pop_if(is_block_open) { - let source = block_open.source; - let mut depth = 1; - let is_matching_block_close = |token: &SyntacticToken| { - match token.variant { - SyntacticTokenVariant::BlockOpen => { - depth += 1; false } - SyntacticTokenVariant::BlockClose => { - depth -= 1; depth == 0 } - _ => false, - } - }; - if let Ok(block_tokens) = self.tokens.pull_until(is_matching_block_close) { - let block = BlockParser::new(block_tokens).parse(); - Some(ArgumentInvocation { source, value: Value::Block(block) }) - } else { - let variant = SemanticParseErrorVariant::UnterminatedBlock; - self.errors.push(SemanticParseError { source, variant }); - None - } - } else { - let token = self.tokens.pop()?; - let source = token.source; - match token.variant { - SynVar::Symbol(name) => { - let arguments = Vec::new(); - let errors = Vec::new(); - let invocation = Invocation { source: source.clone(), name, arguments, errors }; - let value = Value::Invocation(invocation); - Some(ArgumentInvocation { source, value }) - } - SynVar::IntegerLiteral(value) => { - let integer = TrackedInteger { source: source.clone(), value }; - let value = Value::Integer(Integer::Literal(integer)); - Some(ArgumentInvocation { source, value }) - } - SynVar::String(string) => { - let value = Value::Integer(Integer::String(string)); - Some(ArgumentInvocation { source, value }) - } - SynVar::Expression(expr) => { - let value = Value::Integer(Integer::Expression(expr)); - Some(ArgumentInvocation { source, value }) - } - _ => { - let variant = SemanticParseErrorVariant::InvalidToken; - self.errors.push(SemanticParseError { source, variant }); - None - } - } - } - } -} - - -pub struct Tokens { - tokens: VecDeque<SyntacticToken>, -} - -impl Tokens { - pub fn new<T: Into<VecDeque<SyntacticToken>>>(tokens: T) -> Self { - Self { tokens: tokens.into() } - } - - pub fn pop(&mut self) -> Option<SyntacticToken> { - self.tokens.pop_front() - } - - pub fn pop_if(&mut self, predicate: fn(&SyntacticToken) -> bool) -> Option<SyntacticToken> { - match predicate(self.tokens.front()?) { - true => self.tokens.pop_front(), - false => None, - } - } - - pub fn unpop(&mut self, token: SyntacticToken) { - self.tokens.push_front(token); - } - - /// Pull tokens until the predicate returns true, otherwise return Err. - pub fn pull_until(&mut self, mut predicate: impl FnMut(&SyntacticToken) -> bool) -> Result<Self, ()> { - let mut output = VecDeque::new(); - while let Some(token) = self.tokens.pop_front() { - match predicate(&token) { - true => return Ok(Self::new(output)), - false => output.push_back(token), - }; - } - return Err(()); - } - - pub fn take(&mut self) -> Self { - Self { tokens: std::mem::take(&mut self.tokens) } - } - - pub fn len(&self) -> usize { - self.tokens.len() - } -} - diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs deleted file mode 100644 index f3fcec1..0000000 --- a/src/parsers/syntactic.rs +++ /dev/null @@ -1,172 +0,0 @@ -use crate::*; - - -pub struct SyntacticParser { - tokeniser: Tokeniser, - tokens: Vec<SyntacticToken>, - /// The name of the macro being parsed. - macro_name: Option<String>, - /// The name of the most recent label. - label_name: String, -} - -impl SyntacticParser { - pub fn new<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { - let mut tokeniser = Tokeniser::new(source_code, path); - tokeniser.add_delimiters(&['@','%',';',':','{','}','(','[','#','~']); - Self { - tokeniser, - tokens: Vec::new(), - macro_name: None, - label_name: String::new(), - } - } - - pub fn parse(mut self) -> Vec<SyntacticToken> { - use SyntacticTokenVariant as SynVar; - use SyntacticParseError as SynErr; - let t = &mut self.tokeniser; - - loop { - t.eat_whitespace(); - t.mark_start(); - let Some(c) = t.eat_char() else { break }; - let variant = match c { - ':' => SynVar::Separator, - '{' => SynVar::BlockOpen, - '}' => SynVar::BlockClose, - '@' => match &self.macro_name { - Some(_) => { - t.eat_token(); - SynVar::Error(SynErr::LabelInMacroDefinition) - } - None => { - self.label_name = t.eat_token(); - SynVar::LabelDefinition(self.label_name.clone()) - } - } - '&' => match &self.macro_name { - Some(macro_name) => { - let label_name = format!("{macro_name}:{}", t.eat_token()); - SynVar::LabelDefinition(label_name) - } - None => { - let label_name = &self.label_name; - let sublabel_name = format!("{label_name}/{}", t.eat_token()); - SynVar::LabelDefinition(sublabel_name) - } - } - '%' => { - let macro_name = t.eat_token(); - self.macro_name = Some(macro_name.clone()); - SynVar::MacroDefinition(macro_name) - } - ';' => { - self.macro_name = None; - SynVar::MacroDefinitionTerminator - } - '[' => { - t.mark_child(); - match t.eat_to_delimiter(']') { - Some(_) => { - let child = t.subtokenise(); - t.mark_end(); - let expr = parse_constant_expression(child, t.get_source()); - SynVar::Expression(expr) - } - None => SynVar::Error(SynErr::UnterminatedExpression), - } - } - '"' => { - t.mark_child(); - match t.eat_to_delimiter('"') { - Some(string) => { - let child = t.subtokenise(); - t.mark_end(); - let chars = parse_tracked_chars(child); - let tracked_string = TrackedString { - source: t.get_source(), string, chars, - }; - SynVar::String(tracked_string) - } - None => SynVar::Error(SynErr::UnterminatedString), - } - } - '(' => match t.eat_to_delimiter(')') { - Some(string) => { - // Check if the comment fills the entire line. - if t.start.position.column == 0 && t.end_of_line() { - if let Some(path) = string.strip_prefix(": ") { - t.embedded_path = Some(PathBuf::from(path.trim())); - t.embedded_first_line = t.start.position.line + 1; - } - } - continue; - }, - None => SynVar::Error(SynErr::UnterminatedComment), - } - '|' => { - let token = t.eat_token(); - if let Some(hex_string) = token.strip_prefix("0x") { - match usize::from_str_radix(hex_string, 16) { - Ok(addr) => SynVar::PinnedAddress(addr), - Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), - } - } else { - match usize::from_str_radix(&token, 10) { - Ok(addr) => SynVar::PinnedAddress(addr), - Err(_) => SynVar::Error(SynErr::InvalidDecimalLiteral(token)), - } - } - } - '#' => { - t.mark_child(); - t.eat_token(); - let pbl = parse_packed_binary_literal(t.subtokenise(), t.get_source()); - SynVar::PackedBinaryLiteral(pbl) - }, - '~' => match &self.macro_name { - Some(macro_name) => { - let symbol_name = format!("{macro_name}:{}", t.eat_token()); - SynVar::Symbol(symbol_name) - } - None => { - let label_name = &self.label_name; - let symbol_name = format!("{label_name}/{}", t.eat_token()); - SynVar::Symbol(symbol_name) - } - } - c => { - let token = format!("{c}{}", t.eat_token()); - if let Some(hex_string) = token.strip_prefix("0x") { - match usize::from_str_radix(hex_string, 16) { - Ok(value) => SynVar::IntegerLiteral(value as isize), - Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), - } - } else { - match usize::from_str_radix(&token, 10) { - Ok(value) => SynVar::IntegerLiteral(value as isize), - Err(_) => SynVar::Symbol(token), - } - } - } - }; - - t.mark_end(); - let source = t.get_source(); - self.tokens.push(SyntacticToken { source, variant }); - } - - return self.tokens; - } -} - - -fn parse_tracked_chars(mut t: Tokeniser) -> Vec<Tracked<char>> { - let mut output = Vec::new(); - while let Some(c) = t.eat_char() { - output.push(Tracked::from(c, t.get_source())); - t.mark_start(); - } - return output; -} |