diff options
Diffstat (limited to 'src/parsers')
-rw-r--r-- | src/parsers/constant_expression.rs | 52 | ||||
-rw-r--r-- | src/parsers/mod.rs | 11 | ||||
-rw-r--r-- | src/parsers/packed_binary_literal.rs | 80 | ||||
-rw-r--r-- | src/parsers/semantic.rs | 21 | ||||
-rw-r--r-- | src/parsers/syntactic.rs | 108 |
5 files changed, 272 insertions, 0 deletions
diff --git a/src/parsers/constant_expression.rs b/src/parsers/constant_expression.rs new file mode 100644 index 0000000..78dc697 --- /dev/null +++ b/src/parsers/constant_expression.rs @@ -0,0 +1,52 @@ +use crate::*; + + +pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantExpression { + use ConstantExpressionTokenVariant as TokenVar; + use ConstantExpressionParseError as ParseError; + + let mut tokens = Vec::new(); + let mut t = Tokeniser::new_child(string, parent); + t.position.to_next_char(); // skip opening delimiter + + loop { + t.drop_whitespace(); + t.mark_start_position(); + let token = t.eat_token(); + if token.is_empty() { + break; + } + + let variant = match token.as_str() { + "=" => TokenVar::Operator(Operator::Equal), + "!" => TokenVar::Operator(Operator::NotEqual), + "<" => TokenVar::Operator(Operator::LessThan), + ">" => TokenVar::Operator(Operator::GreaterThan), + "+" => TokenVar::Operator(Operator::Add), + "-" => TokenVar::Operator(Operator::Subtract), + "<<" => TokenVar::Operator(Operator::LeftShift), + ">>" => TokenVar::Operator(Operator::RightShift), + "&" => TokenVar::Operator(Operator::And), + "|" => TokenVar::Operator(Operator::Or), + "^" => TokenVar::Operator(Operator::Xor), + "~" => TokenVar::Operator(Operator::Not), + _ => if let Some(stripped) = token.strip_prefix("0x") { + match usize::from_str_radix(stripped, 16) { + Ok(value) => TokenVar::IntegerLiteral(value), + Err(_) => TokenVar::Error( + ParseError::InvalidHexadecimalLiteral(stripped.to_string())), + } + } else { + match usize::from_str_radix(&token, 10) { + Ok(value) => TokenVar::IntegerLiteral(value), + Err(_) => TokenVar::SymbolReference(token.to_string()), + } + } + }; + + let source = t.mark_end_position(); + tokens.push(ConstantExpressionToken { source, variant }); + } + + return ConstantExpression { tokens }; +} diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs new file mode 100644 index 0000000..91765a9 --- /dev/null +++ b/src/parsers/mod.rs @@ -0,0 +1,11 @@ +mod constant_expression; +pub use constant_expression::*; + +mod packed_binary_literal; +pub use packed_binary_literal::*; + +mod syntactic; +pub use syntactic::*; + +mod semantic; +pub use semantic::*; diff --git a/src/parsers/packed_binary_literal.rs b/src/parsers/packed_binary_literal.rs new file mode 100644 index 0000000..9704fc4 --- /dev/null +++ b/src/parsers/packed_binary_literal.rs @@ -0,0 +1,80 @@ +use crate::*; + + +pub fn parse_packed_binary_literal(string: &str, parent: &Tokeniser) -> PackedBinaryLiteral { + use PackedBinaryLiteralParseError as ParseError; + use PackedBinaryLiteralParseErrorVariant as ParseErrorVar; + + let mut value = 0; + let mut bits = 0; + let mut name = '\0'; + let mut fields: Vec<BitField> = Vec::new(); + let mut errors: Vec<ParseError> = Vec::new(); + + macro_rules! push_field { + ($source:expr) => { + if fields.iter().any(|f| f.name == name) { + let variant = ParseErrorVar::DuplicateFieldName(name); + errors.push(ParseError { source: $source, variant }); + } else { + fields.push(BitField { name, source: $source, bits, shift: 0 }); + } + }; + } + + let mut t = Tokeniser::new_child(string, parent); + t.position.to_next_char(); // skip opening hash character + + while let Some(c) = t.eat_char() { + // Ignore underscores. + if c == '_' { + t.prev_position = t.prev_prev_position; + continue; + } + + // Add a bit to the value; + value <<= 1; + for field in &mut fields { + field.shift += 1; + } + + // Extend the current field. + if c == name { + bits += 1; + continue; + } + + // Commit the current field. + if bits > 0 { + push_field!(t.mark_prev_end_position()); + bits = 0; + name = '\0'; + } + + // Parse bit literals. + if c == '0' { + continue; + } + if c == '1' { + value |= 1; + continue; + } + + t.mark_prev_start_position(); + if c.is_alphabetic() { + name = c; + bits = 1; + continue; + } else { + let source = t.mark_end_position(); + errors.push(ParseError { source, variant: ParseErrorVar::InvalidCharacter(c) }); + } + } + + // Commit the final field. + if bits > 0 { + push_field!(t.mark_end_position()); + } + + PackedBinaryLiteral { value, fields, errors } +} diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs new file mode 100644 index 0000000..94ed70c --- /dev/null +++ b/src/parsers/semantic.rs @@ -0,0 +1,21 @@ +use crate::*; + + +pub struct SemanticParser { + pub syntactic_tokens: Vec<SyntacticToken>, + pub semantic_tokens: Vec<SemanticToken>, +} + + +impl SemanticParser { + pub fn new(syntactic_tokens: Vec<SyntacticToken>) -> Self { + Self { + syntactic_tokens, + semantic_tokens: Vec::new(), + } + } + + pub fn parse(&mut self) { + todo!() + } +} diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs new file mode 100644 index 0000000..443e47e --- /dev/null +++ b/src/parsers/syntactic.rs @@ -0,0 +1,108 @@ +use crate::*; + + +pub struct SyntacticParser { + tokeniser: Tokeniser, + /// The name of the most recently parsed label. + label_name: String, + /// The name of the macro being parsed. + macro_name: Option<String>, +} + +impl SyntacticParser { + pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { + let mut tokeniser = Tokeniser::new(source_code, path); + tokeniser.add_delimiters(&['@','&','%',';',':','{','}','(','[','#','~']); + Self { + tokeniser, + label_name: String::new(), + macro_name: None, + } + } +} + + +impl Iterator for SyntacticParser { + type Item = SyntacticToken; + + /// Sequentially parse tokens from the source code. + fn next(&mut self) -> Option<SyntacticToken> { + use SyntacticTokenVariant as SynVar; + use SyntacticParseError as SynErr; + let t = &mut self.tokeniser; + + t.drop_whitespace(); + t.mark_start_position(); + + let variant = match t.eat_char()? { + '@' => { + self.label_name = t.eat_token(); + SynVar::LabelDefinition(self.label_name.clone()) + } + '&' => { + let token = t.eat_token(); + SynVar::LabelDefinition(format!("{}/{token}", self.label_name)) + } + '%' => { + let macro_name = t.eat_token(); + self.macro_name = Some(macro_name.clone()); + SynVar::MacroDefinition(macro_name) + } + ';' => { + self.macro_name = None; + SynVar::MacroDefinitionTerminator + } + '[' => match t.eat_to_delimiter(']') { + Some(string) => { + let constant = ConstantExpression::from_str(&string, t); + SynVar::ConstantExpression(constant) + } + None => SynVar::Error(SynErr::UnterminatedConstantExpression), + } + '{' => SynVar::BlockOpen, + '}' => SynVar::BlockClose, + '(' => match t.eat_to_delimiter(')') { + Some(string) => SynVar::Comment(string), + None => SynVar::Error(SynErr::UnterminatedComment), + } + '#' => { + let token = t.eat_token(); + let pbl = PackedBinaryLiteral::from_str(&token, t); + SynVar::PackedBinaryLiteral(pbl) + }, + '~' => { + let token = t.eat_token(); + SynVar::Symbol(format!("{}/{token}", self.label_name)) + } + ':' => SynVar::Separator, + c => { + let token = format!("{c}{}", t.eat_token()); + if let Some(hex_string) = token.strip_prefix("0x") { + match usize::from_str_radix(hex_string, 16) { + Ok(hex) => SynVar::HexadecimalLiteral(hex), + Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), + } + } else { + match usize::from_str_radix(&token, 10) { + Ok(value) => SynVar::DecimalLiteral(value), + Err(_) => SynVar::Symbol(token), + } + } + } + }; + + // Parse source path comments. + if let SynVar::Comment(comment) = &variant { + // Check if the comment fills the entire line. + if t.start_position.column == 0 && t.end_of_line() { + if let Some(path) = comment.strip_prefix(": ") { + t.source_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start_position.line + 1; + } + } + } + + let source = t.mark_end_position(); + Some( SyntacticToken { source, variant } ) + } +} |