summaryrefslogtreecommitdiff
path: root/src/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'src/parsers')
-rw-r--r--src/parsers/constant_expression.rs52
-rw-r--r--src/parsers/mod.rs11
-rw-r--r--src/parsers/packed_binary_literal.rs80
-rw-r--r--src/parsers/semantic.rs21
-rw-r--r--src/parsers/syntactic.rs108
5 files changed, 272 insertions, 0 deletions
diff --git a/src/parsers/constant_expression.rs b/src/parsers/constant_expression.rs
new file mode 100644
index 0000000..78dc697
--- /dev/null
+++ b/src/parsers/constant_expression.rs
@@ -0,0 +1,52 @@
+use crate::*;
+
+
+pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantExpression {
+ use ConstantExpressionTokenVariant as TokenVar;
+ use ConstantExpressionParseError as ParseError;
+
+ let mut tokens = Vec::new();
+ let mut t = Tokeniser::new_child(string, parent);
+ t.position.to_next_char(); // skip opening delimiter
+
+ loop {
+ t.drop_whitespace();
+ t.mark_start_position();
+ let token = t.eat_token();
+ if token.is_empty() {
+ break;
+ }
+
+ let variant = match token.as_str() {
+ "=" => TokenVar::Operator(Operator::Equal),
+ "!" => TokenVar::Operator(Operator::NotEqual),
+ "<" => TokenVar::Operator(Operator::LessThan),
+ ">" => TokenVar::Operator(Operator::GreaterThan),
+ "+" => TokenVar::Operator(Operator::Add),
+ "-" => TokenVar::Operator(Operator::Subtract),
+ "<<" => TokenVar::Operator(Operator::LeftShift),
+ ">>" => TokenVar::Operator(Operator::RightShift),
+ "&" => TokenVar::Operator(Operator::And),
+ "|" => TokenVar::Operator(Operator::Or),
+ "^" => TokenVar::Operator(Operator::Xor),
+ "~" => TokenVar::Operator(Operator::Not),
+ _ => if let Some(stripped) = token.strip_prefix("0x") {
+ match usize::from_str_radix(stripped, 16) {
+ Ok(value) => TokenVar::IntegerLiteral(value),
+ Err(_) => TokenVar::Error(
+ ParseError::InvalidHexadecimalLiteral(stripped.to_string())),
+ }
+ } else {
+ match usize::from_str_radix(&token, 10) {
+ Ok(value) => TokenVar::IntegerLiteral(value),
+ Err(_) => TokenVar::SymbolReference(token.to_string()),
+ }
+ }
+ };
+
+ let source = t.mark_end_position();
+ tokens.push(ConstantExpressionToken { source, variant });
+ }
+
+ return ConstantExpression { tokens };
+}
diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs
new file mode 100644
index 0000000..91765a9
--- /dev/null
+++ b/src/parsers/mod.rs
@@ -0,0 +1,11 @@
+mod constant_expression;
+pub use constant_expression::*;
+
+mod packed_binary_literal;
+pub use packed_binary_literal::*;
+
+mod syntactic;
+pub use syntactic::*;
+
+mod semantic;
+pub use semantic::*;
diff --git a/src/parsers/packed_binary_literal.rs b/src/parsers/packed_binary_literal.rs
new file mode 100644
index 0000000..9704fc4
--- /dev/null
+++ b/src/parsers/packed_binary_literal.rs
@@ -0,0 +1,80 @@
+use crate::*;
+
+
+pub fn parse_packed_binary_literal(string: &str, parent: &Tokeniser) -> PackedBinaryLiteral {
+ use PackedBinaryLiteralParseError as ParseError;
+ use PackedBinaryLiteralParseErrorVariant as ParseErrorVar;
+
+ let mut value = 0;
+ let mut bits = 0;
+ let mut name = '\0';
+ let mut fields: Vec<BitField> = Vec::new();
+ let mut errors: Vec<ParseError> = Vec::new();
+
+ macro_rules! push_field {
+ ($source:expr) => {
+ if fields.iter().any(|f| f.name == name) {
+ let variant = ParseErrorVar::DuplicateFieldName(name);
+ errors.push(ParseError { source: $source, variant });
+ } else {
+ fields.push(BitField { name, source: $source, bits, shift: 0 });
+ }
+ };
+ }
+
+ let mut t = Tokeniser::new_child(string, parent);
+ t.position.to_next_char(); // skip opening hash character
+
+ while let Some(c) = t.eat_char() {
+ // Ignore underscores.
+ if c == '_' {
+ t.prev_position = t.prev_prev_position;
+ continue;
+ }
+
+ // Add a bit to the value;
+ value <<= 1;
+ for field in &mut fields {
+ field.shift += 1;
+ }
+
+ // Extend the current field.
+ if c == name {
+ bits += 1;
+ continue;
+ }
+
+ // Commit the current field.
+ if bits > 0 {
+ push_field!(t.mark_prev_end_position());
+ bits = 0;
+ name = '\0';
+ }
+
+ // Parse bit literals.
+ if c == '0' {
+ continue;
+ }
+ if c == '1' {
+ value |= 1;
+ continue;
+ }
+
+ t.mark_prev_start_position();
+ if c.is_alphabetic() {
+ name = c;
+ bits = 1;
+ continue;
+ } else {
+ let source = t.mark_end_position();
+ errors.push(ParseError { source, variant: ParseErrorVar::InvalidCharacter(c) });
+ }
+ }
+
+ // Commit the final field.
+ if bits > 0 {
+ push_field!(t.mark_end_position());
+ }
+
+ PackedBinaryLiteral { value, fields, errors }
+}
diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs
new file mode 100644
index 0000000..94ed70c
--- /dev/null
+++ b/src/parsers/semantic.rs
@@ -0,0 +1,21 @@
+use crate::*;
+
+
+pub struct SemanticParser {
+ pub syntactic_tokens: Vec<SyntacticToken>,
+ pub semantic_tokens: Vec<SemanticToken>,
+}
+
+
+impl SemanticParser {
+ pub fn new(syntactic_tokens: Vec<SyntacticToken>) -> Self {
+ Self {
+ syntactic_tokens,
+ semantic_tokens: Vec::new(),
+ }
+ }
+
+ pub fn parse(&mut self) {
+ todo!()
+ }
+}
diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs
new file mode 100644
index 0000000..443e47e
--- /dev/null
+++ b/src/parsers/syntactic.rs
@@ -0,0 +1,108 @@
+use crate::*;
+
+
+pub struct SyntacticParser {
+ tokeniser: Tokeniser,
+ /// The name of the most recently parsed label.
+ label_name: String,
+ /// The name of the macro being parsed.
+ macro_name: Option<String>,
+}
+
+impl SyntacticParser {
+ pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
+ let mut tokeniser = Tokeniser::new(source_code, path);
+ tokeniser.add_delimiters(&['@','&','%',';',':','{','}','(','[','#','~']);
+ Self {
+ tokeniser,
+ label_name: String::new(),
+ macro_name: None,
+ }
+ }
+}
+
+
+impl Iterator for SyntacticParser {
+ type Item = SyntacticToken;
+
+ /// Sequentially parse tokens from the source code.
+ fn next(&mut self) -> Option<SyntacticToken> {
+ use SyntacticTokenVariant as SynVar;
+ use SyntacticParseError as SynErr;
+ let t = &mut self.tokeniser;
+
+ t.drop_whitespace();
+ t.mark_start_position();
+
+ let variant = match t.eat_char()? {
+ '@' => {
+ self.label_name = t.eat_token();
+ SynVar::LabelDefinition(self.label_name.clone())
+ }
+ '&' => {
+ let token = t.eat_token();
+ SynVar::LabelDefinition(format!("{}/{token}", self.label_name))
+ }
+ '%' => {
+ let macro_name = t.eat_token();
+ self.macro_name = Some(macro_name.clone());
+ SynVar::MacroDefinition(macro_name)
+ }
+ ';' => {
+ self.macro_name = None;
+ SynVar::MacroDefinitionTerminator
+ }
+ '[' => match t.eat_to_delimiter(']') {
+ Some(string) => {
+ let constant = ConstantExpression::from_str(&string, t);
+ SynVar::ConstantExpression(constant)
+ }
+ None => SynVar::Error(SynErr::UnterminatedConstantExpression),
+ }
+ '{' => SynVar::BlockOpen,
+ '}' => SynVar::BlockClose,
+ '(' => match t.eat_to_delimiter(')') {
+ Some(string) => SynVar::Comment(string),
+ None => SynVar::Error(SynErr::UnterminatedComment),
+ }
+ '#' => {
+ let token = t.eat_token();
+ let pbl = PackedBinaryLiteral::from_str(&token, t);
+ SynVar::PackedBinaryLiteral(pbl)
+ },
+ '~' => {
+ let token = t.eat_token();
+ SynVar::Symbol(format!("{}/{token}", self.label_name))
+ }
+ ':' => SynVar::Separator,
+ c => {
+ let token = format!("{c}{}", t.eat_token());
+ if let Some(hex_string) = token.strip_prefix("0x") {
+ match usize::from_str_radix(hex_string, 16) {
+ Ok(hex) => SynVar::HexadecimalLiteral(hex),
+ Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)),
+ }
+ } else {
+ match usize::from_str_radix(&token, 10) {
+ Ok(value) => SynVar::DecimalLiteral(value),
+ Err(_) => SynVar::Symbol(token),
+ }
+ }
+ }
+ };
+
+ // Parse source path comments.
+ if let SynVar::Comment(comment) = &variant {
+ // Check if the comment fills the entire line.
+ if t.start_position.column == 0 && t.end_of_line() {
+ if let Some(path) = comment.strip_prefix(": ") {
+ t.source_path = Some(PathBuf::from(path.trim()));
+ t.embedded_first_line = t.start_position.line + 1;
+ }
+ }
+ }
+
+ let source = t.mark_end_position();
+ Some( SyntacticToken { source, variant } )
+ }
+}