summaryrefslogtreecommitdiff
path: root/src/translators/syntactic_parser.rs
diff options
context:
space:
mode:
authorBen Bridle <ben@derelict.engineering>2025-02-04 18:18:31 +1300
committerBen Bridle <ben@derelict.engineering>2025-02-04 18:18:40 +1300
commitf69a8f8c312ded212446082682bcabba8e3a9c9f (patch)
tree887195a8b90235f6a9c584374567b45fffac15b4 /src/translators/syntactic_parser.rs
parentcf1af202e01cdcbac437ac96f21c4437bf27bb0d (diff)
downloadbedrock-asm-f69a8f8c312ded212446082682bcabba8e3a9c9f.zip
Use source code tokeniser from assembler crate
Work-in-progress commit while functionality is moved over to the assembler crate. This commit doesn't compile.
Diffstat (limited to 'src/translators/syntactic_parser.rs')
-rw-r--r--src/translators/syntactic_parser.rs178
1 files changed, 24 insertions, 154 deletions
diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs
index 7279daf..8f0850b 100644
--- a/src/translators/syntactic_parser.rs
+++ b/src/translators/syntactic_parser.rs
@@ -5,117 +5,18 @@ use std::path::PathBuf;
/// Translate raw source code characters into syntactic tokens.
pub struct SyntacticParser {
- /// Path of file from which the source was read.
- path: Option<PathBuf>,
- /// Path of the original source file.
- source_path: Option<PathBuf>,
- /// Position of the next character to be read.
- position: Position,
- /// Previous value of the position field.
- prev_position: Position,
- /// Line where the embedded source file begins.
- source_line_start: usize,
- /// Characters waiting to be parsed, in reverse order.
- chars: Vec<char>,
- /// The token currently being parsed.
- token_source_string: String,
+ tokeniser: Tokeniser,
/// The name of the most recently parsed label.
label: String,
}
impl SyntacticParser {
- /// Parse source code.
pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
- Self {
- path: path.map(|p| p.into()),
- source_path: None,
- position: Position { line: 0, column: 0 },
- prev_position: Position { line: 0, column: 0 },
- source_line_start: 0,
- chars: source_code.chars().rev().collect(),
- token_source_string: String::new(),
- label: String::new(),
- }
- }
-
- /// Return the next character, keeping it on the queue.
- fn peek_char(&self) -> Option<char> {
- self.chars.last().copied()
- }
-
- /// Return the next character, removing it from the queue.
- fn eat_char(&mut self) -> Option<char> {
- let option = self.chars.pop();
- if let Some(c) = option {
- self.prev_position = self.position;
- self.position.advance(c);
- self.token_source_string.push(c);
- }
- return option;
- }
-
- /// Remove the next character from the queue.
- fn drop_char(&mut self) {
- if let Some(c) = self.chars.pop() {
- self.prev_position = self.position;
- self.position.advance(c);
- }
- }
-
- /// Remove leading whitespace.
- fn drop_whitespace(&mut self) {
- while let Some(c) = self.peek_char() {
- match c.is_whitespace() {
- true => self.drop_char(),
- false => break,
- }
- }
- }
-
- /// Remove a full token from the queue.
- fn eat_token(&mut self) -> String {
- const DELIMITERS: [char; 13] =
- ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~'];
- let mut token = String::new();
- while let Some(peek) = self.peek_char() {
- if peek.is_whitespace() || DELIMITERS.contains(&peek) {
- break;
- }
- let c = self.eat_char().unwrap();
- token.push(c);
- if c == ':' {
- break;
- }
- }
- token
- }
-
- /// Return all characters until the delimiter, removing all returned
- /// characters and the delimiter from the queue. Returns None if end
- /// of source is reached before delimiter is found.
- fn eat_to_delim(&mut self, delim: char) -> Option<String> {
- let mut token = String::new();
- while let Some(c) = self.eat_char() {
- self.token_source_string.push(c);
- match c == delim {
- true => return Some(token),
- false => token.push(c),
- }
- }
- return None;
- }
-
- fn is_line_empty(&self) -> bool {
- for c in self.chars.iter().rev() {
- if *c == '\n' {
- return true;
- }
- if !c.is_whitespace() {
- return false
- }
- }
- return false;
+ let mut tokeniser = Tokeniser::new(source_code, path);
+ tokeniser.add_delimiters(&['@','&','%',';','[',']','{','}','(','"','\'','#','~']);
+ tokeniser.add_terminators(&[':']);
+ Self { tokeniser, label: String::new() }
}
}
@@ -127,35 +28,35 @@ impl Iterator for SyntacticParser {
fn next(&mut self) -> Option<SyntacticToken> {
use SyntacticTokenVariant as SynVar;
use SyntacticParseError as SynErr;
+ let t = &mut self.tokeniser;
- self.drop_whitespace();
- let start = self.position;
+ t.drop_whitespace();
+ t.mark_start_position();
- let variant = match self.eat_char()? {
+ let variant = match t.eat_char()? {
'@' => {
- self.label = self.eat_token();
+ self.label = t.eat_token();
SynVar::LabelDefinition(self.label.clone())
}
'&' => {
- let token = self.eat_token();
- let sublabel = format!("{}/{token}", self.label);
- SynVar::LabelDefinition(sublabel)
+ let token = t.eat_token();
+ SynVar::LabelDefinition(format!("{}/{token}", self.label))
}
- '%' => SynVar::MacroDefinition(self.eat_token()),
+ '%' => SynVar::MacroDefinition(t.eat_token()),
';' => SynVar::MacroDefinitionTerminator,
'[' => SynVar::MarkOpen,
']' => SynVar::MarkClose,
'{' => SynVar::BlockOpen,
'}' => SynVar::BlockClose,
- '(' => match self.eat_to_delim(')') {
+ '(' => match t.eat_to_delimiter(')') {
Some(string) => SynVar::Comment(string),
None => SynVar::Error(SynErr::UnterminatedComment),
}
- '\'' => match self.eat_to_delim('\'') {
+ '\'' => match t.eat_to_delimiter('\'') {
Some(string) => SynVar::String(string.as_bytes().to_vec()),
None => SynVar::Error(SynErr::UnterminatedRawString),
}
- '"' => match self.eat_to_delim('"') {
+ '"' => match t.eat_to_delimiter('"') {
Some(string) => {
let mut bytes = string.as_bytes().to_vec();
bytes.push(0x00);
@@ -164,20 +65,20 @@ impl Iterator for SyntacticParser {
None => SynVar::Error(SynErr::UnterminatedNullString),
}
'#' => {
- let token = self.eat_token();
+ let token = t.eat_token();
match token.parse::<Value>() {
Ok(value) => SynVar::Padding(value),
Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)),
}
},
'~' => {
- let token = self.eat_token();
+ let token = t.eat_token();
let symbol = format!("{}/{token}", self.label);
SynVar::Symbol(symbol)
}
':' => SynVar::Symbol(String::from(':')),
c => {
- let token = format!("{c}{}", self.eat_token());
+ let token = format!("{c}{}", t.eat_token());
match token.parse::<Value>() {
Ok(value) => SynVar::Literal(value),
Err(_) => match token.parse::<Instruction>() {
@@ -190,47 +91,16 @@ impl Iterator for SyntacticParser {
// Parse source path comments.
if let SynVar::Comment(comment) = &variant {
- // Check that the comment fills the entire line.
- if start.column == 0 && self.is_line_empty() {
+ // Check if the comment fills the entire line.
+ if t.start_position.column == 0 && t.end_of_line() {
if let Some(path) = comment.strip_prefix(": ") {
- self.source_path = Some(PathBuf::from(path.trim()));
- self.source_line_start = start.line + 1;
+ t.source_path = Some(PathBuf::from(path.trim()));
+ t.embedded_first_line = t.start_position.line + 1;
}
}
}
- // Find location in current merged file.
- let in_merged = SourceLocation {
- path: self.path.to_owned(),
- start,
- end: self.prev_position,
- };
-
- // Find location in original source file.
- let in_source = if start.line >= self.source_line_start {
- match &self.source_path {
- Some(path) => {
- let offset = self.source_line_start;
- Some( SourceLocation {
- path: Some(path.to_owned()),
- start: Position {
- line: in_merged.start.line.saturating_sub(offset),
- column: in_merged.start.column,
- },
- end: Position {
- line: in_merged.end.line.saturating_sub(offset),
- column: in_merged.end.column,
- }
- })
- }
- None => None,
- }
- } else {
- None
- };
-
- let string = std::mem::take(&mut self.token_source_string);
- let source = SourceSpan { string, in_merged, in_source };
+ let source = t.get_source_span();
Some( SyntacticToken { source, variant } )
}
}