diff options
Diffstat (limited to 'src/translators/syntactic_parser.rs')
-rw-r--r-- | src/translators/syntactic_parser.rs | 178 |
1 files changed, 24 insertions, 154 deletions
diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs index 7279daf..8f0850b 100644 --- a/src/translators/syntactic_parser.rs +++ b/src/translators/syntactic_parser.rs @@ -5,117 +5,18 @@ use std::path::PathBuf; /// Translate raw source code characters into syntactic tokens. pub struct SyntacticParser { - /// Path of file from which the source was read. - path: Option<PathBuf>, - /// Path of the original source file. - source_path: Option<PathBuf>, - /// Position of the next character to be read. - position: Position, - /// Previous value of the position field. - prev_position: Position, - /// Line where the embedded source file begins. - source_line_start: usize, - /// Characters waiting to be parsed, in reverse order. - chars: Vec<char>, - /// The token currently being parsed. - token_source_string: String, + tokeniser: Tokeniser, /// The name of the most recently parsed label. label: String, } impl SyntacticParser { - /// Parse source code. pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { - Self { - path: path.map(|p| p.into()), - source_path: None, - position: Position { line: 0, column: 0 }, - prev_position: Position { line: 0, column: 0 }, - source_line_start: 0, - chars: source_code.chars().rev().collect(), - token_source_string: String::new(), - label: String::new(), - } - } - - /// Return the next character, keeping it on the queue. - fn peek_char(&self) -> Option<char> { - self.chars.last().copied() - } - - /// Return the next character, removing it from the queue. - fn eat_char(&mut self) -> Option<char> { - let option = self.chars.pop(); - if let Some(c) = option { - self.prev_position = self.position; - self.position.advance(c); - self.token_source_string.push(c); - } - return option; - } - - /// Remove the next character from the queue. - fn drop_char(&mut self) { - if let Some(c) = self.chars.pop() { - self.prev_position = self.position; - self.position.advance(c); - } - } - - /// Remove leading whitespace. - fn drop_whitespace(&mut self) { - while let Some(c) = self.peek_char() { - match c.is_whitespace() { - true => self.drop_char(), - false => break, - } - } - } - - /// Remove a full token from the queue. - fn eat_token(&mut self) -> String { - const DELIMITERS: [char; 13] = - ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~']; - let mut token = String::new(); - while let Some(peek) = self.peek_char() { - if peek.is_whitespace() || DELIMITERS.contains(&peek) { - break; - } - let c = self.eat_char().unwrap(); - token.push(c); - if c == ':' { - break; - } - } - token - } - - /// Return all characters until the delimiter, removing all returned - /// characters and the delimiter from the queue. Returns None if end - /// of source is reached before delimiter is found. - fn eat_to_delim(&mut self, delim: char) -> Option<String> { - let mut token = String::new(); - while let Some(c) = self.eat_char() { - self.token_source_string.push(c); - match c == delim { - true => return Some(token), - false => token.push(c), - } - } - return None; - } - - fn is_line_empty(&self) -> bool { - for c in self.chars.iter().rev() { - if *c == '\n' { - return true; - } - if !c.is_whitespace() { - return false - } - } - return false; + let mut tokeniser = Tokeniser::new(source_code, path); + tokeniser.add_delimiters(&['@','&','%',';','[',']','{','}','(','"','\'','#','~']); + tokeniser.add_terminators(&[':']); + Self { tokeniser, label: String::new() } } } @@ -127,35 +28,35 @@ impl Iterator for SyntacticParser { fn next(&mut self) -> Option<SyntacticToken> { use SyntacticTokenVariant as SynVar; use SyntacticParseError as SynErr; + let t = &mut self.tokeniser; - self.drop_whitespace(); - let start = self.position; + t.drop_whitespace(); + t.mark_start_position(); - let variant = match self.eat_char()? { + let variant = match t.eat_char()? { '@' => { - self.label = self.eat_token(); + self.label = t.eat_token(); SynVar::LabelDefinition(self.label.clone()) } '&' => { - let token = self.eat_token(); - let sublabel = format!("{}/{token}", self.label); - SynVar::LabelDefinition(sublabel) + let token = t.eat_token(); + SynVar::LabelDefinition(format!("{}/{token}", self.label)) } - '%' => SynVar::MacroDefinition(self.eat_token()), + '%' => SynVar::MacroDefinition(t.eat_token()), ';' => SynVar::MacroDefinitionTerminator, '[' => SynVar::MarkOpen, ']' => SynVar::MarkClose, '{' => SynVar::BlockOpen, '}' => SynVar::BlockClose, - '(' => match self.eat_to_delim(')') { + '(' => match t.eat_to_delimiter(')') { Some(string) => SynVar::Comment(string), None => SynVar::Error(SynErr::UnterminatedComment), } - '\'' => match self.eat_to_delim('\'') { + '\'' => match t.eat_to_delimiter('\'') { Some(string) => SynVar::String(string.as_bytes().to_vec()), None => SynVar::Error(SynErr::UnterminatedRawString), } - '"' => match self.eat_to_delim('"') { + '"' => match t.eat_to_delimiter('"') { Some(string) => { let mut bytes = string.as_bytes().to_vec(); bytes.push(0x00); @@ -164,20 +65,20 @@ impl Iterator for SyntacticParser { None => SynVar::Error(SynErr::UnterminatedNullString), } '#' => { - let token = self.eat_token(); + let token = t.eat_token(); match token.parse::<Value>() { Ok(value) => SynVar::Padding(value), Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)), } }, '~' => { - let token = self.eat_token(); + let token = t.eat_token(); let symbol = format!("{}/{token}", self.label); SynVar::Symbol(symbol) } ':' => SynVar::Symbol(String::from(':')), c => { - let token = format!("{c}{}", self.eat_token()); + let token = format!("{c}{}", t.eat_token()); match token.parse::<Value>() { Ok(value) => SynVar::Literal(value), Err(_) => match token.parse::<Instruction>() { @@ -190,47 +91,16 @@ impl Iterator for SyntacticParser { // Parse source path comments. if let SynVar::Comment(comment) = &variant { - // Check that the comment fills the entire line. - if start.column == 0 && self.is_line_empty() { + // Check if the comment fills the entire line. + if t.start_position.column == 0 && t.end_of_line() { if let Some(path) = comment.strip_prefix(": ") { - self.source_path = Some(PathBuf::from(path.trim())); - self.source_line_start = start.line + 1; + t.source_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start_position.line + 1; } } } - // Find location in current merged file. - let in_merged = SourceLocation { - path: self.path.to_owned(), - start, - end: self.prev_position, - }; - - // Find location in original source file. - let in_source = if start.line >= self.source_line_start { - match &self.source_path { - Some(path) => { - let offset = self.source_line_start; - Some( SourceLocation { - path: Some(path.to_owned()), - start: Position { - line: in_merged.start.line.saturating_sub(offset), - column: in_merged.start.column, - }, - end: Position { - line: in_merged.end.line.saturating_sub(offset), - column: in_merged.end.column, - } - }) - } - None => None, - } - } else { - None - }; - - let string = std::mem::take(&mut self.token_source_string); - let source = SourceSpan { string, in_merged, in_source }; + let source = t.get_source_span(); Some( SyntacticToken { source, variant } ) } } |