diff options
Diffstat (limited to 'src/translators/syntactic_parser.rs')
-rw-r--r-- | src/translators/syntactic_parser.rs | 247 |
1 files changed, 247 insertions, 0 deletions
diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs new file mode 100644 index 0000000..7279daf --- /dev/null +++ b/src/translators/syntactic_parser.rs @@ -0,0 +1,247 @@ +use crate::*; + +use std::path::PathBuf; + + +/// Translate raw source code characters into syntactic tokens. +pub struct SyntacticParser { + /// Path of file from which the source was read. + path: Option<PathBuf>, + /// Path of the original source file. + source_path: Option<PathBuf>, + /// Position of the next character to be read. + position: Position, + /// Previous value of the position field. + prev_position: Position, + /// Line where the embedded source file begins. + source_line_start: usize, + /// Characters waiting to be parsed, in reverse order. + chars: Vec<char>, + /// The token currently being parsed. + token_source_string: String, + /// The name of the most recently parsed label. + label: String, +} + + +impl SyntacticParser { + /// Parse source code. + pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { + Self { + path: path.map(|p| p.into()), + source_path: None, + position: Position { line: 0, column: 0 }, + prev_position: Position { line: 0, column: 0 }, + source_line_start: 0, + chars: source_code.chars().rev().collect(), + token_source_string: String::new(), + label: String::new(), + } + } + + /// Return the next character, keeping it on the queue. + fn peek_char(&self) -> Option<char> { + self.chars.last().copied() + } + + /// Return the next character, removing it from the queue. + fn eat_char(&mut self) -> Option<char> { + let option = self.chars.pop(); + if let Some(c) = option { + self.prev_position = self.position; + self.position.advance(c); + self.token_source_string.push(c); + } + return option; + } + + /// Remove the next character from the queue. + fn drop_char(&mut self) { + if let Some(c) = self.chars.pop() { + self.prev_position = self.position; + self.position.advance(c); + } + } + + /// Remove leading whitespace. + fn drop_whitespace(&mut self) { + while let Some(c) = self.peek_char() { + match c.is_whitespace() { + true => self.drop_char(), + false => break, + } + } + } + + /// Remove a full token from the queue. + fn eat_token(&mut self) -> String { + const DELIMITERS: [char; 13] = + ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~']; + let mut token = String::new(); + while let Some(peek) = self.peek_char() { + if peek.is_whitespace() || DELIMITERS.contains(&peek) { + break; + } + let c = self.eat_char().unwrap(); + token.push(c); + if c == ':' { + break; + } + } + token + } + + /// Return all characters until the delimiter, removing all returned + /// characters and the delimiter from the queue. Returns None if end + /// of source is reached before delimiter is found. + fn eat_to_delim(&mut self, delim: char) -> Option<String> { + let mut token = String::new(); + while let Some(c) = self.eat_char() { + self.token_source_string.push(c); + match c == delim { + true => return Some(token), + false => token.push(c), + } + } + return None; + } + + fn is_line_empty(&self) -> bool { + for c in self.chars.iter().rev() { + if *c == '\n' { + return true; + } + if !c.is_whitespace() { + return false + } + } + return false; + } +} + + +impl Iterator for SyntacticParser { + type Item = SyntacticToken; + + /// Sequentially parse tokens from the source code. + fn next(&mut self) -> Option<SyntacticToken> { + use SyntacticTokenVariant as SynVar; + use SyntacticParseError as SynErr; + + self.drop_whitespace(); + let start = self.position; + + let variant = match self.eat_char()? { + '@' => { + self.label = self.eat_token(); + SynVar::LabelDefinition(self.label.clone()) + } + '&' => { + let token = self.eat_token(); + let sublabel = format!("{}/{token}", self.label); + SynVar::LabelDefinition(sublabel) + } + '%' => SynVar::MacroDefinition(self.eat_token()), + ';' => SynVar::MacroDefinitionTerminator, + '[' => SynVar::MarkOpen, + ']' => SynVar::MarkClose, + '{' => SynVar::BlockOpen, + '}' => SynVar::BlockClose, + '(' => match self.eat_to_delim(')') { + Some(string) => SynVar::Comment(string), + None => SynVar::Error(SynErr::UnterminatedComment), + } + '\'' => match self.eat_to_delim('\'') { + Some(string) => SynVar::String(string.as_bytes().to_vec()), + None => SynVar::Error(SynErr::UnterminatedRawString), + } + '"' => match self.eat_to_delim('"') { + Some(string) => { + let mut bytes = string.as_bytes().to_vec(); + bytes.push(0x00); + SynVar::String(bytes) + } + None => SynVar::Error(SynErr::UnterminatedNullString), + } + '#' => { + let token = self.eat_token(); + match token.parse::<Value>() { + Ok(value) => SynVar::Padding(value), + Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)), + } + }, + '~' => { + let token = self.eat_token(); + let symbol = format!("{}/{token}", self.label); + SynVar::Symbol(symbol) + } + ':' => SynVar::Symbol(String::from(':')), + c => { + let token = format!("{c}{}", self.eat_token()); + match token.parse::<Value>() { + Ok(value) => SynVar::Literal(value), + Err(_) => match token.parse::<Instruction>() { + Ok(instruction) => SynVar::Instruction(instruction), + Err(_) => SynVar::Symbol(token), + } + } + } + }; + + // Parse source path comments. + if let SynVar::Comment(comment) = &variant { + // Check that the comment fills the entire line. + if start.column == 0 && self.is_line_empty() { + if let Some(path) = comment.strip_prefix(": ") { + self.source_path = Some(PathBuf::from(path.trim())); + self.source_line_start = start.line + 1; + } + } + } + + // Find location in current merged file. + let in_merged = SourceLocation { + path: self.path.to_owned(), + start, + end: self.prev_position, + }; + + // Find location in original source file. + let in_source = if start.line >= self.source_line_start { + match &self.source_path { + Some(path) => { + let offset = self.source_line_start; + Some( SourceLocation { + path: Some(path.to_owned()), + start: Position { + line: in_merged.start.line.saturating_sub(offset), + column: in_merged.start.column, + }, + end: Position { + line: in_merged.end.line.saturating_sub(offset), + column: in_merged.end.column, + } + }) + } + None => None, + } + } else { + None + }; + + let string = std::mem::take(&mut self.token_source_string); + let source = SourceSpan { string, in_merged, in_source }; + Some( SyntacticToken { source, variant } ) + } +} + + +#[derive(Debug)] +pub enum ParseError { + InvalidExtension, + NotFound, + NotReadable, + IsADirectory, + InvalidUtf8, + Unknown, +} |