diff options
Diffstat (limited to 'src/translators/syntactic_parser.rs')
| -rw-r--r-- | src/translators/syntactic_parser.rs | 247 |
1 files changed, 0 insertions, 247 deletions
diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs deleted file mode 100644 index 7279daf..0000000 --- a/src/translators/syntactic_parser.rs +++ /dev/null @@ -1,247 +0,0 @@ -use crate::*; - -use std::path::PathBuf; - - -/// Translate raw source code characters into syntactic tokens. -pub struct SyntacticParser { - /// Path of file from which the source was read. - path: Option<PathBuf>, - /// Path of the original source file. - source_path: Option<PathBuf>, - /// Position of the next character to be read. - position: Position, - /// Previous value of the position field. - prev_position: Position, - /// Line where the embedded source file begins. - source_line_start: usize, - /// Characters waiting to be parsed, in reverse order. - chars: Vec<char>, - /// The token currently being parsed. - token_source_string: String, - /// The name of the most recently parsed label. - label: String, -} - - -impl SyntacticParser { - /// Parse source code. - pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { - Self { - path: path.map(|p| p.into()), - source_path: None, - position: Position { line: 0, column: 0 }, - prev_position: Position { line: 0, column: 0 }, - source_line_start: 0, - chars: source_code.chars().rev().collect(), - token_source_string: String::new(), - label: String::new(), - } - } - - /// Return the next character, keeping it on the queue. - fn peek_char(&self) -> Option<char> { - self.chars.last().copied() - } - - /// Return the next character, removing it from the queue. - fn eat_char(&mut self) -> Option<char> { - let option = self.chars.pop(); - if let Some(c) = option { - self.prev_position = self.position; - self.position.advance(c); - self.token_source_string.push(c); - } - return option; - } - - /// Remove the next character from the queue. - fn drop_char(&mut self) { - if let Some(c) = self.chars.pop() { - self.prev_position = self.position; - self.position.advance(c); - } - } - - /// Remove leading whitespace. - fn drop_whitespace(&mut self) { - while let Some(c) = self.peek_char() { - match c.is_whitespace() { - true => self.drop_char(), - false => break, - } - } - } - - /// Remove a full token from the queue. - fn eat_token(&mut self) -> String { - const DELIMITERS: [char; 13] = - ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~']; - let mut token = String::new(); - while let Some(peek) = self.peek_char() { - if peek.is_whitespace() || DELIMITERS.contains(&peek) { - break; - } - let c = self.eat_char().unwrap(); - token.push(c); - if c == ':' { - break; - } - } - token - } - - /// Return all characters until the delimiter, removing all returned - /// characters and the delimiter from the queue. Returns None if end - /// of source is reached before delimiter is found. - fn eat_to_delim(&mut self, delim: char) -> Option<String> { - let mut token = String::new(); - while let Some(c) = self.eat_char() { - self.token_source_string.push(c); - match c == delim { - true => return Some(token), - false => token.push(c), - } - } - return None; - } - - fn is_line_empty(&self) -> bool { - for c in self.chars.iter().rev() { - if *c == '\n' { - return true; - } - if !c.is_whitespace() { - return false - } - } - return false; - } -} - - -impl Iterator for SyntacticParser { - type Item = SyntacticToken; - - /// Sequentially parse tokens from the source code. - fn next(&mut self) -> Option<SyntacticToken> { - use SyntacticTokenVariant as SynVar; - use SyntacticParseError as SynErr; - - self.drop_whitespace(); - let start = self.position; - - let variant = match self.eat_char()? { - '@' => { - self.label = self.eat_token(); - SynVar::LabelDefinition(self.label.clone()) - } - '&' => { - let token = self.eat_token(); - let sublabel = format!("{}/{token}", self.label); - SynVar::LabelDefinition(sublabel) - } - '%' => SynVar::MacroDefinition(self.eat_token()), - ';' => SynVar::MacroDefinitionTerminator, - '[' => SynVar::MarkOpen, - ']' => SynVar::MarkClose, - '{' => SynVar::BlockOpen, - '}' => SynVar::BlockClose, - '(' => match self.eat_to_delim(')') { - Some(string) => SynVar::Comment(string), - None => SynVar::Error(SynErr::UnterminatedComment), - } - '\'' => match self.eat_to_delim('\'') { - Some(string) => SynVar::String(string.as_bytes().to_vec()), - None => SynVar::Error(SynErr::UnterminatedRawString), - } - '"' => match self.eat_to_delim('"') { - Some(string) => { - let mut bytes = string.as_bytes().to_vec(); - bytes.push(0x00); - SynVar::String(bytes) - } - None => SynVar::Error(SynErr::UnterminatedNullString), - } - '#' => { - let token = self.eat_token(); - match token.parse::<Value>() { - Ok(value) => SynVar::Padding(value), - Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)), - } - }, - '~' => { - let token = self.eat_token(); - let symbol = format!("{}/{token}", self.label); - SynVar::Symbol(symbol) - } - ':' => SynVar::Symbol(String::from(':')), - c => { - let token = format!("{c}{}", self.eat_token()); - match token.parse::<Value>() { - Ok(value) => SynVar::Literal(value), - Err(_) => match token.parse::<Instruction>() { - Ok(instruction) => SynVar::Instruction(instruction), - Err(_) => SynVar::Symbol(token), - } - } - } - }; - - // Parse source path comments. - if let SynVar::Comment(comment) = &variant { - // Check that the comment fills the entire line. - if start.column == 0 && self.is_line_empty() { - if let Some(path) = comment.strip_prefix(": ") { - self.source_path = Some(PathBuf::from(path.trim())); - self.source_line_start = start.line + 1; - } - } - } - - // Find location in current merged file. - let in_merged = SourceLocation { - path: self.path.to_owned(), - start, - end: self.prev_position, - }; - - // Find location in original source file. - let in_source = if start.line >= self.source_line_start { - match &self.source_path { - Some(path) => { - let offset = self.source_line_start; - Some( SourceLocation { - path: Some(path.to_owned()), - start: Position { - line: in_merged.start.line.saturating_sub(offset), - column: in_merged.start.column, - }, - end: Position { - line: in_merged.end.line.saturating_sub(offset), - column: in_merged.end.column, - } - }) - } - None => None, - } - } else { - None - }; - - let string = std::mem::take(&mut self.token_source_string); - let source = SourceSpan { string, in_merged, in_source }; - Some( SyntacticToken { source, variant } ) - } -} - - -#[derive(Debug)] -pub enum ParseError { - InvalidExtension, - NotFound, - NotReadable, - IsADirectory, - InvalidUtf8, - Unknown, -} |
