use crate::*; use std::path::PathBuf; /// Translate raw source code characters into syntactic tokens. pub struct SyntacticParser { /// Path of file from which the source was read. path: Option, /// Path of the original source file. source_path: Option, /// Position of the next character to be read. position: Position, /// Previous value of the position field. prev_position: Position, /// Line where the embedded source file begins. source_line_start: usize, /// Characters waiting to be parsed, in reverse order. chars: Vec, /// The token currently being parsed. token_source_string: String, /// The name of the most recently parsed label. label: String, } impl SyntacticParser { /// Parse source code. pub fn from_source_code>(source_code: &str, path: Option

) -> Self { Self { path: path.map(|p| p.into()), source_path: None, position: Position { line: 0, column: 0 }, prev_position: Position { line: 0, column: 0 }, source_line_start: 0, chars: source_code.chars().rev().collect(), token_source_string: String::new(), label: String::new(), } } /// Return the next character, keeping it on the queue. fn peek_char(&self) -> Option { self.chars.last().copied() } /// Return the next character, removing it from the queue. fn eat_char(&mut self) -> Option { let option = self.chars.pop(); if let Some(c) = option { self.prev_position = self.position; self.position.advance(c); self.token_source_string.push(c); } return option; } /// Remove the next character from the queue. fn drop_char(&mut self) { if let Some(c) = self.chars.pop() { self.prev_position = self.position; self.position.advance(c); } } /// Remove leading whitespace. fn drop_whitespace(&mut self) { while let Some(c) = self.peek_char() { match c.is_whitespace() { true => self.drop_char(), false => break, } } } /// Remove a full token from the queue. fn eat_token(&mut self) -> String { const DELIMITERS: [char; 13] = ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~']; let mut token = String::new(); while let Some(peek) = self.peek_char() { if peek.is_whitespace() || DELIMITERS.contains(&peek) { break; } let c = self.eat_char().unwrap(); token.push(c); if c == ':' { break; } } token } /// Return all characters until the delimiter, removing all returned /// characters and the delimiter from the queue. Returns None if end /// of source is reached before delimiter is found. fn eat_to_delim(&mut self, delim: char) -> Option { let mut token = String::new(); while let Some(c) = self.eat_char() { self.token_source_string.push(c); match c == delim { true => return Some(token), false => token.push(c), } } return None; } fn is_line_empty(&self) -> bool { for c in self.chars.iter().rev() { if *c == '\n' { return true; } if !c.is_whitespace() { return false } } return false; } } impl Iterator for SyntacticParser { type Item = SyntacticToken; /// Sequentially parse tokens from the source code. fn next(&mut self) -> Option { use SyntacticTokenVariant as SynVar; use SyntacticParseError as SynErr; self.drop_whitespace(); let start = self.position; let variant = match self.eat_char()? { '@' => { self.label = self.eat_token(); SynVar::LabelDefinition(self.label.clone()) } '&' => { let token = self.eat_token(); let sublabel = format!("{}/{token}", self.label); SynVar::LabelDefinition(sublabel) } '%' => SynVar::MacroDefinition(self.eat_token()), ';' => SynVar::MacroDefinitionTerminator, '[' => SynVar::MarkOpen, ']' => SynVar::MarkClose, '{' => SynVar::BlockOpen, '}' => SynVar::BlockClose, '(' => match self.eat_to_delim(')') { Some(string) => SynVar::Comment(string), None => SynVar::Error(SynErr::UnterminatedComment), } '\'' => match self.eat_to_delim('\'') { Some(string) => SynVar::String(string.as_bytes().to_vec()), None => SynVar::Error(SynErr::UnterminatedRawString), } '"' => match self.eat_to_delim('"') { Some(string) => { let mut bytes = string.as_bytes().to_vec(); bytes.push(0x00); SynVar::String(bytes) } None => SynVar::Error(SynErr::UnterminatedNullString), } '#' => { let token = self.eat_token(); match token.parse::() { Ok(value) => SynVar::Padding(value), Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)), } }, '~' => { let token = self.eat_token(); let symbol = format!("{}/{token}", self.label); SynVar::Symbol(symbol) } ':' => SynVar::Symbol(String::from(':')), c => { let token = format!("{c}{}", self.eat_token()); match token.parse::() { Ok(value) => SynVar::Literal(value), Err(_) => match token.parse::() { Ok(instruction) => SynVar::Instruction(instruction), Err(_) => SynVar::Symbol(token), } } } }; // Parse source path comments. if let SynVar::Comment(comment) = &variant { // Check that the comment fills the entire line. if start.column == 0 && self.is_line_empty() { if let Some(path) = comment.strip_prefix(": ") { self.source_path = Some(PathBuf::from(path.trim())); self.source_line_start = start.line + 1; } } } // Find location in current merged file. let in_merged = SourceLocation { path: self.path.to_owned(), start, end: self.prev_position, }; // Find location in original source file. let in_source = if start.line >= self.source_line_start { match &self.source_path { Some(path) => { let offset = self.source_line_start; Some( SourceLocation { path: Some(path.to_owned()), start: Position { line: in_merged.start.line.saturating_sub(offset), column: in_merged.start.column, }, end: Position { line: in_merged.end.line.saturating_sub(offset), column: in_merged.end.column, } }) } None => None, } } else { None }; let string = std::mem::take(&mut self.token_source_string); let source = SourceSpan { string, in_merged, in_source }; Some( SyntacticToken { source, variant } ) } } #[derive(Debug)] pub enum ParseError { InvalidExtension, NotFound, NotReadable, IsADirectory, InvalidUtf8, Unknown, }