summaryrefslogtreecommitdiff
path: root/src/translators/syntactic_parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/translators/syntactic_parser.rs')
-rw-r--r--src/translators/syntactic_parser.rs247
1 files changed, 247 insertions, 0 deletions
diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs
new file mode 100644
index 0000000..7279daf
--- /dev/null
+++ b/src/translators/syntactic_parser.rs
@@ -0,0 +1,247 @@
+use crate::*;
+
+use std::path::PathBuf;
+
+
+/// Translate raw source code characters into syntactic tokens.
+pub struct SyntacticParser {
+ /// Path of file from which the source was read.
+ path: Option<PathBuf>,
+ /// Path of the original source file.
+ source_path: Option<PathBuf>,
+ /// Position of the next character to be read.
+ position: Position,
+ /// Previous value of the position field.
+ prev_position: Position,
+ /// Line where the embedded source file begins.
+ source_line_start: usize,
+ /// Characters waiting to be parsed, in reverse order.
+ chars: Vec<char>,
+ /// The token currently being parsed.
+ token_source_string: String,
+ /// The name of the most recently parsed label.
+ label: String,
+}
+
+
+impl SyntacticParser {
+ /// Parse source code.
+ pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
+ Self {
+ path: path.map(|p| p.into()),
+ source_path: None,
+ position: Position { line: 0, column: 0 },
+ prev_position: Position { line: 0, column: 0 },
+ source_line_start: 0,
+ chars: source_code.chars().rev().collect(),
+ token_source_string: String::new(),
+ label: String::new(),
+ }
+ }
+
+ /// Return the next character, keeping it on the queue.
+ fn peek_char(&self) -> Option<char> {
+ self.chars.last().copied()
+ }
+
+ /// Return the next character, removing it from the queue.
+ fn eat_char(&mut self) -> Option<char> {
+ let option = self.chars.pop();
+ if let Some(c) = option {
+ self.prev_position = self.position;
+ self.position.advance(c);
+ self.token_source_string.push(c);
+ }
+ return option;
+ }
+
+ /// Remove the next character from the queue.
+ fn drop_char(&mut self) {
+ if let Some(c) = self.chars.pop() {
+ self.prev_position = self.position;
+ self.position.advance(c);
+ }
+ }
+
+ /// Remove leading whitespace.
+ fn drop_whitespace(&mut self) {
+ while let Some(c) = self.peek_char() {
+ match c.is_whitespace() {
+ true => self.drop_char(),
+ false => break,
+ }
+ }
+ }
+
+ /// Remove a full token from the queue.
+ fn eat_token(&mut self) -> String {
+ const DELIMITERS: [char; 13] =
+ ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~'];
+ let mut token = String::new();
+ while let Some(peek) = self.peek_char() {
+ if peek.is_whitespace() || DELIMITERS.contains(&peek) {
+ break;
+ }
+ let c = self.eat_char().unwrap();
+ token.push(c);
+ if c == ':' {
+ break;
+ }
+ }
+ token
+ }
+
+ /// Return all characters until the delimiter, removing all returned
+ /// characters and the delimiter from the queue. Returns None if end
+ /// of source is reached before delimiter is found.
+ fn eat_to_delim(&mut self, delim: char) -> Option<String> {
+ let mut token = String::new();
+ while let Some(c) = self.eat_char() {
+ self.token_source_string.push(c);
+ match c == delim {
+ true => return Some(token),
+ false => token.push(c),
+ }
+ }
+ return None;
+ }
+
+ fn is_line_empty(&self) -> bool {
+ for c in self.chars.iter().rev() {
+ if *c == '\n' {
+ return true;
+ }
+ if !c.is_whitespace() {
+ return false
+ }
+ }
+ return false;
+ }
+}
+
+
+impl Iterator for SyntacticParser {
+ type Item = SyntacticToken;
+
+ /// Sequentially parse tokens from the source code.
+ fn next(&mut self) -> Option<SyntacticToken> {
+ use SyntacticTokenVariant as SynVar;
+ use SyntacticParseError as SynErr;
+
+ self.drop_whitespace();
+ let start = self.position;
+
+ let variant = match self.eat_char()? {
+ '@' => {
+ self.label = self.eat_token();
+ SynVar::LabelDefinition(self.label.clone())
+ }
+ '&' => {
+ let token = self.eat_token();
+ let sublabel = format!("{}/{token}", self.label);
+ SynVar::LabelDefinition(sublabel)
+ }
+ '%' => SynVar::MacroDefinition(self.eat_token()),
+ ';' => SynVar::MacroDefinitionTerminator,
+ '[' => SynVar::MarkOpen,
+ ']' => SynVar::MarkClose,
+ '{' => SynVar::BlockOpen,
+ '}' => SynVar::BlockClose,
+ '(' => match self.eat_to_delim(')') {
+ Some(string) => SynVar::Comment(string),
+ None => SynVar::Error(SynErr::UnterminatedComment),
+ }
+ '\'' => match self.eat_to_delim('\'') {
+ Some(string) => SynVar::String(string.as_bytes().to_vec()),
+ None => SynVar::Error(SynErr::UnterminatedRawString),
+ }
+ '"' => match self.eat_to_delim('"') {
+ Some(string) => {
+ let mut bytes = string.as_bytes().to_vec();
+ bytes.push(0x00);
+ SynVar::String(bytes)
+ }
+ None => SynVar::Error(SynErr::UnterminatedNullString),
+ }
+ '#' => {
+ let token = self.eat_token();
+ match token.parse::<Value>() {
+ Ok(value) => SynVar::Padding(value),
+ Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)),
+ }
+ },
+ '~' => {
+ let token = self.eat_token();
+ let symbol = format!("{}/{token}", self.label);
+ SynVar::Symbol(symbol)
+ }
+ ':' => SynVar::Symbol(String::from(':')),
+ c => {
+ let token = format!("{c}{}", self.eat_token());
+ match token.parse::<Value>() {
+ Ok(value) => SynVar::Literal(value),
+ Err(_) => match token.parse::<Instruction>() {
+ Ok(instruction) => SynVar::Instruction(instruction),
+ Err(_) => SynVar::Symbol(token),
+ }
+ }
+ }
+ };
+
+ // Parse source path comments.
+ if let SynVar::Comment(comment) = &variant {
+ // Check that the comment fills the entire line.
+ if start.column == 0 && self.is_line_empty() {
+ if let Some(path) = comment.strip_prefix(": ") {
+ self.source_path = Some(PathBuf::from(path.trim()));
+ self.source_line_start = start.line + 1;
+ }
+ }
+ }
+
+ // Find location in current merged file.
+ let in_merged = SourceLocation {
+ path: self.path.to_owned(),
+ start,
+ end: self.prev_position,
+ };
+
+ // Find location in original source file.
+ let in_source = if start.line >= self.source_line_start {
+ match &self.source_path {
+ Some(path) => {
+ let offset = self.source_line_start;
+ Some( SourceLocation {
+ path: Some(path.to_owned()),
+ start: Position {
+ line: in_merged.start.line.saturating_sub(offset),
+ column: in_merged.start.column,
+ },
+ end: Position {
+ line: in_merged.end.line.saturating_sub(offset),
+ column: in_merged.end.column,
+ }
+ })
+ }
+ None => None,
+ }
+ } else {
+ None
+ };
+
+ let string = std::mem::take(&mut self.token_source_string);
+ let source = SourceSpan { string, in_merged, in_source };
+ Some( SyntacticToken { source, variant } )
+ }
+}
+
+
+#[derive(Debug)]
+pub enum ParseError {
+ InvalidExtension,
+ NotFound,
+ NotReadable,
+ IsADirectory,
+ InvalidUtf8,
+ Unknown,
+}