summaryrefslogtreecommitdiff
path: root/src/translators/syntactic_parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/translators/syntactic_parser.rs')
-rw-r--r--src/translators/syntactic_parser.rs247
1 files changed, 0 insertions, 247 deletions
diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs
deleted file mode 100644
index 7279daf..0000000
--- a/src/translators/syntactic_parser.rs
+++ /dev/null
@@ -1,247 +0,0 @@
-use crate::*;
-
-use std::path::PathBuf;
-
-
-/// Translate raw source code characters into syntactic tokens.
-pub struct SyntacticParser {
- /// Path of file from which the source was read.
- path: Option<PathBuf>,
- /// Path of the original source file.
- source_path: Option<PathBuf>,
- /// Position of the next character to be read.
- position: Position,
- /// Previous value of the position field.
- prev_position: Position,
- /// Line where the embedded source file begins.
- source_line_start: usize,
- /// Characters waiting to be parsed, in reverse order.
- chars: Vec<char>,
- /// The token currently being parsed.
- token_source_string: String,
- /// The name of the most recently parsed label.
- label: String,
-}
-
-
-impl SyntacticParser {
- /// Parse source code.
- pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
- Self {
- path: path.map(|p| p.into()),
- source_path: None,
- position: Position { line: 0, column: 0 },
- prev_position: Position { line: 0, column: 0 },
- source_line_start: 0,
- chars: source_code.chars().rev().collect(),
- token_source_string: String::new(),
- label: String::new(),
- }
- }
-
- /// Return the next character, keeping it on the queue.
- fn peek_char(&self) -> Option<char> {
- self.chars.last().copied()
- }
-
- /// Return the next character, removing it from the queue.
- fn eat_char(&mut self) -> Option<char> {
- let option = self.chars.pop();
- if let Some(c) = option {
- self.prev_position = self.position;
- self.position.advance(c);
- self.token_source_string.push(c);
- }
- return option;
- }
-
- /// Remove the next character from the queue.
- fn drop_char(&mut self) {
- if let Some(c) = self.chars.pop() {
- self.prev_position = self.position;
- self.position.advance(c);
- }
- }
-
- /// Remove leading whitespace.
- fn drop_whitespace(&mut self) {
- while let Some(c) = self.peek_char() {
- match c.is_whitespace() {
- true => self.drop_char(),
- false => break,
- }
- }
- }
-
- /// Remove a full token from the queue.
- fn eat_token(&mut self) -> String {
- const DELIMITERS: [char; 13] =
- ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~'];
- let mut token = String::new();
- while let Some(peek) = self.peek_char() {
- if peek.is_whitespace() || DELIMITERS.contains(&peek) {
- break;
- }
- let c = self.eat_char().unwrap();
- token.push(c);
- if c == ':' {
- break;
- }
- }
- token
- }
-
- /// Return all characters until the delimiter, removing all returned
- /// characters and the delimiter from the queue. Returns None if end
- /// of source is reached before delimiter is found.
- fn eat_to_delim(&mut self, delim: char) -> Option<String> {
- let mut token = String::new();
- while let Some(c) = self.eat_char() {
- self.token_source_string.push(c);
- match c == delim {
- true => return Some(token),
- false => token.push(c),
- }
- }
- return None;
- }
-
- fn is_line_empty(&self) -> bool {
- for c in self.chars.iter().rev() {
- if *c == '\n' {
- return true;
- }
- if !c.is_whitespace() {
- return false
- }
- }
- return false;
- }
-}
-
-
-impl Iterator for SyntacticParser {
- type Item = SyntacticToken;
-
- /// Sequentially parse tokens from the source code.
- fn next(&mut self) -> Option<SyntacticToken> {
- use SyntacticTokenVariant as SynVar;
- use SyntacticParseError as SynErr;
-
- self.drop_whitespace();
- let start = self.position;
-
- let variant = match self.eat_char()? {
- '@' => {
- self.label = self.eat_token();
- SynVar::LabelDefinition(self.label.clone())
- }
- '&' => {
- let token = self.eat_token();
- let sublabel = format!("{}/{token}", self.label);
- SynVar::LabelDefinition(sublabel)
- }
- '%' => SynVar::MacroDefinition(self.eat_token()),
- ';' => SynVar::MacroDefinitionTerminator,
- '[' => SynVar::MarkOpen,
- ']' => SynVar::MarkClose,
- '{' => SynVar::BlockOpen,
- '}' => SynVar::BlockClose,
- '(' => match self.eat_to_delim(')') {
- Some(string) => SynVar::Comment(string),
- None => SynVar::Error(SynErr::UnterminatedComment),
- }
- '\'' => match self.eat_to_delim('\'') {
- Some(string) => SynVar::String(string.as_bytes().to_vec()),
- None => SynVar::Error(SynErr::UnterminatedRawString),
- }
- '"' => match self.eat_to_delim('"') {
- Some(string) => {
- let mut bytes = string.as_bytes().to_vec();
- bytes.push(0x00);
- SynVar::String(bytes)
- }
- None => SynVar::Error(SynErr::UnterminatedNullString),
- }
- '#' => {
- let token = self.eat_token();
- match token.parse::<Value>() {
- Ok(value) => SynVar::Padding(value),
- Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)),
- }
- },
- '~' => {
- let token = self.eat_token();
- let symbol = format!("{}/{token}", self.label);
- SynVar::Symbol(symbol)
- }
- ':' => SynVar::Symbol(String::from(':')),
- c => {
- let token = format!("{c}{}", self.eat_token());
- match token.parse::<Value>() {
- Ok(value) => SynVar::Literal(value),
- Err(_) => match token.parse::<Instruction>() {
- Ok(instruction) => SynVar::Instruction(instruction),
- Err(_) => SynVar::Symbol(token),
- }
- }
- }
- };
-
- // Parse source path comments.
- if let SynVar::Comment(comment) = &variant {
- // Check that the comment fills the entire line.
- if start.column == 0 && self.is_line_empty() {
- if let Some(path) = comment.strip_prefix(": ") {
- self.source_path = Some(PathBuf::from(path.trim()));
- self.source_line_start = start.line + 1;
- }
- }
- }
-
- // Find location in current merged file.
- let in_merged = SourceLocation {
- path: self.path.to_owned(),
- start,
- end: self.prev_position,
- };
-
- // Find location in original source file.
- let in_source = if start.line >= self.source_line_start {
- match &self.source_path {
- Some(path) => {
- let offset = self.source_line_start;
- Some( SourceLocation {
- path: Some(path.to_owned()),
- start: Position {
- line: in_merged.start.line.saturating_sub(offset),
- column: in_merged.start.column,
- },
- end: Position {
- line: in_merged.end.line.saturating_sub(offset),
- column: in_merged.end.column,
- }
- })
- }
- None => None,
- }
- } else {
- None
- };
-
- let string = std::mem::take(&mut self.token_source_string);
- let source = SourceSpan { string, in_merged, in_source };
- Some( SyntacticToken { source, variant } )
- }
-}
-
-
-#[derive(Debug)]
-pub enum ParseError {
- InvalidExtension,
- NotFound,
- NotReadable,
- IsADirectory,
- InvalidUtf8,
- Unknown,
-}