use crate::*;
use std::path::PathBuf;
/// Translate raw source code characters into syntactic tokens.
pub struct SyntacticParser {
/// Path of file from which the source was read.
path: Option<PathBuf>,
/// Path of the original source file.
source_path: Option<PathBuf>,
/// Position of the next character to be read.
position: Position,
/// Previous value of the position field.
prev_position: Position,
/// Line where the embedded source file begins.
source_line_start: usize,
/// Characters waiting to be parsed, in reverse order.
chars: Vec<char>,
/// The token currently being parsed.
token_source_string: String,
/// The name of the most recently parsed label.
label: String,
}
impl SyntacticParser {
/// Parse source code.
pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
Self {
path: path.map(|p| p.into()),
source_path: None,
position: Position { line: 0, column: 0 },
prev_position: Position { line: 0, column: 0 },
source_line_start: 0,
chars: source_code.chars().rev().collect(),
token_source_string: String::new(),
label: String::new(),
}
}
/// Return the next character, keeping it on the queue.
fn peek_char(&self) -> Option<char> {
self.chars.last().copied()
}
/// Return the next character, removing it from the queue.
fn eat_char(&mut self) -> Option<char> {
let option = self.chars.pop();
if let Some(c) = option {
self.prev_position = self.position;
self.position.advance(c);
self.token_source_string.push(c);
}
return option;
}
/// Remove the next character from the queue.
fn drop_char(&mut self) {
if let Some(c) = self.chars.pop() {
self.prev_position = self.position;
self.position.advance(c);
}
}
/// Remove leading whitespace.
fn drop_whitespace(&mut self) {
while let Some(c) = self.peek_char() {
match c.is_whitespace() {
true => self.drop_char(),
false => break,
}
}
}
/// Remove a full token from the queue.
fn eat_token(&mut self) -> String {
const DELIMITERS: [char; 13] =
['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~'];
let mut token = String::new();
while let Some(peek) = self.peek_char() {
if peek.is_whitespace() || DELIMITERS.contains(&peek) {
break;
}
let c = self.eat_char().unwrap();
token.push(c);
if c == ':' {
break;
}
}
token
}
/// Return all characters until the delimiter, removing all returned
/// characters and the delimiter from the queue. Returns None if end
/// of source is reached before delimiter is found.
fn eat_to_delim(&mut self, delim: char) -> Option<String> {
let mut token = String::new();
while let Some(c) = self.eat_char() {
self.token_source_string.push(c);
match c == delim {
true => return Some(token),
false => token.push(c),
}
}
return None;
}
fn is_line_empty(&self) -> bool {
for c in self.chars.iter().rev() {
if *c == '\n' {
return true;
}
if !c.is_whitespace() {
return false
}
}
return false;
}
}
impl Iterator for SyntacticParser {
type Item = SyntacticToken;
/// Sequentially parse tokens from the source code.
fn next(&mut self) -> Option<SyntacticToken> {
use SyntacticTokenVariant as SynVar;
use SyntacticParseError as SynErr;
self.drop_whitespace();
let start = self.position;
let variant = match self.eat_char()? {
'@' => {
self.label = self.eat_token();
SynVar::LabelDefinition(self.label.clone())
}
'&' => {
let token = self.eat_token();
let sublabel = format!("{}/{token}", self.label);
SynVar::LabelDefinition(sublabel)
}
'%' => SynVar::MacroDefinition(self.eat_token()),
';' => SynVar::MacroDefinitionTerminator,
'[' => SynVar::MarkOpen,
']' => SynVar::MarkClose,
'{' => SynVar::BlockOpen,
'}' => SynVar::BlockClose,
'(' => match self.eat_to_delim(')') {
Some(string) => SynVar::Comment(string),
None => SynVar::Error(SynErr::UnterminatedComment),
}
'\'' => match self.eat_to_delim('\'') {
Some(string) => SynVar::String(string.as_bytes().to_vec()),
None => SynVar::Error(SynErr::UnterminatedRawString),
}
'"' => match self.eat_to_delim('"') {
Some(string) => {
let mut bytes = string.as_bytes().to_vec();
bytes.push(0x00);
SynVar::String(bytes)
}
None => SynVar::Error(SynErr::UnterminatedNullString),
}
'#' => {
let token = self.eat_token();
match token.parse::<Value>() {
Ok(value) => SynVar::Padding(value),
Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)),
}
},
'~' => {
let token = self.eat_token();
let symbol = format!("{}/{token}", self.label);
SynVar::Symbol(symbol)
}
':' => SynVar::Symbol(String::from(':')),
c => {
let token = format!("{c}{}", self.eat_token());
match token.parse::<Value>() {
Ok(value) => SynVar::Literal(value),
Err(_) => match token.parse::<Instruction>() {
Ok(instruction) => SynVar::Instruction(instruction),
Err(_) => SynVar::Symbol(token),
}
}
}
};
// Parse source path comments.
if let SynVar::Comment(comment) = &variant {
// Check that the comment fills the entire line.
if start.column == 0 && self.is_line_empty() {
if let Some(path) = comment.strip_prefix(": ") {
self.source_path = Some(PathBuf::from(path.trim()));
self.source_line_start = start.line + 1;
}
}
}
// Find location in current merged file.
let in_merged = SourceLocation {
path: self.path.to_owned(),
start,
end: self.prev_position,
};
// Find location in original source file.
let in_source = if start.line >= self.source_line_start {
match &self.source_path {
Some(path) => {
let offset = self.source_line_start;
Some( SourceLocation {
path: Some(path.to_owned()),
start: Position {
line: in_merged.start.line.saturating_sub(offset),
column: in_merged.start.column,
},
end: Position {
line: in_merged.end.line.saturating_sub(offset),
column: in_merged.end.column,
}
})
}
None => None,
}
} else {
None
};
let string = std::mem::take(&mut self.token_source_string);
let source = SourceSpan { string, in_merged, in_source };
Some( SyntacticToken { source, variant } )
}
}
#[derive(Debug)]
pub enum ParseError {
InvalidExtension,
NotFound,
NotReadable,
IsADirectory,
InvalidUtf8,
Unknown,
}