summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/lib.rs4
-rw-r--r--src/locators/source.rs2
-rw-r--r--src/tokeniser.rs167
3 files changed, 172 insertions, 1 deletions
diff --git a/src/lib.rs b/src/lib.rs
index fba7e5d..2ebe010 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,5 @@
mod locators;
-
pub use locators::*;
+
+mod tokeniser;
+pub use tokeniser::*;
diff --git a/src/locators/source.rs b/src/locators/source.rs
index 9fd1b2b..2cf1ef9 100644
--- a/src/locators/source.rs
+++ b/src/locators/source.rs
@@ -49,6 +49,8 @@ pub struct Position {
}
impl Position {
+ pub const ZERO: Self = Self { line: 0, column: 0 };
+
pub fn to_next_char(&mut self) {
self.column += 1;
}
diff --git a/src/tokeniser.rs b/src/tokeniser.rs
new file mode 100644
index 0000000..eeab6e6
--- /dev/null
+++ b/src/tokeniser.rs
@@ -0,0 +1,167 @@
+use crate::*;
+
+use std::path::PathBuf;
+
+
+pub struct Tokeniser {
+ /// Characters waiting to be parsed, in reverse order.
+ pub chars: Vec<char>,
+ /// Path of the whole source file.
+ pub source_path: Option<PathBuf>,
+ /// Original path of the embedded source file.
+ pub embedded_path: Option<PathBuf>,
+ /// Line where the embedded source file begins.
+ pub embedded_first_line: usize,
+ /// Position of the next character to be consumed.
+ pub position: Position,
+ /// Position of the most recently consumed character.
+ pub prev_position: Position,
+ /// Position of the first character of the current token.
+ pub start_position: Position,
+ /// The source characters consumed for the current token.
+ pub consumed: String,
+ /// List of characters that start a new token.
+ pub delimiters: Vec<char>,
+ /// List of characters that terminate a token.
+ pub terminators: Vec<char>,
+}
+
+
+impl Tokeniser {
+ pub fn new<P: Into<PathBuf>>(source_code: &str,path: Option<P>) -> Self {
+ Self {
+ chars: source_code.chars().rev().collect(),
+ source_path: path.map(|p| p.into()),
+ embedded_path: None,
+ embedded_first_line: 0,
+ position: Position::ZERO,
+ prev_position: Position::ZERO,
+ start_position: Position::ZERO,
+ consumed: String::new(),
+ delimiters: Vec::new(),
+ terminators: Vec::new(),
+ }
+ }
+
+ pub fn add_delimiters(&mut self, delimiters: &[char]) {
+ self.delimiters.extend_from_slice(delimiters);
+ }
+
+ pub fn add_terminators(&mut self, terminators: &[char]) {
+ self.terminators.extend_from_slice(terminators);
+ }
+
+ /// Return the next character without consuming it.
+ pub fn peek_char(&self) -> Option<char> {
+ self.chars.last().copied()
+ }
+
+ /// Consume and return the next character.
+ pub fn eat_char(&mut self) -> Option<char> {
+ let option = self.chars.pop();
+ if let Some(c) = option {
+ self.prev_position = self.position;
+ self.position.advance(c);
+ self.consumed.push(c);
+ }
+ return option;
+ }
+
+ /// Remove the next character.
+ pub fn drop_char(&mut self) {
+ if let Some(c) = self.chars.pop() {
+ self.prev_position = self.position;
+ self.position.advance(c);
+ }
+ }
+
+ /// Remove whitespace.
+ pub fn drop_whitespace(&mut self) {
+ while let Some(c) = self.peek_char() {
+ match c.is_whitespace() {
+ true => self.drop_char(),
+ false => break,
+ }
+ }
+ }
+
+ /// Remove a full token from the queue.
+ pub fn eat_token(&mut self) -> String {
+ let mut token = String::new();
+ while let Some(peek) = self.peek_char() {
+ if peek.is_whitespace() || self.delimiters.contains(&peek) {
+ break;
+ }
+ let c = self.eat_char().unwrap();
+ token.push(c);
+ if self.terminators.contains(&c) {
+ break;
+ }
+ }
+ token
+ }
+
+ /// Consume and return all characters up to and including the delimiter.
+ /// Returns None if end of source is reached before delimiter is found.
+ pub fn eat_to_delimiter(&mut self, delim: char) -> Option<String> {
+ let mut token = String::new();
+ while let Some(c) = self.eat_char() {
+ self.consumed.push(c);
+ match c == delim {
+ true => return Some(token),
+ false => token.push(c),
+ }
+ }
+ return None;
+ }
+
+ /// Returns true if the remainder of the line is whitespace.
+ pub fn end_of_line(&self) -> bool {
+ for c in self.chars.iter().rev() {
+ if *c == '\n' {
+ return true;
+ }
+ if !c.is_whitespace() {
+ return false
+ }
+ }
+ return true;
+ }
+
+ pub fn mark_start_position(&mut self) {
+ self.start_position = self.position;
+ }
+
+ pub fn get_source_span(&mut self) -> SourceSpan {
+ let in_merged = SourceLocation {
+ path: self.source_path.to_owned(),
+ start: self.start_position,
+ end: self.prev_position,
+ };
+ let in_source = if self.start_position.line >= self.embedded_first_line {
+ if let Some(embedded_path) = &self.embedded_path {
+ let offset = self.embedded_first_line;
+ Some(
+ SourceLocation {
+ path: Some(embedded_path.to_owned()),
+ start: Position {
+ line: in_merged.start.line.saturating_sub(offset),
+ column: in_merged.start.column,
+ },
+ end: Position {
+ line: in_merged.end.line.saturating_sub(offset),
+ column: in_merged.end.column,
+ }
+ }
+ )
+ } else {
+ None
+ }
+ } else {
+ None
+ };
+
+ let string = std::mem::take(&mut self.consumed);
+ SourceSpan { string, in_merged, in_source }
+ }
+}