summaryrefslogtreecommitdiff
path: root/src/tokeniser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokeniser.rs')
-rw-r--r--src/tokeniser.rs260
1 files changed, 0 insertions, 260 deletions
diff --git a/src/tokeniser.rs b/src/tokeniser.rs
deleted file mode 100644
index 0350afe..0000000
--- a/src/tokeniser.rs
+++ /dev/null
@@ -1,260 +0,0 @@
-use crate::*;
-
-use std::path::PathBuf;
-
-
-/// Break a character stream down into individual tokens.
-pub struct Tokeniser {
- /// All characters to be parsed, characters are never removed.
- pub chars: Vec<char>,
- /// Path of the whole source file.
- pub source_path: Option<PathBuf>,
- /// Original path of the embedded source file.
- pub embedded_path: Option<PathBuf>,
- /// Line where the embedded source file begins.
- pub embedded_first_line: usize,
- /// Mark tracking the next character to parse.
- pub mark: TokeniserMark,
- /// Mark tracking the most recent character of the current token.
- pub prev: TokeniserMark,
- /// Position of the first character of the current token.
- pub start: TokeniserMark,
- /// Position after the final character of the current token.
- pub end: TokeniserMark,
- /// Position to begin subtokenisation from.
- pub child: TokeniserMark,
- /// List of characters that start a new token.
- pub delimiters: Vec<char>,
- /// List of characters that terminate a token.
- pub terminators: Vec<char>,
-}
-
-impl Tokeniser {
- pub fn new<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
- Self {
- chars: source_code.chars().collect(),
- source_path: path.map(|p| p.into()),
- embedded_path: None,
- embedded_first_line: 0,
- mark: TokeniserMark::ZERO,
- prev: TokeniserMark::ZERO,
- start: TokeniserMark::ZERO,
- end: TokeniserMark::ZERO,
- child: TokeniserMark::ZERO,
- delimiters: Vec::new(),
- terminators: Vec::new(),
- }
- }
-
- /// Create a tokeniser from child to end.
- pub fn tokenise_child_span(&mut self) -> Self {
- let mut start = self.child;
- start.i = 0;
- Self {
- chars: self.get_chars(&self.child, &self.end),
- source_path: self.source_path.clone(),
- embedded_path: self.embedded_path.clone(),
- embedded_first_line: self.embedded_first_line.clone(),
- mark: start,
- prev: start,
- start: start,
- end: start,
- child: start,
- delimiters: Vec::new(),
- terminators: Vec::new(),
- }
- }
-
- pub fn add_delimiters(&mut self, delimiters: &[char]) {
- self.delimiters.extend_from_slice(delimiters);
- }
-
- pub fn add_terminators(&mut self, terminators: &[char]) {
- self.terminators.extend_from_slice(terminators);
- }
-
- pub fn get_chars(&self, start: &TokeniserMark, end: &TokeniserMark) -> Vec<char> {
- self.chars[start.i..end.i].iter().map(char::to_owned).collect()
- }
-
- /// Return the next character without consuming it.
- pub fn peek_char(&self) -> Option<char> {
- self.chars.get(self.mark.i).copied()
- }
-
- /// Consume and return the next character.
- pub fn eat_char(&mut self) -> Option<char> {
- let option = self.peek_char();
- if let Some(c) = option {
- self.prev = self.mark;
- self.mark.advance(c);
- self.mark_end();
- }
- return option;
- }
-
- /// Consume next characters if they match a pattern.
- pub fn eat_if(&mut self, pattern: &str) -> Option<String> {
- // Check that next characters match the pattern.
- for (i, c) in pattern.chars().enumerate() {
- if let Some(d) = self.chars.get(self.mark.i + i) {
- if c == *d {
- continue;
- }
- }
- return None;
- }
- // Consume the next characters.
- self.prev = self.mark;
- for c in pattern.chars() {
- self.mark.advance(c);
- self.mark_end();
- }
- return Some(pattern.to_string());
- }
-
- /// Consume whitespace.
- pub fn eat_whitespace(&mut self) {
- while let Some(c) = self.peek_char() {
- match c.is_whitespace() {
- true => self.eat_char(),
- false => break,
- };
- }
- }
-
- /// Remove a full token from the queue.
- pub fn eat_token(&mut self) -> String {
- let mut token = String::new();
- while let Some(peek) = self.peek_char() {
- if peek.is_whitespace() || self.delimiters.contains(&peek) {
- break;
- }
- let c = self.eat_char().unwrap();
- token.push(c);
- if self.terminators.contains(&c) {
- break;
- }
- }
- return token;
- }
-
- /// Return all characters found until the predicate returns true.
- /// Returns None if end of source is reached before delimiter is found.
- pub fn track_until(&mut self, mut predicate: impl FnMut(&mut Self) -> bool) -> Option<String> {
- let start = self.mark;
- let mut end = self.mark;
- while !predicate(self) {
- self.peek_char()?;
- end = self.mark;
- }
- self.end = self.prev;
- return Some(self.get_chars(&start, &end).iter().collect());
- }
-
- /// Returns true if the remainder of the line is whitespace.
- pub fn end_of_line(&self) -> bool {
- for c in &self.chars[self.mark.i..] {
- if *c == '\n' {
- return true;
- }
- if !c.is_whitespace() {
- return false
- }
- }
- return true;
- }
-
- /// Mark the next character to be consumed as the start character.
- pub fn mark_start(&mut self) {
- self.start = self.mark;
- }
-
- /// Mark the most recently consumed character as the start character.
- pub fn mark_start_prev(&mut self) {
- self.start = self.prev;
- }
-
- /// Mark the next character as the character following the end character.
- pub fn mark_end(&mut self) {
- self.end = self.mark;
- }
-
- /// Mark the next character as the character following the end character.
- pub fn mark_end_prev(&mut self) {
- self.end = self.prev;
- }
-
- /// Mark the next character to be consumed as the start of the child.
- pub fn mark_child(&mut self) {
- self.child = self.mark;
- }
-
- /// Return the SourceSpan between the start and end marks.
- pub fn get_source(&mut self) -> SourceSpan {
- let in_merged = SourceLocation {
- path: self.source_path.to_owned(),
- start: self.start.position,
- end: self.end.prev_position,
- };
- let in_source = if self.start.position.line >= self.embedded_first_line {
- if let Some(embedded_path) = &self.embedded_path {
- let offset = self.embedded_first_line;
- Some(
- SourceLocation {
- path: Some(embedded_path.to_owned()),
- start: SourcePosition {
- line: in_merged.start.line.saturating_sub(offset),
- column: in_merged.start.column,
- },
- end: SourcePosition {
- line: in_merged.end.line.saturating_sub(offset),
- column: in_merged.end.column,
- }
- }
- )
- } else {
- None
- }
- } else {
- None
- };
-
- let string = self.get_chars(&self.start, &self.end).iter().collect();
- SourceSpan { string, in_merged, in_source, child: None }
- }
-}
-
-
-#[derive(Clone, Copy)]
-pub struct TokeniserMark {
- /// Position of the next character to be consumed.
- pub position: SourcePosition,
- /// Index of the next character to be consumed.
- pub i: usize,
- /// Position of the most recently consumed character.
- pub prev_position: SourcePosition,
- pub prev_prev_position: SourcePosition,
-}
-
-impl TokeniserMark {
- pub const ZERO: Self = Self {
- position: SourcePosition::ZERO,
- i: 0,
- prev_position: SourcePosition::ZERO,
- prev_prev_position: SourcePosition::ZERO,
- };
-
- /// Advance to the next character.
- pub fn advance(&mut self, c: char) {
- self.prev_prev_position = self.prev_position;
- self.prev_position = self.position;
- self.position.advance(c);
- self.i += 1;
- }
-
- /// Ignore the most recently consumed character.
- pub fn undo(&mut self) {
- self.prev_position = self.prev_prev_position;
- }
-}