summaryrefslogtreecommitdiff
path: root/src/processors
diff options
context:
space:
mode:
authorBen Bridle <ben@derelict.engineering>2025-03-18 11:50:19 +1300
committerBen Bridle <ben@derelict.engineering>2025-03-18 11:50:29 +1300
commitf8a694267d3981b0437c05fc248406116ea9ec06 (patch)
treee7c0426f4278481490e269be7ccb0a710c22ebae /src/processors
parent87cdf5e88fd1d0aaddb91e216c47344effd63ed3 (diff)
downloadassembler-f8a694267d3981b0437c05fc248406116ea9ec06.zip
Large restructure
Files were moved to be better organised, error messages were changed to be more general, and a Compiler type was added to the library.
Diffstat (limited to 'src/processors')
-rw-r--r--src/processors/compiler.rs134
-rw-r--r--src/processors/mod.rs7
-rw-r--r--src/processors/resolver.rs344
-rw-r--r--src/processors/tokeniser.rs260
4 files changed, 745 insertions, 0 deletions
diff --git a/src/processors/compiler.rs b/src/processors/compiler.rs
new file mode 100644
index 0000000..626dfd3
--- /dev/null
+++ b/src/processors/compiler.rs
@@ -0,0 +1,134 @@
+use crate::*;
+
+use log::{info, warn};
+use vagabond::*;
+
+
+/// Compiles multiple source code files into one.
+pub struct Compiler {
+ pub source_path: Option<PathBuf>,
+ pub resolver: Resolver,
+ pub parse_symbols: ParseFn,
+ pub push_code: PushFn,
+}
+
+impl Compiler {
+ pub fn new(parse_symbols: ParseFn, push_code: PushFn) -> Self {
+ let resolver = Resolver::new();
+ Self { source_path: None, resolver, parse_symbols, push_code }
+ }
+
+ pub fn root_from_string<P: AsRef<Path>>(&mut self, source_code: String, path: P) {
+ let source_unit = SourceUnit::from_string(source_code, &path, self.parse_symbols);
+ self.source_path = Some(path.as_ref().to_path_buf());
+ self.resolver.include_source_unit(source_unit, None);
+ }
+
+ pub fn root_from_path<P: AsRef<Path>>(&mut self, path: P) -> Result<(), FileError> {
+ let source_unit = SourceUnit::from_path(&path, None, self.parse_symbols)?;
+ self.source_path = Some(path.as_ref().to_path_buf());
+ self.resolver.include_source_unit(source_unit, None);
+ return Ok(());
+ }
+
+ /// Find library files descending from the parent directory.
+ pub fn include_libs_from_parent(&mut self, ext: &str) {
+ if let Some(path) = &self.source_path {
+ if let Some(parent_path) = path.parent() {
+ let parent_path = parent_path.to_owned();
+ self.include_libs_from_path(&parent_path, ext);
+ }
+ }
+ }
+
+ /// Find library files at or descending from a path.
+ pub fn include_libs_from_path(&mut self, path: &Path, ext: &str) {
+ let libraries = gather_from_path(path, Some(ext), self.parse_symbols);
+ self.resolver.add_library_source_units(libraries);
+ self.resolver.resolve();
+ }
+
+ /// Find library files from a PATH-style environment variable.
+ pub fn include_libs_from_path_variable(&mut self, name: &str, ext: &str) {
+ let libraries = gather_from_path_variable(name, Some(ext), self.parse_symbols);
+ self.resolver.add_library_source_units(libraries);
+ self.resolver.resolve();
+ }
+
+ pub fn error(&self) -> Option<ResolverError> {
+ self.resolver.error()
+ }
+
+ pub fn hierarchy(&self) -> SourceHierarchy {
+ self.resolver.hierarchy()
+ }
+
+ pub fn symbols(&self) -> SourceSymbols {
+ self.resolver.symbols()
+ }
+
+ pub fn unused(&self) -> UnusedSymbols {
+ self.resolver.unused()
+ }
+
+ pub fn get_compiled_source(&mut self) -> Result<String, MergeError> {
+ self.resolver.calculate_hierarchy();
+ self.resolver.get_merged_source_code(self.push_code)
+ }
+}
+
+
+/// Gather all source units with a given extension using a PATH-style environment variable.
+pub fn gather_from_path_variable(variable: &str, extension: Option<&str>, parse: ParseFn) -> Vec<SourceUnit> {
+ let mut source_units = Vec::new();
+ if let Ok(string) = std::env::var(variable) {
+ for path in string.split(":").map(PathBuf::from) {
+ info!("Found path {path:?} in environment variable {variable:?}");
+ source_units.extend(gather_from_path(&path, extension, parse));
+ }
+ };
+ return source_units;
+}
+
+/// Gather source units with a given extension at or descending from a path.
+pub fn gather_from_path(path: &Path, extension: Option<&str>, parse: ParseFn) -> Vec<SourceUnit> {
+ let mut source_units = Vec::new();
+ let check_optional_file = |file: &Option<SourceFile>| -> bool {
+ match file {
+ Some(file) => match file.symbols {
+ Some(_) => { info!("Found source file at {:?}", file.path); true }
+ None => { warn!("Could not parse source file at {:?}", file.path); false }
+ }
+ None => true,
+ }
+ };
+ let mut gather_source_unit = |path: &Path| {
+ if let Ok(unit) = SourceUnit::from_path(&path, extension, parse) {
+ if unit.main.symbols.is_some() {
+ info!("Found source file at {:?}", unit.main.path);
+ let head_good = check_optional_file(&unit.head);
+ let tail_good = check_optional_file(&unit.tail);
+ if head_good && tail_good {
+ source_units.push(unit);
+ }
+ } else {
+ warn!("Could not parse source file at {path:?}");
+ check_optional_file(&unit.head);
+ check_optional_file(&unit.tail);
+ }
+ }
+ };
+ if let Ok(entry) = Entry::from_path(path) {
+ if EntryType::File == entry.entry_type {
+ gather_source_unit(&entry.path)
+ } else if EntryType::Directory == entry.entry_type {
+ info!("Traversing directory {path:?} for source files");
+ if let Ok(entries) = traverse_directory(entry.path) {
+ for entry in entries {
+ gather_source_unit(&entry.path)
+ }
+ }
+ }
+ };
+ return source_units;
+}
diff --git a/src/processors/mod.rs b/src/processors/mod.rs
new file mode 100644
index 0000000..a0ce56f
--- /dev/null
+++ b/src/processors/mod.rs
@@ -0,0 +1,7 @@
+mod compiler;
+mod resolver;
+mod tokeniser;
+
+pub use compiler::*;
+pub use resolver::*;
+pub use tokeniser::*;
diff --git a/src/processors/resolver.rs b/src/processors/resolver.rs
new file mode 100644
index 0000000..6c1d0f1
--- /dev/null
+++ b/src/processors/resolver.rs
@@ -0,0 +1,344 @@
+use crate::*;
+
+use log::{info, error};
+
+use std::collections::HashSet;
+
+
+/// Resolve undeclared symbols in a source unit with definitions from other units.
+pub struct Resolver {
+ /// Definitions from all included source units.
+ pub definitions: Vec<TrackedDefinition>,
+ /// All resolved references in all included source units.
+ pub resolved: Vec<TrackedReference>,
+ /// All unresolved references in all included source units.
+ pub unresolved: Vec<TrackedSymbol>,
+ /// All redefined (duplicate) definitions in all included source units.
+ pub redefinitions: Vec<TrackedRedefinition>,
+ /// All included source units.
+ pub source_units: Vec<HeirarchicalSourceUnit>,
+ /// The 'source_units' indices of the root source units.
+ pub root_unit_ids: Vec<usize>,
+ /// Source units that can be included later to resolve symbols.
+ pub library_source_units: Vec<SourceUnit>,
+}
+
+
+impl Resolver {
+ pub fn new() -> Self {
+ Self {
+ definitions: Vec::new(),
+ resolved: Vec::new(),
+ unresolved: Vec::new(),
+ redefinitions: Vec::new(),
+ source_units: Vec::new(),
+ root_unit_ids: Vec::new(),
+ library_source_units: Vec::new(),
+ }
+ }
+
+ pub fn include_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) {
+ let source_id = self.source_units.len();
+ use std::mem::take;
+
+ info!("Including source unit at {:?}", source_unit.main.path);
+ if let Some(symbols) = &mut source_unit.main.symbols {
+ self.include_symbols(take(symbols), source_id, SourceRole::Main);
+ }
+ if let Some(head) = &mut source_unit.head {
+ if let Some(symbols) = &mut head.symbols {
+ self.include_symbols(take(symbols), source_id, SourceRole::Head);
+ }
+ }
+ if let Some(tail) = &mut source_unit.tail {
+ if let Some(symbols) = &mut tail.symbols {
+ self.include_symbols(take(symbols), source_id, SourceRole::Tail);
+ }
+ }
+
+ match parent_id {
+ Some(parent_id) => match self.source_units.get_mut(parent_id) {
+ Some(parent) => { parent.child_ids.insert(source_id); }
+ None => { error!("Could not find parent (#{parent_id}) of source unit #{source_id}"); }
+ }
+ None => self.root_unit_ids.push(source_id),
+ }
+ self.source_units.push(
+ HeirarchicalSourceUnit {
+ source_unit,
+ child_ids: HashSet::new(),
+ parent_ids: HashSet::new(),
+ }
+ );
+ }
+
+ fn include_symbols(&mut self, symbols: Vec<Symbol>, source_id: usize, source_role: SourceRole) {
+ for symbol in symbols {
+ match symbol.role {
+ SymbolRole::Definition(_) => {
+ // Check if the symbol has already been defined.
+ let equal = |d: &TrackedDefinition| { &d.tracked.symbol == &symbol };
+ if let Some(original) = self.definitions.iter().position(equal) {
+ let tracked = TrackedSymbol { symbol, source_id, source_role };
+ let definition = original;
+ self.redefinitions.push(TrackedRedefinition { tracked, definition });
+ } else {
+ // Resolve all unresolved references that match this symbol.
+ let defines = |r: &mut TrackedSymbol| symbol.defines(&r.symbol);
+ let definition = self.definitions.len();
+ let mut references = Vec::new();
+ for tracked in self.unresolved.extract_if(.., defines) {
+ references.push(self.resolved.len());
+ self.resolved.push(TrackedReference { tracked, definition });
+ }
+ let tracked = TrackedSymbol { symbol, source_id, source_role };
+ self.definitions.push(TrackedDefinition { tracked, references });
+ }
+ }
+ SymbolRole::Reference => {
+ let tracked = TrackedSymbol { symbol, source_id, source_role };
+ let defines = |d: &TrackedDefinition| d.tracked.symbol.defines(&tracked.symbol);
+ match self.definitions.iter().position(defines) {
+ Some(index) => {
+ let definition = &mut self.definitions[index];
+ definition.references.push(self.resolved.len());
+ self.resolved.push(TrackedReference { tracked, definition: index })
+ }
+ None => self.unresolved.push(tracked),
+ }
+ }
+ }
+ }
+ }
+
+ /// Add a set of source units that might contain definitions for unresolved symbols.
+ pub fn add_library_source_units(&mut self, source_units: Vec<SourceUnit>) {
+ for source_unit in source_units {
+ // Discard source units that have already been included.
+ let in_included = self.source_units.iter().any(|s| s.source_unit == source_unit);
+ let in_library = self.library_source_units.iter().any(|s| *s == source_unit);
+ if !in_included && !in_library {
+ self.library_source_units.push(source_unit);
+ }
+ }
+ }
+
+ /// Attempt to resolve unresolved references with library source units.
+ pub fn resolve(&mut self) {
+ // Repeatedly test if each library source unit resolves an unresolved
+ // symbol, breaking the loop when no new resolutions are found.
+ 'outer: loop {
+ for (i, source_unit) in self.library_source_units.iter().enumerate() {
+ if let Some(id) = self.unit_resolved_by_unit(&source_unit) {
+ let source_unit = self.library_source_units.remove(i);
+ self.include_source_unit(source_unit, Some(id));
+ continue 'outer;
+ }
+ }
+ break;
+ }
+ }
+
+ /// Returns true if all references in all included source units have been
+ /// resolved.
+ pub fn error(&self) -> Option<ResolverError> {
+ match self.unresolved.is_empty() && self.redefinitions.is_empty() {
+ true => None,
+ false => Some(ResolverError { resolver: self })
+ }
+ }
+
+ /// Return a type that can print all symbol definitions and references.
+ pub fn symbols(&self) -> SourceSymbols {
+ SourceSymbols { resolver: self }
+ }
+
+ /// Return a type that can print all unused symbol definitions.
+ pub fn unused(&self) -> UnusedSymbols {
+ UnusedSymbols { resolver: self }
+ }
+
+ /// Return a type that can print the structure of the source tree.
+ pub fn hierarchy(&self) -> SourceHierarchy {
+ SourceHierarchy { resolver: self }
+ }
+
+ /// Return the ID of a source unit that contains an unresolved reference
+ /// to a symbol defined by this unit.
+ fn unit_resolved_by_unit(&self, source_unit: &SourceUnit) -> Option<usize> {
+ macro_rules! return_resolved_unit {
+ ($file:expr) => {
+ if let Some(symbols) = &$file.symbols {
+ if let Some(id) = self.unit_resolved_by_symbol(&symbols) {
+ return Some(id);
+ }
+ }
+ };
+ }
+ return_resolved_unit!(&source_unit.main);
+ if let Some(head) = &source_unit.head { return_resolved_unit!(&head) }
+ if let Some(tail) = &source_unit.tail { return_resolved_unit!(&tail) }
+ return None;
+ }
+
+ /// Returns the ID of a source unit that contains an unresolved reference
+ /// to a symbol defined by one of these symbols.
+ fn unit_resolved_by_symbol(&self, symbols: &[Symbol]) -> Option<usize> {
+ for symbol in symbols {
+ if let SymbolRole::Definition(_) = symbol.role {
+ for unresolved in &self.unresolved {
+ if unresolved.symbol.name == symbol.name {
+ return Some(unresolved.source_id);
+ }
+ }
+ }
+ }
+ return None;
+ }
+
+ /// Populate the .parent_ids field of every source unit. The parents of
+ /// each source unit are the units that define a symbol referenced by the
+ /// unit, where the definition type is MustPrecedeReference.
+ pub fn calculate_hierarchy(&mut self) {
+ // Clear the .parent_ids field of every source unit.
+ for source_unit in &mut self.source_units {
+ source_unit.parent_ids.clear();
+ }
+ // Populate the .parent_ids field of every source unit.
+ for reference in &self.resolved {
+ let predicate = |d: &&TrackedDefinition| d.tracked.symbol.name == reference.tracked.symbol.name;
+ if let Some(definition) = self.definitions.iter().find(predicate) {
+ // A unit cannot be its own parent.
+ let is_self = reference.tracked.source_id == definition.tracked.source_id;
+ let must_precede = SymbolRole::Definition(DefinitionType::MustPrecedeReference);
+ if is_self || definition.tracked.symbol.role != must_precede { continue; }
+ let referencing_unit = &mut self.source_units[reference.tracked.source_id];
+ referencing_unit.parent_ids.insert(definition.tracked.source_id);
+ };
+ }
+ }
+
+ /// Concatenate all included source units into one string.
+ /// If the source unit dependency graph contains a cycle, the IDs of the
+ /// source units involved in the cycle will be returned.
+ pub fn get_merged_source_code(&self, push: PushFn) -> Result<String, MergeError> {
+ // The ID of each source unit will come after the IDs of all
+ // parents of that unit.
+ let head_order = {
+ let mut included_source_ids = Vec::new();
+ let mut remaining_source_ids = (0..self.source_units.len()).collect::<Vec<_>>();
+
+ 'restart: while !remaining_source_ids.is_empty() {
+ // Iterate over source units, most-recently-included first.
+ 'next: for (i, id) in remaining_source_ids.iter().enumerate() {
+ let unit = &self.source_units[*id];
+ for parent_id in &unit.parent_ids {
+ // Skip this unit if a parent hasn't yet been included.
+ if !included_source_ids.contains(parent_id) {
+ continue 'next;
+ }
+ }
+ // Include this unit, then check remaining units from the start.
+ included_source_ids.push(*id);
+ remaining_source_ids.remove(i);
+ continue 'restart;
+ }
+ // All remaining source units depend on at least one remaining
+ // source unit, indicating a dependency cycle.
+ return Err(MergeError {
+ resolver: self,
+ cyclic_unit_ids: remaining_source_ids,
+ });
+ }
+ included_source_ids
+ };
+ let mut source_code = String::new();
+
+ // Push head source code in calculated parent-preceding order.
+ for id in &head_order {
+ let source_unit = &self.source_units[*id];
+ if let Some(head) = &source_unit.source_unit.head {
+ push(&mut source_code, head);
+ }
+ }
+ // Push main source code in source-added order.
+ // The root unit will be pushed first.
+ for source_unit in self.source_units.iter() {
+ let main = &source_unit.source_unit.main;
+ push(&mut source_code, &main);
+ }
+ // Push tail source code in reverse source-added order.
+ // The root unit will be pushed last.
+ for source_unit in self.source_units.iter().rev() {
+ if let Some(tail) = &source_unit.source_unit.tail {
+ push(&mut source_code, tail);
+ }
+ }
+ return Ok(source_code);
+ }
+}
+
+
+/// A source unit tracked with pointers to parents and dependents.
+pub struct HeirarchicalSourceUnit {
+ pub source_unit: SourceUnit,
+ /// Pointers to source units that resolve references this unit.
+ pub child_ids: HashSet<usize>,
+ /// Pointers to source units that must be included before this unit.
+ pub parent_ids: HashSet<usize>,
+}
+
+pub struct TrackedDefinition {
+ pub tracked: TrackedSymbol,
+ /// Points to resolved references in `resolved`.
+ pub references: Vec<usize>,
+}
+
+pub struct TrackedRedefinition {
+ pub tracked: TrackedSymbol,
+ /// Points to existing definition in `definitions`.
+ pub definition: usize,
+}
+
+pub struct TrackedReference {
+ pub tracked: TrackedSymbol,
+ /// Points to resolving definition in `definitions`.
+ pub definition: usize,
+}
+
+pub struct TrackedSymbol {
+ pub symbol: Symbol,
+ pub source_id: usize,
+ pub source_role: SourceRole,
+}
+
+impl TrackedSymbol {
+ pub fn context<'a>(&'a self, resolver: &'a Resolver) -> Context<'a> {
+ let source_unit = &resolver.source_units[self.source_id].source_unit;
+ let source_code = match self.source_role {
+ SourceRole::Main => source_unit.main.source_code.as_str(),
+ SourceRole::Head => match &source_unit.head {
+ Some(head) => head.source_code.as_str(),
+ None => unreachable!("Failed to find source code of head file"),
+ }
+ SourceRole::Tail => match &source_unit.tail {
+ Some(tail) => tail.source_code.as_str(),
+ None => unreachable!("Failed to find source code of tail file"),
+ }
+ };
+ Context { source_code, source: &self.symbol.source }
+ }
+}
+
+impl PartialEq for TrackedSymbol {
+ fn eq(&self, other: &TrackedSymbol) -> bool {
+ self.symbol.name.eq(&other.symbol.name)
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum SourceRole {
+ Main,
+ Head,
+ Tail,
+}
diff --git a/src/processors/tokeniser.rs b/src/processors/tokeniser.rs
new file mode 100644
index 0000000..0350afe
--- /dev/null
+++ b/src/processors/tokeniser.rs
@@ -0,0 +1,260 @@
+use crate::*;
+
+use std::path::PathBuf;
+
+
+/// Break a character stream down into individual tokens.
+pub struct Tokeniser {
+ /// All characters to be parsed, characters are never removed.
+ pub chars: Vec<char>,
+ /// Path of the whole source file.
+ pub source_path: Option<PathBuf>,
+ /// Original path of the embedded source file.
+ pub embedded_path: Option<PathBuf>,
+ /// Line where the embedded source file begins.
+ pub embedded_first_line: usize,
+ /// Mark tracking the next character to parse.
+ pub mark: TokeniserMark,
+ /// Mark tracking the most recent character of the current token.
+ pub prev: TokeniserMark,
+ /// Position of the first character of the current token.
+ pub start: TokeniserMark,
+ /// Position after the final character of the current token.
+ pub end: TokeniserMark,
+ /// Position to begin subtokenisation from.
+ pub child: TokeniserMark,
+ /// List of characters that start a new token.
+ pub delimiters: Vec<char>,
+ /// List of characters that terminate a token.
+ pub terminators: Vec<char>,
+}
+
+impl Tokeniser {
+ pub fn new<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
+ Self {
+ chars: source_code.chars().collect(),
+ source_path: path.map(|p| p.into()),
+ embedded_path: None,
+ embedded_first_line: 0,
+ mark: TokeniserMark::ZERO,
+ prev: TokeniserMark::ZERO,
+ start: TokeniserMark::ZERO,
+ end: TokeniserMark::ZERO,
+ child: TokeniserMark::ZERO,
+ delimiters: Vec::new(),
+ terminators: Vec::new(),
+ }
+ }
+
+ /// Create a tokeniser from child to end.
+ pub fn tokenise_child_span(&mut self) -> Self {
+ let mut start = self.child;
+ start.i = 0;
+ Self {
+ chars: self.get_chars(&self.child, &self.end),
+ source_path: self.source_path.clone(),
+ embedded_path: self.embedded_path.clone(),
+ embedded_first_line: self.embedded_first_line.clone(),
+ mark: start,
+ prev: start,
+ start: start,
+ end: start,
+ child: start,
+ delimiters: Vec::new(),
+ terminators: Vec::new(),
+ }
+ }
+
+ pub fn add_delimiters(&mut self, delimiters: &[char]) {
+ self.delimiters.extend_from_slice(delimiters);
+ }
+
+ pub fn add_terminators(&mut self, terminators: &[char]) {
+ self.terminators.extend_from_slice(terminators);
+ }
+
+ pub fn get_chars(&self, start: &TokeniserMark, end: &TokeniserMark) -> Vec<char> {
+ self.chars[start.i..end.i].iter().map(char::to_owned).collect()
+ }
+
+ /// Return the next character without consuming it.
+ pub fn peek_char(&self) -> Option<char> {
+ self.chars.get(self.mark.i).copied()
+ }
+
+ /// Consume and return the next character.
+ pub fn eat_char(&mut self) -> Option<char> {
+ let option = self.peek_char();
+ if let Some(c) = option {
+ self.prev = self.mark;
+ self.mark.advance(c);
+ self.mark_end();
+ }
+ return option;
+ }
+
+ /// Consume next characters if they match a pattern.
+ pub fn eat_if(&mut self, pattern: &str) -> Option<String> {
+ // Check that next characters match the pattern.
+ for (i, c) in pattern.chars().enumerate() {
+ if let Some(d) = self.chars.get(self.mark.i + i) {
+ if c == *d {
+ continue;
+ }
+ }
+ return None;
+ }
+ // Consume the next characters.
+ self.prev = self.mark;
+ for c in pattern.chars() {
+ self.mark.advance(c);
+ self.mark_end();
+ }
+ return Some(pattern.to_string());
+ }
+
+ /// Consume whitespace.
+ pub fn eat_whitespace(&mut self) {
+ while let Some(c) = self.peek_char() {
+ match c.is_whitespace() {
+ true => self.eat_char(),
+ false => break,
+ };
+ }
+ }
+
+ /// Remove a full token from the queue.
+ pub fn eat_token(&mut self) -> String {
+ let mut token = String::new();
+ while let Some(peek) = self.peek_char() {
+ if peek.is_whitespace() || self.delimiters.contains(&peek) {
+ break;
+ }
+ let c = self.eat_char().unwrap();
+ token.push(c);
+ if self.terminators.contains(&c) {
+ break;
+ }
+ }
+ return token;
+ }
+
+ /// Return all characters found until the predicate returns true.
+ /// Returns None if end of source is reached before delimiter is found.
+ pub fn track_until(&mut self, mut predicate: impl FnMut(&mut Self) -> bool) -> Option<String> {
+ let start = self.mark;
+ let mut end = self.mark;
+ while !predicate(self) {
+ self.peek_char()?;
+ end = self.mark;
+ }
+ self.end = self.prev;
+ return Some(self.get_chars(&start, &end).iter().collect());
+ }
+
+ /// Returns true if the remainder of the line is whitespace.
+ pub fn end_of_line(&self) -> bool {
+ for c in &self.chars[self.mark.i..] {
+ if *c == '\n' {
+ return true;
+ }
+ if !c.is_whitespace() {
+ return false
+ }
+ }
+ return true;
+ }
+
+ /// Mark the next character to be consumed as the start character.
+ pub fn mark_start(&mut self) {
+ self.start = self.mark;
+ }
+
+ /// Mark the most recently consumed character as the start character.
+ pub fn mark_start_prev(&mut self) {
+ self.start = self.prev;
+ }
+
+ /// Mark the next character as the character following the end character.
+ pub fn mark_end(&mut self) {
+ self.end = self.mark;
+ }
+
+ /// Mark the next character as the character following the end character.
+ pub fn mark_end_prev(&mut self) {
+ self.end = self.prev;
+ }
+
+ /// Mark the next character to be consumed as the start of the child.
+ pub fn mark_child(&mut self) {
+ self.child = self.mark;
+ }
+
+ /// Return the SourceSpan between the start and end marks.
+ pub fn get_source(&mut self) -> SourceSpan {
+ let in_merged = SourceLocation {
+ path: self.source_path.to_owned(),
+ start: self.start.position,
+ end: self.end.prev_position,
+ };
+ let in_source = if self.start.position.line >= self.embedded_first_line {
+ if let Some(embedded_path) = &self.embedded_path {
+ let offset = self.embedded_first_line;
+ Some(
+ SourceLocation {
+ path: Some(embedded_path.to_owned()),
+ start: SourcePosition {
+ line: in_merged.start.line.saturating_sub(offset),
+ column: in_merged.start.column,
+ },
+ end: SourcePosition {
+ line: in_merged.end.line.saturating_sub(offset),
+ column: in_merged.end.column,
+ }
+ }
+ )
+ } else {
+ None
+ }
+ } else {
+ None
+ };
+
+ let string = self.get_chars(&self.start, &self.end).iter().collect();
+ SourceSpan { string, in_merged, in_source, child: None }
+ }
+}
+
+
+#[derive(Clone, Copy)]
+pub struct TokeniserMark {
+ /// Position of the next character to be consumed.
+ pub position: SourcePosition,
+ /// Index of the next character to be consumed.
+ pub i: usize,
+ /// Position of the most recently consumed character.
+ pub prev_position: SourcePosition,
+ pub prev_prev_position: SourcePosition,
+}
+
+impl TokeniserMark {
+ pub const ZERO: Self = Self {
+ position: SourcePosition::ZERO,
+ i: 0,
+ prev_position: SourcePosition::ZERO,
+ prev_prev_position: SourcePosition::ZERO,
+ };
+
+ /// Advance to the next character.
+ pub fn advance(&mut self, c: char) {
+ self.prev_prev_position = self.prev_position;
+ self.prev_position = self.position;
+ self.position.advance(c);
+ self.i += 1;
+ }
+
+ /// Ignore the most recently consumed character.
+ pub fn undo(&mut self) {
+ self.prev_position = self.prev_prev_position;
+ }
+}