summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Bridle <ben@derelict.engineering>2025-02-05 12:58:02 +1300
committerBen Bridle <ben@derelict.engineering>2025-02-05 13:03:36 +1300
commit80da2af821385b2fc89091e9ac37a047349da4bd (patch)
tree2ba50368301e041f8d1b99145ab0a1fe28f91571
parent8d11be64f6c1747e7c4049105a6dd4ea9ab0d27f (diff)
downloadassembler-80da2af821385b2fc89091e9ac37a047349da4bd.zip
Implement source unit compilation, symbol resolution, error reporting
This library can now carry out all stages of assembly from collecting source fragments to resolving symbols to pruning unused libraries to generating a single compiled source file. Pretty-printing of state has also been implemented in this library. The source tree hierarchy, symbol resolution errors, and file read errors can all be printed in a tidy format.
-rw-r--r--Cargo.lock23
-rw-r--r--Cargo.toml3
-rw-r--r--src/context.rs8
-rw-r--r--src/errors/file_error.rs41
-rw-r--r--src/errors/merge_error.rs41
-rw-r--r--src/errors/mod.rs71
-rw-r--r--src/errors/resolver_error.rs30
-rw-r--r--src/lib.rs17
-rw-r--r--src/locators/mod.rs (renamed from src/locators.rs)0
-rw-r--r--src/resolver.rs296
-rw-r--r--src/source_hierarchy.rs58
-rw-r--r--src/source_unit.rs121
-rw-r--r--src/tokeniser.rs1
13 files changed, 708 insertions, 2 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 2d6c017..ad65887 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3,5 +3,28 @@
version = 3
[[package]]
+name = "ansi"
+version = "1.0.0"
+source = "git+git://benbridle.com/ansi?tag=v1.0.0#81d47867c2c97a9ae1d1c8fdfcd42c582410ad2a"
+
+[[package]]
name = "assembler"
version = "1.0.0"
+dependencies = [
+ "ansi",
+ "log",
+ "vagabond",
+]
+
+[[package]]
+name = "log"
+version = "1.1.2"
+source = "git+git://benbridle.com/log?tag=v1.1.2#3d5d1f7a19436151ba1dd52a2b50664969d90db6"
+dependencies = [
+ "ansi",
+]
+
+[[package]]
+name = "vagabond"
+version = "1.0.1"
+source = "git+git://benbridle.com/vagabond?tag=v1.0.1#08f3153fea62ea81a42438347eeee058f5bec199"
diff --git a/Cargo.toml b/Cargo.toml
index bba445c..d79bc8e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,3 +4,6 @@ version = "1.0.0"
edition = "2021"
[dependencies]
+ansi = { git = "git://benbridle.com/ansi", tag = "v1.0.0" }
+log = { git = "git://benbridle.com/log", tag = "v1.1.2" }
+vagabond = { git = "git://benbridle.com/vagabond", tag = "v1.0.1" }
diff --git a/src/context.rs b/src/context.rs
new file mode 100644
index 0000000..c015c7e
--- /dev/null
+++ b/src/context.rs
@@ -0,0 +1,8 @@
+use crate::*;
+
+
+/// Source context for a token.
+pub struct Context<'a> {
+ pub source_code: &'a str,
+ pub source: &'a SourceSpan,
+}
diff --git a/src/errors/file_error.rs b/src/errors/file_error.rs
new file mode 100644
index 0000000..e601f94
--- /dev/null
+++ b/src/errors/file_error.rs
@@ -0,0 +1,41 @@
+pub use std::path::{Path, PathBuf};
+
+
+pub enum FileError {
+ InvalidExtension,
+ NotFound,
+ NotReadable,
+ IsADirectory,
+ InvalidUtf8,
+ Unknown,
+}
+
+impl std::fmt::Debug for FileError {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let message = match self {
+ Self::InvalidExtension => "File has invalid extension",
+ Self::NotFound => "File was not found",
+ Self::InvalidUtf8 => "File does not contain valid UTF-8 text",
+ Self::NotReadable => "File is not readable",
+ Self::IsADirectory => "File is a directory",
+ Self::Unknown => "Unknown error while attempting to read from path",
+ };
+ write!(f, "{message}")
+ }
+}
+
+
+pub fn read_file(path: &Path) -> Result<String, FileError> {
+ match std::fs::read(&path) {
+ Ok(bytes) => match String::from_utf8(bytes) {
+ Ok(source) => Ok(source),
+ Err(_) => return Err(FileError::InvalidUtf8),
+ }
+ Err(err) => return Err( match err.kind() {
+ std::io::ErrorKind::NotFound => FileError::NotFound,
+ std::io::ErrorKind::PermissionDenied => FileError::NotReadable,
+ std::io::ErrorKind::IsADirectory => FileError::IsADirectory,
+ _ => FileError::Unknown,
+ } )
+ }
+}
diff --git a/src/errors/merge_error.rs b/src/errors/merge_error.rs
new file mode 100644
index 0000000..a694b71
--- /dev/null
+++ b/src/errors/merge_error.rs
@@ -0,0 +1,41 @@
+use crate::*;
+
+use ansi::*;
+use log::error;
+
+
+pub struct MergeError<'a> {
+ pub resolver: &'a Resolver,
+ /// A list of source units involved in a cycle.
+ pub cyclic_unit_ids: Vec<usize>,
+}
+
+impl MergeError<'_> {
+ pub fn report(&self) {
+ error!("A cyclic dependency was found between the following libraries:");
+ for id in &self.cyclic_unit_ids {
+ if let Some(unit) = self.resolver.source_units.get(*id) {
+ let path = &unit.source_unit.path();
+ match unit.source_unit.name() {
+ Some(name) =>
+ eprintln!("{name}{NORMAL}{DIM} ({path}){NORMAL}"),
+ None =>
+ eprintln!("{path}"),
+ };
+ // Print each parent involved in the dependency cycle.
+ for parent_id in &unit.parent_ids {
+ if !self.cyclic_unit_ids.contains(parent_id) { continue; }
+ if let Some(parent_unit) = self.resolver.source_units.get(*parent_id) {
+ let parent_path = &parent_unit.source_unit.path();
+ match parent_unit.source_unit.name() {
+ Some(parent_name) =>
+ eprintln!(" => {parent_name} {DIM}({parent_path}){NORMAL}"),
+ None =>
+ eprintln!(" => {parent_path}"),
+ };
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/errors/mod.rs b/src/errors/mod.rs
new file mode 100644
index 0000000..b0bf7e4
--- /dev/null
+++ b/src/errors/mod.rs
@@ -0,0 +1,71 @@
+mod file_error;
+mod merge_error;
+mod resolver_error;
+
+pub use file_error::*;
+pub use merge_error::*;
+pub use resolver_error::*;
+
+use crate::*;
+
+use ansi::*;
+use log::LogLevel;
+
+
+pub fn report_source_issue(level: LogLevel, context: &Context, message: &str) {
+ // Prepare variables.
+ let in_merged = &context.source.in_merged;
+ let line_num = in_merged.start.line + 1;
+ let digits = line_num.to_string().len();
+ let w = digits + 3;
+ let arrow = "-->";
+ let mut string = message.to_string();
+
+ macro_rules! push {
+ ($($tokens:tt)*) => { string.push_str(&format!($($tokens)*)) };
+ }
+
+ // Format message and locations.
+ push!("{NORMAL}\n");
+ push!("{BLUE}{arrow:>w$}{NORMAL} {in_merged}\n", w=w);
+ if let Some(in_source) = &context.source.in_source {
+ push!("{BLUE}{arrow:>w$}{NORMAL} {in_source}\n", w=w);
+ }
+
+ // Format source context.
+ let left = in_merged.start.column;
+ let right = in_merged.end.column + 1;
+ let source_line = context.source_code.split('\n').nth(in_merged.start.line)
+ .unwrap_or("<error reading line from source>");
+ let space = " ";
+ let colour = match level {
+ LogLevel::Info => BLUE,
+ LogLevel::Warn => YELLOW,
+ LogLevel::Error => RED,
+ LogLevel::Fatal => RED,
+ };
+
+ // Print source code line.
+ push!("{BLUE} {line_num} | {NORMAL}");
+ for (i, c) in source_line.chars().enumerate() {
+ if i == left { push!("{colour}") }
+ if i == right { push!("{NORMAL}") }
+ push!("{c}");
+ }
+ push!("{NORMAL}\n");
+
+ // Print source code underline.
+ push!("{BLUE} {space:>w$} | {NORMAL}", w=digits);
+ for _ in 0..left { push!(" "); }
+ push!("{colour}");
+ for _ in left..right { push!("^"); }
+ push!("{NORMAL}");
+
+ // Print the completed message.
+ match level {
+ LogLevel::Info => log::info!( "{}", string),
+ LogLevel::Warn => log::warn!( "{}", string),
+ LogLevel::Error => log::error!("{}", string),
+ LogLevel::Fatal => log::fatal!("{}", string),
+ }
+}
diff --git a/src/errors/resolver_error.rs b/src/errors/resolver_error.rs
new file mode 100644
index 0000000..de8b8d1
--- /dev/null
+++ b/src/errors/resolver_error.rs
@@ -0,0 +1,30 @@
+use crate::*;
+
+use log::LogLevel;
+
+
+pub struct ResolverError<'a> {
+ pub resolver: &'a Resolver,
+}
+
+impl<'a> ResolverError<'a> {
+ pub fn report(&self) {
+ for reference in &self.resolver.unresolved {
+ let message = format!(
+ "Undefined symbol, no label or macro has been defined with the name {:?}",
+ &reference.symbol.source.string,
+ );
+ let context = reference.context(&self.resolver);
+ report_source_issue(LogLevel::Error, &context, &message);
+ }
+ for redefinition in &self.resolver.redefinitions {
+ let definition = self.resolver.definitions.get(redefinition.1).unwrap();
+ let message = format!(
+ "Redefined symbol, first defined at {}",
+ &definition.symbol.source.in_merged,
+ );
+ let context = redefinition.0.context(&self.resolver);
+ report_source_issue(LogLevel::Error, &context, &message);
+ }
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 2ebe010..b8ce3c9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,18 @@
-mod locators;
-pub use locators::*;
+#![feature(extract_if)]
+#![feature(io_error_more)]
+mod context;
+mod errors;
+mod locators;
+mod resolver;
+mod source_hierarchy;
+mod source_unit;
mod tokeniser;
+
+pub use context::*;
+pub use errors::*;
+pub use locators::*;
+pub use resolver::*;
+pub use source_hierarchy::*;
+pub use source_unit::*;
pub use tokeniser::*;
diff --git a/src/locators.rs b/src/locators/mod.rs
index b7db1ee..b7db1ee 100644
--- a/src/locators.rs
+++ b/src/locators/mod.rs
diff --git a/src/resolver.rs b/src/resolver.rs
new file mode 100644
index 0000000..23dc73a
--- /dev/null
+++ b/src/resolver.rs
@@ -0,0 +1,296 @@
+use crate::*;
+
+use log::error;
+
+type PushFn = fn(&mut String, &SourceFile);
+
+
+/// Resolve undeclared symbols in a source unit with definitions from other units.
+pub struct Resolver {
+ /// Definitions from all included source units.
+ pub definitions: Vec<TrackedSymbol>,
+ /// All resolved references in all included source units.
+ pub resolved: Vec<TrackedSymbol>,
+ /// All unresolved references in all included source units.
+ pub unresolved: Vec<TrackedSymbol>,
+ /// All redefined (duplicate) definitions in all included source units.
+ /// Points to the 'definitions' index of the existing definition.
+ pub redefinitions: Vec<(TrackedSymbol, usize)>,
+ /// All included source units.
+ pub source_units: Vec<HeirarchicalSourceUnit>,
+ /// The 'source_units' indices of the root source units.
+ pub root_unit_ids: Vec<usize>,
+ /// Source units that can be included later to resolve symbols.
+ pub library_source_units: Vec<SourceUnit>,
+}
+
+
+impl Resolver {
+ pub fn new(source_unit: SourceUnit) -> Self {
+ let mut new = Self {
+ definitions: Vec::new(),
+ resolved: Vec::new(),
+ unresolved: Vec::new(),
+ redefinitions: Vec::new(),
+ source_units: Vec::new(),
+ root_unit_ids: Vec::new(),
+ library_source_units: Vec::new(),
+ };
+ new.include_source_unit(source_unit, None);
+ return new;
+ }
+
+ pub fn include_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) {
+ let source_id = self.source_units.len();
+ use std::mem::take;
+
+ self.include_symbols(take(&mut source_unit.main.symbols), source_id, SourceRole::Main);
+ if let Some(head) = &mut source_unit.head {
+ self.include_symbols(take(&mut head.symbols), source_id, SourceRole::Head); }
+ if let Some(tail) = &mut source_unit.tail {
+ self.include_symbols(take(&mut tail.symbols), source_id, SourceRole::Tail); }
+
+ match parent_id {
+ Some(parent_id) => match self.source_units.get_mut(parent_id) {
+ Some(parent) => parent.child_ids.push(source_id),
+ None => error!("Could not find parent (#{parent_id}) of source unit #{source_id}"),
+ }
+ None => self.root_unit_ids.push(source_id),
+ }
+ self.source_units.push(
+ HeirarchicalSourceUnit {
+ source_unit,
+ child_ids: Vec::new(),
+ parent_ids: Vec::new(),
+ }
+ );
+ }
+
+ fn include_symbols(&mut self, symbols: Vec<Symbol>, source_id: usize, source_role: SourceRole) {
+ for symbol in symbols {
+ match symbol.role {
+ SymbolRole::Definition(_) => {
+ // Check if the symbol has already been defined.
+ let equal = |d: &TrackedSymbol| { &d.symbol.name == &symbol.name };
+ if let Some(original) = self.definitions.iter().position(equal) {
+ let definition = TrackedSymbol { symbol, source_id, source_role };
+ let redefinition = (definition, original);
+ self.redefinitions.push(redefinition);
+ } else {
+ // Resolve all unresolved references that match this symbol.
+ let equal = |s: &mut TrackedSymbol| s.symbol.name == symbol.name;
+ for symbol in self.unresolved.extract_if(equal) {
+ self.resolved.push(symbol);
+ }
+ let definition = TrackedSymbol { symbol, source_id, source_role };
+ self.definitions.push(definition);
+ }
+ }
+ SymbolRole::Reference => {
+ let reference = TrackedSymbol { symbol, source_id, source_role };
+ match self.definitions.contains(&reference) {
+ true => self.resolved.push(reference),
+ false => self.unresolved.push(reference),
+ }
+ }
+ }
+ }
+ }
+
+ /// Add a set of source units that might contain definitions for unresolved symbols.
+ pub fn add_library_source_units(&mut self, mut source_units: Vec<SourceUnit>) {
+ self.library_source_units.append(&mut source_units);
+ }
+
+ /// Attempt to resolve unresolved references with library source units.
+ pub fn resolve(&mut self) {
+ // Repeatedly test if each library source unit resolves an unresolved
+ // symbol, breaking the loop when no new resolutions are found.
+ 'outer: loop {
+ for (i, source_unit) in self.library_source_units.iter().enumerate() {
+ if let Some(id) = self.unit_resolved_by_unit(&source_unit) {
+ let source_unit = self.library_source_units.remove(i);
+ self.include_source_unit(source_unit, Some(id));
+ continue 'outer;
+ }
+ }
+ break;
+ }
+ }
+
+ /// Returns true if all references in all included source units have been
+ /// resolved.
+ pub fn error(&self) -> Option<ResolverError> {
+ match self.unresolved.is_empty() {
+ true => None,
+ false => Some(ResolverError { resolver: self })
+ }
+ }
+
+ /// Return a type that can print the structure of the source tree.
+ pub fn hierarchy(&self) -> SourceHierarchy {
+ SourceHierarchy { resolver: self }
+ }
+
+ /// Return the ID of a source unit that contains an unresolved reference
+ /// to a symbol defined by this unit.
+ fn unit_resolved_by_unit(&self, source_unit: &SourceUnit) -> Option<usize> {
+ if let Some(id) = self.unit_resolved_by_symbol(&source_unit.main.symbols) {
+ return Some(id);
+ }
+ if let Some(head) = &source_unit.head {
+ if let Some(id) = self.unit_resolved_by_symbol(&head.symbols) {
+ return Some(id);
+ }
+ }
+ if let Some(tail) = &source_unit.tail {
+ if let Some(id) = self.unit_resolved_by_symbol(&tail.symbols) {
+ return Some(id);
+ }
+ }
+ return None;
+ }
+
+ /// Returns the ID of a source unit that contains an unresolved reference
+ /// to a symbol defined by one of these symbols.
+ fn unit_resolved_by_symbol(&self, symbols: &[Symbol]) -> Option<usize> {
+ for symbol in symbols {
+ if let SymbolRole::Definition(_) = symbol.role {
+ for unresolved in &self.unresolved {
+ if unresolved.symbol.name == symbol.name {
+ return Some(unresolved.source_id);
+ }
+ }
+ }
+ }
+ return None;
+ }
+
+ /// Populate the .parent_ids field of every source unit. The parents of
+ /// each source unit are the units that define a symbol referenced by the
+ /// unit, where the definition type is MustPrecedeReference.
+ pub fn calculate_hierarchy(&mut self) {
+ // Clear the .parent_ids field of every source unit.
+ for source_unit in &mut self.source_units {
+ source_unit.parent_ids.clear();
+ }
+ // Populate the .parent_ids field of every source unit.
+ for reference in &self.resolved {
+ let predicate = |d: &&TrackedSymbol| d.symbol.name == reference.symbol.name;
+ if let Some(definition) = self.definitions.iter().find(predicate) {
+ // A unit cannot be its own parent.
+ let is_self = reference.source_id == definition.source_id;
+ let must_precede = SymbolRole::Definition(DefinitionType::MustPrecedeReference);
+ if is_self || definition.symbol.role != must_precede { continue; }
+ let referencing_unit = &mut self.source_units[reference.source_id];
+ referencing_unit.parent_ids.push(definition.source_id);
+ };
+ }
+ }
+
+ /// Concatenate all included source units into one string.
+ /// If the source unit dependency graph contains a cycle, the IDs of the
+ /// source units involved in the cycle will be returned.
+ pub fn get_merged_source_code(&self, push: PushFn) -> Result<String, MergeError> {
+ // The ID of each source unit will come after the IDs of all
+ // parents of that unit.
+ let head_order = {
+ let mut included_source_ids = Vec::new();
+ let mut remaining_source_ids = (0..self.source_units.len()).collect::<Vec<_>>();
+
+ 'restart: while !remaining_source_ids.is_empty() {
+ // Iterate over source units, most-recently-included first.
+ 'next: for (i, id) in remaining_source_ids.iter().enumerate() {
+ let unit = &self.source_units[*id];
+ for parent_id in &unit.parent_ids {
+ // Skip this unit if a parent hasn't yet been included.
+ if !included_source_ids.contains(parent_id) {
+ continue 'next;
+ }
+ }
+ // Include this unit, then check remaining units from the start.
+ included_source_ids.push(*id);
+ remaining_source_ids.remove(i);
+ continue 'restart;
+ }
+ // All remaining source units depend on at least one remaining
+ // source unit, indicating a dependency cycle.
+ return Err(MergeError {
+ resolver: self,
+ cyclic_unit_ids: remaining_source_ids,
+ });
+ }
+ included_source_ids
+ };
+ let mut source_code = String::new();
+
+ // Push head source code in calculated parent-preceding order.
+ for id in &head_order {
+ let source_unit = &self.source_units[*id];
+ if let Some(head) = &source_unit.source_unit.head {
+ push(&mut source_code, head);
+ }
+ }
+ // Push main source code in source-added order.
+ // The root unit will be pushed first.
+ for source_unit in self.source_units.iter() {
+ let main = &source_unit.source_unit.main;
+ push(&mut source_code, &main);
+ }
+ // Push tail source code in reverse source-added order.
+ // The root unit will be pushed last.
+ for source_unit in self.source_units.iter().rev() {
+ if let Some(tail) = &source_unit.source_unit.tail {
+ push(&mut source_code, tail);
+ }
+ }
+ return Ok(source_code);
+ }
+}
+
+
+/// A source unit tracked with pointers to parents and dependents.
+pub struct HeirarchicalSourceUnit {
+ pub source_unit: SourceUnit,
+ /// Pointers to source units that resolve references this unit.
+ pub child_ids: Vec<usize>,
+ /// Pointers to source units that must be included before this unit.
+ pub parent_ids: Vec<usize>,
+}
+
+pub struct TrackedSymbol {
+ pub symbol: Symbol,
+ pub source_id: usize,
+ pub source_role: SourceRole,
+}
+
+impl TrackedSymbol {
+ pub fn context<'a>(&'a self, resolver: &'a Resolver) -> Context<'a> {
+ let source_unit = &resolver.source_units[self.source_id].source_unit;
+ let source_code = match self.source_role {
+ SourceRole::Main => source_unit.main.source_code.as_str(),
+ SourceRole::Head => match &source_unit.head {
+ Some(head) => head.source_code.as_str(),
+ None => unreachable!("Failed to find source code of head file"),
+ }
+ SourceRole::Tail => match &source_unit.tail {
+ Some(tail) => tail.source_code.as_str(),
+ None => unreachable!("Failed to find source code of tail file"),
+ }
+ };
+ Context { source_code, source: &self.symbol.source }
+ }
+}
+
+impl PartialEq for TrackedSymbol {
+ fn eq(&self, other: &TrackedSymbol) -> bool {
+ self.symbol.name.eq(&other.symbol.name)
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum SourceRole {
+ Main,
+ Head,
+ Tail,
+}
diff --git a/src/source_hierarchy.rs b/src/source_hierarchy.rs
new file mode 100644
index 0000000..9478c56
--- /dev/null
+++ b/src/source_hierarchy.rs
@@ -0,0 +1,58 @@
+use crate::*;
+
+use ansi::*;
+
+
+pub struct SourceHierarchy<'a> {
+ pub resolver: &'a Resolver,
+}
+
+impl<'a> SourceHierarchy<'a> {
+ pub fn report(&self) {
+ eprintln!(".");
+ let len = self.resolver.root_unit_ids.len();
+ for (i, id) in self.resolver.root_unit_ids.iter().enumerate() {
+ let end = i + 1 == len;
+ self.report_leaf(*id, Vec::new(), end);
+ }
+ }
+
+ fn report_leaf(&self, id: usize, mut levels: Vec<bool>, end: bool) {
+ // A level entry is true if all entries in that level have been printed.
+ for level in &levels {
+ match level {
+ false => eprint!("│ "),
+ true => eprint!(" "),
+ }
+ }
+ // The end value is true if all siblings of this entry have been printed.
+ match end {
+ false => eprint!("├── "),
+ true => eprint!("└── "),
+ }
+ if let Some(unit) = self.resolver.source_units.get(id) {
+ let path_str = &unit.source_unit.main.path.as_os_str().to_string_lossy();
+ if let Some(name_str) = unit.source_unit.name() {
+ eprint!("{name_str}{BLUE}");
+ if unit.source_unit.head.is_some() { eprint!(" +head") }
+ if unit.source_unit.tail.is_some() { eprint!(" +tail") }
+ let mut unresolved = 0;
+ for symbol in &self.resolver.unresolved {
+ if symbol.source_id == id { unresolved += 1; }
+ }
+ if unresolved > 0 { eprint!("{RED} ({unresolved})"); }
+ eprintln!("{NORMAL} {DIM}({path_str}){NORMAL}");
+ } else {
+ eprintln!("{path_str}");
+ }
+ levels.push(end);
+ let len = unit.child_ids.len();
+ for (i, id) in unit.child_ids.iter().enumerate() {
+ let end = i + 1 == len;
+ self.report_leaf(*id, levels.clone(), end);
+ }
+ } else {
+ eprintln!("<error loading source unit details>");
+ }
+ }
+}
diff --git a/src/source_unit.rs b/src/source_unit.rs
new file mode 100644
index 0000000..3e674be
--- /dev/null
+++ b/src/source_unit.rs
@@ -0,0 +1,121 @@
+use crate::*;
+
+use vagabond::*;
+
+
+type ParseFn = fn(&str, Option<&Path>) -> Vec<Symbol>;
+
+
+/// Gather all source units from a PATH-style environment variable.
+pub fn gather_from_path_variable(variable: &str, extension: &str, parse: ParseFn) -> Vec<SourceUnit> {
+ let mut source_units = Vec::new();
+ if let Ok(string) = std::env::var(variable) {
+ for path in string.split(":").map(PathBuf::from) {
+ source_units.extend(gather_from_path(&path, extension, parse));
+ }
+ };
+ return source_units;
+}
+
+/// Gather source units at or descending from a path.
+pub fn gather_from_path(path: &Path, extension: &str, parse: ParseFn) -> Vec<SourceUnit> {
+ let mut source_units = Vec::new();
+ if let Ok(entry) = Entry::from_path(path) {
+ if EntryType::File == entry.entry_type {
+ if let Ok(unit) = SourceUnit::from_path(&entry.path, extension, parse) {
+ source_units.push(unit);
+ }
+ } else if EntryType::Directory == entry.entry_type {
+ if let Ok(entries) = traverse_directory(entry.path) {
+ for entry in entries {
+ if let Ok(unit) = SourceUnit::from_path(&entry.path, extension, parse) {
+ source_units.push(unit);
+ }
+ }
+ }
+ }
+ };
+ return source_units;
+}
+
+
+pub struct SourceUnit {
+ pub main: SourceFile,
+ pub head: Option<SourceFile>,
+ pub tail: Option<SourceFile>,
+}
+
+impl SourceUnit {
+ /// Load source from a main file and an associated head and tail file.
+ pub fn from_path<P: AsRef<Path>>(path: P, extension: &str, parse: ParseFn) -> Result<Self, FileError> {
+ let main_path = { path.as_ref().canonicalize().unwrap_or_else(|_| path.as_ref().to_path_buf()) };
+ let main_path_str = main_path.as_os_str().to_string_lossy().to_string();
+ let head_extension = format!("head.{extension}");
+ let tail_extension = format!("tail.{extension}");
+ let is_head = main_path_str.ends_with(&head_extension);
+ let is_tail = main_path_str.ends_with(&tail_extension);
+ let is_not_main = !main_path_str.ends_with(extension);
+ // Head and tail files will be picked up later along with the main file.
+ if is_not_main || is_head || is_tail { return Err(FileError::InvalidExtension); }
+
+ let source_code = read_file(path.as_ref())?;
+ let symbols = parse(&source_code, Some(path.as_ref()));
+ let head_path = main_path.with_extension(head_extension);
+ let tail_path = main_path.with_extension(tail_extension);
+
+ macro_rules! parse_file {
+ ($path:expr) => {
+ read_file(&$path).ok().map(|source_code| {
+ let symbols = parse(&source_code, Some(&$path));
+ let path = $path;
+ SourceFile { symbols, source_code, path }
+ })
+ };
+ }
+ let main = SourceFile { path: main_path, source_code, symbols };
+ let head = parse_file!(head_path);
+ let tail = parse_file!(tail_path);
+ Ok( SourceUnit { main, head, tail } )
+ }
+
+ /// Load from a string of source code.
+ pub fn from_string<P: AsRef<Path>>(source_code: String, path: P, parse: ParseFn) -> Self {
+ let path = { path.as_ref().canonicalize().unwrap_or_else(|_| path.as_ref().to_path_buf()) };
+ let symbols = parse(&source_code, Some(&path));
+ Self { main: SourceFile { path, source_code, symbols }, head: None, tail: None }
+ }
+
+ pub fn name(&self) -> Option<String> {
+ self.main.path.file_name().map(|s| s.to_string_lossy().to_string())
+ }
+
+ pub fn path(&self) -> String {
+ self.main.path.as_os_str().to_string_lossy().to_string()
+
+ }
+}
+
+
+pub struct SourceFile {
+ pub path: PathBuf,
+ pub source_code: String,
+ pub symbols: Vec<Symbol>,
+}
+
+pub struct Symbol {
+ pub name: String,
+ pub source: SourceSpan,
+ pub role: SymbolRole,
+}
+
+#[derive(PartialEq)]
+pub enum SymbolRole {
+ Definition(DefinitionType),
+ Reference,
+}
+
+#[derive(PartialEq)]
+pub enum DefinitionType {
+ MustPrecedeReference,
+ CanFollowReference,
+}
diff --git a/src/tokeniser.rs b/src/tokeniser.rs
index eeab6e6..4ff3d0b 100644
--- a/src/tokeniser.rs
+++ b/src/tokeniser.rs
@@ -3,6 +3,7 @@ use crate::*;
use std::path::PathBuf;
+/// Break a character stream down into individual tokens.
pub struct Tokeniser {
/// Characters waiting to be parsed, in reverse order.
pub chars: Vec<char>,