summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock23
-rw-r--r--Cargo.toml3
-rw-r--r--src/context.rs8
-rw-r--r--src/errors/file_error.rs41
-rw-r--r--src/errors/merge_error.rs41
-rw-r--r--src/errors/mod.rs71
-rw-r--r--src/errors/resolver_error.rs30
-rw-r--r--src/lib.rs17
-rw-r--r--src/locators/mod.rs (renamed from src/locators.rs)0
-rw-r--r--src/resolver.rs296
-rw-r--r--src/source_hierarchy.rs58
-rw-r--r--src/source_unit.rs121
-rw-r--r--src/tokeniser.rs1
13 files changed, 708 insertions, 2 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 2d6c017..ad65887 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3,5 +3,28 @@
version = 3
[[package]]
+name = "ansi"
+version = "1.0.0"
+source = "git+git://benbridle.com/ansi?tag=v1.0.0#81d47867c2c97a9ae1d1c8fdfcd42c582410ad2a"
+
+[[package]]
name = "assembler"
version = "1.0.0"
+dependencies = [
+ "ansi",
+ "log",
+ "vagabond",
+]
+
+[[package]]
+name = "log"
+version = "1.1.2"
+source = "git+git://benbridle.com/log?tag=v1.1.2#3d5d1f7a19436151ba1dd52a2b50664969d90db6"
+dependencies = [
+ "ansi",
+]
+
+[[package]]
+name = "vagabond"
+version = "1.0.1"
+source = "git+git://benbridle.com/vagabond?tag=v1.0.1#08f3153fea62ea81a42438347eeee058f5bec199"
diff --git a/Cargo.toml b/Cargo.toml
index bba445c..d79bc8e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,3 +4,6 @@ version = "1.0.0"
edition = "2021"
[dependencies]
+ansi = { git = "git://benbridle.com/ansi", tag = "v1.0.0" }
+log = { git = "git://benbridle.com/log", tag = "v1.1.2" }
+vagabond = { git = "git://benbridle.com/vagabond", tag = "v1.0.1" }
diff --git a/src/context.rs b/src/context.rs
new file mode 100644
index 0000000..c015c7e
--- /dev/null
+++ b/src/context.rs
@@ -0,0 +1,8 @@
+use crate::*;
+
+
+/// Source context for a token.
+pub struct Context<'a> {
+ pub source_code: &'a str,
+ pub source: &'a SourceSpan,
+}
diff --git a/src/errors/file_error.rs b/src/errors/file_error.rs
new file mode 100644
index 0000000..e601f94
--- /dev/null
+++ b/src/errors/file_error.rs
@@ -0,0 +1,41 @@
+pub use std::path::{Path, PathBuf};
+
+
+pub enum FileError {
+ InvalidExtension,
+ NotFound,
+ NotReadable,
+ IsADirectory,
+ InvalidUtf8,
+ Unknown,
+}
+
+impl std::fmt::Debug for FileError {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ let message = match self {
+ Self::InvalidExtension => "File has invalid extension",
+ Self::NotFound => "File was not found",
+ Self::InvalidUtf8 => "File does not contain valid UTF-8 text",
+ Self::NotReadable => "File is not readable",
+ Self::IsADirectory => "File is a directory",
+ Self::Unknown => "Unknown error while attempting to read from path",
+ };
+ write!(f, "{message}")
+ }
+}
+
+
+pub fn read_file(path: &Path) -> Result<String, FileError> {
+ match std::fs::read(&path) {
+ Ok(bytes) => match String::from_utf8(bytes) {
+ Ok(source) => Ok(source),
+ Err(_) => return Err(FileError::InvalidUtf8),
+ }
+ Err(err) => return Err( match err.kind() {
+ std::io::ErrorKind::NotFound => FileError::NotFound,
+ std::io::ErrorKind::PermissionDenied => FileError::NotReadable,
+ std::io::ErrorKind::IsADirectory => FileError::IsADirectory,
+ _ => FileError::Unknown,
+ } )
+ }
+}
diff --git a/src/errors/merge_error.rs b/src/errors/merge_error.rs
new file mode 100644
index 0000000..a694b71
--- /dev/null
+++ b/src/errors/merge_error.rs
@@ -0,0 +1,41 @@
+use crate::*;
+
+use ansi::*;
+use log::error;
+
+
+pub struct MergeError<'a> {
+ pub resolver: &'a Resolver,
+ /// A list of source units involved in a cycle.
+ pub cyclic_unit_ids: Vec<usize>,
+}
+
+impl MergeError<'_> {
+ pub fn report(&self) {
+ error!("A cyclic dependency was found between the following libraries:");
+ for id in &self.cyclic_unit_ids {
+ if let Some(unit) = self.resolver.source_units.get(*id) {
+ let path = &unit.source_unit.path();
+ match unit.source_unit.name() {
+ Some(name) =>
+ eprintln!("{name}{NORMAL}{DIM} ({path}){NORMAL}"),
+ None =>
+ eprintln!("{path}"),
+ };
+ // Print each parent involved in the dependency cycle.
+ for parent_id in &unit.parent_ids {
+ if !self.cyclic_unit_ids.contains(parent_id) { continue; }
+ if let Some(parent_unit) = self.resolver.source_units.get(*parent_id) {
+ let parent_path = &parent_unit.source_unit.path();
+ match parent_unit.source_unit.name() {
+ Some(parent_name) =>
+ eprintln!(" => {parent_name} {DIM}({parent_path}){NORMAL}"),
+ None =>
+ eprintln!(" => {parent_path}"),
+ };
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/errors/mod.rs b/src/errors/mod.rs
new file mode 100644
index 0000000..b0bf7e4
--- /dev/null
+++ b/src/errors/mod.rs
@@ -0,0 +1,71 @@
+mod file_error;
+mod merge_error;
+mod resolver_error;
+
+pub use file_error::*;
+pub use merge_error::*;
+pub use resolver_error::*;
+
+use crate::*;
+
+use ansi::*;
+use log::LogLevel;
+
+
+pub fn report_source_issue(level: LogLevel, context: &Context, message: &str) {
+ // Prepare variables.
+ let in_merged = &context.source.in_merged;
+ let line_num = in_merged.start.line + 1;
+ let digits = line_num.to_string().len();
+ let w = digits + 3;
+ let arrow = "-->";
+ let mut string = message.to_string();
+
+ macro_rules! push {
+ ($($tokens:tt)*) => { string.push_str(&format!($($tokens)*)) };
+ }
+
+ // Format message and locations.
+ push!("{NORMAL}\n");
+ push!("{BLUE}{arrow:>w$}{NORMAL} {in_merged}\n", w=w);
+ if let Some(in_source) = &context.source.in_source {
+ push!("{BLUE}{arrow:>w$}{NORMAL} {in_source}\n", w=w);
+ }
+
+ // Format source context.
+ let left = in_merged.start.column;
+ let right = in_merged.end.column + 1;
+ let source_line = context.source_code.split('\n').nth(in_merged.start.line)
+ .unwrap_or("<error reading line from source>");
+ let space = " ";
+ let colour = match level {
+ LogLevel::Info => BLUE,
+ LogLevel::Warn => YELLOW,
+ LogLevel::Error => RED,
+ LogLevel::Fatal => RED,
+ };
+
+ // Print source code line.
+ push!("{BLUE} {line_num} | {NORMAL}");
+ for (i, c) in source_line.chars().enumerate() {
+ if i == left { push!("{colour}") }
+ if i == right { push!("{NORMAL}") }
+ push!("{c}");
+ }
+ push!("{NORMAL}\n");
+
+ // Print source code underline.
+ push!("{BLUE} {space:>w$} | {NORMAL}", w=digits);
+ for _ in 0..left { push!(" "); }
+ push!("{colour}");
+ for _ in left..right { push!("^"); }
+ push!("{NORMAL}");
+
+ // Print the completed message.
+ match level {
+ LogLevel::Info => log::info!( "{}", string),
+ LogLevel::Warn => log::warn!( "{}", string),
+ LogLevel::Error => log::error!("{}", string),
+ LogLevel::Fatal => log::fatal!("{}", string),
+ }
+}
diff --git a/src/errors/resolver_error.rs b/src/errors/resolver_error.rs
new file mode 100644
index 0000000..de8b8d1
--- /dev/null
+++ b/src/errors/resolver_error.rs
@@ -0,0 +1,30 @@
+use crate::*;
+
+use log::LogLevel;
+
+
+pub struct ResolverError<'a> {
+ pub resolver: &'a Resolver,
+}
+
+impl<'a> ResolverError<'a> {
+ pub fn report(&self) {
+ for reference in &self.resolver.unresolved {
+ let message = format!(
+ "Undefined symbol, no label or macro has been defined with the name {:?}",
+ &reference.symbol.source.string,
+ );
+ let context = reference.context(&self.resolver);
+ report_source_issue(LogLevel::Error, &context, &message);
+ }
+ for redefinition in &self.resolver.redefinitions {
+ let definition = self.resolver.definitions.get(redefinition.1).unwrap();
+ let message = format!(
+ "Redefined symbol, first defined at {}",
+ &definition.symbol.source.in_merged,
+ );
+ let context = redefinition.0.context(&self.resolver);
+ report_source_issue(LogLevel::Error, &context, &message);
+ }
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 2ebe010..b8ce3c9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,18 @@
-mod locators;
-pub use locators::*;
+#![feature(extract_if)]
+#![feature(io_error_more)]
+mod context;
+mod errors;
+mod locators;
+mod resolver;
+mod source_hierarchy;
+mod source_unit;
mod tokeniser;
+
+pub use context::*;
+pub use errors::*;
+pub use locators::*;
+pub use resolver::*;
+pub use source_hierarchy::*;
+pub use source_unit::*;
pub use tokeniser::*;
diff --git a/src/locators.rs b/src/locators/mod.rs
index b7db1ee..b7db1ee 100644
--- a/src/locators.rs
+++ b/src/locators/mod.rs
diff --git a/src/resolver.rs b/src/resolver.rs
new file mode 100644
index 0000000..23dc73a
--- /dev/null
+++ b/src/resolver.rs
@@ -0,0 +1,296 @@
+use crate::*;
+
+use log::error;
+
+type PushFn = fn(&mut String, &SourceFile);
+
+
+/// Resolve undeclared symbols in a source unit with definitions from other units.
+pub struct Resolver {
+ /// Definitions from all included source units.
+ pub definitions: Vec<TrackedSymbol>,
+ /// All resolved references in all included source units.
+ pub resolved: Vec<TrackedSymbol>,
+ /// All unresolved references in all included source units.
+ pub unresolved: Vec<TrackedSymbol>,
+ /// All redefined (duplicate) definitions in all included source units.
+ /// Points to the 'definitions' index of the existing definition.
+ pub redefinitions: Vec<(TrackedSymbol, usize)>,
+ /// All included source units.
+ pub source_units: Vec<HeirarchicalSourceUnit>,
+ /// The 'source_units' indices of the root source units.
+ pub root_unit_ids: Vec<usize>,
+ /// Source units that can be included later to resolve symbols.
+ pub library_source_units: Vec<SourceUnit>,
+}
+
+
+impl Resolver {
+ pub fn new(source_unit: SourceUnit) -> Self {
+ let mut new = Self {
+ definitions: Vec::new(),
+ resolved: Vec::new(),
+ unresolved: Vec::new(),
+ redefinitions: Vec::new(),
+ source_units: Vec::new(),
+ root_unit_ids: Vec::new(),
+ library_source_units: Vec::new(),
+ };
+ new.include_source_unit(source_unit, None);
+ return new;
+ }
+
+ pub fn include_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) {
+ let source_id = self.source_units.len();
+ use std::mem::take;
+
+ self.include_symbols(take(&mut source_unit.main.symbols), source_id, SourceRole::Main);
+ if let Some(head) = &mut source_unit.head {
+ self.include_symbols(take(&mut head.symbols), source_id, SourceRole::Head); }
+ if let Some(tail) = &mut source_unit.tail {
+ self.include_symbols(take(&mut tail.symbols), source_id, SourceRole::Tail); }
+
+ match parent_id {
+ Some(parent_id) => match self.source_units.get_mut(parent_id) {
+ Some(parent) => parent.child_ids.push(source_id),
+ None => error!("Could not find parent (#{parent_id}) of source unit #{source_id}"),
+ }
+ None => self.root_unit_ids.push(source_id),
+ }
+ self.source_units.push(
+ HeirarchicalSourceUnit {
+ source_unit,
+ child_ids: Vec::new(),
+ parent_ids: Vec::new(),
+ }
+ );
+ }
+
+ fn include_symbols(&mut self, symbols: Vec<Symbol>, source_id: usize, source_role: SourceRole) {
+ for symbol in symbols {
+ match symbol.role {
+ SymbolRole::Definition(_) => {
+ // Check if the symbol has already been defined.
+ let equal = |d: &TrackedSymbol| { &d.symbol.name == &symbol.name };
+ if let Some(original) = self.definitions.iter().position(equal) {
+ let definition = TrackedSymbol { symbol, source_id, source_role };
+ let redefinition = (definition, original);
+ self.redefinitions.push(redefinition);
+ } else {
+ // Resolve all unresolved references that match this symbol.
+ let equal = |s: &mut TrackedSymbol| s.symbol.name == symbol.name;
+ for symbol in self.unresolved.extract_if(equal) {
+ self.resolved.push(symbol);
+ }
+ let definition = TrackedSymbol { symbol, source_id, source_role };
+ self.definitions.push(definition);
+ }
+ }
+ SymbolRole::Reference => {
+ let reference = TrackedSymbol { symbol, source_id, source_role };
+ match self.definitions.contains(&reference) {
+ true => self.resolved.push(reference),
+ false => self.unresolved.push(reference),
+ }
+ }
+ }
+ }
+ }
+
+ /// Add a set of source units that might contain definitions for unresolved symbols.
+ pub fn add_library_source_units(&mut self, mut source_units: Vec<SourceUnit>) {
+ self.library_source_units.append(&mut source_units);
+ }
+
+ /// Attempt to resolve unresolved references with library source units.
+ pub fn resolve(&mut self) {
+ // Repeatedly test if each library source unit resolves an unresolved
+ // symbol, breaking the loop when no new resolutions are found.
+ 'outer: loop {
+ for (i, source_unit) in self.library_source_units.iter().enumerate() {
+ if let Some(id) = self.unit_resolved_by_unit(&source_unit) {
+ let source_unit = self.library_source_units.remove(i);
+ self.include_source_unit(source_unit, Some(id));
+ continue 'outer;
+ }
+ }
+ break;
+ }
+ }
+
+ /// Returns true if all references in all included source units have been
+ /// resolved.
+ pub fn error(&self) -> Option<ResolverError> {
+ match self.unresolved.is_empty() {
+ true => None,
+ false => Some(ResolverError { resolver: self })
+ }
+ }
+
+ /// Return a type that can print the structure of the source tree.
+ pub fn hierarchy(&self) -> SourceHierarchy {
+ SourceHierarchy { resolver: self }
+ }
+
+ /// Return the ID of a source unit that contains an unresolved reference
+ /// to a symbol defined by this unit.
+ fn unit_resolved_by_unit(&self, source_unit: &SourceUnit) -> Option<usize> {
+ if let Some(id) = self.unit_resolved_by_symbol(&source_unit.main.symbols) {
+ return Some(id);
+ }
+ if let Some(head) = &source_unit.head {
+ if let Some(id) = self.unit_resolved_by_symbol(&head.symbols) {
+ return Some(id);
+ }
+ }
+ if let Some(tail) = &source_unit.tail {
+ if let Some(id) = self.unit_resolved_by_symbol(&tail.symbols) {
+ return Some(id);
+ }
+ }
+ return None;
+ }
+
+ /// Returns the ID of a source unit that contains an unresolved reference
+ /// to a symbol defined by one of these symbols.
+ fn unit_resolved_by_symbol(&self, symbols: &[Symbol]) -> Option<usize> {
+ for symbol in symbols {
+ if let SymbolRole::Definition(_) = symbol.role {
+ for unresolved in &self.unresolved {
+ if unresolved.symbol.name == symbol.name {
+ return Some(unresolved.source_id);
+ }
+ }
+ }
+ }
+ return None;
+ }
+
+ /// Populate the .parent_ids field of every source unit. The parents of
+ /// each source unit are the units that define a symbol referenced by the
+ /// unit, where the definition type is MustPrecedeReference.
+ pub fn calculate_hierarchy(&mut self) {
+ // Clear the .parent_ids field of every source unit.
+ for source_unit in &mut self.source_units {
+ source_unit.parent_ids.clear();
+ }
+ // Populate the .parent_ids field of every source unit.
+ for reference in &self.resolved {
+ let predicate = |d: &&TrackedSymbol| d.symbol.name == reference.symbol.name;
+ if let Some(definition) = self.definitions.iter().find(predicate) {
+ // A unit cannot be its own parent.
+ let is_self = reference.source_id == definition.source_id;
+ let must_precede = SymbolRole::Definition(DefinitionType::MustPrecedeReference);
+ if is_self || definition.symbol.role != must_precede { continue; }
+ let referencing_unit = &mut self.source_units[reference.source_id];
+ referencing_unit.parent_ids.push(definition.source_id);
+ };
+ }
+ }
+
+ /// Concatenate all included source units into one string.
+ /// If the source unit dependency graph contains a cycle, the IDs of the
+ /// source units involved in the cycle will be returned.
+ pub fn get_merged_source_code(&self, push: PushFn) -> Result<String, MergeError> {
+ // The ID of each source unit will come after the IDs of all
+ // parents of that unit.
+ let head_order = {
+ let mut included_source_ids = Vec::new();
+ let mut remaining_source_ids = (0..self.source_units.len()).collect::<Vec<_>>();
+
+ 'restart: while !remaining_source_ids.is_empty() {
+ // Iterate over source units, most-recently-included first.
+ 'next: for (i, id) in remaining_source_ids.iter().enumerate() {
+ let unit = &self.source_units[*id];
+ for parent_id in &unit.parent_ids {
+ // Skip this unit if a parent hasn't yet been included.
+ if !included_source_ids.contains(parent_id) {
+ continue 'next;
+ }
+ }
+ // Include this unit, then check remaining units from the start.
+ included_source_ids.push(*id);
+ remaining_source_ids.remove(i);
+ continue 'restart;
+ }
+ // All remaining source units depend on at least one remaining
+ // source unit, indicating a dependency cycle.
+ return Err(MergeError {
+ resolver: self,
+ cyclic_unit_ids: remaining_source_ids,
+ });
+ }
+ included_source_ids
+ };
+ let mut source_code = String::new();
+
+ // Push head source code in calculated parent-preceding order.
+ for id in &head_order {
+ let source_unit = &self.source_units[*id];
+ if let Some(head) = &source_unit.source_unit.head {
+ push(&mut source_code, head);
+ }
+ }
+ // Push main source code in source-added order.
+ // The root unit will be pushed first.
+ for source_unit in self.source_units.iter() {
+ let main = &source_unit.source_unit.main;
+ push(&mut source_code, &main);
+ }
+ // Push tail source code in reverse source-added order.
+ // The root unit will be pushed last.
+ for source_unit in self.source_units.iter().rev() {
+ if let Some(tail) = &source_unit.source_unit.tail {
+ push(&mut source_code, tail);
+ }
+ }
+ return Ok(source_code);
+ }
+}
+
+
+/// A source unit tracked with pointers to parents and dependents.
+pub struct HeirarchicalSourceUnit {
+ pub source_unit: SourceUnit,
+ /// Pointers to source units that resolve references this unit.
+ pub child_ids: Vec<usize>,
+ /// Pointers to source units that must be included before this unit.
+ pub parent_ids: Vec<usize>,
+}
+
+pub struct TrackedSymbol {
+ pub symbol: Symbol,
+ pub source_id: usize,
+ pub source_role: SourceRole,
+}
+
+impl TrackedSymbol {
+ pub fn context<'a>(&'a self, resolver: &'a Resolver) -> Context<'a> {
+ let source_unit = &resolver.source_units[self.source_id].source_unit;
+ let source_code = match self.source_role {
+ SourceRole::Main => source_unit.main.source_code.as_str(),
+ SourceRole::Head => match &source_unit.head {
+ Some(head) => head.source_code.as_str(),
+ None => unreachable!("Failed to find source code of head file"),
+ }
+ SourceRole::Tail => match &source_unit.tail {
+ Some(tail) => tail.source_code.as_str(),
+ None => unreachable!("Failed to find source code of tail file"),
+ }
+ };
+ Context { source_code, source: &self.symbol.source }
+ }
+}
+
+impl PartialEq for TrackedSymbol {
+ fn eq(&self, other: &TrackedSymbol) -> bool {
+ self.symbol.name.eq(&other.symbol.name)
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum SourceRole {
+ Main,
+ Head,
+ Tail,
+}
diff --git a/src/source_hierarchy.rs b/src/source_hierarchy.rs
new file mode 100644
index 0000000..9478c56
--- /dev/null
+++ b/src/source_hierarchy.rs
@@ -0,0 +1,58 @@
+use crate::*;
+
+use ansi::*;
+
+
+pub struct SourceHierarchy<'a> {
+ pub resolver: &'a Resolver,
+}
+
+impl<'a> SourceHierarchy<'a> {
+ pub fn report(&self) {
+ eprintln!(".");
+ let len = self.resolver.root_unit_ids.len();
+ for (i, id) in self.resolver.root_unit_ids.iter().enumerate() {
+ let end = i + 1 == len;
+ self.report_leaf(*id, Vec::new(), end);
+ }
+ }
+
+ fn report_leaf(&self, id: usize, mut levels: Vec<bool>, end: bool) {
+ // A level entry is true if all entries in that level have been printed.
+ for level in &levels {
+ match level {
+ false => eprint!("│ "),
+ true => eprint!(" "),
+ }
+ }
+ // The end value is true if all siblings of this entry have been printed.
+ match end {
+ false => eprint!("├── "),
+ true => eprint!("└── "),
+ }
+ if let Some(unit) = self.resolver.source_units.get(id) {
+ let path_str = &unit.source_unit.main.path.as_os_str().to_string_lossy();
+ if let Some(name_str) = unit.source_unit.name() {
+ eprint!("{name_str}{BLUE}");
+ if unit.source_unit.head.is_some() { eprint!(" +head") }
+ if unit.source_unit.tail.is_some() { eprint!(" +tail") }
+ let mut unresolved = 0;
+ for symbol in &self.resolver.unresolved {
+ if symbol.source_id == id { unresolved += 1; }
+ }
+ if unresolved > 0 { eprint!("{RED} ({unresolved})"); }
+ eprintln!("{NORMAL} {DIM}({path_str}){NORMAL}");
+ } else {
+ eprintln!("{path_str}");
+ }
+ levels.push(end);
+ let len = unit.child_ids.len();
+ for (i, id) in unit.child_ids.iter().enumerate() {
+ let end = i + 1 == len;
+ self.report_leaf(*id, levels.clone(), end);
+ }
+ } else {
+ eprintln!("<error loading source unit details>");
+ }
+ }
+}
diff --git a/src/source_unit.rs b/src/source_unit.rs
new file mode 100644
index 0000000..3e674be
--- /dev/null
+++ b/src/source_unit.rs
@@ -0,0 +1,121 @@
+use crate::*;
+
+use vagabond::*;
+
+
+type ParseFn = fn(&str, Option<&Path>) -> Vec<Symbol>;
+
+
+/// Gather all source units from a PATH-style environment variable.
+pub fn gather_from_path_variable(variable: &str, extension: &str, parse: ParseFn) -> Vec<SourceUnit> {
+ let mut source_units = Vec::new();
+ if let Ok(string) = std::env::var(variable) {
+ for path in string.split(":").map(PathBuf::from) {
+ source_units.extend(gather_from_path(&path, extension, parse));
+ }
+ };
+ return source_units;
+}
+
+/// Gather source units at or descending from a path.
+pub fn gather_from_path(path: &Path, extension: &str, parse: ParseFn) -> Vec<SourceUnit> {
+ let mut source_units = Vec::new();
+ if let Ok(entry) = Entry::from_path(path) {
+ if EntryType::File == entry.entry_type {
+ if let Ok(unit) = SourceUnit::from_path(&entry.path, extension, parse) {
+ source_units.push(unit);
+ }
+ } else if EntryType::Directory == entry.entry_type {
+ if let Ok(entries) = traverse_directory(entry.path) {
+ for entry in entries {
+ if let Ok(unit) = SourceUnit::from_path(&entry.path, extension, parse) {
+ source_units.push(unit);
+ }
+ }
+ }
+ }
+ };
+ return source_units;
+}
+
+
+pub struct SourceUnit {
+ pub main: SourceFile,
+ pub head: Option<SourceFile>,
+ pub tail: Option<SourceFile>,
+}
+
+impl SourceUnit {
+ /// Load source from a main file and an associated head and tail file.
+ pub fn from_path<P: AsRef<Path>>(path: P, extension: &str, parse: ParseFn) -> Result<Self, FileError> {
+ let main_path = { path.as_ref().canonicalize().unwrap_or_else(|_| path.as_ref().to_path_buf()) };
+ let main_path_str = main_path.as_os_str().to_string_lossy().to_string();
+ let head_extension = format!("head.{extension}");
+ let tail_extension = format!("tail.{extension}");
+ let is_head = main_path_str.ends_with(&head_extension);
+ let is_tail = main_path_str.ends_with(&tail_extension);
+ let is_not_main = !main_path_str.ends_with(extension);
+ // Head and tail files will be picked up later along with the main file.
+ if is_not_main || is_head || is_tail { return Err(FileError::InvalidExtension); }
+
+ let source_code = read_file(path.as_ref())?;
+ let symbols = parse(&source_code, Some(path.as_ref()));
+ let head_path = main_path.with_extension(head_extension);
+ let tail_path = main_path.with_extension(tail_extension);
+
+ macro_rules! parse_file {
+ ($path:expr) => {
+ read_file(&$path).ok().map(|source_code| {
+ let symbols = parse(&source_code, Some(&$path));
+ let path = $path;
+ SourceFile { symbols, source_code, path }
+ })
+ };
+ }
+ let main = SourceFile { path: main_path, source_code, symbols };
+ let head = parse_file!(head_path);
+ let tail = parse_file!(tail_path);
+ Ok( SourceUnit { main, head, tail } )
+ }
+
+ /// Load from a string of source code.
+ pub fn from_string<P: AsRef<Path>>(source_code: String, path: P, parse: ParseFn) -> Self {
+ let path = { path.as_ref().canonicalize().unwrap_or_else(|_| path.as_ref().to_path_buf()) };
+ let symbols = parse(&source_code, Some(&path));
+ Self { main: SourceFile { path, source_code, symbols }, head: None, tail: None }
+ }
+
+ pub fn name(&self) -> Option<String> {
+ self.main.path.file_name().map(|s| s.to_string_lossy().to_string())
+ }
+
+ pub fn path(&self) -> String {
+ self.main.path.as_os_str().to_string_lossy().to_string()
+
+ }
+}
+
+
+pub struct SourceFile {
+ pub path: PathBuf,
+ pub source_code: String,
+ pub symbols: Vec<Symbol>,
+}
+
+pub struct Symbol {
+ pub name: String,
+ pub source: SourceSpan,
+ pub role: SymbolRole,
+}
+
+#[derive(PartialEq)]
+pub enum SymbolRole {
+ Definition(DefinitionType),
+ Reference,
+}
+
+#[derive(PartialEq)]
+pub enum DefinitionType {
+ MustPrecedeReference,
+ CanFollowReference,
+}
diff --git a/src/tokeniser.rs b/src/tokeniser.rs
index eeab6e6..4ff3d0b 100644
--- a/src/tokeniser.rs
+++ b/src/tokeniser.rs
@@ -3,6 +3,7 @@ use crate::*;
use std::path::PathBuf;
+/// Break a character stream down into individual tokens.
pub struct Tokeniser {
/// Characters waiting to be parsed, in reverse order.
pub chars: Vec<char>,