diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/context.rs | 8 | ||||
-rw-r--r-- | src/errors/file_error.rs | 41 | ||||
-rw-r--r-- | src/errors/merge_error.rs | 41 | ||||
-rw-r--r-- | src/errors/mod.rs | 71 | ||||
-rw-r--r-- | src/errors/resolver_error.rs | 30 | ||||
-rw-r--r-- | src/lib.rs | 17 | ||||
-rw-r--r-- | src/locators/mod.rs (renamed from src/locators.rs) | 0 | ||||
-rw-r--r-- | src/resolver.rs | 296 | ||||
-rw-r--r-- | src/source_hierarchy.rs | 58 | ||||
-rw-r--r-- | src/source_unit.rs | 121 | ||||
-rw-r--r-- | src/tokeniser.rs | 1 |
11 files changed, 682 insertions, 2 deletions
diff --git a/src/context.rs b/src/context.rs new file mode 100644 index 0000000..c015c7e --- /dev/null +++ b/src/context.rs @@ -0,0 +1,8 @@ +use crate::*; + + +/// Source context for a token. +pub struct Context<'a> { + pub source_code: &'a str, + pub source: &'a SourceSpan, +} diff --git a/src/errors/file_error.rs b/src/errors/file_error.rs new file mode 100644 index 0000000..e601f94 --- /dev/null +++ b/src/errors/file_error.rs @@ -0,0 +1,41 @@ +pub use std::path::{Path, PathBuf}; + + +pub enum FileError { + InvalidExtension, + NotFound, + NotReadable, + IsADirectory, + InvalidUtf8, + Unknown, +} + +impl std::fmt::Debug for FileError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let message = match self { + Self::InvalidExtension => "File has invalid extension", + Self::NotFound => "File was not found", + Self::InvalidUtf8 => "File does not contain valid UTF-8 text", + Self::NotReadable => "File is not readable", + Self::IsADirectory => "File is a directory", + Self::Unknown => "Unknown error while attempting to read from path", + }; + write!(f, "{message}") + } +} + + +pub fn read_file(path: &Path) -> Result<String, FileError> { + match std::fs::read(&path) { + Ok(bytes) => match String::from_utf8(bytes) { + Ok(source) => Ok(source), + Err(_) => return Err(FileError::InvalidUtf8), + } + Err(err) => return Err( match err.kind() { + std::io::ErrorKind::NotFound => FileError::NotFound, + std::io::ErrorKind::PermissionDenied => FileError::NotReadable, + std::io::ErrorKind::IsADirectory => FileError::IsADirectory, + _ => FileError::Unknown, + } ) + } +} diff --git a/src/errors/merge_error.rs b/src/errors/merge_error.rs new file mode 100644 index 0000000..a694b71 --- /dev/null +++ b/src/errors/merge_error.rs @@ -0,0 +1,41 @@ +use crate::*; + +use ansi::*; +use log::error; + + +pub struct MergeError<'a> { + pub resolver: &'a Resolver, + /// A list of source units involved in a cycle. + pub cyclic_unit_ids: Vec<usize>, +} + +impl MergeError<'_> { + pub fn report(&self) { + error!("A cyclic dependency was found between the following libraries:"); + for id in &self.cyclic_unit_ids { + if let Some(unit) = self.resolver.source_units.get(*id) { + let path = &unit.source_unit.path(); + match unit.source_unit.name() { + Some(name) => + eprintln!("{name}{NORMAL}{DIM} ({path}){NORMAL}"), + None => + eprintln!("{path}"), + }; + // Print each parent involved in the dependency cycle. + for parent_id in &unit.parent_ids { + if !self.cyclic_unit_ids.contains(parent_id) { continue; } + if let Some(parent_unit) = self.resolver.source_units.get(*parent_id) { + let parent_path = &parent_unit.source_unit.path(); + match parent_unit.source_unit.name() { + Some(parent_name) => + eprintln!(" => {parent_name} {DIM}({parent_path}){NORMAL}"), + None => + eprintln!(" => {parent_path}"), + }; + } + } + } + } + } +} diff --git a/src/errors/mod.rs b/src/errors/mod.rs new file mode 100644 index 0000000..b0bf7e4 --- /dev/null +++ b/src/errors/mod.rs @@ -0,0 +1,71 @@ +mod file_error; +mod merge_error; +mod resolver_error; + +pub use file_error::*; +pub use merge_error::*; +pub use resolver_error::*; + +use crate::*; + +use ansi::*; +use log::LogLevel; + + +pub fn report_source_issue(level: LogLevel, context: &Context, message: &str) { + // Prepare variables. + let in_merged = &context.source.in_merged; + let line_num = in_merged.start.line + 1; + let digits = line_num.to_string().len(); + let w = digits + 3; + let arrow = "-->"; + let mut string = message.to_string(); + + macro_rules! push { + ($($tokens:tt)*) => { string.push_str(&format!($($tokens)*)) }; + } + + // Format message and locations. + push!("{NORMAL}\n"); + push!("{BLUE}{arrow:>w$}{NORMAL} {in_merged}\n", w=w); + if let Some(in_source) = &context.source.in_source { + push!("{BLUE}{arrow:>w$}{NORMAL} {in_source}\n", w=w); + } + + // Format source context. + let left = in_merged.start.column; + let right = in_merged.end.column + 1; + let source_line = context.source_code.split('\n').nth(in_merged.start.line) + .unwrap_or("<error reading line from source>"); + let space = " "; + let colour = match level { + LogLevel::Info => BLUE, + LogLevel::Warn => YELLOW, + LogLevel::Error => RED, + LogLevel::Fatal => RED, + }; + + // Print source code line. + push!("{BLUE} {line_num} | {NORMAL}"); + for (i, c) in source_line.chars().enumerate() { + if i == left { push!("{colour}") } + if i == right { push!("{NORMAL}") } + push!("{c}"); + } + push!("{NORMAL}\n"); + + // Print source code underline. + push!("{BLUE} {space:>w$} | {NORMAL}", w=digits); + for _ in 0..left { push!(" "); } + push!("{colour}"); + for _ in left..right { push!("^"); } + push!("{NORMAL}"); + + // Print the completed message. + match level { + LogLevel::Info => log::info!( "{}", string), + LogLevel::Warn => log::warn!( "{}", string), + LogLevel::Error => log::error!("{}", string), + LogLevel::Fatal => log::fatal!("{}", string), + } +} diff --git a/src/errors/resolver_error.rs b/src/errors/resolver_error.rs new file mode 100644 index 0000000..de8b8d1 --- /dev/null +++ b/src/errors/resolver_error.rs @@ -0,0 +1,30 @@ +use crate::*; + +use log::LogLevel; + + +pub struct ResolverError<'a> { + pub resolver: &'a Resolver, +} + +impl<'a> ResolverError<'a> { + pub fn report(&self) { + for reference in &self.resolver.unresolved { + let message = format!( + "Undefined symbol, no label or macro has been defined with the name {:?}", + &reference.symbol.source.string, + ); + let context = reference.context(&self.resolver); + report_source_issue(LogLevel::Error, &context, &message); + } + for redefinition in &self.resolver.redefinitions { + let definition = self.resolver.definitions.get(redefinition.1).unwrap(); + let message = format!( + "Redefined symbol, first defined at {}", + &definition.symbol.source.in_merged, + ); + let context = redefinition.0.context(&self.resolver); + report_source_issue(LogLevel::Error, &context, &message); + } + } +} @@ -1,5 +1,18 @@ -mod locators; -pub use locators::*; +#![feature(extract_if)] +#![feature(io_error_more)] +mod context; +mod errors; +mod locators; +mod resolver; +mod source_hierarchy; +mod source_unit; mod tokeniser; + +pub use context::*; +pub use errors::*; +pub use locators::*; +pub use resolver::*; +pub use source_hierarchy::*; +pub use source_unit::*; pub use tokeniser::*; diff --git a/src/locators.rs b/src/locators/mod.rs index b7db1ee..b7db1ee 100644 --- a/src/locators.rs +++ b/src/locators/mod.rs diff --git a/src/resolver.rs b/src/resolver.rs new file mode 100644 index 0000000..23dc73a --- /dev/null +++ b/src/resolver.rs @@ -0,0 +1,296 @@ +use crate::*; + +use log::error; + +type PushFn = fn(&mut String, &SourceFile); + + +/// Resolve undeclared symbols in a source unit with definitions from other units. +pub struct Resolver { + /// Definitions from all included source units. + pub definitions: Vec<TrackedSymbol>, + /// All resolved references in all included source units. + pub resolved: Vec<TrackedSymbol>, + /// All unresolved references in all included source units. + pub unresolved: Vec<TrackedSymbol>, + /// All redefined (duplicate) definitions in all included source units. + /// Points to the 'definitions' index of the existing definition. + pub redefinitions: Vec<(TrackedSymbol, usize)>, + /// All included source units. + pub source_units: Vec<HeirarchicalSourceUnit>, + /// The 'source_units' indices of the root source units. + pub root_unit_ids: Vec<usize>, + /// Source units that can be included later to resolve symbols. + pub library_source_units: Vec<SourceUnit>, +} + + +impl Resolver { + pub fn new(source_unit: SourceUnit) -> Self { + let mut new = Self { + definitions: Vec::new(), + resolved: Vec::new(), + unresolved: Vec::new(), + redefinitions: Vec::new(), + source_units: Vec::new(), + root_unit_ids: Vec::new(), + library_source_units: Vec::new(), + }; + new.include_source_unit(source_unit, None); + return new; + } + + pub fn include_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) { + let source_id = self.source_units.len(); + use std::mem::take; + + self.include_symbols(take(&mut source_unit.main.symbols), source_id, SourceRole::Main); + if let Some(head) = &mut source_unit.head { + self.include_symbols(take(&mut head.symbols), source_id, SourceRole::Head); } + if let Some(tail) = &mut source_unit.tail { + self.include_symbols(take(&mut tail.symbols), source_id, SourceRole::Tail); } + + match parent_id { + Some(parent_id) => match self.source_units.get_mut(parent_id) { + Some(parent) => parent.child_ids.push(source_id), + None => error!("Could not find parent (#{parent_id}) of source unit #{source_id}"), + } + None => self.root_unit_ids.push(source_id), + } + self.source_units.push( + HeirarchicalSourceUnit { + source_unit, + child_ids: Vec::new(), + parent_ids: Vec::new(), + } + ); + } + + fn include_symbols(&mut self, symbols: Vec<Symbol>, source_id: usize, source_role: SourceRole) { + for symbol in symbols { + match symbol.role { + SymbolRole::Definition(_) => { + // Check if the symbol has already been defined. + let equal = |d: &TrackedSymbol| { &d.symbol.name == &symbol.name }; + if let Some(original) = self.definitions.iter().position(equal) { + let definition = TrackedSymbol { symbol, source_id, source_role }; + let redefinition = (definition, original); + self.redefinitions.push(redefinition); + } else { + // Resolve all unresolved references that match this symbol. + let equal = |s: &mut TrackedSymbol| s.symbol.name == symbol.name; + for symbol in self.unresolved.extract_if(equal) { + self.resolved.push(symbol); + } + let definition = TrackedSymbol { symbol, source_id, source_role }; + self.definitions.push(definition); + } + } + SymbolRole::Reference => { + let reference = TrackedSymbol { symbol, source_id, source_role }; + match self.definitions.contains(&reference) { + true => self.resolved.push(reference), + false => self.unresolved.push(reference), + } + } + } + } + } + + /// Add a set of source units that might contain definitions for unresolved symbols. + pub fn add_library_source_units(&mut self, mut source_units: Vec<SourceUnit>) { + self.library_source_units.append(&mut source_units); + } + + /// Attempt to resolve unresolved references with library source units. + pub fn resolve(&mut self) { + // Repeatedly test if each library source unit resolves an unresolved + // symbol, breaking the loop when no new resolutions are found. + 'outer: loop { + for (i, source_unit) in self.library_source_units.iter().enumerate() { + if let Some(id) = self.unit_resolved_by_unit(&source_unit) { + let source_unit = self.library_source_units.remove(i); + self.include_source_unit(source_unit, Some(id)); + continue 'outer; + } + } + break; + } + } + + /// Returns true if all references in all included source units have been + /// resolved. + pub fn error(&self) -> Option<ResolverError> { + match self.unresolved.is_empty() { + true => None, + false => Some(ResolverError { resolver: self }) + } + } + + /// Return a type that can print the structure of the source tree. + pub fn hierarchy(&self) -> SourceHierarchy { + SourceHierarchy { resolver: self } + } + + /// Return the ID of a source unit that contains an unresolved reference + /// to a symbol defined by this unit. + fn unit_resolved_by_unit(&self, source_unit: &SourceUnit) -> Option<usize> { + if let Some(id) = self.unit_resolved_by_symbol(&source_unit.main.symbols) { + return Some(id); + } + if let Some(head) = &source_unit.head { + if let Some(id) = self.unit_resolved_by_symbol(&head.symbols) { + return Some(id); + } + } + if let Some(tail) = &source_unit.tail { + if let Some(id) = self.unit_resolved_by_symbol(&tail.symbols) { + return Some(id); + } + } + return None; + } + + /// Returns the ID of a source unit that contains an unresolved reference + /// to a symbol defined by one of these symbols. + fn unit_resolved_by_symbol(&self, symbols: &[Symbol]) -> Option<usize> { + for symbol in symbols { + if let SymbolRole::Definition(_) = symbol.role { + for unresolved in &self.unresolved { + if unresolved.symbol.name == symbol.name { + return Some(unresolved.source_id); + } + } + } + } + return None; + } + + /// Populate the .parent_ids field of every source unit. The parents of + /// each source unit are the units that define a symbol referenced by the + /// unit, where the definition type is MustPrecedeReference. + pub fn calculate_hierarchy(&mut self) { + // Clear the .parent_ids field of every source unit. + for source_unit in &mut self.source_units { + source_unit.parent_ids.clear(); + } + // Populate the .parent_ids field of every source unit. + for reference in &self.resolved { + let predicate = |d: &&TrackedSymbol| d.symbol.name == reference.symbol.name; + if let Some(definition) = self.definitions.iter().find(predicate) { + // A unit cannot be its own parent. + let is_self = reference.source_id == definition.source_id; + let must_precede = SymbolRole::Definition(DefinitionType::MustPrecedeReference); + if is_self || definition.symbol.role != must_precede { continue; } + let referencing_unit = &mut self.source_units[reference.source_id]; + referencing_unit.parent_ids.push(definition.source_id); + }; + } + } + + /// Concatenate all included source units into one string. + /// If the source unit dependency graph contains a cycle, the IDs of the + /// source units involved in the cycle will be returned. + pub fn get_merged_source_code(&self, push: PushFn) -> Result<String, MergeError> { + // The ID of each source unit will come after the IDs of all + // parents of that unit. + let head_order = { + let mut included_source_ids = Vec::new(); + let mut remaining_source_ids = (0..self.source_units.len()).collect::<Vec<_>>(); + + 'restart: while !remaining_source_ids.is_empty() { + // Iterate over source units, most-recently-included first. + 'next: for (i, id) in remaining_source_ids.iter().enumerate() { + let unit = &self.source_units[*id]; + for parent_id in &unit.parent_ids { + // Skip this unit if a parent hasn't yet been included. + if !included_source_ids.contains(parent_id) { + continue 'next; + } + } + // Include this unit, then check remaining units from the start. + included_source_ids.push(*id); + remaining_source_ids.remove(i); + continue 'restart; + } + // All remaining source units depend on at least one remaining + // source unit, indicating a dependency cycle. + return Err(MergeError { + resolver: self, + cyclic_unit_ids: remaining_source_ids, + }); + } + included_source_ids + }; + let mut source_code = String::new(); + + // Push head source code in calculated parent-preceding order. + for id in &head_order { + let source_unit = &self.source_units[*id]; + if let Some(head) = &source_unit.source_unit.head { + push(&mut source_code, head); + } + } + // Push main source code in source-added order. + // The root unit will be pushed first. + for source_unit in self.source_units.iter() { + let main = &source_unit.source_unit.main; + push(&mut source_code, &main); + } + // Push tail source code in reverse source-added order. + // The root unit will be pushed last. + for source_unit in self.source_units.iter().rev() { + if let Some(tail) = &source_unit.source_unit.tail { + push(&mut source_code, tail); + } + } + return Ok(source_code); + } +} + + +/// A source unit tracked with pointers to parents and dependents. +pub struct HeirarchicalSourceUnit { + pub source_unit: SourceUnit, + /// Pointers to source units that resolve references this unit. + pub child_ids: Vec<usize>, + /// Pointers to source units that must be included before this unit. + pub parent_ids: Vec<usize>, +} + +pub struct TrackedSymbol { + pub symbol: Symbol, + pub source_id: usize, + pub source_role: SourceRole, +} + +impl TrackedSymbol { + pub fn context<'a>(&'a self, resolver: &'a Resolver) -> Context<'a> { + let source_unit = &resolver.source_units[self.source_id].source_unit; + let source_code = match self.source_role { + SourceRole::Main => source_unit.main.source_code.as_str(), + SourceRole::Head => match &source_unit.head { + Some(head) => head.source_code.as_str(), + None => unreachable!("Failed to find source code of head file"), + } + SourceRole::Tail => match &source_unit.tail { + Some(tail) => tail.source_code.as_str(), + None => unreachable!("Failed to find source code of tail file"), + } + }; + Context { source_code, source: &self.symbol.source } + } +} + +impl PartialEq for TrackedSymbol { + fn eq(&self, other: &TrackedSymbol) -> bool { + self.symbol.name.eq(&other.symbol.name) + } +} + +#[derive(Clone, Copy, Debug)] +pub enum SourceRole { + Main, + Head, + Tail, +} diff --git a/src/source_hierarchy.rs b/src/source_hierarchy.rs new file mode 100644 index 0000000..9478c56 --- /dev/null +++ b/src/source_hierarchy.rs @@ -0,0 +1,58 @@ +use crate::*; + +use ansi::*; + + +pub struct SourceHierarchy<'a> { + pub resolver: &'a Resolver, +} + +impl<'a> SourceHierarchy<'a> { + pub fn report(&self) { + eprintln!("."); + let len = self.resolver.root_unit_ids.len(); + for (i, id) in self.resolver.root_unit_ids.iter().enumerate() { + let end = i + 1 == len; + self.report_leaf(*id, Vec::new(), end); + } + } + + fn report_leaf(&self, id: usize, mut levels: Vec<bool>, end: bool) { + // A level entry is true if all entries in that level have been printed. + for level in &levels { + match level { + false => eprint!("│ "), + true => eprint!(" "), + } + } + // The end value is true if all siblings of this entry have been printed. + match end { + false => eprint!("├── "), + true => eprint!("└── "), + } + if let Some(unit) = self.resolver.source_units.get(id) { + let path_str = &unit.source_unit.main.path.as_os_str().to_string_lossy(); + if let Some(name_str) = unit.source_unit.name() { + eprint!("{name_str}{BLUE}"); + if unit.source_unit.head.is_some() { eprint!(" +head") } + if unit.source_unit.tail.is_some() { eprint!(" +tail") } + let mut unresolved = 0; + for symbol in &self.resolver.unresolved { + if symbol.source_id == id { unresolved += 1; } + } + if unresolved > 0 { eprint!("{RED} ({unresolved})"); } + eprintln!("{NORMAL} {DIM}({path_str}){NORMAL}"); + } else { + eprintln!("{path_str}"); + } + levels.push(end); + let len = unit.child_ids.len(); + for (i, id) in unit.child_ids.iter().enumerate() { + let end = i + 1 == len; + self.report_leaf(*id, levels.clone(), end); + } + } else { + eprintln!("<error loading source unit details>"); + } + } +} diff --git a/src/source_unit.rs b/src/source_unit.rs new file mode 100644 index 0000000..3e674be --- /dev/null +++ b/src/source_unit.rs @@ -0,0 +1,121 @@ +use crate::*; + +use vagabond::*; + + +type ParseFn = fn(&str, Option<&Path>) -> Vec<Symbol>; + + +/// Gather all source units from a PATH-style environment variable. +pub fn gather_from_path_variable(variable: &str, extension: &str, parse: ParseFn) -> Vec<SourceUnit> { + let mut source_units = Vec::new(); + if let Ok(string) = std::env::var(variable) { + for path in string.split(":").map(PathBuf::from) { + source_units.extend(gather_from_path(&path, extension, parse)); + } + }; + return source_units; +} + +/// Gather source units at or descending from a path. +pub fn gather_from_path(path: &Path, extension: &str, parse: ParseFn) -> Vec<SourceUnit> { + let mut source_units = Vec::new(); + if let Ok(entry) = Entry::from_path(path) { + if EntryType::File == entry.entry_type { + if let Ok(unit) = SourceUnit::from_path(&entry.path, extension, parse) { + source_units.push(unit); + } + } else if EntryType::Directory == entry.entry_type { + if let Ok(entries) = traverse_directory(entry.path) { + for entry in entries { + if let Ok(unit) = SourceUnit::from_path(&entry.path, extension, parse) { + source_units.push(unit); + } + } + } + } + }; + return source_units; +} + + +pub struct SourceUnit { + pub main: SourceFile, + pub head: Option<SourceFile>, + pub tail: Option<SourceFile>, +} + +impl SourceUnit { + /// Load source from a main file and an associated head and tail file. + pub fn from_path<P: AsRef<Path>>(path: P, extension: &str, parse: ParseFn) -> Result<Self, FileError> { + let main_path = { path.as_ref().canonicalize().unwrap_or_else(|_| path.as_ref().to_path_buf()) }; + let main_path_str = main_path.as_os_str().to_string_lossy().to_string(); + let head_extension = format!("head.{extension}"); + let tail_extension = format!("tail.{extension}"); + let is_head = main_path_str.ends_with(&head_extension); + let is_tail = main_path_str.ends_with(&tail_extension); + let is_not_main = !main_path_str.ends_with(extension); + // Head and tail files will be picked up later along with the main file. + if is_not_main || is_head || is_tail { return Err(FileError::InvalidExtension); } + + let source_code = read_file(path.as_ref())?; + let symbols = parse(&source_code, Some(path.as_ref())); + let head_path = main_path.with_extension(head_extension); + let tail_path = main_path.with_extension(tail_extension); + + macro_rules! parse_file { + ($path:expr) => { + read_file(&$path).ok().map(|source_code| { + let symbols = parse(&source_code, Some(&$path)); + let path = $path; + SourceFile { symbols, source_code, path } + }) + }; + } + let main = SourceFile { path: main_path, source_code, symbols }; + let head = parse_file!(head_path); + let tail = parse_file!(tail_path); + Ok( SourceUnit { main, head, tail } ) + } + + /// Load from a string of source code. + pub fn from_string<P: AsRef<Path>>(source_code: String, path: P, parse: ParseFn) -> Self { + let path = { path.as_ref().canonicalize().unwrap_or_else(|_| path.as_ref().to_path_buf()) }; + let symbols = parse(&source_code, Some(&path)); + Self { main: SourceFile { path, source_code, symbols }, head: None, tail: None } + } + + pub fn name(&self) -> Option<String> { + self.main.path.file_name().map(|s| s.to_string_lossy().to_string()) + } + + pub fn path(&self) -> String { + self.main.path.as_os_str().to_string_lossy().to_string() + + } +} + + +pub struct SourceFile { + pub path: PathBuf, + pub source_code: String, + pub symbols: Vec<Symbol>, +} + +pub struct Symbol { + pub name: String, + pub source: SourceSpan, + pub role: SymbolRole, +} + +#[derive(PartialEq)] +pub enum SymbolRole { + Definition(DefinitionType), + Reference, +} + +#[derive(PartialEq)] +pub enum DefinitionType { + MustPrecedeReference, + CanFollowReference, +} diff --git a/src/tokeniser.rs b/src/tokeniser.rs index eeab6e6..4ff3d0b 100644 --- a/src/tokeniser.rs +++ b/src/tokeniser.rs @@ -3,6 +3,7 @@ use crate::*; use std::path::PathBuf; +/// Break a character stream down into individual tokens. pub struct Tokeniser { /// Characters waiting to be parsed, in reverse order. pub chars: Vec<char>, |