diff options
Diffstat (limited to 'src/processors/resolver.rs')
-rw-r--r-- | src/processors/resolver.rs | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/src/processors/resolver.rs b/src/processors/resolver.rs new file mode 100644 index 0000000..6c1d0f1 --- /dev/null +++ b/src/processors/resolver.rs @@ -0,0 +1,344 @@ +use crate::*; + +use log::{info, error}; + +use std::collections::HashSet; + + +/// Resolve undeclared symbols in a source unit with definitions from other units. +pub struct Resolver { + /// Definitions from all included source units. + pub definitions: Vec<TrackedDefinition>, + /// All resolved references in all included source units. + pub resolved: Vec<TrackedReference>, + /// All unresolved references in all included source units. + pub unresolved: Vec<TrackedSymbol>, + /// All redefined (duplicate) definitions in all included source units. + pub redefinitions: Vec<TrackedRedefinition>, + /// All included source units. + pub source_units: Vec<HeirarchicalSourceUnit>, + /// The 'source_units' indices of the root source units. + pub root_unit_ids: Vec<usize>, + /// Source units that can be included later to resolve symbols. + pub library_source_units: Vec<SourceUnit>, +} + + +impl Resolver { + pub fn new() -> Self { + Self { + definitions: Vec::new(), + resolved: Vec::new(), + unresolved: Vec::new(), + redefinitions: Vec::new(), + source_units: Vec::new(), + root_unit_ids: Vec::new(), + library_source_units: Vec::new(), + } + } + + pub fn include_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) { + let source_id = self.source_units.len(); + use std::mem::take; + + info!("Including source unit at {:?}", source_unit.main.path); + if let Some(symbols) = &mut source_unit.main.symbols { + self.include_symbols(take(symbols), source_id, SourceRole::Main); + } + if let Some(head) = &mut source_unit.head { + if let Some(symbols) = &mut head.symbols { + self.include_symbols(take(symbols), source_id, SourceRole::Head); + } + } + if let Some(tail) = &mut source_unit.tail { + if let Some(symbols) = &mut tail.symbols { + self.include_symbols(take(symbols), source_id, SourceRole::Tail); + } + } + + match parent_id { + Some(parent_id) => match self.source_units.get_mut(parent_id) { + Some(parent) => { parent.child_ids.insert(source_id); } + None => { error!("Could not find parent (#{parent_id}) of source unit #{source_id}"); } + } + None => self.root_unit_ids.push(source_id), + } + self.source_units.push( + HeirarchicalSourceUnit { + source_unit, + child_ids: HashSet::new(), + parent_ids: HashSet::new(), + } + ); + } + + fn include_symbols(&mut self, symbols: Vec<Symbol>, source_id: usize, source_role: SourceRole) { + for symbol in symbols { + match symbol.role { + SymbolRole::Definition(_) => { + // Check if the symbol has already been defined. + let equal = |d: &TrackedDefinition| { &d.tracked.symbol == &symbol }; + if let Some(original) = self.definitions.iter().position(equal) { + let tracked = TrackedSymbol { symbol, source_id, source_role }; + let definition = original; + self.redefinitions.push(TrackedRedefinition { tracked, definition }); + } else { + // Resolve all unresolved references that match this symbol. + let defines = |r: &mut TrackedSymbol| symbol.defines(&r.symbol); + let definition = self.definitions.len(); + let mut references = Vec::new(); + for tracked in self.unresolved.extract_if(.., defines) { + references.push(self.resolved.len()); + self.resolved.push(TrackedReference { tracked, definition }); + } + let tracked = TrackedSymbol { symbol, source_id, source_role }; + self.definitions.push(TrackedDefinition { tracked, references }); + } + } + SymbolRole::Reference => { + let tracked = TrackedSymbol { symbol, source_id, source_role }; + let defines = |d: &TrackedDefinition| d.tracked.symbol.defines(&tracked.symbol); + match self.definitions.iter().position(defines) { + Some(index) => { + let definition = &mut self.definitions[index]; + definition.references.push(self.resolved.len()); + self.resolved.push(TrackedReference { tracked, definition: index }) + } + None => self.unresolved.push(tracked), + } + } + } + } + } + + /// Add a set of source units that might contain definitions for unresolved symbols. + pub fn add_library_source_units(&mut self, source_units: Vec<SourceUnit>) { + for source_unit in source_units { + // Discard source units that have already been included. + let in_included = self.source_units.iter().any(|s| s.source_unit == source_unit); + let in_library = self.library_source_units.iter().any(|s| *s == source_unit); + if !in_included && !in_library { + self.library_source_units.push(source_unit); + } + } + } + + /// Attempt to resolve unresolved references with library source units. + pub fn resolve(&mut self) { + // Repeatedly test if each library source unit resolves an unresolved + // symbol, breaking the loop when no new resolutions are found. + 'outer: loop { + for (i, source_unit) in self.library_source_units.iter().enumerate() { + if let Some(id) = self.unit_resolved_by_unit(&source_unit) { + let source_unit = self.library_source_units.remove(i); + self.include_source_unit(source_unit, Some(id)); + continue 'outer; + } + } + break; + } + } + + /// Returns true if all references in all included source units have been + /// resolved. + pub fn error(&self) -> Option<ResolverError> { + match self.unresolved.is_empty() && self.redefinitions.is_empty() { + true => None, + false => Some(ResolverError { resolver: self }) + } + } + + /// Return a type that can print all symbol definitions and references. + pub fn symbols(&self) -> SourceSymbols { + SourceSymbols { resolver: self } + } + + /// Return a type that can print all unused symbol definitions. + pub fn unused(&self) -> UnusedSymbols { + UnusedSymbols { resolver: self } + } + + /// Return a type that can print the structure of the source tree. + pub fn hierarchy(&self) -> SourceHierarchy { + SourceHierarchy { resolver: self } + } + + /// Return the ID of a source unit that contains an unresolved reference + /// to a symbol defined by this unit. + fn unit_resolved_by_unit(&self, source_unit: &SourceUnit) -> Option<usize> { + macro_rules! return_resolved_unit { + ($file:expr) => { + if let Some(symbols) = &$file.symbols { + if let Some(id) = self.unit_resolved_by_symbol(&symbols) { + return Some(id); + } + } + }; + } + return_resolved_unit!(&source_unit.main); + if let Some(head) = &source_unit.head { return_resolved_unit!(&head) } + if let Some(tail) = &source_unit.tail { return_resolved_unit!(&tail) } + return None; + } + + /// Returns the ID of a source unit that contains an unresolved reference + /// to a symbol defined by one of these symbols. + fn unit_resolved_by_symbol(&self, symbols: &[Symbol]) -> Option<usize> { + for symbol in symbols { + if let SymbolRole::Definition(_) = symbol.role { + for unresolved in &self.unresolved { + if unresolved.symbol.name == symbol.name { + return Some(unresolved.source_id); + } + } + } + } + return None; + } + + /// Populate the .parent_ids field of every source unit. The parents of + /// each source unit are the units that define a symbol referenced by the + /// unit, where the definition type is MustPrecedeReference. + pub fn calculate_hierarchy(&mut self) { + // Clear the .parent_ids field of every source unit. + for source_unit in &mut self.source_units { + source_unit.parent_ids.clear(); + } + // Populate the .parent_ids field of every source unit. + for reference in &self.resolved { + let predicate = |d: &&TrackedDefinition| d.tracked.symbol.name == reference.tracked.symbol.name; + if let Some(definition) = self.definitions.iter().find(predicate) { + // A unit cannot be its own parent. + let is_self = reference.tracked.source_id == definition.tracked.source_id; + let must_precede = SymbolRole::Definition(DefinitionType::MustPrecedeReference); + if is_self || definition.tracked.symbol.role != must_precede { continue; } + let referencing_unit = &mut self.source_units[reference.tracked.source_id]; + referencing_unit.parent_ids.insert(definition.tracked.source_id); + }; + } + } + + /// Concatenate all included source units into one string. + /// If the source unit dependency graph contains a cycle, the IDs of the + /// source units involved in the cycle will be returned. + pub fn get_merged_source_code(&self, push: PushFn) -> Result<String, MergeError> { + // The ID of each source unit will come after the IDs of all + // parents of that unit. + let head_order = { + let mut included_source_ids = Vec::new(); + let mut remaining_source_ids = (0..self.source_units.len()).collect::<Vec<_>>(); + + 'restart: while !remaining_source_ids.is_empty() { + // Iterate over source units, most-recently-included first. + 'next: for (i, id) in remaining_source_ids.iter().enumerate() { + let unit = &self.source_units[*id]; + for parent_id in &unit.parent_ids { + // Skip this unit if a parent hasn't yet been included. + if !included_source_ids.contains(parent_id) { + continue 'next; + } + } + // Include this unit, then check remaining units from the start. + included_source_ids.push(*id); + remaining_source_ids.remove(i); + continue 'restart; + } + // All remaining source units depend on at least one remaining + // source unit, indicating a dependency cycle. + return Err(MergeError { + resolver: self, + cyclic_unit_ids: remaining_source_ids, + }); + } + included_source_ids + }; + let mut source_code = String::new(); + + // Push head source code in calculated parent-preceding order. + for id in &head_order { + let source_unit = &self.source_units[*id]; + if let Some(head) = &source_unit.source_unit.head { + push(&mut source_code, head); + } + } + // Push main source code in source-added order. + // The root unit will be pushed first. + for source_unit in self.source_units.iter() { + let main = &source_unit.source_unit.main; + push(&mut source_code, &main); + } + // Push tail source code in reverse source-added order. + // The root unit will be pushed last. + for source_unit in self.source_units.iter().rev() { + if let Some(tail) = &source_unit.source_unit.tail { + push(&mut source_code, tail); + } + } + return Ok(source_code); + } +} + + +/// A source unit tracked with pointers to parents and dependents. +pub struct HeirarchicalSourceUnit { + pub source_unit: SourceUnit, + /// Pointers to source units that resolve references this unit. + pub child_ids: HashSet<usize>, + /// Pointers to source units that must be included before this unit. + pub parent_ids: HashSet<usize>, +} + +pub struct TrackedDefinition { + pub tracked: TrackedSymbol, + /// Points to resolved references in `resolved`. + pub references: Vec<usize>, +} + +pub struct TrackedRedefinition { + pub tracked: TrackedSymbol, + /// Points to existing definition in `definitions`. + pub definition: usize, +} + +pub struct TrackedReference { + pub tracked: TrackedSymbol, + /// Points to resolving definition in `definitions`. + pub definition: usize, +} + +pub struct TrackedSymbol { + pub symbol: Symbol, + pub source_id: usize, + pub source_role: SourceRole, +} + +impl TrackedSymbol { + pub fn context<'a>(&'a self, resolver: &'a Resolver) -> Context<'a> { + let source_unit = &resolver.source_units[self.source_id].source_unit; + let source_code = match self.source_role { + SourceRole::Main => source_unit.main.source_code.as_str(), + SourceRole::Head => match &source_unit.head { + Some(head) => head.source_code.as_str(), + None => unreachable!("Failed to find source code of head file"), + } + SourceRole::Tail => match &source_unit.tail { + Some(tail) => tail.source_code.as_str(), + None => unreachable!("Failed to find source code of tail file"), + } + }; + Context { source_code, source: &self.symbol.source } + } +} + +impl PartialEq for TrackedSymbol { + fn eq(&self, other: &TrackedSymbol) -> bool { + self.symbol.name.eq(&other.symbol.name) + } +} + +#[derive(Clone, Copy, Debug)] +pub enum SourceRole { + Main, + Head, + Tail, +} |