diff options
author | Ben Bridle <ben@derelict.engineering> | 2025-02-05 12:58:02 +1300 |
---|---|---|
committer | Ben Bridle <ben@derelict.engineering> | 2025-02-05 13:03:36 +1300 |
commit | 80da2af821385b2fc89091e9ac37a047349da4bd (patch) | |
tree | 2ba50368301e041f8d1b99145ab0a1fe28f91571 /src/resolver.rs | |
parent | 8d11be64f6c1747e7c4049105a6dd4ea9ab0d27f (diff) | |
download | assembler-80da2af821385b2fc89091e9ac37a047349da4bd.zip |
Implement source unit compilation, symbol resolution, error reporting
This library can now carry out all stages of assembly from collecting
source fragments to resolving symbols to pruning unused libraries to
generating a single compiled source file.
Pretty-printing of state has also been implemented in this library.
The source tree hierarchy, symbol resolution errors, and file read
errors can all be printed in a tidy format.
Diffstat (limited to 'src/resolver.rs')
-rw-r--r-- | src/resolver.rs | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/src/resolver.rs b/src/resolver.rs new file mode 100644 index 0000000..23dc73a --- /dev/null +++ b/src/resolver.rs @@ -0,0 +1,296 @@ +use crate::*; + +use log::error; + +type PushFn = fn(&mut String, &SourceFile); + + +/// Resolve undeclared symbols in a source unit with definitions from other units. +pub struct Resolver { + /// Definitions from all included source units. + pub definitions: Vec<TrackedSymbol>, + /// All resolved references in all included source units. + pub resolved: Vec<TrackedSymbol>, + /// All unresolved references in all included source units. + pub unresolved: Vec<TrackedSymbol>, + /// All redefined (duplicate) definitions in all included source units. + /// Points to the 'definitions' index of the existing definition. + pub redefinitions: Vec<(TrackedSymbol, usize)>, + /// All included source units. + pub source_units: Vec<HeirarchicalSourceUnit>, + /// The 'source_units' indices of the root source units. + pub root_unit_ids: Vec<usize>, + /// Source units that can be included later to resolve symbols. + pub library_source_units: Vec<SourceUnit>, +} + + +impl Resolver { + pub fn new(source_unit: SourceUnit) -> Self { + let mut new = Self { + definitions: Vec::new(), + resolved: Vec::new(), + unresolved: Vec::new(), + redefinitions: Vec::new(), + source_units: Vec::new(), + root_unit_ids: Vec::new(), + library_source_units: Vec::new(), + }; + new.include_source_unit(source_unit, None); + return new; + } + + pub fn include_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) { + let source_id = self.source_units.len(); + use std::mem::take; + + self.include_symbols(take(&mut source_unit.main.symbols), source_id, SourceRole::Main); + if let Some(head) = &mut source_unit.head { + self.include_symbols(take(&mut head.symbols), source_id, SourceRole::Head); } + if let Some(tail) = &mut source_unit.tail { + self.include_symbols(take(&mut tail.symbols), source_id, SourceRole::Tail); } + + match parent_id { + Some(parent_id) => match self.source_units.get_mut(parent_id) { + Some(parent) => parent.child_ids.push(source_id), + None => error!("Could not find parent (#{parent_id}) of source unit #{source_id}"), + } + None => self.root_unit_ids.push(source_id), + } + self.source_units.push( + HeirarchicalSourceUnit { + source_unit, + child_ids: Vec::new(), + parent_ids: Vec::new(), + } + ); + } + + fn include_symbols(&mut self, symbols: Vec<Symbol>, source_id: usize, source_role: SourceRole) { + for symbol in symbols { + match symbol.role { + SymbolRole::Definition(_) => { + // Check if the symbol has already been defined. + let equal = |d: &TrackedSymbol| { &d.symbol.name == &symbol.name }; + if let Some(original) = self.definitions.iter().position(equal) { + let definition = TrackedSymbol { symbol, source_id, source_role }; + let redefinition = (definition, original); + self.redefinitions.push(redefinition); + } else { + // Resolve all unresolved references that match this symbol. + let equal = |s: &mut TrackedSymbol| s.symbol.name == symbol.name; + for symbol in self.unresolved.extract_if(equal) { + self.resolved.push(symbol); + } + let definition = TrackedSymbol { symbol, source_id, source_role }; + self.definitions.push(definition); + } + } + SymbolRole::Reference => { + let reference = TrackedSymbol { symbol, source_id, source_role }; + match self.definitions.contains(&reference) { + true => self.resolved.push(reference), + false => self.unresolved.push(reference), + } + } + } + } + } + + /// Add a set of source units that might contain definitions for unresolved symbols. + pub fn add_library_source_units(&mut self, mut source_units: Vec<SourceUnit>) { + self.library_source_units.append(&mut source_units); + } + + /// Attempt to resolve unresolved references with library source units. + pub fn resolve(&mut self) { + // Repeatedly test if each library source unit resolves an unresolved + // symbol, breaking the loop when no new resolutions are found. + 'outer: loop { + for (i, source_unit) in self.library_source_units.iter().enumerate() { + if let Some(id) = self.unit_resolved_by_unit(&source_unit) { + let source_unit = self.library_source_units.remove(i); + self.include_source_unit(source_unit, Some(id)); + continue 'outer; + } + } + break; + } + } + + /// Returns true if all references in all included source units have been + /// resolved. + pub fn error(&self) -> Option<ResolverError> { + match self.unresolved.is_empty() { + true => None, + false => Some(ResolverError { resolver: self }) + } + } + + /// Return a type that can print the structure of the source tree. + pub fn hierarchy(&self) -> SourceHierarchy { + SourceHierarchy { resolver: self } + } + + /// Return the ID of a source unit that contains an unresolved reference + /// to a symbol defined by this unit. + fn unit_resolved_by_unit(&self, source_unit: &SourceUnit) -> Option<usize> { + if let Some(id) = self.unit_resolved_by_symbol(&source_unit.main.symbols) { + return Some(id); + } + if let Some(head) = &source_unit.head { + if let Some(id) = self.unit_resolved_by_symbol(&head.symbols) { + return Some(id); + } + } + if let Some(tail) = &source_unit.tail { + if let Some(id) = self.unit_resolved_by_symbol(&tail.symbols) { + return Some(id); + } + } + return None; + } + + /// Returns the ID of a source unit that contains an unresolved reference + /// to a symbol defined by one of these symbols. + fn unit_resolved_by_symbol(&self, symbols: &[Symbol]) -> Option<usize> { + for symbol in symbols { + if let SymbolRole::Definition(_) = symbol.role { + for unresolved in &self.unresolved { + if unresolved.symbol.name == symbol.name { + return Some(unresolved.source_id); + } + } + } + } + return None; + } + + /// Populate the .parent_ids field of every source unit. The parents of + /// each source unit are the units that define a symbol referenced by the + /// unit, where the definition type is MustPrecedeReference. + pub fn calculate_hierarchy(&mut self) { + // Clear the .parent_ids field of every source unit. + for source_unit in &mut self.source_units { + source_unit.parent_ids.clear(); + } + // Populate the .parent_ids field of every source unit. + for reference in &self.resolved { + let predicate = |d: &&TrackedSymbol| d.symbol.name == reference.symbol.name; + if let Some(definition) = self.definitions.iter().find(predicate) { + // A unit cannot be its own parent. + let is_self = reference.source_id == definition.source_id; + let must_precede = SymbolRole::Definition(DefinitionType::MustPrecedeReference); + if is_self || definition.symbol.role != must_precede { continue; } + let referencing_unit = &mut self.source_units[reference.source_id]; + referencing_unit.parent_ids.push(definition.source_id); + }; + } + } + + /// Concatenate all included source units into one string. + /// If the source unit dependency graph contains a cycle, the IDs of the + /// source units involved in the cycle will be returned. + pub fn get_merged_source_code(&self, push: PushFn) -> Result<String, MergeError> { + // The ID of each source unit will come after the IDs of all + // parents of that unit. + let head_order = { + let mut included_source_ids = Vec::new(); + let mut remaining_source_ids = (0..self.source_units.len()).collect::<Vec<_>>(); + + 'restart: while !remaining_source_ids.is_empty() { + // Iterate over source units, most-recently-included first. + 'next: for (i, id) in remaining_source_ids.iter().enumerate() { + let unit = &self.source_units[*id]; + for parent_id in &unit.parent_ids { + // Skip this unit if a parent hasn't yet been included. + if !included_source_ids.contains(parent_id) { + continue 'next; + } + } + // Include this unit, then check remaining units from the start. + included_source_ids.push(*id); + remaining_source_ids.remove(i); + continue 'restart; + } + // All remaining source units depend on at least one remaining + // source unit, indicating a dependency cycle. + return Err(MergeError { + resolver: self, + cyclic_unit_ids: remaining_source_ids, + }); + } + included_source_ids + }; + let mut source_code = String::new(); + + // Push head source code in calculated parent-preceding order. + for id in &head_order { + let source_unit = &self.source_units[*id]; + if let Some(head) = &source_unit.source_unit.head { + push(&mut source_code, head); + } + } + // Push main source code in source-added order. + // The root unit will be pushed first. + for source_unit in self.source_units.iter() { + let main = &source_unit.source_unit.main; + push(&mut source_code, &main); + } + // Push tail source code in reverse source-added order. + // The root unit will be pushed last. + for source_unit in self.source_units.iter().rev() { + if let Some(tail) = &source_unit.source_unit.tail { + push(&mut source_code, tail); + } + } + return Ok(source_code); + } +} + + +/// A source unit tracked with pointers to parents and dependents. +pub struct HeirarchicalSourceUnit { + pub source_unit: SourceUnit, + /// Pointers to source units that resolve references this unit. + pub child_ids: Vec<usize>, + /// Pointers to source units that must be included before this unit. + pub parent_ids: Vec<usize>, +} + +pub struct TrackedSymbol { + pub symbol: Symbol, + pub source_id: usize, + pub source_role: SourceRole, +} + +impl TrackedSymbol { + pub fn context<'a>(&'a self, resolver: &'a Resolver) -> Context<'a> { + let source_unit = &resolver.source_units[self.source_id].source_unit; + let source_code = match self.source_role { + SourceRole::Main => source_unit.main.source_code.as_str(), + SourceRole::Head => match &source_unit.head { + Some(head) => head.source_code.as_str(), + None => unreachable!("Failed to find source code of head file"), + } + SourceRole::Tail => match &source_unit.tail { + Some(tail) => tail.source_code.as_str(), + None => unreachable!("Failed to find source code of tail file"), + } + }; + Context { source_code, source: &self.symbol.source } + } +} + +impl PartialEq for TrackedSymbol { + fn eq(&self, other: &TrackedSymbol) -> bool { + self.symbol.name.eq(&other.symbol.name) + } +} + +#[derive(Clone, Copy, Debug)] +pub enum SourceRole { + Main, + Head, + Tail, +} |