From 6cc0bec0e11d5fec757e90aebd7e51ed9393365c Mon Sep 17 00:00:00 2001 From: Ben Bridle Date: Wed, 30 Oct 2024 14:48:10 +1300 Subject: Implement an intelligent source merging strategy The previous source merging strategy was to concatenate source units in the reverse order that they were added to the resolver, which generally only worked when each source unit had at most one macro-resolving parent. An issue arose when some macros in a source unit were resolved by a source unit which had been added earlier in the order, as the required macro definitions would then be merged after they were referenced, preventing the program from assembling. The new source merging strategy finds an optimal merge order by first recording for a given source unit the ID of each unit which resolves a macro referenced by the given unit, and then only merging those source units whose macro-defining dependencies have already been merged. In the case that a cycle is detected, where two or more source units depend on one another, a message is printed and the assembly is aborted. --- src/bin/bedrock-asm.rs | 8 ++++- src/gather_libraries.rs | 19 +++++++++-- src/lib.rs | 1 + src/print.rs | 41 ++++++++++++++++++++--- src/symbol_resolver.rs | 88 ++++++++++++++++++++++++++++++++++++++++++------- 5 files changed, 136 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/bin/bedrock-asm.rs b/src/bin/bedrock-asm.rs index 2a29ee3..5cb962f 100644 --- a/src/bin/bedrock-asm.rs +++ b/src/bin/bedrock-asm.rs @@ -77,7 +77,13 @@ fn main() { if print_resolver_errors(&resolver) { std::process::exit(1); }; - let merged_source = resolver.get_merged_source_code(); + let merged_source = match resolver.get_merged_source_code() { + Ok(merged_source) => merged_source, + Err(ids) => { + print_cyclic_source_units(&ids, &resolver); + std::process::exit(1); + }, + }; if args.resolve { write_bytes_and_exit(merged_source.as_bytes(), args.output.as_ref()); } diff --git a/src/gather_libraries.rs b/src/gather_libraries.rs index 0b5d2a6..0fd1131 100644 --- a/src/gather_libraries.rs +++ b/src/gather_libraries.rs @@ -117,15 +117,19 @@ fn parse_symbols_from_source(source_code: String, path: Option<&Path>) -> Symbol let mut references = Vec::new(); for token in token_iter { + let source = token.source; match token.variant { SynVar::LabelDefinition(name) => { - definitions.push(Symbol { name, source: token.source }); + let variant = SymbolVariant::LabelDefinition; + definitions.push(Symbol { name, source, variant }); }, SynVar::MacroDefinition(name) => { - definitions.push(Symbol { name, source: token.source }); + let variant = SymbolVariant::MacroDefinition; + definitions.push(Symbol { name, source, variant }); } SynVar::Symbol(name) => { - references.push(Symbol { name, source: token.source }); + let variant = SymbolVariant::Reference; + references.push(Symbol { name, source, variant }); }, _ => (), } @@ -181,5 +185,14 @@ pub struct Symbols { pub struct Symbol { pub name: String, + pub variant: SymbolVariant, pub source: SourceSpan, } + + +#[derive(PartialEq)] +pub enum SymbolVariant { + LabelDefinition, + MacroDefinition, + Reference, +} diff --git a/src/lib.rs b/src/lib.rs index ff00605..3470e6e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![feature(extract_if)] #![feature(io_error_more)] #![feature(map_try_insert)] diff --git a/src/print.rs b/src/print.rs index 7f49db2..0c81c07 100644 --- a/src/print.rs +++ b/src/print.rs @@ -120,6 +120,36 @@ pub fn print_resolver_errors(resolver: &SymbolResolver) -> bool { } +/// The `ids` argument contains a list of the IDs of the source units which +/// cyclicly depend on one another. +pub fn print_cyclic_source_units(ids: &[usize], resolver: &SymbolResolver) { + eprintln!("{BOLD}{RED}[ERROR]{WHITE}: Some libraries contain a dependency cycle{NORMAL}"); + for id in ids { + if let Some(unit) = resolver.source_units.get(*id) { + let path = &unit.source_unit.main.path; + let path_str = path.as_os_str().to_string_lossy(); + if let Some(name_str) = get_unit_name(&unit.source_unit) { + eprintln!("{name_str}{NORMAL}{DIM} ({path_str}){NORMAL}"); + } else { + eprintln!("{path_str}"); + }; + // Print parents involved in dependency cycle. + for parent_id in &unit.parent_ids { + if !ids.contains(parent_id) { continue; } + if let Some(parent_unit) = resolver.source_units.get(*parent_id) { + let parent_path = &parent_unit.source_unit.main.path; + let parent_path_str = parent_path.as_os_str().to_string_lossy(); + let parent_name_str = match get_unit_name(&parent_unit.source_unit) { + Some(parent_name_str) => parent_name_str, + None => parent_path_str.to_string(), + }; + eprintln!(" => {parent_name_str} {DIM}({parent_path_str}){NORMAL}"); + } + } + } + } +} + pub fn print_error(message: &str, context: Context) { print_source_issue(message, context, SourceIssueVariant::Error); @@ -207,10 +237,8 @@ fn print_source_tree_leaf(resolver: &SymbolResolver, id: usize, mut levels: Vec< true => eprint!("└── "), } if let Some(unit) = resolver.source_units.get(id) { - let path = &unit.source_unit.main.path; - let path_str = path.as_os_str().to_string_lossy(); - if let Some(name) = path.file_name() { - let name_str = name.to_string_lossy(); + let path_str = &unit.source_unit.main.path.as_os_str().to_string_lossy(); + if let Some(name_str) = get_unit_name(&unit.source_unit) { eprint!("{name_str}{BLUE}"); if unit.source_unit.head.is_some() { eprint!(" +head") } if unit.source_unit.tail.is_some() { eprint!(" +tail") } @@ -222,7 +250,7 @@ fn print_source_tree_leaf(resolver: &SymbolResolver, id: usize, mut levels: Vec< eprintln!("{NORMAL} {DIM}({path_str}){NORMAL}"); } else { eprintln!("{path_str}"); - }; + } levels.push(end); let len = unit.child_ids.len(); for (i, id) in unit.child_ids.iter().enumerate() { @@ -235,3 +263,6 @@ fn print_source_tree_leaf(resolver: &SymbolResolver, id: usize, mut levels: Vec< } +fn get_unit_name(source_unit: &SourceUnit) -> Option { + source_unit.main.path.file_name().map(|s| s.to_string_lossy().to_string()) +} diff --git a/src/symbol_resolver.rs b/src/symbol_resolver.rs index cced994..e19a7bf 100644 --- a/src/symbol_resolver.rs +++ b/src/symbol_resolver.rs @@ -6,6 +6,9 @@ use std::mem::take; /// Resolve symbol references across source units. pub struct SymbolResolver { pub definitions: Vec, + /// All resolved references. + pub resolved: Vec, + /// All unresolved references. pub unresolved: Vec, /// Contains the ID of the owner of the original definition. pub redefinitions: Vec<(TrackedSymbol, usize)>, @@ -20,6 +23,7 @@ impl SymbolResolver { pub fn from_source_unit(source_unit: SourceUnit) -> Self { let mut new = Self { definitions: Vec::new(), + resolved: Vec::new(), unresolved: Vec::new(), redefinitions: Vec::new(), source_units: Vec::new(), @@ -47,6 +51,20 @@ impl SymbolResolver { } break; } + + // For every macro reference in every unit, find the ID of the unit which + // resolves that reference and add it to the .parent_ids field of the + // referencing unit. + for reference in &self.resolved { + let predicate = |d: &&TrackedSymbol| d.symbol.name == reference.symbol.name; + if let Some(definition) = self.definitions.iter().find(predicate) { + let is_self = reference.source_id == definition.source_id; + let is_label = definition.symbol.variant == SymbolVariant::LabelDefinition; + if is_self || is_label { continue; } + let referencing_unit = &mut self.source_units[reference.source_id]; + referencing_unit.parent_ids.push(definition.source_id); + }; + } } /// Add a source unit to the resolver and link it to a parent unit. @@ -83,15 +101,21 @@ impl SymbolResolver { self.root_unit_ids.push(source_id); } - let source_unit = HeirarchicalSourceUnit { source_unit, child_ids: Vec::new() }; - self.source_units.push(source_unit); + self.source_units.push( + HeirarchicalSourceUnit { + source_unit, + child_ids: Vec::new(), + parent_ids: Vec::new(), + } + ); } fn add_references(&mut self, references: Vec, source_id: usize, source_role: SourceRole) { for symbol in references { let reference = TrackedSymbol { symbol, source_id, source_role }; - if !self.definitions.contains(&reference) { - self.unresolved.push(reference); + match self.definitions.contains(&reference) { + true => self.resolved.push(reference), + false => self.unresolved.push(reference), } } } @@ -104,6 +128,10 @@ impl SymbolResolver { let redefinition = (definition, def.source_id); self.redefinitions.push(redefinition); } else { + let predicate = |s: &mut TrackedSymbol| s.symbol.name == symbol.name; + for symbol in self.unresolved.extract_if(predicate) { + self.resolved.push(symbol); + } self.unresolved.retain(|s| s.symbol.name != symbol.name); let definition = TrackedSymbol { symbol, source_id, source_role }; self.definitions.push(definition); @@ -162,28 +190,61 @@ impl SymbolResolver { } /// Create a source file by concatenating all source units. - pub fn get_merged_source_code(&self) -> String { - // The first source unit is guaranteed to be the root unit, so we can - // just push source files in their current order. + /// If the source unit dependency graph contains a cycle, the IDs of the + /// source units involved in the cycle will be returned. + pub fn get_merged_source_code(&self) -> Result> { + // The ID of a given source unit will come after the IDs of all + // source units which define at least one symbol referenced in the + // given source unit. + let source_order = { + let mut included_source_ids: Vec = Vec::new(); + let mut remaining_source_ids: Vec = Vec::new(); + for i in 0..self.source_units.len() { + remaining_source_ids.push(i); + } + + 'restart: while !remaining_source_ids.is_empty() { + 'next: for (i, id) in remaining_source_ids.iter().enumerate() { + let unit = &self.source_units[*id]; + for parent_id in &unit.parent_ids { + if !included_source_ids.contains(&parent_id) { + continue 'next; + } + } + included_source_ids.push(*id); + remaining_source_ids.remove(i); + continue 'restart; + } + // All remaining source units depend on at least one remaining + // source unit, indicating a dependency cycle. + return Err(remaining_source_ids); + } + included_source_ids + }; + let mut source_code = String::new(); // Push head source code. - for source_unit in self.source_units.iter().rev() { + for id in &source_order { + let source_unit = &self.source_units[*id]; if let Some(head) = &source_unit.source_unit.head { push_source_code_to_string(&mut source_code, head); } } // Push main source code. - for source_unit in self.source_units.iter() { - push_source_code_to_string(&mut source_code, &source_unit.source_unit.main); + for id in source_order.iter().rev() { + let source_unit = &self.source_units[*id]; + let main = &source_unit.source_unit.main; + push_source_code_to_string(&mut source_code, &main); } // Push tail source code. - for source_unit in self.source_units.iter().rev() { + for id in &source_order { + let source_unit = &self.source_units[*id]; if let Some(tail) = &source_unit.source_unit.tail { push_source_code_to_string(&mut source_code, tail); } } - return source_code; + return Ok(source_code); } } @@ -204,7 +265,10 @@ fn push_source_code_to_string(string: &mut String, source_file: &SourceFile) { pub struct HeirarchicalSourceUnit { pub source_unit: SourceUnit, + /// IDs of units which were added to resolve symbol references this unit. pub child_ids: Vec, + /// IDs of units which resolve macro references in this unit. + pub parent_ids: Vec, } -- cgit v1.2.3-70-g09d2