diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/bin/bedrock-asm.rs | 154 | ||||
-rw-r--r-- | src/compiler.rs | 94 | ||||
-rw-r--r-- | src/gather_libraries.rs | 198 | ||||
-rw-r--r-- | src/lib.rs | 14 | ||||
-rw-r--r-- | src/print.rs | 196 | ||||
-rw-r--r-- | src/symbol_resolver.rs | 296 | ||||
-rw-r--r-- | src/tokens/mod.rs (renamed from src/tokens.rs) | 0 | ||||
-rw-r--r-- | src/translators/mod.rs (renamed from src/translators.rs) | 0 |
8 files changed, 102 insertions, 850 deletions
diff --git a/src/bin/bedrock-asm.rs b/src/bin/bedrock-asm.rs deleted file mode 100644 index 5cb962f..0000000 --- a/src/bin/bedrock-asm.rs +++ /dev/null @@ -1,154 +0,0 @@ -use bedrock_asm::*; - -use std::io::{Read, Write}; -use std::path::{Path, PathBuf}; - - -static mut VERBOSE: bool = false; - -macro_rules! verbose { - ($($tokens:tt)*) => { if unsafe { VERBOSE } { - eprint!("[INFO] "); eprintln!($($tokens)*); - } }; -} -macro_rules! error { - ($($tokens:tt)*) => {{ - eprint!("[ERROR] "); eprintln!($($tokens)*); std::process::exit(1); - }}; -} - - -fn main() { - let args = Arguments::from_env_or_exit(); - - // ----------------------------------------------------------------------- - // RESOLVE syntactic symbols - let ext = args.ext.unwrap_or(String::from("brc")); - let mut resolver = if let Some(path) = &args.source { - match SourceUnit::from_path(&path, &ext) { - Ok(source_unit) => SymbolResolver::from_source_unit(source_unit), - Err(err) => match err { - ParseError::InvalidExtension => error!( - "File {path:?} has invalid extension, must be '.{ext}'"), - ParseError::NotFound => error!( - "File {path:?} was not found"), - ParseError::InvalidUtf8 => error!( - "File {path:?} does not contain valid UTF-8 text"), - ParseError::NotReadable => error!( - "File {path:?} is not readable"), - ParseError::IsADirectory => error!( - "File {path:?} is a directory"), - ParseError::Unknown => error!( - "Unknown error while attempting to read from {path:?}") - } - } - } else { - let mut source_code = String::new(); - verbose!("Reading program source from standard input"); - if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { - eprintln!("Could not read from standard input, exiting."); - eprintln!("({err:?})"); - std::process::exit(1); - } - let path = "<standard input>"; - let source_unit = SourceUnit::from_source_code(source_code, path); - SymbolResolver::from_source_unit(source_unit) - }; - // Load project libraries. - if let Some(path) = &args.source { - if !args.no_libs && !args.no_project_libs { - let project_library = gather_project_libraries(path, &ext); - resolver.add_library_units(project_library); - } - } - // Load environment libraries. - if !args.no_libs && !args.no_env_libs { - for env_library in gather_environment_libraries(&ext) { - resolver.add_library_units(env_library); - } - } - resolver.resolve(); - - // ----------------------------------------------------------------------- - // PRINT information, generate merged source code - if args.tree { - print_source_tree(&resolver); - } - if print_resolver_errors(&resolver) { - std::process::exit(1); - }; - let merged_source = match resolver.get_merged_source_code() { - Ok(merged_source) => merged_source, - Err(ids) => { - print_cyclic_source_units(&ids, &resolver); - std::process::exit(1); - }, - }; - if args.resolve { - write_bytes_and_exit(merged_source.as_bytes(), args.output.as_ref()); - } - - // ----------------------------------------------------------------------- - // PARSE semantic tokens from merged source code - let path = Some("<merged source>"); - let mut semantic_tokens = generate_semantic_tokens(&merged_source, path); - if print_semantic_errors(&semantic_tokens, &merged_source) { - std::process::exit(1); - }; - - // ----------------------------------------------------------------------- - // GENERATE symbols file and bytecode - let bytecode = generate_bytecode(&mut semantic_tokens); - // let symbols = generate_symbols_file(&semantic_tokens); - write_bytes_and_exit(&bytecode, args.output.as_ref()); -} - - -fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { - if let Some(path) = path { - if let Err(err) = std::fs::write(path, bytes) { - eprintln!("Could not write to path {:?}, exiting.", path.as_ref()); - eprintln!("({err:?})"); - std::process::exit(1); - } - } else { - if let Err(err) = std::io::stdout().write_all(bytes) { - eprintln!("Could not write to standard output, exiting."); - eprintln!("({err:?})"); - std::process::exit(1); - } - } - std::process::exit(0); -} - - -xflags::xflags! { - cmd arguments { - /// Print additional debug information - optional --verbose - /// Print the assembler version and exit - optional --version - - - /// Bedrock source code file to assemble. - optional source: PathBuf - /// Destination path for assembler output. - optional output: PathBuf - /// File extension to identify source files. - optional ext: String - - /// Don't include libraries or resolve references. - optional --no-libs - /// Don't include project libraries - optional --no-project-libs - /// Don't include environment libraries. - optional --no-env-libs - - /// Show the resolved source file heirarchy - optional --tree - /// Assemble the program without saving any output - optional --check - /// Only return resolved source code. - optional --resolve - } -} diff --git a/src/compiler.rs b/src/compiler.rs new file mode 100644 index 0000000..3ac4854 --- /dev/null +++ b/src/compiler.rs @@ -0,0 +1,94 @@ +use crate::*; + + +/// Compiles multiple source code files into one. +pub struct Compiler { + pub source_path: PathBuf, + pub resolver: Resolver, +} + +impl Compiler { + pub fn from_string<P: AsRef<Path>>(source_code: String, path: P) -> Self { + let source_unit = SourceUnit::from_string(source_code, &path, parse_symbols); + Self { + source_path: path.as_ref().to_path_buf(), + resolver: Resolver::new(source_unit) + } + } + + pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, FileError> { + let source_unit = SourceUnit::from_path(&path, None, parse_symbols)?; + Ok(Self { + source_path: path.as_ref().to_path_buf(), + resolver: Resolver::new(source_unit) + }) + } + + /// Find library files descending from the parent directory. + pub fn include_libs_from_parent(&mut self, ext: &str) { + if let Some(parent_path) = self.source_path.parent() { + let parent_path = parent_path.to_owned(); + self.include_libs_from_path(&parent_path, ext); + } + } + + /// Find library files at or descending from a path. + pub fn include_libs_from_path(&mut self, path: &Path, ext: &str) { + let libraries = gather_from_path(path, Some(ext), parse_symbols); + self.resolver.add_library_source_units(libraries); + self.resolver.resolve(); + } + + /// Find library files from a PATH-style environment variable. + pub fn include_libs_from_path_variable(&mut self, name: &str, ext: &str) { + let libraries = gather_from_path_variable(name, Some(ext), parse_symbols); + self.resolver.add_library_source_units(libraries); + self.resolver.resolve(); + } + + pub fn error(&self) -> Option<ResolverError> { + self.resolver.error() + } + + pub fn get_compiled_source(&self) -> Result<String, MergeError> { + self.resolver.get_merged_source_code(push_source_code) + } +} + + +/// Parse all symbols from a source code string. +fn parse_symbols(source_code: &str, path: Option<&Path>) -> Vec<Symbol> { + use SyntacticTokenVariant as SynVar; + let mut symbols = Vec::new(); + for token in SyntacticParser::from_source_code(&source_code, path) { + let source = token.source; + let (name, role) = match token.variant { + SynVar::LabelDefinition(name) => + (name, SymbolRole::Definition(DefinitionType::CanFollowReference)), + SynVar::MacroDefinition(name) => + (name, SymbolRole::Definition(DefinitionType::MustPrecedeReference)), + SynVar::Symbol(name) => + (name, SymbolRole::Reference), + _ => continue, + }; + symbols.push(Symbol { name, source, role }); + } + return symbols; +} + +/// Push source code to a source compilation string. +fn push_source_code(compilation: &mut String, source_file: &SourceFile) { + // Skip blank files. + let source_code = &source_file.source_code; + if source_code.chars().all(|c| c.is_whitespace()) { return; } + // Ensure that the previous section is followed by two newline characters. + if !compilation.is_empty() { + if !compilation.ends_with('\n') { compilation.push('\n'); } + if !compilation.ends_with("\n\n") { compilation.push('\n'); } + } + // Push a path comment and the source code. + let path_str = source_file.path.as_os_str().to_string_lossy(); + let path_comment = format!("(: {path_str} )\n"); + compilation.push_str(&path_comment); + compilation.push_str(&source_code); +} diff --git a/src/gather_libraries.rs b/src/gather_libraries.rs deleted file mode 100644 index 0fd1131..0000000 --- a/src/gather_libraries.rs +++ /dev/null @@ -1,198 +0,0 @@ -use crate::*; - -use vagabond::*; - - -/// Gather all library units from the given path. -pub fn gather_project_libraries(path: &Path, extension: &str) -> Vec<SourceUnit> { - match path.parent() { - Some(parent_path) => gather_source_units(parent_path, extension), - None => Vec::new(), - } -} - - -/// Gather all library units from the paths specified in an environment variable. -pub fn gather_environment_libraries(extension: &str) -> Vec<Vec<SourceUnit>> { - let mut environment_libraries = Vec::new(); - if let Ok(lib_var) = std::env::var("BEDROCK_LIBS") { - for path_str in lib_var.split(":") { - let lib_path = PathBuf::from(path_str); - let source_units = gather_source_units(&lib_path, extension); - if !source_units.is_empty() { - environment_libraries.push(source_units); - } - } - }; - return environment_libraries; -} - - -/// Gather all source units at or descended from the given entry. -fn gather_source_units(path: &Path, extension: &str) -> Vec<SourceUnit> { - let mut source_units = Vec::new(); - if let Ok(entry) = Entry::from_path(path) { - match entry.entry_type { - EntryType::File => { - if let Ok(source) = SourceUnit::from_path(entry.path, extension) { - source_units.push(source); - } - } - EntryType::Directory => { - if let Ok(entries) = traverse_directory(entry.path) { - for entry in entries { - if let Ok(source) = SourceUnit::from_path(entry.path, extension) { - source_units.push(source); - } - } - } - } - } - }; - return source_units; -} - - -pub struct SourceUnit { - pub main: SourceFile, - pub head: Option<SourceFile>, - pub tail: Option<SourceFile>, -} - - -impl SourceUnit { - /// Load from a source file and an associated head and tail file. - pub fn from_path<P: Into<PathBuf>>(path: P, extension: &str) -> Result<Self, ParseError> { - let main_path = canonicalize_path(path); - let main_path_str = main_path.as_os_str().to_string_lossy().to_string(); - let head_extension = format!("head.{extension}"); - let tail_extension = format!("tail.{extension}"); - let is_head = main_path_str.ends_with(&head_extension); - let is_tail = main_path_str.ends_with(&tail_extension); - let is_not_main = !main_path_str.ends_with(extension); - if is_not_main || is_head || is_tail { return Err(ParseError::InvalidExtension); } - - let symbols = parse_symbols_from_file(&main_path)?; - let head_path = main_path.with_extension(head_extension); - let tail_path = main_path.with_extension(tail_extension); - - let main = SourceFile { path: main_path, symbols }; - let head = match parse_symbols_from_file(&head_path) { - Ok(symbols) => Some(SourceFile { path: head_path, symbols }), - Err(_) => None, - }; - let tail = match parse_symbols_from_file(&tail_path) { - Ok(symbols) => Some(SourceFile { path: tail_path, symbols }), - Err(_) => None, - }; - Ok( SourceUnit { main, head, tail } ) - } - - /// Load from a string of source code. - pub fn from_source_code<P: Into<PathBuf>>(source_code: String, path: P) -> Self { - let path = canonicalize_path(path); - let symbols = parse_symbols_from_source(source_code, Some(&path)); - Self { - main: SourceFile { path, symbols }, - head: None, - tail: None, - } - } -} - - -/// Read and parse all symbols from a source file. -fn parse_symbols_from_file(path: &Path) -> Result<Symbols, ParseError> { - let source = read_source_from_file(path)?; - Ok(parse_symbols_from_source(source, Some(path))) -} - - -/// Parse all symbols from a source code string. -fn parse_symbols_from_source(source_code: String, path: Option<&Path>) -> Symbols { - use SyntacticTokenVariant as SynVar; - - let token_iter = SyntacticParser::from_source_code(&source_code, path); - let mut definitions = Vec::new(); - let mut references = Vec::new(); - - for token in token_iter { - let source = token.source; - match token.variant { - SynVar::LabelDefinition(name) => { - let variant = SymbolVariant::LabelDefinition; - definitions.push(Symbol { name, source, variant }); - }, - SynVar::MacroDefinition(name) => { - let variant = SymbolVariant::MacroDefinition; - definitions.push(Symbol { name, source, variant }); - } - SynVar::Symbol(name) => { - let variant = SymbolVariant::Reference; - references.push(Symbol { name, source, variant }); - }, - _ => (), - } - } - - Symbols { - definitions: Some(definitions), - references: Some(references), - source_code, - } -} - - -/// Attempt to read program source from a file. -pub fn read_source_from_file(path: &Path) -> Result<String, ParseError> { - match std::fs::read(&path) { - Ok(bytes) => match String::from_utf8(bytes) { - Ok(source) => Ok(source), - Err(_) => return Err(ParseError::InvalidUtf8), - } - Err(err) => return Err( match err.kind() { - std::io::ErrorKind::NotFound => ParseError::NotFound, - std::io::ErrorKind::PermissionDenied => ParseError::NotReadable, - std::io::ErrorKind::IsADirectory => ParseError::IsADirectory, - _ => ParseError::Unknown, - } ) - } -} - - -fn canonicalize_path<P: Into<PathBuf>>(path: P) -> PathBuf { - let pathbuf = path.into(); - match pathbuf.canonicalize() { - Ok(canonical) => canonical, - Err(_) => pathbuf, - } -} - - - -pub struct SourceFile { - pub path: PathBuf, - pub symbols: Symbols, -} - - -pub struct Symbols { - pub definitions: Option<Vec<Symbol>>, - pub references: Option<Vec<Symbol>>, - pub source_code: String, -} - - -pub struct Symbol { - pub name: String, - pub variant: SymbolVariant, - pub source: SourceSpan, -} - - -#[derive(PartialEq)] -pub enum SymbolVariant { - LabelDefinition, - MacroDefinition, - Reference, -} @@ -2,21 +2,15 @@ #![feature(io_error_more)] #![feature(map_try_insert)] - -mod gather_libraries; -mod symbol_resolver; - -pub use gather_libraries::*; -pub use symbol_resolver::*; - +mod compiler; mod tokens; mod translators; +mod print; +pub use compiler::*; pub use tokens::*; pub use translators::*; - -mod print; pub use print::*; - pub use assembler::*; + diff --git a/src/print.rs b/src/print.rs index 2110d37..9306984 100644 --- a/src/print.rs +++ b/src/print.rs @@ -1,25 +1,12 @@ use crate::*; +use log::LogLevel; + use SemanticTokenVariant as SemVar; use SemanticParseError as SemErr; use SyntacticParseError as SynErr; -const NORMAL: &str = "\x1b[0m"; -const BOLD: &str = "\x1b[1m"; -const DIM: &str = "\x1b[2m"; -const WHITE: &str = "\x1b[37m"; -const RED: &str = "\x1b[31m"; -const YELLOW: &str = "\x1b[33m"; -const BLUE: &str = "\x1b[34m"; - - -pub struct Context<'a> { - pub source_code: &'a str, - pub source: &'a SourceSpan, -} - - /// Print all errors found in the semantic tokens, including those inside macro /// definitions. Returns true if at least one error was printed. pub fn print_semantic_errors(semantic_tokens: &[SemanticToken], source_code: &str) -> bool { @@ -54,7 +41,7 @@ pub fn print_semantic_errors(semantic_tokens: &[SemanticToken], source_code: &st fn print_semantic_error(error: &SemanticParseError, context: Context) { let message = get_message_for_semantic_error(error); - print_error(&message, context); + report_source_issue(LogLevel::Error, &context, &message); } fn get_message_for_semantic_error(error: &SemanticParseError) -> String { @@ -70,7 +57,7 @@ fn get_message_for_semantic_error(error: &SemanticParseError) -> String { SemErr::UnclosedBlock => format!("Block was not closed, add a '}}' character to close"), SemErr::UndefinedSymbol(name) => - format!("Undefined symbol, no label or macro has been defined with the name '{name}'"), + format!("Undefined symbol, no label or macro has been defined with the name {name:?}"), SemErr::RedefinedSymbol((_, source)) => format!("Redefined symbol, first defined at {}", source.location()), SemErr::MacroInvocationBeforeDefinition((_, source)) => @@ -87,178 +74,3 @@ fn get_message_for_semantic_error(error: &SemanticParseError) -> String { } } } - - -pub fn print_resolver_errors(resolver: &SymbolResolver) -> bool { - let mut found_error = false; - for reference in &resolver.unresolved { - found_error = true; - let message = format!( - "Undefined symbol, no label or macro has been defined with the name '{}'", - &reference.symbol.source.string, - ); - let source_code = resolver.get_source_code_for_tracked_symbol(reference); - let source = &reference.symbol.source; - print_error(&message, Context { source_code, source } ) - } - for redefinition in &resolver.redefinitions { - found_error = true; - let definition = resolver.definitions.get(redefinition.1).unwrap(); - let message = format!( - "Redefined symbol, first defined at {}", - &definition.symbol.source.in_merged, - ); - let source_code = resolver.get_source_code_for_tracked_symbol(&redefinition.0); - let source = &redefinition.0.symbol.source; - print_error(&message, Context { source_code, source } ) - } - return found_error; -} - - -/// The `ids` argument contains a list of the IDs of the source units which -/// cyclicly depend on one another. -pub fn print_cyclic_source_units(ids: &[usize], resolver: &SymbolResolver) { - eprintln!("{BOLD}{RED}[ERROR]{WHITE}: Some libraries contain a dependency cycle{NORMAL}"); - for id in ids { - if let Some(unit) = resolver.source_units.get(*id) { - let path = &unit.source_unit.main.path; - let path_str = path.as_os_str().to_string_lossy(); - if let Some(name_str) = get_unit_name(&unit.source_unit) { - eprintln!("{name_str}{NORMAL}{DIM} ({path_str}){NORMAL}"); - } else { - eprintln!("{path_str}"); - }; - // Print parents involved in dependency cycle. - for parent_id in &unit.parent_ids { - if !ids.contains(parent_id) { continue; } - if let Some(parent_unit) = resolver.source_units.get(*parent_id) { - let parent_path = &parent_unit.source_unit.main.path; - let parent_path_str = parent_path.as_os_str().to_string_lossy(); - let parent_name_str = match get_unit_name(&parent_unit.source_unit) { - Some(parent_name_str) => parent_name_str, - None => parent_path_str.to_string(), - }; - eprintln!(" => {parent_name_str} {DIM}({parent_path_str}){NORMAL}"); - } - } - } - } -} - - -pub fn print_error(message: &str, context: Context) { - print_source_issue(message, context, SourceIssueVariant::Error); -} - -pub fn print_warning(message: &str, context: Context) { - print_source_issue(message, context, SourceIssueVariant::Warning); -} - -fn print_source_issue(message: &str, context: Context, variant: SourceIssueVariant) { - let (label, colour) = match variant { - SourceIssueVariant::Warning => ("WARNING", YELLOW), - SourceIssueVariant::Error => ("ERROR", RED), - }; - - // Prepare variables. - let location = &context.source.in_merged; - let y = location.start.line + 1; - let digits = y.to_string().len(); - let arrow = "-->"; - let space = " "; - - // Print message and file path. - eprintln!("{BOLD}{colour}[{label}]{WHITE}: {message}{NORMAL}"); - eprintln!("{BLUE}{arrow:>w$}{NORMAL} {location}{NORMAL}", w=digits+3); - if let Some(source) = &context.source.in_source { - eprintln!("{BLUE}{arrow:>w$}{NORMAL} {source}{NORMAL}", w=digits+3); - } - - let start = location.start.column; - let end = location.end.column + 1; - - // Print source code line. - eprint!("{BLUE} {y} | {NORMAL}"); - let line = get_line_from_source_code(context.source_code, location.start.line); - for (i, c) in line.chars().enumerate() { - if i == start { eprint!("{colour}") } - if i == end { eprint!("{NORMAL}") } - eprint!("{c}"); - } - eprintln!("{NORMAL}"); - - // Print source code underline. - eprint!("{BLUE} {space:>w$} | {NORMAL}", w=digits); - for _ in 0..start { eprint!(" "); } - eprint!("{colour}"); - for _ in start..end { eprint!("^"); } - eprintln!("{NORMAL}"); -} - - -fn get_line_from_source_code(source_code: &str, line: usize) -> &str { - source_code.split('\n').nth(line).unwrap_or("<error reading line from source>") -} - - -enum SourceIssueVariant { - Warning, - Error, -} - - -/// Print a tree containing the name and path of each source unit. -pub fn print_source_tree(resolver: &SymbolResolver) { - eprintln!("."); - let len = resolver.root_unit_ids.len(); - for (i, id) in resolver.root_unit_ids.iter().enumerate() { - let end = i + 1 == len; - print_source_tree_leaf(resolver, *id, Vec::new(), end); - } - eprintln!(); -} - -fn print_source_tree_leaf(resolver: &SymbolResolver, id: usize, mut levels: Vec<bool>, end: bool) { - // A level entry is true if all entries in that level have been printed. - for level in &levels { - match level { - false => eprint!("│ "), - true => eprint!(" "), - } - } - // The end value is true if all siblings of this entry have been printed. - match end { - false => eprint!("├── "), - true => eprint!("└── "), - } - if let Some(unit) = resolver.source_units.get(id) { - let path_str = &unit.source_unit.main.path.as_os_str().to_string_lossy(); - if let Some(name_str) = get_unit_name(&unit.source_unit) { - eprint!("{name_str}{BLUE}"); - if unit.source_unit.head.is_some() { eprint!(" +head") } - if unit.source_unit.tail.is_some() { eprint!(" +tail") } - let mut unresolved = 0; - for symbol in &resolver.unresolved { - if symbol.source_id == id { unresolved += 1; } - } - if unresolved > 0 { eprint!("{RED} ({unresolved})"); } - eprintln!("{NORMAL} {DIM}({path_str}){NORMAL}"); - } else { - eprintln!("{path_str}"); - } - levels.push(end); - let len = unit.child_ids.len(); - for (i, id) in unit.child_ids.iter().enumerate() { - let end = i + 1 == len; - print_source_tree_leaf(resolver, *id, levels.clone(), end); - } - } else { - eprintln!("<error loading source unit details>"); - } -} - - -fn get_unit_name(source_unit: &SourceUnit) -> Option<String> { - source_unit.main.path.file_name().map(|s| s.to_string_lossy().to_string()) -} diff --git a/src/symbol_resolver.rs b/src/symbol_resolver.rs deleted file mode 100644 index 0b89fb1..0000000 --- a/src/symbol_resolver.rs +++ /dev/null @@ -1,296 +0,0 @@ -use crate::*; - -use std::mem::take; - - -/// Resolve symbol references across source units. -pub struct SymbolResolver { - pub definitions: Vec<TrackedSymbol>, - /// All resolved references. - pub resolved: Vec<TrackedSymbol>, - /// All unresolved references. - pub unresolved: Vec<TrackedSymbol>, - /// Contains the `definitions` index of the original definition. - pub redefinitions: Vec<(TrackedSymbol, usize)>, - pub source_units: Vec<HeirarchicalSourceUnit>, - pub root_unit_ids: Vec<usize>, - pub unused_library_units: Vec<SourceUnit>, -} - - -impl SymbolResolver { - /// Construct a resolver from a root source unit. - pub fn from_source_unit(source_unit: SourceUnit) -> Self { - let mut new = Self { - definitions: Vec::new(), - resolved: Vec::new(), - unresolved: Vec::new(), - redefinitions: Vec::new(), - source_units: Vec::new(), - root_unit_ids: Vec::new(), - unused_library_units: Vec::new(), - }; - new.add_source_unit(source_unit, None); - return new; - } - - pub fn add_library_units(&mut self, mut source_units: Vec<SourceUnit>) { - self.unused_library_units.append(&mut source_units); - } - - pub fn resolve(&mut self) { - // Repeatedly test if any unused source unit resolves an unresolved symbol, - // breaking the loop when no new resolutions are found. - 'outer: loop { - for (i, source_unit) in self.unused_library_units.iter().enumerate() { - if let Some(id) = self.resolves_reference(&source_unit) { - let source_unit = self.unused_library_units.remove(i); - self.add_source_unit(source_unit, Some(id)); - continue 'outer; - } - } - break; - } - - // For every macro reference in every unit, find the ID of the unit which - // resolves that reference and add it to the .parent_ids field of the - // referencing unit. - for reference in &self.resolved { - let predicate = |d: &&TrackedSymbol| d.symbol.name == reference.symbol.name; - if let Some(definition) = self.definitions.iter().find(predicate) { - let is_self = reference.source_id == definition.source_id; - let is_label = definition.symbol.variant == SymbolVariant::LabelDefinition; - if is_self || is_label { continue; } - let referencing_unit = &mut self.source_units[reference.source_id]; - referencing_unit.parent_ids.push(definition.source_id); - }; - } - } - - /// Add a source unit to the resolver and link it to a parent unit. - pub fn add_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) { - let source_id = self.source_units.len(); - - // Add all main symbols. - if let Some(definitions) = take(&mut source_unit.main.symbols.definitions) { - self.add_definitions(definitions, source_id, SourceRole::Main); } - if let Some(references) = take(&mut source_unit.main.symbols.references) { - self.add_references(references, source_id, SourceRole::Main); } - - // Add all head symbols. - if let Some(head) = &mut source_unit.head { - if let Some(references) = take(&mut head.symbols.references) { - self.add_references(references, source_id, SourceRole::Head); } - if let Some(definitions) = take(&mut head.symbols.definitions) { - self.add_definitions(definitions, source_id, SourceRole::Head); } - } - - // Add all tail symbols. - if let Some(tail) = &mut source_unit.tail { - if let Some(references) = take(&mut tail.symbols.references) { - self.add_references(references, source_id, SourceRole::Tail); } - if let Some(definitions) = take(&mut tail.symbols.definitions) { - self.add_definitions(definitions, source_id, SourceRole::Tail); } - } - - if let Some(parent_id) = parent_id { - if let Some(parent_unit) = self.source_units.get_mut(parent_id) { - parent_unit.child_ids.push(source_id); - } - } else { - self.root_unit_ids.push(source_id); - } - - self.source_units.push( - HeirarchicalSourceUnit { - source_unit, - child_ids: Vec::new(), - parent_ids: Vec::new(), - } - ); - } - - fn add_references(&mut self, references: Vec<Symbol>, source_id: usize, source_role: SourceRole) { - for symbol in references { - let reference = TrackedSymbol { symbol, source_id, source_role }; - match self.definitions.contains(&reference) { - true => self.resolved.push(reference), - false => self.unresolved.push(reference), - } - } - } - - fn add_definitions(&mut self, definitions: Vec<Symbol>, source_id: usize, source_role: SourceRole) { - for symbol in definitions { - let predicate = |d: &TrackedSymbol| { &d.symbol.name == &symbol.name }; - if let Some(original) = self.definitions.iter().position(predicate) { - let definition = TrackedSymbol { symbol, source_id, source_role }; - let redefinition = (definition, original); - self.redefinitions.push(redefinition); - } else { - let predicate = |s: &mut TrackedSymbol| s.symbol.name == symbol.name; - for symbol in self.unresolved.extract_if(predicate) { - self.resolved.push(symbol); - } - self.unresolved.retain(|s| s.symbol.name != symbol.name); - let definition = TrackedSymbol { symbol, source_id, source_role }; - self.definitions.push(definition); - } - } - } - - /// Returns the ID of the owner of a symbol resolved by this unit. - pub fn resolves_reference(&self, source_unit: &SourceUnit) -> Option<usize> { - if let Some(definitions) = &source_unit.main.symbols.definitions { - if let Some(id) = self.source_id_of_unresolved(&definitions) { - return Some(id); - } - } - if let Some(head) = &source_unit.head { - if let Some(definitions) = &head.symbols.definitions { - if let Some(id) = self.source_id_of_unresolved(&definitions) { - return Some(id); - } - } - } - if let Some(tail) = &source_unit.tail { - if let Some(definitions) = &tail.symbols.definitions { - if let Some(id) = self.source_id_of_unresolved(&definitions) { - return Some(id); - } - } - } - return None; - } - - /// Returns the ID of the owner of a reference to one of these symbols. - fn source_id_of_unresolved(&self, symbols: &[Symbol]) -> Option<usize> { - for symbol in symbols { - let opt = self.unresolved.iter().find(|s| s.symbol.name == symbol.name); - if let Some(unresolved) = opt { - return Some(unresolved.source_id); - } - } - return None; - } - - pub fn get_source_code_for_tracked_symbol(&self, symbol: &TrackedSymbol) -> &str { - let source_unit = &self.source_units[symbol.source_id].source_unit; - match symbol.source_role { - SourceRole::Main => source_unit.main.symbols.source_code.as_str(), - SourceRole::Head => match &source_unit.head { - Some(head) => head.symbols.source_code.as_str(), - None => unreachable!("Failed to find source for token"), - } - SourceRole::Tail => match &source_unit.tail { - Some(tail) => tail.symbols.source_code.as_str(), - None => unreachable!("Failed to find source for token"), - } - } - } - - /// Create a source file by concatenating all source units. - /// If the source unit dependency graph contains a cycle, the IDs of the - /// source units involved in the cycle will be returned. - pub fn get_merged_source_code(&self) -> Result<String, Vec<usize>> { - // The ID of a given source unit will come after the IDs of all - // source units which define at least one symbol referenced in the - // given source unit. - let head_order = { - let mut included_source_ids: Vec<usize> = Vec::new(); - let mut remaining_source_ids: Vec<usize> = Vec::new(); - // Reverse the order so that the root unit is the last to be added. - for i in (0..self.source_units.len()).rev() { - remaining_source_ids.push(i); - } - - 'restart: while !remaining_source_ids.is_empty() { - 'next: for (i, id) in remaining_source_ids.iter().enumerate() { - let unit = &self.source_units[*id]; - for parent_id in &unit.parent_ids { - if !included_source_ids.contains(&parent_id) { - continue 'next; - } - } - included_source_ids.push(*id); - remaining_source_ids.remove(i); - continue 'restart; - } - // All remaining source units depend on at least one remaining - // source unit, indicating a dependency cycle. - return Err(remaining_source_ids); - } - included_source_ids - }; - - let mut source_code = String::new(); - - // Push head source code in macro-definition order. - for id in &head_order { - let source_unit = &self.source_units[*id]; - if let Some(head) = &source_unit.source_unit.head { - push_source_code_to_string(&mut source_code, head); - } - } - // Push main source code in source-added order. - for source_unit in self.source_units.iter() { - let main = &source_unit.source_unit.main; - push_source_code_to_string(&mut source_code, &main); - } - // Push tail source code in reverse source-added order. - for source_unit in self.source_units.iter().rev() { - if let Some(tail) = &source_unit.source_unit.tail { - push_source_code_to_string(&mut source_code, tail); - } - } - return Ok(source_code); - } -} - - -fn push_source_code_to_string(string: &mut String, source_file: &SourceFile) { - // Don't push source code if it contains only whitespace. - let source_code = &source_file.symbols.source_code; - if source_code.chars().all(|c| c.is_whitespace()) { return; } - // Ensure that sections are separated by two newlines. - if !string.is_empty() { - if !string.ends_with('\n') { string.push('\n'); } - if !string.ends_with("\n\n") { string.push('\n'); } - } - // Write a path comment to the string. - let path_str = source_file.path.as_os_str().to_string_lossy(); - let path_comment = format!("(: {path_str} )\n"); - string.push_str(&path_comment); - string.push_str(&source_code); -} - - -pub struct HeirarchicalSourceUnit { - pub source_unit: SourceUnit, - /// IDs of units which were added to resolve symbol references this unit. - pub child_ids: Vec<usize>, - /// IDs of units which resolve macro references in this unit. - pub parent_ids: Vec<usize>, -} - - -pub struct TrackedSymbol { - pub symbol: Symbol, - pub source_id: usize, - pub source_role: SourceRole, -} - - -#[derive(Clone, Copy)] -pub enum SourceRole { - Main, - Head, - Tail, -} - - -impl PartialEq for TrackedSymbol { - fn eq(&self, other: &TrackedSymbol) -> bool { - self.symbol.name.eq(&other.symbol.name) - } -} diff --git a/src/tokens.rs b/src/tokens/mod.rs index 81bf9d5..81bf9d5 100644 --- a/src/tokens.rs +++ b/src/tokens/mod.rs diff --git a/src/translators.rs b/src/translators/mod.rs index cce5633..cce5633 100644 --- a/src/translators.rs +++ b/src/translators/mod.rs |