diff options
| -rw-r--r-- | Cargo.lock | 23 | ||||
| -rw-r--r-- | Cargo.toml | 3 | ||||
| -rw-r--r-- | src/bin/bedrock-asm.rs | 154 | ||||
| -rw-r--r-- | src/gather_libraries.rs | 198 | ||||
| -rw-r--r-- | src/lib.rs | 25 | ||||
| -rw-r--r-- | src/print.rs | 264 | ||||
| -rw-r--r-- | src/stages/bytecode.rs | 137 | ||||
| -rw-r--r-- | src/stages/bytecode_tokens.rs | 13 | ||||
| -rw-r--r-- | src/stages/compiler.rs | 80 | ||||
| -rw-r--r-- | src/stages/mod.rs | 27 | ||||
| -rw-r--r-- | src/stages/semantic.rs | 141 | ||||
| -rw-r--r-- | src/stages/semantic_tokens.rs | 101 | ||||
| -rw-r--r-- | src/stages/syntactic.rs | 185 | ||||
| -rw-r--r-- | src/stages/syntactic_tokens.rs | 107 | ||||
| -rw-r--r-- | src/symbol_resolver.rs | 296 | ||||
| -rw-r--r-- | src/tokens.rs | 9 | ||||
| -rw-r--r-- | src/tokens/semantic.rs | 90 | ||||
| -rw-r--r-- | src/tokens/syntactic.rs | 39 | ||||
| -rw-r--r-- | src/translators.rs | 9 | ||||
| -rw-r--r-- | src/translators/bytecode_generator.rs | 131 | ||||
| -rw-r--r-- | src/translators/semantic_parser.rs | 245 | ||||
| -rw-r--r-- | src/translators/symbols_generator.rs | 20 | ||||
| -rw-r--r-- | src/translators/syntactic_parser.rs | 117 | ||||
| -rw-r--r-- | src/types/instruction.rs (renamed from src/tokens/instruction.rs) | 5 | ||||
| -rw-r--r-- | src/types/mod.rs | 5 | ||||
| -rw-r--r-- | src/types/value.rs (renamed from src/tokens/value.rs) | 16 | 
26 files changed, 842 insertions, 1598 deletions
| @@ -3,20 +3,39 @@  version = 4  [[package]] -name = "assembler" +name = "ansi"  version = "1.0.0" -source = "git+git://benbridle.com/assembler?tag=v1.0.0#93b2bc229a8347a925df819b2d6199a1a6066cf0" +source = "git+git://benbridle.com/ansi?tag=v1.0.0#81d47867c2c97a9ae1d1c8fdfcd42c582410ad2a" + +[[package]] +name = "assembler" +version = "2.2.0" +source = "git+git://benbridle.com/assembler?tag=v2.2.0#24080dd75092ea5ef8c10fd179aa28b8db534c7f" +dependencies = [ + "ansi", + "log", + "vagabond", +]  [[package]]  name = "bedrock-asm"  version = "4.0.6"  dependencies = [   "assembler", + "log",   "vagabond",   "xflags",  ]  [[package]] +name = "log" +version = "1.1.2" +source = "git+git://benbridle.com/log?tag=v1.1.2#3d5d1f7a19436151ba1dd52a2b50664969d90db6" +dependencies = [ + "ansi", +] + +[[package]]  name = "vagabond"  version = "1.0.1"  source = "git+git://benbridle.com/vagabond?tag=v1.0.1#08f3153fea62ea81a42438347eeee058f5bec199" @@ -7,7 +7,8 @@ description = "Assembler program for the Bedrock assembly language"  [dependencies] -assembler = { git = "git://benbridle.com/assembler", tag = "v1.0.0" } +assembler = { git = "git://benbridle.com/assembler", tag = "v2.2.0" } +log = { git = "git://benbridle.com/log", tag = "v1.1.2" }  vagabond = { git = "git://benbridle.com/vagabond", tag = "v1.0.1" }  xflags = "0.4.0-pre" diff --git a/src/bin/bedrock-asm.rs b/src/bin/bedrock-asm.rs deleted file mode 100644 index 5cb962f..0000000 --- a/src/bin/bedrock-asm.rs +++ /dev/null @@ -1,154 +0,0 @@ -use bedrock_asm::*; - -use std::io::{Read, Write}; -use std::path::{Path, PathBuf}; - - -static mut VERBOSE: bool = false; - -macro_rules! verbose { -    ($($tokens:tt)*) => { if unsafe { VERBOSE } { -            eprint!("[INFO] "); eprintln!($($tokens)*); -    } }; -} -macro_rules! error { -    ($($tokens:tt)*) => {{ -        eprint!("[ERROR] "); eprintln!($($tokens)*); std::process::exit(1); -    }}; -} - - -fn main() { -    let args = Arguments::from_env_or_exit(); - -    // ----------------------------------------------------------------------- -    // RESOLVE syntactic symbols -    let ext = args.ext.unwrap_or(String::from("brc")); -    let mut resolver = if let Some(path) = &args.source { -        match SourceUnit::from_path(&path, &ext) { -            Ok(source_unit) => SymbolResolver::from_source_unit(source_unit), -            Err(err) => match err { -                ParseError::InvalidExtension => error!( -                    "File {path:?} has invalid extension, must be '.{ext}'"), -                ParseError::NotFound => error!( -                    "File {path:?} was not found"), -                ParseError::InvalidUtf8 => error!( -                    "File {path:?} does not contain valid UTF-8 text"), -                ParseError::NotReadable => error!( -                    "File {path:?} is not readable"), -                ParseError::IsADirectory => error!( -                    "File {path:?} is a directory"), -                ParseError::Unknown => error!( -                    "Unknown error while attempting to read from {path:?}") -            } -        } -    } else { -        let mut source_code = String::new(); -        verbose!("Reading program source from standard input"); -        if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { -            eprintln!("Could not read from standard input, exiting."); -            eprintln!("({err:?})"); -            std::process::exit(1); -        } -        let path = "<standard input>"; -        let source_unit = SourceUnit::from_source_code(source_code, path); -        SymbolResolver::from_source_unit(source_unit) -    }; -    // Load project libraries. -    if let Some(path) = &args.source { -        if !args.no_libs && !args.no_project_libs { -            let project_library = gather_project_libraries(path, &ext); -            resolver.add_library_units(project_library); -        } -    } -    // Load environment libraries. -    if !args.no_libs && !args.no_env_libs { -        for env_library in gather_environment_libraries(&ext) { -            resolver.add_library_units(env_library); -        } -    } -    resolver.resolve(); - -    // ----------------------------------------------------------------------- -    // PRINT information, generate merged source code -    if args.tree { -        print_source_tree(&resolver); -    } -    if print_resolver_errors(&resolver) { -        std::process::exit(1); -    }; -    let merged_source = match resolver.get_merged_source_code() { -        Ok(merged_source) => merged_source, -        Err(ids) => { -            print_cyclic_source_units(&ids, &resolver); -            std::process::exit(1); -        }, -    }; -    if args.resolve { -        write_bytes_and_exit(merged_source.as_bytes(), args.output.as_ref()); -    } - -    // ----------------------------------------------------------------------- -    // PARSE semantic tokens from merged source code -    let path = Some("<merged source>"); -    let mut semantic_tokens = generate_semantic_tokens(&merged_source, path); -    if print_semantic_errors(&semantic_tokens, &merged_source) { -        std::process::exit(1); -    }; - -    // ----------------------------------------------------------------------- -    // GENERATE symbols file and bytecode -    let bytecode = generate_bytecode(&mut semantic_tokens); -    // let symbols = generate_symbols_file(&semantic_tokens); -    write_bytes_and_exit(&bytecode, args.output.as_ref()); -} - - -fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { -    if let Some(path) = path { -        if let Err(err) = std::fs::write(path, bytes) { -            eprintln!("Could not write to path {:?}, exiting.", path.as_ref()); -            eprintln!("({err:?})"); -            std::process::exit(1); -        } -    } else { -        if let Err(err) = std::io::stdout().write_all(bytes) { -            eprintln!("Could not write to standard output, exiting."); -            eprintln!("({err:?})"); -            std::process::exit(1); -        } -    } -    std::process::exit(0); -} - - -xflags::xflags! { -    cmd arguments { -        /// Print additional debug information -        optional --verbose -        /// Print the assembler version and exit -        optional --version - - -        /// Bedrock source code file to assemble. -        optional source: PathBuf -        /// Destination path for assembler output. -        optional output: PathBuf -        /// File extension to identify source files. -        optional ext: String - -        /// Don't include libraries or resolve references. -        optional --no-libs -        /// Don't include project libraries -        optional --no-project-libs -        /// Don't include environment libraries. -        optional --no-env-libs - -        /// Show the resolved source file heirarchy -        optional --tree -        /// Assemble the program without saving any output -        optional --check -        /// Only return resolved source code. -        optional --resolve -    } -} diff --git a/src/gather_libraries.rs b/src/gather_libraries.rs deleted file mode 100644 index 0fd1131..0000000 --- a/src/gather_libraries.rs +++ /dev/null @@ -1,198 +0,0 @@ -use crate::*; - -use vagabond::*; - - -/// Gather all library units from the given path. -pub fn gather_project_libraries(path: &Path, extension: &str) -> Vec<SourceUnit> { -    match path.parent() { -        Some(parent_path) => gather_source_units(parent_path, extension), -        None => Vec::new(), -    } -} - - -/// Gather all library units from the paths specified in an environment variable. -pub fn gather_environment_libraries(extension: &str) -> Vec<Vec<SourceUnit>> { -    let mut environment_libraries = Vec::new(); -    if let Ok(lib_var) = std::env::var("BEDROCK_LIBS") { -        for path_str in lib_var.split(":") { -            let lib_path = PathBuf::from(path_str); -            let source_units = gather_source_units(&lib_path, extension); -            if !source_units.is_empty() { -                environment_libraries.push(source_units); -            } -        } -    }; -    return environment_libraries; -} - - -/// Gather all source units at or descended from the given entry. -fn gather_source_units(path: &Path, extension: &str) -> Vec<SourceUnit> { -    let mut source_units = Vec::new(); -    if let Ok(entry) = Entry::from_path(path) { -        match entry.entry_type { -            EntryType::File => { -                if let Ok(source) = SourceUnit::from_path(entry.path, extension) { -                    source_units.push(source); -                } -            } -            EntryType::Directory => { -                if let Ok(entries) = traverse_directory(entry.path) { -                    for entry in entries { -                        if let Ok(source) = SourceUnit::from_path(entry.path, extension) { -                            source_units.push(source); -                        } -                    } -                } -            } -        } -    }; -    return source_units; -} - - -pub struct SourceUnit { -    pub main: SourceFile, -    pub head: Option<SourceFile>, -    pub tail: Option<SourceFile>, -} - - -impl SourceUnit { -    /// Load from a source file and an associated head and tail file. -    pub fn from_path<P: Into<PathBuf>>(path: P, extension: &str) -> Result<Self, ParseError> { -        let main_path = canonicalize_path(path); -        let main_path_str = main_path.as_os_str().to_string_lossy().to_string(); -        let head_extension = format!("head.{extension}"); -        let tail_extension = format!("tail.{extension}"); -        let is_head = main_path_str.ends_with(&head_extension); -        let is_tail = main_path_str.ends_with(&tail_extension); -        let is_not_main = !main_path_str.ends_with(extension); -        if is_not_main || is_head || is_tail { return Err(ParseError::InvalidExtension); } - -        let symbols = parse_symbols_from_file(&main_path)?; -        let head_path = main_path.with_extension(head_extension); -        let tail_path = main_path.with_extension(tail_extension); - -        let main = SourceFile { path: main_path, symbols }; -        let head = match parse_symbols_from_file(&head_path) { -            Ok(symbols) => Some(SourceFile { path: head_path, symbols }), -            Err(_) => None, -        }; -        let tail = match parse_symbols_from_file(&tail_path) { -            Ok(symbols) => Some(SourceFile { path: tail_path, symbols }), -            Err(_) => None, -        }; -        Ok( SourceUnit { main, head, tail } ) -    } - -    /// Load from a string of source code. -    pub fn from_source_code<P: Into<PathBuf>>(source_code: String, path: P) -> Self { -        let path = canonicalize_path(path); -        let symbols = parse_symbols_from_source(source_code, Some(&path)); -        Self { -            main: SourceFile { path, symbols }, -            head: None, -            tail: None, -        } -    } -} - - -/// Read and parse all symbols from a source file. -fn parse_symbols_from_file(path: &Path) -> Result<Symbols, ParseError> { -    let source = read_source_from_file(path)?; -    Ok(parse_symbols_from_source(source, Some(path))) -} - - -/// Parse all symbols from a source code string. -fn parse_symbols_from_source(source_code: String, path: Option<&Path>) -> Symbols { -    use SyntacticTokenVariant as SynVar; - -    let token_iter = SyntacticParser::from_source_code(&source_code, path); -    let mut definitions = Vec::new(); -    let mut references = Vec::new(); - -    for token in token_iter { -        let source = token.source; -        match token.variant { -            SynVar::LabelDefinition(name) => { -                let variant = SymbolVariant::LabelDefinition; -                definitions.push(Symbol { name, source, variant }); -            }, -            SynVar::MacroDefinition(name) => { -                let variant = SymbolVariant::MacroDefinition; -                definitions.push(Symbol { name, source, variant }); -            } -            SynVar::Symbol(name) => { -                let variant = SymbolVariant::Reference; -                references.push(Symbol { name, source, variant }); -            }, -            _ => (), -        } -    } - -    Symbols { -        definitions: Some(definitions), -        references: Some(references), -        source_code, -    } -} - - -/// Attempt to read program source from a file. -pub fn read_source_from_file(path: &Path) -> Result<String, ParseError> { -    match std::fs::read(&path) { -        Ok(bytes) => match String::from_utf8(bytes) { -            Ok(source) => Ok(source), -            Err(_) => return Err(ParseError::InvalidUtf8), -        } -        Err(err) => return Err( match err.kind() { -            std::io::ErrorKind::NotFound => ParseError::NotFound, -            std::io::ErrorKind::PermissionDenied => ParseError::NotReadable, -            std::io::ErrorKind::IsADirectory => ParseError::IsADirectory, -            _ => ParseError::Unknown, -        } ) -    } -} - - -fn canonicalize_path<P: Into<PathBuf>>(path: P) -> PathBuf { -    let pathbuf = path.into(); -    match pathbuf.canonicalize() { -        Ok(canonical) => canonical, -        Err(_) => pathbuf, -    } -} - - - -pub struct SourceFile { -    pub path: PathBuf, -    pub symbols: Symbols, -} - - -pub struct Symbols { -    pub definitions: Option<Vec<Symbol>>, -    pub references: Option<Vec<Symbol>>, -    pub source_code: String, -} - - -pub struct Symbol { -    pub name: String, -    pub variant: SymbolVariant, -    pub source: SourceSpan, -} - - -#[derive(PartialEq)] -pub enum SymbolVariant { -    LabelDefinition, -    MacroDefinition, -    Reference, -} @@ -1,22 +1,7 @@ -#![feature(extract_if)] -#![feature(io_error_more)] -#![feature(map_try_insert)] +mod types; +mod stages; +pub use types::*; +pub use stages::*; -mod gather_libraries; -mod symbol_resolver; - -pub use gather_libraries::*; -pub use symbol_resolver::*; - -mod tokens; -mod translators; - -pub use tokens::*; -pub use translators::*; - -mod print; -pub use print::*; - - -pub use assembler::*; +use assembler::*; diff --git a/src/print.rs b/src/print.rs deleted file mode 100644 index 2110d37..0000000 --- a/src/print.rs +++ /dev/null @@ -1,264 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; -use SemanticParseError as SemErr; -use SyntacticParseError as SynErr; - - -const NORMAL: &str = "\x1b[0m"; -const BOLD:   &str = "\x1b[1m"; -const DIM:    &str = "\x1b[2m"; -const WHITE:  &str = "\x1b[37m"; -const RED:    &str = "\x1b[31m"; -const YELLOW: &str = "\x1b[33m"; -const BLUE:   &str = "\x1b[34m"; - - -pub struct Context<'a> { -    pub source_code: &'a str, -    pub source: &'a SourceSpan, -} - - -/// Print all errors found in the semantic tokens, including those inside macro -/// definitions. Returns true if at least one error was printed. -pub fn print_semantic_errors(semantic_tokens: &[SemanticToken], source_code: &str) -> bool { -    let mut found_error = false; -    for semantic_token in semantic_tokens { -        match &semantic_token.variant { -            SemVar::Error(err) => { -                let context = Context { -                    source_code: source_code, -                    source: &semantic_token.source, -                }; -                found_error = true; -                print_semantic_error(&err, context) -            } -            SemVar::MacroDefinition(definition) => { -                for body_token in &definition.body_tokens { -                    if let SemVar::Error(err) = &body_token.variant { -                        let context = Context { -                            source_code: source_code, -                            source: &body_token.source, -                        }; -                        found_error = true; -                        print_semantic_error(err, context) -                    } -                } -            } -            _ => (), -        } -    } -    return found_error; -} - -fn print_semantic_error(error: &SemanticParseError, context: Context) { -    let message = get_message_for_semantic_error(error); -    print_error(&message, context); -} - -fn get_message_for_semantic_error(error: &SemanticParseError) -> String { -    match error { -        SemErr::LabelDefinitionInMacroDefinition => -            format!("Label cannot be defined inside a macro"), -        SemErr::MacroDefinitionInMacroDefinition => -            format!("Macro cannot be defined inside a macro"), -        SemErr::StrayMacroTerminator => -            format!("Macro definition terminator is missing a macro definition"), -        SemErr::StrayBlockClose => -            format!("Block was not opened, add a '{{' character to open"), -        SemErr::UnclosedBlock => -            format!("Block was not closed, add a '}}' character to close"), -        SemErr::UndefinedSymbol(name) => -            format!("Undefined symbol, no label or macro has been defined with the name '{name}'"), -        SemErr::RedefinedSymbol((_, source)) => -            format!("Redefined symbol, first defined at {}", source.location()), -        SemErr::MacroInvocationBeforeDefinition((_, source)) => -            format!("Macro used before definition, definition is at {}", source.location()), -        SemErr:: SyntaxError(syntax_error) => match syntax_error { -            SynErr::UnterminatedComment => -                format!("Unclosed comment, add a ')' character to close"), -            SynErr::UnterminatedRawString => -                format!("Unclosed string, add a ' character to close"), -            SynErr::UnterminatedNullString => -                format!("Unclosed string, add a \" character to close"), -            SynErr::InvalidPaddingValue(_) => -                format!("Padding value must be two or four hexadecimal digits"), -        } -    } -} - - -pub fn print_resolver_errors(resolver: &SymbolResolver) -> bool { -    let mut found_error = false; -    for reference in &resolver.unresolved { -        found_error = true; -        let message = format!( -            "Undefined symbol, no label or macro has been defined with the name '{}'", -            &reference.symbol.source.string, -        ); -        let source_code = resolver.get_source_code_for_tracked_symbol(reference); -        let source =  &reference.symbol.source; -        print_error(&message, Context { source_code, source } ) -    } -    for redefinition in &resolver.redefinitions { -        found_error = true; -        let definition = resolver.definitions.get(redefinition.1).unwrap(); -        let message = format!( -            "Redefined symbol, first defined at {}", -            &definition.symbol.source.in_merged, -        ); -        let source_code = resolver.get_source_code_for_tracked_symbol(&redefinition.0); -        let source = &redefinition.0.symbol.source; -        print_error(&message, Context { source_code, source } ) -    } -    return found_error; -} - - -/// The `ids` argument contains a list of the IDs of the source units which -/// cyclicly depend on one another. -pub fn print_cyclic_source_units(ids: &[usize], resolver: &SymbolResolver) { -    eprintln!("{BOLD}{RED}[ERROR]{WHITE}: Some libraries contain a dependency cycle{NORMAL}"); -    for id in ids { -        if let Some(unit) = resolver.source_units.get(*id) { -            let path = &unit.source_unit.main.path; -            let path_str = path.as_os_str().to_string_lossy(); -            if let Some(name_str) = get_unit_name(&unit.source_unit) { -                eprintln!("{name_str}{NORMAL}{DIM} ({path_str}){NORMAL}"); -            } else { -                eprintln!("{path_str}"); -            }; -            // Print parents involved in dependency cycle. -            for parent_id in &unit.parent_ids { -                if !ids.contains(parent_id) { continue; } -                if let Some(parent_unit) = resolver.source_units.get(*parent_id) { -                    let parent_path = &parent_unit.source_unit.main.path; -                    let parent_path_str = parent_path.as_os_str().to_string_lossy(); -                    let parent_name_str = match get_unit_name(&parent_unit.source_unit) { -                        Some(parent_name_str) => parent_name_str, -                        None => parent_path_str.to_string(), -                    }; -                    eprintln!("  => {parent_name_str} {DIM}({parent_path_str}){NORMAL}"); -                } -            } -        } -    } -} - - -pub fn print_error(message: &str, context: Context) { -    print_source_issue(message, context, SourceIssueVariant::Error); -} - -pub fn print_warning(message: &str, context: Context) { -    print_source_issue(message, context, SourceIssueVariant::Warning); -} - -fn print_source_issue(message: &str, context: Context, variant: SourceIssueVariant) { -    let (label, colour) = match variant { -        SourceIssueVariant::Warning => ("WARNING", YELLOW), -        SourceIssueVariant::Error => ("ERROR", RED), -    }; - -    // Prepare variables. -    let location = &context.source.in_merged; -    let y = location.start.line + 1; -    let digits = y.to_string().len(); -    let arrow = "-->"; -    let space = " "; - -    // Print message and file path. -    eprintln!("{BOLD}{colour}[{label}]{WHITE}: {message}{NORMAL}"); -    eprintln!("{BLUE}{arrow:>w$}{NORMAL} {location}{NORMAL}", w=digits+3); -    if let Some(source) = &context.source.in_source { -        eprintln!("{BLUE}{arrow:>w$}{NORMAL} {source}{NORMAL}", w=digits+3); -    } - -    let start = location.start.column; -    let end = location.end.column + 1; - -    // Print source code line. -    eprint!("{BLUE} {y} | {NORMAL}"); -    let line = get_line_from_source_code(context.source_code, location.start.line); -    for (i, c) in line.chars().enumerate() { -        if i == start { eprint!("{colour}") } -        if i == end { eprint!("{NORMAL}") } -        eprint!("{c}"); -    } -    eprintln!("{NORMAL}"); - -    // Print source code underline. -    eprint!("{BLUE} {space:>w$} | {NORMAL}", w=digits); -    for _ in 0..start { eprint!(" "); } -    eprint!("{colour}"); -    for _ in start..end { eprint!("^"); } -    eprintln!("{NORMAL}"); -} - - -fn get_line_from_source_code(source_code: &str, line: usize) -> &str { -    source_code.split('\n').nth(line).unwrap_or("<error reading line from source>") -} - - -enum SourceIssueVariant { -    Warning, -    Error, -} - - -/// Print a tree containing the name and path of each source unit. -pub fn print_source_tree(resolver: &SymbolResolver) { -    eprintln!("."); -    let len = resolver.root_unit_ids.len(); -    for (i, id) in resolver.root_unit_ids.iter().enumerate() { -        let end = i + 1 == len; -        print_source_tree_leaf(resolver, *id, Vec::new(), end); -    } -    eprintln!(); -} - -fn print_source_tree_leaf(resolver: &SymbolResolver, id: usize, mut levels: Vec<bool>, end: bool) { -    // A level entry is true if all entries in that level have been printed. -    for level in &levels { -        match level { -            false => eprint!("│   "), -            true  => eprint!("    "), -        } -    } -    // The end value is true if all siblings of this entry have been printed. -    match end { -            false => eprint!("├── "), -            true  => eprint!("└── "), -    } -    if let Some(unit) = resolver.source_units.get(id) { -        let path_str = &unit.source_unit.main.path.as_os_str().to_string_lossy(); -        if let Some(name_str) = get_unit_name(&unit.source_unit) { -            eprint!("{name_str}{BLUE}"); -            if unit.source_unit.head.is_some() { eprint!(" +head") } -            if unit.source_unit.tail.is_some() { eprint!(" +tail") } -            let mut unresolved = 0; -            for symbol in &resolver.unresolved { -                if symbol.source_id == id { unresolved += 1; } -            } -            if unresolved > 0 { eprint!("{RED} ({unresolved})"); } -            eprintln!("{NORMAL} {DIM}({path_str}){NORMAL}"); -        } else { -            eprintln!("{path_str}"); -        } -        levels.push(end); -        let len = unit.child_ids.len(); -        for (i, id) in unit.child_ids.iter().enumerate() { -            let end = i + 1 == len; -            print_source_tree_leaf(resolver, *id, levels.clone(), end); -        } -    } else { -        eprintln!("<error loading source unit details>"); -    } -} - - -fn get_unit_name(source_unit: &SourceUnit) -> Option<String> { -    source_unit.main.path.file_name().map(|s| s.to_string_lossy().to_string()) -} diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs new file mode 100644 index 0000000..db6ff6d --- /dev/null +++ b/src/stages/bytecode.rs @@ -0,0 +1,137 @@ +use crate::*; + +use std::collections::HashMap; + + +/// Doesn't truncate trailing null bytes. +pub fn generate_bytecode(semantic: &Program) -> AssembledProgram { +    let mut generator = BytecodeGenerator::new(&semantic.definitions); +    generator.parse(&semantic.tokens, false); +    generator.fill_slots(); +    let mut symbols = Vec::new(); +    for (name, information) in generator.labels { +        let source = semantic.definitions.get(&name).unwrap().source.clone(); +        let address = information.address; +        symbols.push(AssembledSymbol { name, address, source }); +    } +    AssembledProgram { +        bytecode: generator.bytecode, +        symbols, +    } +} + + +pub struct BytecodeGenerator<'a> { +    definitions: &'a HashMap<String, Tracked<Definition>>, +    labels: HashMap<String, LabelInformation>, +    stack: Vec<usize>, +    bytecode: Vec<u8>, +} + +struct LabelInformation { +    address: usize, +    slots: Vec<usize>, +} + +impl<'a> BytecodeGenerator<'a> { +    pub fn new(definitions: &'a HashMap<String, Tracked<Definition>>) -> Self { +        let mut labels = HashMap::new(); +        for (name, definition) in definitions { +            if let DefinitionKind::LabelDefinition = definition.kind { +                labels.insert(name, LabelInformation { address: 0, slots: Vec::new() }); +            } +        } +        Self { +            definitions, +            labels: HashMap::new(), +            stack: Vec::new(), +            bytecode: Vec::new(), +        } +    } + +    pub fn parse(&mut self, tokens: &[Tracked<SemanticToken>], in_macro: bool) { +        macro_rules! byte { +            ($byte:expr) => { +                self.bytecode.push($byte) +            }; +        } +        macro_rules! double { +            ($double:expr) => {{ +                let [high, low] = u16::to_be_bytes($double); +                self.bytecode.push(high); +                self.bytecode.push(low); +            }}; +        } + +        for token in tokens { +            let i = self.bytecode.len(); +            match &token.value { +                SemanticToken::Comment(_) => (), + +                SemanticToken::LabelDefinition(name) => if in_macro { +                    unreachable!("Uncaught label definition in macro"); +                } else { +                    let information = self.labels.get_mut(name).unwrap(); +                    information.address = i; +                } +                SemanticToken::MacroDefinition{ .. } => if in_macro { +                    unreachable!("Uncaught macro definition in macro"); +                } + +                SemanticToken::RawValue(value) => match value { +                    Value::Byte(byte) => byte!(*byte), +                    Value::Double(double) => double!(*double), +                } +                SemanticToken::Instruction(instruction) => { +                    byte!(instruction.value) +                } +                SemanticToken::Invocation(name) => { +                    if let Some(definition) = self.definitions.get(name) { +                        match &definition.kind { +                            DefinitionKind::MacroDefinition(body) => { +                                self.parse(body, true); +                            } +                            DefinitionKind::LabelDefinition => { +                                let information = self.labels.get_mut(name).unwrap(); +                                information.slots.push(i); +                                double!(0); +                            } +                        } +                    } else { +                        unreachable!("Uncaught undefined symbol '{name}'"); +                    } +                } + +                SemanticToken::Padding(value) => { +                    self.bytecode.resize(i + usize::from(value), 0); +                }, +                SemanticToken::String(bytes) => { +                    self.bytecode.extend_from_slice(bytes) +                }, + +                SemanticToken::BlockOpen(_) => { +                    self.stack.push(i); +                    double!(0); +                } +                SemanticToken::BlockClose(_) => { +                    let Some(addr) = self.stack.pop() else { +                        unreachable!("Uncaught unmatched block terminator"); +                    }; +                    let [high, low] = (addr as u16).to_be_bytes(); +                    self.bytecode[addr] = high; +                    self.bytecode[addr+1] = low; +                } +            } +        } +    } + +    pub fn fill_slots(&mut self) { +        for information in self.labels.values() { +            let [high, low] = (information.address as u16).to_be_bytes(); +            for addr in &information.slots { +                self.bytecode[*addr] = high; +                self.bytecode[*addr + 1] = low; +            } +        } +    } +} diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs new file mode 100644 index 0000000..aef27f9 --- /dev/null +++ b/src/stages/bytecode_tokens.rs @@ -0,0 +1,13 @@ +use crate::*; + + +pub struct AssembledProgram { +    pub bytecode: Vec<u8>, +    pub symbols: Vec<AssembledSymbol>, +} + +pub struct AssembledSymbol { +    pub name: String, +    pub address: usize, +    pub source: SourceSpan, +} diff --git a/src/stages/compiler.rs b/src/stages/compiler.rs new file mode 100644 index 0000000..7dad5e4 --- /dev/null +++ b/src/stages/compiler.rs @@ -0,0 +1,80 @@ +use crate::*; +use assembler::SymbolRole::*; +use assembler::DefinitionType::*; + + +pub fn new_compiler() -> Compiler { +    Compiler::new(parse_symbols, push_code) +} + + +/// Parse all symbols from a source code string. +pub fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbol>> { +    let syntactic = match parse_syntactic(source_code, path) { +        Ok(syntactic) => syntactic, +        Err(_errors) => return None, +    }; +    Some(SymbolParser::new().parse(&syntactic)) +} + +/// Push source code to a source compilation string. +pub fn push_code(compilation: &mut String, source_file: &SourceFile) { +    // Skip blank files. +    let source_code = &source_file.source_code; +    if source_code.chars().all(|c| c.is_whitespace()) { return; } +    // Ensure that the previous section is followed by two newline characters. +    if !compilation.is_empty() { +        if !compilation.ends_with('\n') { compilation.push('\n'); } +        if !compilation.ends_with("\n\n") { compilation.push('\n'); } +    } +    // Push a path comment and the source code. +    let path_str = source_file.path.as_os_str().to_string_lossy(); +    let path_comment = format!("(: {path_str} )\n"); +    compilation.push_str(&path_comment); +    compilation.push_str(&source_code); +} + + +// Extract symbol definitions from a list of semantic tokens. +pub struct SymbolParser { +    pub symbols: Vec<Symbol>, +} + +impl SymbolParser { +    pub fn new() -> Self { +        Self { +            symbols: Vec::new(), +        } +    } + +    fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) { +        let name = name.to_string(); +        let namespace = Vec::new(); +        let source = source.to_owned(); +        self.symbols.push(Symbol { name, namespace, source, role }); +    } + +    pub fn parse(mut self, semantic: &[Tracked<SyntacticToken>]) -> Vec<Symbol> { +        for token in semantic { +            match &token.value { +                SyntacticToken::MacroDefinition(definition) => { +                    self.record_symbol( +                        &definition.name, +                        &definition.name.source, +                        Definition(MustPrecedeReference), +                    ); +                    for token in &definition.body { +                        if let SyntacticToken::Invocation(name) = &token.value { +                            self.record_symbol(&name, &token.source, Reference); +                        } +                    } +                } +                SyntacticToken::LabelDefinition(name) => { +                    self.record_symbol(&name, &token.source, Definition(CanFollowReference)); +                } +                _ => (), +            } +        } +        return self.symbols; +    } +} diff --git a/src/stages/mod.rs b/src/stages/mod.rs new file mode 100644 index 0000000..65d14d7 --- /dev/null +++ b/src/stages/mod.rs @@ -0,0 +1,27 @@ +mod compiler; +mod syntactic; +mod syntactic_tokens; +mod semantic; +mod semantic_tokens; +mod bytecode; +mod bytecode_tokens; + +pub use compiler::*; +pub use syntactic::*; +pub use syntactic_tokens::*; +pub use semantic::*; +pub use semantic_tokens::*; +pub use bytecode::*; +pub use bytecode_tokens::*; + + +#[macro_export] +macro_rules! indent { +    (0, $($tokens:tt)*) => {{ +        println!($($tokens)*); +    }}; +    ($indent:expr, $($tokens:tt)*) => {{ +        for _ in 0..$indent { print!("  "); } +        println!($($tokens)*); +    }}; +} diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..da804ec --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,141 @@ +use crate::*; + +use std::collections::{HashMap, HashSet}; + + +pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Program, Vec<Tracked<SemanticError>>> { +    // Record all label definitions and macro names up front. +    let mut definitions = HashMap::new(); +    let mut macro_names = HashSet::new(); +    for token in &syntactic { +        match &token.value { +            SyntacticToken::LabelDefinition(name) => { +                let name = name.clone(); +                let definition = Definition::new(0, DefinitionKind::LabelDefinition); +                let tracked = Tracked::from(definition, token.source.clone()); +                if let Some(_) = definitions.insert(name.clone(), tracked) { +                    unreachable!("Uncaught duplicate label definition '{name}'"); +                } +            } +            SyntacticToken::MacroDefinition(definition) => { +                let name = definition.name.clone(); +                if !macro_names.insert(name.clone()) { +                    unreachable!("Uncaught duplicate macro definition '{name}'") +                } +            } +            _ => (), +        } +    } + +    // Convert syntactic tokens to semantic tokens. +    let mut tokens: Vec<Tracked<SemanticToken>> = Vec::new(); +    let mut errors = Vec::new(); +    let mut stack = Vec::new(); + +    for syn_token in syntactic { +        let i = tokens.len(); +        let sem_token = match syn_token.value { +            SyntacticToken::Comment(string) => SemanticToken::Comment(string), + +            SyntacticToken::LabelDefinition(name) => { +                let definition = definitions.get_mut(&name).unwrap(); +                definition.value.definition = i; +                SemanticToken::LabelDefinition(name) +            } +            SyntacticToken::MacroDefinition(definition) => { +                let source = definition.name.source.clone(); +                let name = definition.name.clone(); + +                let mut body: Vec<Tracked<SemanticToken>> = Vec::new(); +                let mut body_stack = Vec::new(); +                for syn_token in definition.body { +                    let j = body.len(); +                    let sem_token = match syn_token.value { +                        SyntacticToken::Comment(string) => +                            SemanticToken::Comment(string), + +                        SyntacticToken::LabelDefinition(label) => +                            unreachable!("Uncaught label definition '{label}' in macro '{name}'"), +                        SyntacticToken::MacroDefinition(definition) => +                            unreachable!("Uncaught macro definition '{}' in macro '{name}'", definition.name), + +                        SyntacticToken::RawValue(value) => SemanticToken::RawValue(value), +                        SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), +                        SyntacticToken::Invocation(symbol) => { +                            if let Some(definition) = definitions.get_mut(&symbol) { +                                definition.value.deep_references.push((i, j)); +                            } else if macro_names.contains(&symbol) { +                                let error = SemanticError::InvocationBeforeDefinition; +                                errors.push(Tracked::from(error, syn_token.source.clone())); +                            } else { +                                unreachable!("Uncaught undefined symbol '{symbol}'"); +                            }; +                            SemanticToken::Invocation(symbol) +                        } + +                        SyntacticToken::Padding(value) => SemanticToken::Padding(value), +                        SyntacticToken::String(bytes) => SemanticToken::String(bytes), + +                        SyntacticToken::BlockOpen => { +                            body_stack.push(j); +                            SemanticToken::BlockOpen(0) +                        } +                        SyntacticToken::BlockClose => { +                            let Some(k) = body_stack.pop() else { +                                unreachable!("Uncaught unmatched block terminator in macro {name}"); +                            }; +                            body[k].value = SemanticToken::BlockOpen(j); +                            SemanticToken::BlockClose(k) +                        } +                    }; +                    body.push(Tracked::from(sem_token, syn_token.source)); +                } + +                let kind = DefinitionKind::MacroDefinition(body); +                let tracked = Tracked::from(Definition::new(i, kind), source); +                if let Some(_) = definitions.insert(name.value.clone(), tracked) { +                    unreachable!("Uncaught duplicate definition '{name}'") +                }; + +                if !body_stack.is_empty() { +                    unreachable!("Uncaught unterminated block in macro {name}"); +                } +                SemanticToken::MacroDefinition(name) +            } + +            SyntacticToken::RawValue(value) => SemanticToken::RawValue(value), +            SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction), +            SyntacticToken::Invocation(symbol) => { +                let Some(definition) = definitions.get_mut(&symbol) else { +                    unreachable!("Uncaught undefined symbol '{symbol}'"); +                }; +                definition.value.references.push(i); +                SemanticToken::Invocation(symbol) +            } + +            SyntacticToken::Padding(value) => SemanticToken::Padding(value), +            SyntacticToken::String(bytes) => SemanticToken::String(bytes), + +            SyntacticToken::BlockOpen => { +                stack.push(i); +                SemanticToken::BlockOpen(0) +            } +            SyntacticToken::BlockClose => { +                let Some(k) = stack.pop() else { +                    unreachable!("Uncaught unmatched block terminator"); +                }; +                tokens[k].value = SemanticToken::BlockOpen(i); +                SemanticToken::BlockClose(k) +            } +        }; +        tokens.push(Tracked::from(sem_token, syn_token.source)); +    } + +    if !stack.is_empty() { +        unreachable!("Uncaught unterminated block"); +    } +    match errors.is_empty() { +        true => Ok(Program { definitions, tokens }), +        false => Err(errors), +    } +} diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs new file mode 100644 index 0000000..ca5b27a --- /dev/null +++ b/src/stages/semantic_tokens.rs @@ -0,0 +1,101 @@ +use crate::*; + +use std::collections::HashMap; + + +pub struct Program { +    pub definitions: HashMap<String, Tracked<Definition>>, +    pub tokens: Vec<Tracked<SemanticToken>>, +} + +pub struct Definition { +    pub kind: DefinitionKind, +    pub definition: usize, +    pub references: Vec<usize>, +    pub deep_references: Vec<(usize, usize)>, +} + +impl Definition { +    pub fn new(i: usize, kind: DefinitionKind) -> Self { +        Self { +            kind, +            definition: i, +            references: Vec::new(), +            deep_references: Vec::new(), +        } +    } +} + +pub enum DefinitionKind { +    MacroDefinition(Vec<Tracked<SemanticToken>>), +    LabelDefinition, +} + +pub enum SemanticToken { +    Comment(String), + +    LabelDefinition(String), +    MacroDefinition(Tracked<String>), + +    RawValue(Value), +    Instruction(Instruction), +    Invocation(String), + +    Padding(Value), +    String(Vec<u8>), + +    BlockOpen(usize), +    BlockClose(usize), +} + +pub enum SemanticError { +    InvocationBeforeDefinition, +} + + +pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) { +    for error in errors { +        report_semantic_error(error, source_code); +    } +} + +fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) { +    let context = Context { source_code: &source_code, source: &error.source }; +    let message = match &error.value { +        SemanticError::InvocationBeforeDefinition => +            "Invocation before definition", +    }; + +    report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_semantic_token(i: usize, token: &SemanticToken, definitions: &HashMap<String, Tracked<Definition>>) { +    match token { +        SemanticToken::Comment(_) => +            indent!(i, "Comment"), + +        SemanticToken::LabelDefinition(name) => +            indent!(i, "LabelDefinition({name})"), +        SemanticToken::MacroDefinition(name) => { +            indent!(i, "MacroDefinition({name})"); +            if let Some(definition) = definitions.get(name.as_str()) { +                if let DefinitionKind::MacroDefinition(body) = &definition.kind { +                    for token in body { +                        print_semantic_token(i+1, token, definitions); +                    } +                } +            } +        } + +        SemanticToken::RawValue(value) => indent!(i, "RawValue({value})"), +        SemanticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"), +        SemanticToken::Invocation(name) => indent!(i, "Invocation({name})"), + +        SemanticToken::Padding(value) => indent!(i, "Padding({value})"), +        SemanticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)), + +        SemanticToken::BlockOpen(pointer) => indent!(i, "BlockOpen(*{pointer})"), +        SemanticToken::BlockClose(pointer) => indent!(i, "BlockOpen(*{pointer})"), +    } +} diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs new file mode 100644 index 0000000..c680700 --- /dev/null +++ b/src/stages/syntactic.rs @@ -0,0 +1,185 @@ +use crate::*; + +use std::path::PathBuf; + + +pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { +    parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path)) +} + +fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { +    t.add_delimiters(&['@','&','%',';','{','}','(',')','[',']','#','~','"','\'']); +    t.add_terminators(&[':']); +    let mut tokens = Vec::new(); +    let mut errors = Vec::new(); +    let mut label_name = String::new(); + +    macro_rules! err { +        ($error:expr) => {{ +            err!($error, t.get_source()); +        }}; +        ($error:expr, $source:expr) => {{ +            errors.push(Tracked::from($error, $source)); +            continue; +        }}; +    } + +    macro_rules! is_any { +        ($close:expr) => { +            |t: &mut Tokeniser| { t.eat_char() == Some($close) } +        }; +    } + +    loop { +        t.eat_whitespace(); +        t.mark_start(); +        let Some(c) = t.eat_char() else { break }; +        let token = match c { +            '"' => { +                let source = t.get_source(); +                match t.track_until(is_any!('"')) { +                    Some(string) => { +                        let mut bytes = string.into_bytes(); +                        bytes.push(0x00); +                        SyntacticToken::String(bytes) +                    } +                    None => err!(SyntacticError::UnterminatedNullString, source), +                } +            } +            '\'' => { +                let source = t.get_source(); +                match t.track_until(is_any!('\'')) { +                    Some(string) => SyntacticToken::String(string.into_bytes()), +                    None => err!(SyntacticError::UnterminatedRawString, source), +                } +            } +            '(' => { +                let source = t.get_source(); +                if let Some(string) = t.track_until(is_any!(')')) { +                    // Check if the comment fills the entire line. +                    if t.start.position.column == 0 && t.end_of_line() { +                        if let Some(path) = string.strip_prefix(": ") { +                            t.embedded_path = Some(PathBuf::from(path.trim())); +                            t.embedded_first_line = t.start.position.line + 1; +                            continue; +                        } +                    } +                    SyntacticToken::Comment(string) +                } else { +                    err!(SyntacticError::UnterminatedComment, source) +                } +            } +            '%' => { +                let name = t.eat_token(); +                let source = t.get_source(); +                t.mark_child(); +                if let Some(_) = t.track_until(is_any!(';')) { +                    let child = t.tokenise_child_span(); +                    match parse_body_from_tokeniser(child) { +                        Ok(body) => { +                            let name = Tracked::from(name, source); +                            let definition = SyntacticMacroDefinition { name, body }; +                            SyntacticToken::MacroDefinition(definition) +                        } +                        Err(mut err) => { +                            errors.append(&mut err); +                            continue; +                        } +                    } +                } else { +                    err!(SyntacticError::UnterminatedMacroDefinition, source); +                } +            } +            '{' => SyntacticToken::BlockOpen, +            '}' => SyntacticToken::BlockClose, +            '[' => continue, +            ']' => continue, + +            ')' => err!(SyntacticError::UnmatchedCommentTerminator), +            ';' => err!(SyntacticError::UnmatchedMacroTerminator), + +            '@' => { +                label_name = t.eat_token(); +                SyntacticToken::LabelDefinition(label_name.clone()) +            } +            '&' => { +                let name = t.eat_token(); +                SyntacticToken::LabelDefinition(format!("{label_name}/{name}")) +            } +            '~' => { +                let name = t.eat_token(); +                SyntacticToken::Invocation(format!("{label_name}/{name}")) +            } +            '#' => { +                let token = t.eat_token(); +                match token.parse::<Value>() { +                    Ok(value) => SyntacticToken::Padding(value), +                    Err(_) => err!(SyntacticError::InvalidPaddingValue), +                } +            }, +            c => { +                let token = format!("{c}{}", t.eat_token()); +                match token.parse::<Value>() { +                    Ok(value) => SyntacticToken::RawValue(value), +                    Err(_) => match token.parse::<Instruction>() { +                        Ok(instruction) => SyntacticToken::Instruction(instruction), +                        Err(_) => SyntacticToken::Invocation(token), +                    } +                } +            } +        }; + +        t.mark_end(); +        let source = t.get_source(); +        tokens.push(Tracked::from(token, source)); +    } + +    // Check that every block open matches a block close. +    let mut stack = Vec::new(); +    for token in &tokens { +        match &token.value { +            SyntacticToken::BlockOpen => stack.push(token.source.clone()), +            SyntacticToken::BlockClose => if let None = stack.pop() { +                let error = SyntacticError::UnmatchedBlockTerminator; +                errors.push(Tracked::from(error, token.source.clone())); +            } +            _ => (), +        } +    } +    for source in stack { +        let error = SyntacticError::UnterminatedBlock; +        errors.push(Tracked::from(error, source)); +    } + +    match errors.is_empty() { +        true => Ok(tokens), +        false => Err(errors), +    } +} + + +fn parse_body_from_tokeniser(t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { +    let mut tokens = Vec::new(); +    let mut errors = Vec::new(); + +    for token in parse_syntactic_from_tokeniser(t)? { +        match token.value { +            SyntacticToken::LabelDefinition(_) => { +                let error = SyntacticError::LabelDefinitionInMacroDefinition; +                errors.push(Tracked::from(error, token.source)); +                continue; +            } +            SyntacticToken::MacroDefinition(_) => { +                let error = SyntacticError::MacroDefinitionInMacroDefinition; +                errors.push(Tracked::from(error, token.source)); +                continue; +            } +            _ => tokens.push(token), +        }; +    } + +    match errors.is_empty() { +        true => Ok(tokens), +        false => Err(errors), +    } +} diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs new file mode 100644 index 0000000..57e78e7 --- /dev/null +++ b/src/stages/syntactic_tokens.rs @@ -0,0 +1,107 @@ +use crate::*; + +pub enum SyntacticToken { +    Comment(String), + +    LabelDefinition(String), +    MacroDefinition(SyntacticMacroDefinition), + +    RawValue(Value), +    Instruction(Instruction), +    Invocation(String), + +    Padding(Value), +    String(Vec<u8>), + +    BlockOpen, +    BlockClose, +} + +pub struct SyntacticMacroDefinition { +    pub name: Tracked<String>, +    pub body: Vec<Tracked<SyntacticToken>>, +} + +pub enum SyntacticError { +    UnterminatedBlock, +    UnterminatedComment, +    UnterminatedRawString, +    UnterminatedNullString, +    UnterminatedMacroDefinition, + +    UnmatchedBlockTerminator, +    UnmatchedCommentTerminator, +    UnmatchedMacroTerminator, + +    InvalidPaddingValue, + +    MacroDefinitionInMacroDefinition, +    LabelDefinitionInMacroDefinition, +} + + +pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) { +    for error in errors { +        report_syntactic_error(error, source_code); +    } +} + +fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) { +    let context = Context { source_code: &source_code, source: &error.source }; +    let message = match &error.value { +        SyntacticError::UnterminatedBlock => +            "Block was not terminated, add a '}}' character to terminate", +        SyntacticError::UnterminatedComment => +            "Comment was not terminated, add a ')' character to terminate", +        SyntacticError::UnterminatedRawString => +            "Raw string was not terminated, add a ' character to terminate", +        SyntacticError::UnterminatedNullString => +            "Null-terminated string was not terminated, add a '\"' character to terminate", +        SyntacticError::UnterminatedMacroDefinition => +            "Macro definition was not terminated, add a ';' character to terminate", + +        SyntacticError::UnmatchedBlockTerminator => +            "Attempted to terminate a block, but no block was in progress", +        SyntacticError::UnmatchedCommentTerminator => +            "Attempted to terminate a comment, but no comment was in progress", +        SyntacticError::UnmatchedMacroTerminator => +            "Attempted to terminate a macro definition, but no macro definition was in progress", + +        SyntacticError::InvalidPaddingValue => +            "The padding value must be either two or four hexadecimal digits", + +        SyntacticError::MacroDefinitionInMacroDefinition => +            "A macro cannot be defined inside another macro", +        SyntacticError::LabelDefinitionInMacroDefinition => +            "A label cannot be defined inside a macro", +    }; + +    report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_syntactic_token(i: usize, token: &SyntacticToken) { +    match token { +        SyntacticToken::Comment(_) => +            indent!(i, "Comment"), + +        SyntacticToken::LabelDefinition(name) => +            indent!(i, "LabelDefinition({name})"), +        SyntacticToken::MacroDefinition(definition) => { +            indent!(i, "MacroDefinition({})", definition.name); +            for token in &definition.body { +                print_syntactic_token(i+1, token); +            } +        } + +        SyntacticToken::RawValue(value) => indent!(i, "RawValue({value})"), +        SyntacticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"), +        SyntacticToken::Invocation(name) => indent!(i, "Invocation({name})"), + +        SyntacticToken::Padding(value) => indent!(i, "Padding({value})"), +        SyntacticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)), + +        SyntacticToken::BlockOpen => indent!(i, "BlockOpen"), +        SyntacticToken::BlockClose => indent!(i, "BlockOpen"), +    } +} diff --git a/src/symbol_resolver.rs b/src/symbol_resolver.rs deleted file mode 100644 index 0b89fb1..0000000 --- a/src/symbol_resolver.rs +++ /dev/null @@ -1,296 +0,0 @@ -use crate::*; - -use std::mem::take; - - -/// Resolve symbol references across source units. -pub struct SymbolResolver { -    pub definitions: Vec<TrackedSymbol>, -    /// All resolved references. -    pub resolved: Vec<TrackedSymbol>, -    /// All unresolved references. -    pub unresolved: Vec<TrackedSymbol>, -    /// Contains the `definitions` index of the original definition. -    pub redefinitions: Vec<(TrackedSymbol, usize)>, -    pub source_units: Vec<HeirarchicalSourceUnit>, -    pub root_unit_ids: Vec<usize>, -    pub unused_library_units: Vec<SourceUnit>, -} - - -impl SymbolResolver { -    /// Construct a resolver from a root source unit. -    pub fn from_source_unit(source_unit: SourceUnit) -> Self { -        let mut new = Self { -            definitions: Vec::new(), -            resolved: Vec::new(), -            unresolved: Vec::new(), -            redefinitions: Vec::new(), -            source_units: Vec::new(), -            root_unit_ids: Vec::new(), -            unused_library_units: Vec::new(), -        }; -        new.add_source_unit(source_unit, None); -        return new; -    } - -    pub fn add_library_units(&mut self, mut source_units: Vec<SourceUnit>) { -        self.unused_library_units.append(&mut source_units); -    } - -    pub fn resolve(&mut self) { -        // Repeatedly test if any unused source unit resolves an unresolved symbol, -        // breaking the loop when no new resolutions are found. -        'outer: loop { -            for (i, source_unit) in self.unused_library_units.iter().enumerate() { -                if let Some(id) = self.resolves_reference(&source_unit) { -                    let source_unit = self.unused_library_units.remove(i); -                    self.add_source_unit(source_unit, Some(id)); -                    continue 'outer; -                } -            } -            break; -        } - -        // For every macro reference in every unit, find the ID of the unit which -        // resolves that reference and add it to the .parent_ids field of the -        // referencing unit. -        for reference in &self.resolved { -            let predicate = |d: &&TrackedSymbol| d.symbol.name == reference.symbol.name; -            if let Some(definition) = self.definitions.iter().find(predicate) { -                let is_self = reference.source_id == definition.source_id; -                let is_label = definition.symbol.variant == SymbolVariant::LabelDefinition; -                if  is_self || is_label { continue; } -                let referencing_unit = &mut self.source_units[reference.source_id]; -                referencing_unit.parent_ids.push(definition.source_id); -            }; -        } -    } - -    /// Add a source unit to the resolver and link it to a parent unit. -    pub fn add_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) { -        let source_id = self.source_units.len(); - -        // Add all main symbols. -        if let Some(definitions) = take(&mut source_unit.main.symbols.definitions) { -            self.add_definitions(definitions, source_id, SourceRole::Main); } -        if let Some(references) = take(&mut source_unit.main.symbols.references) { -            self.add_references(references, source_id, SourceRole::Main); } - -        // Add all head symbols. -        if let Some(head) = &mut source_unit.head { -            if let Some(references) = take(&mut head.symbols.references) { -                self.add_references(references, source_id, SourceRole::Head); } -            if let Some(definitions) = take(&mut head.symbols.definitions) { -                self.add_definitions(definitions, source_id, SourceRole::Head); } -        } - -        // Add all tail symbols. -        if let Some(tail) = &mut source_unit.tail { -            if let Some(references) = take(&mut tail.symbols.references) { -                self.add_references(references, source_id, SourceRole::Tail); } -            if let Some(definitions) = take(&mut tail.symbols.definitions) { -                self.add_definitions(definitions, source_id, SourceRole::Tail); } -        } - -        if let Some(parent_id) = parent_id { -            if let Some(parent_unit) = self.source_units.get_mut(parent_id) { -                parent_unit.child_ids.push(source_id); -            } -        } else { -            self.root_unit_ids.push(source_id); -        } - -        self.source_units.push( -            HeirarchicalSourceUnit { -                source_unit, -                child_ids: Vec::new(), -                parent_ids: Vec::new(), -            } -        ); -    } - -    fn add_references(&mut self, references: Vec<Symbol>, source_id: usize, source_role: SourceRole) { -        for symbol in references { -            let reference = TrackedSymbol { symbol, source_id, source_role }; -            match self.definitions.contains(&reference) { -                true => self.resolved.push(reference), -                false => self.unresolved.push(reference), -            } -        } -    } - -    fn add_definitions(&mut self, definitions: Vec<Symbol>, source_id: usize, source_role: SourceRole) { -        for symbol in definitions { -            let predicate = |d: &TrackedSymbol| { &d.symbol.name == &symbol.name }; -            if let Some(original) = self.definitions.iter().position(predicate) { -                let definition = TrackedSymbol { symbol, source_id, source_role }; -                let redefinition = (definition, original); -                self.redefinitions.push(redefinition); -            } else { -                let predicate = |s: &mut TrackedSymbol| s.symbol.name == symbol.name; -                for symbol in self.unresolved.extract_if(predicate) { -                    self.resolved.push(symbol); -                } -                self.unresolved.retain(|s| s.symbol.name != symbol.name); -                let definition = TrackedSymbol { symbol, source_id, source_role }; -                self.definitions.push(definition); -            } -        } -    } - -    /// Returns the ID of the owner of a symbol resolved by this unit. -    pub fn resolves_reference(&self, source_unit: &SourceUnit) -> Option<usize> { -        if let Some(definitions) = &source_unit.main.symbols.definitions { -            if let Some(id) = self.source_id_of_unresolved(&definitions) { -                return Some(id); -            } -        } -        if let Some(head) = &source_unit.head { -            if let Some(definitions) = &head.symbols.definitions { -                if let Some(id) = self.source_id_of_unresolved(&definitions) { -                    return Some(id); -                } -            } -        } -        if let Some(tail) = &source_unit.tail { -            if let Some(definitions) = &tail.symbols.definitions { -                if let Some(id) = self.source_id_of_unresolved(&definitions) { -                    return Some(id); -                } -            } -        } -        return None; -    } - -    /// Returns the ID of the owner of a reference to one of these symbols. -    fn source_id_of_unresolved(&self, symbols: &[Symbol]) -> Option<usize> { -        for symbol in symbols { -            let opt = self.unresolved.iter().find(|s| s.symbol.name == symbol.name); -            if let Some(unresolved) = opt { -                return Some(unresolved.source_id); -            } -        } -        return None; -    } - -    pub fn get_source_code_for_tracked_symbol(&self, symbol: &TrackedSymbol) -> &str { -        let source_unit = &self.source_units[symbol.source_id].source_unit; -        match symbol.source_role { -            SourceRole::Main => source_unit.main.symbols.source_code.as_str(), -            SourceRole::Head => match &source_unit.head { -                Some(head) => head.symbols.source_code.as_str(), -                None => unreachable!("Failed to find source for token"), -            } -            SourceRole::Tail => match &source_unit.tail { -                Some(tail) => tail.symbols.source_code.as_str(), -                None => unreachable!("Failed to find source for token"), -            } -        } -    } - -    /// Create a source file by concatenating all source units. -    /// If the source unit dependency graph contains a cycle, the IDs of the -    /// source units involved in the cycle will be returned. -    pub fn get_merged_source_code(&self) -> Result<String, Vec<usize>> { -        // The ID of a given source unit will come after the IDs of all -        // source units which define at least one symbol referenced in the -        // given source unit. -        let head_order = { -            let mut included_source_ids: Vec<usize> = Vec::new(); -            let mut remaining_source_ids: Vec<usize> = Vec::new(); -            // Reverse the order so that the root unit is the last to be added. -            for i in (0..self.source_units.len()).rev() { -                remaining_source_ids.push(i); -            } - -            'restart: while !remaining_source_ids.is_empty() { -                'next: for (i, id) in remaining_source_ids.iter().enumerate() { -                    let unit = &self.source_units[*id]; -                    for parent_id in &unit.parent_ids { -                        if !included_source_ids.contains(&parent_id) { -                            continue 'next; -                        } -                    } -                    included_source_ids.push(*id); -                    remaining_source_ids.remove(i); -                    continue 'restart; -                } -                // All remaining source units depend on at least one remaining -                // source unit, indicating a dependency cycle. -                return Err(remaining_source_ids); -            } -            included_source_ids -        }; - -        let mut source_code = String::new(); - -        // Push head source code in macro-definition order. -        for id in &head_order { -            let source_unit = &self.source_units[*id]; -            if let Some(head) = &source_unit.source_unit.head { -                push_source_code_to_string(&mut source_code, head); -            } -        } -        // Push main source code in source-added order. -        for source_unit in self.source_units.iter() { -            let main = &source_unit.source_unit.main; -            push_source_code_to_string(&mut source_code, &main); -        } -        // Push tail source code in reverse source-added order. -        for source_unit in self.source_units.iter().rev() { -            if let Some(tail) = &source_unit.source_unit.tail { -                push_source_code_to_string(&mut source_code, tail); -            } -        } -        return Ok(source_code); -    } -} - - -fn push_source_code_to_string(string: &mut String, source_file: &SourceFile) { -    // Don't push source code if it contains only whitespace. -    let source_code = &source_file.symbols.source_code; -    if source_code.chars().all(|c| c.is_whitespace()) { return; } -    // Ensure that sections are separated by two newlines. -    if !string.is_empty() { -        if !string.ends_with('\n') { string.push('\n'); } -        if !string.ends_with("\n\n") { string.push('\n'); } -    } -    // Write a path comment to the string. -    let path_str = source_file.path.as_os_str().to_string_lossy(); -    let path_comment = format!("(: {path_str} )\n"); -    string.push_str(&path_comment); -    string.push_str(&source_code); -} - - -pub struct HeirarchicalSourceUnit { -    pub source_unit: SourceUnit, -    /// IDs of units which were added to resolve symbol references this unit. -    pub child_ids: Vec<usize>, -    /// IDs of units which resolve macro references in this unit. -    pub parent_ids: Vec<usize>, -} - - -pub struct TrackedSymbol { -    pub symbol: Symbol, -    pub source_id: usize, -    pub source_role: SourceRole, -} - - -#[derive(Clone, Copy)] -pub enum SourceRole { -    Main, -    Head, -    Tail, -} - - -impl PartialEq for TrackedSymbol { -    fn eq(&self, other: &TrackedSymbol) -> bool { -        self.symbol.name.eq(&other.symbol.name) -    } -} diff --git a/src/tokens.rs b/src/tokens.rs deleted file mode 100644 index 81bf9d5..0000000 --- a/src/tokens.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod syntactic; -mod semantic; -mod instruction; -mod value; - -pub use syntactic::*; -pub use semantic::*; -pub use instruction::*; -pub use value::*; diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs deleted file mode 100644 index ac5179c..0000000 --- a/src/tokens/semantic.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; - - -pub struct SemanticToken { -    pub source: SourceSpan, -    pub bytecode: BytecodeSpan, -    pub variant: SemanticTokenVariant, -} - - -pub enum SemanticTokenVariant { -    LabelDefinition(LabelDefinition), -    MacroDefinition(MacroDefinition), - -    /// Pointer to the matching label definition. -    LabelReference(usize), -    /// Pointer to the matching macro definition. -    MacroInvocation(usize), - -    Literal(Value), -    Padding(Value), -    Instruction(Instruction), - -    Comment(String), -    String(Vec<u8>), - -    /// Pointer to the matching block close. -    BlockOpen(usize), -    /// Pointer to the matching block open. -    BlockClose(usize), -    MarkOpen, -    MarkClose, - -    Error(SemanticParseError), -} - -impl std::fmt::Debug for SemanticToken { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        match &self.variant { -            SemVar::LabelDefinition(def) => write!(f, "LabelDefinition({})", def.name), -            SemVar::MacroDefinition(def) => write!(f, "MacroDefinition({})", def.name), -            SemVar::LabelReference(pointer) => write!(f, "LabelReference(*{pointer})"), -            SemVar::MacroInvocation(pointer) => write!(f, "MacroInvocation(*{pointer})"), -            SemVar::Literal(value) => write!(f, "Literal({value})"), -            SemVar::Padding(value) => write!(f, "Padding({value})"), -            SemVar::Instruction(instr) => write!(f, "Instruction(0x{:02x})", instr.value), -            SemVar::Comment(comment) => write!(f, "Comment({comment})"), -            SemVar::String(string) => write!(f, "String({})", String::from_utf8_lossy(&string)), -            SemVar::BlockOpen(_) => write!(f, "BlockOpen"), -            SemVar::BlockClose(_) => write!(f, "BlockClose"), -            SemVar::MarkOpen => write!(f, "MarkOpen"), -            SemVar::MarkClose => write!(f, "MarkClose"), -            SemVar::Error(_) => write!(f, "Error"), -        } -    } -} - - -pub struct LabelDefinition { -    /// The absolute name of the label or sublabel. -    pub name: String, -    /// List of pointers to label reference tokens. -    pub references: Vec<usize>, -} - - -pub struct MacroDefinition { -    pub name: String, -    pub references: Vec<usize>, -    pub body_tokens: Vec<SemanticToken>, -} - - -pub enum SemanticParseError { -    LabelDefinitionInMacroDefinition, -    MacroDefinitionInMacroDefinition, - -    StrayMacroTerminator, -    StrayBlockClose, -    UnclosedBlock, - -    UndefinedSymbol(String), -    RedefinedSymbol((String, SourceSpan)), - -    MacroInvocationBeforeDefinition((String, SourceSpan)), - -    SyntaxError(SyntacticParseError) -} diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs deleted file mode 100644 index 8684ed9..0000000 --- a/src/tokens/syntactic.rs +++ /dev/null @@ -1,39 +0,0 @@ -use crate::*; - - -pub struct SyntacticToken { -    /// Location of token in source files. -    pub source: SourceSpan, -    pub variant: SyntacticTokenVariant, -} - - -pub enum SyntacticTokenVariant { -    LabelDefinition(String), -    MacroDefinition(String), -    MacroDefinitionTerminator, - -    Literal(Value), -    Padding(Value), -    Instruction(Instruction), - -    Comment(String), -    String(Vec<u8>), - -    BlockOpen, -    BlockClose, -    MarkOpen, -    MarkClose, - -    Symbol(String), - -    Error(SyntacticParseError), -} - - -pub enum SyntacticParseError { -    UnterminatedComment, -    UnterminatedRawString, -    UnterminatedNullString, -    InvalidPaddingValue(String), -} diff --git a/src/translators.rs b/src/translators.rs deleted file mode 100644 index cce5633..0000000 --- a/src/translators.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod syntactic_parser; -mod semantic_parser; -mod bytecode_generator; -mod symbols_generator; - -pub use syntactic_parser::*; -pub use semantic_parser::*; -pub use bytecode_generator::*; -pub use symbols_generator::*; diff --git a/src/translators/bytecode_generator.rs b/src/translators/bytecode_generator.rs deleted file mode 100644 index 956aca5..0000000 --- a/src/translators/bytecode_generator.rs +++ /dev/null @@ -1,131 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; - - -pub fn generate_bytecode(semantic_tokens: &mut [SemanticToken]) -> Vec<u8> { -    let generator = BytecodeGenerator::from_semantic_tokens(semantic_tokens); -    generator.generate() -} - - -/// Translate semantic tokens into bytecode. -struct BytecodeGenerator<'a> { -    semantic_tokens: &'a mut [SemanticToken], -    block_stack: Vec<usize>, -    bytecode: Vec<u8>, -    /// (address in bytecode, label definition token index) -    label_references: Vec<(usize, usize)>, -} - -impl<'a> BytecodeGenerator<'a> { -    pub fn from_semantic_tokens(semantic_tokens: &'a mut [SemanticToken]) -> Self { -        Self { -            semantic_tokens, -            block_stack: Vec::new(), -            bytecode: Vec::new(), -            label_references: Vec::new(), -        } -    } - -    pub fn generate(mut self) -> Vec<u8> { -        for i in 0..self.semantic_tokens.len() { -            let address = self.bytecode.len(); -            self.generate_bytecode_for_token(i, None); -            self.semantic_tokens[i].bytecode = BytecodeSpan { -                bytes: self.bytecode[address..].to_vec(), -                location: BytecodeLocation { -                    address, -                    length: self.bytecode.len().saturating_sub(address), -                } -            }; -        } - -        // Replace blank label references in bytecode with real label addresses. -        // The layer of indirection is necessary because the iteration borrows -        // self immutably. -        let mut insertions: Vec<(usize, u16)> = Vec::new(); -        for (bytecode_address, token_pointer) in &self.label_references { -            let label_token = &self.semantic_tokens[*token_pointer]; -            // TODO: If greater than u16, print a warning. -            let address_value = label_token.bytecode.location.address as u16; -            insertions.push((*bytecode_address, address_value)); -        } -        for (bytecode_address, address_value) in insertions { -            self.replace_address_in_bytecode(bytecode_address, address_value); -        } - -        // Strip trailing null bytes from the bytecode. -        let mut length = self.bytecode.len(); -        for (i, byte) in self.bytecode.iter().enumerate().rev() { -            match *byte == 0 { -                true => length = i, -                false => break, -            }; -        } -        self.bytecode.truncate(length); - -        return self.bytecode; -    } - -    fn generate_bytecode_for_token(&mut self, pointer: usize, macro_pointer: Option<usize>) { -        macro_rules! push_byte { -            ($byte:expr) => { self.bytecode.push($byte) }; } -        macro_rules! push_double { -            ($double:expr) => { self.bytecode.extend_from_slice(&$double.to_be_bytes()) }; } -        macro_rules! pad { -            ($len:expr) => { for _ in 0..$len { push_byte!(0); } } } - -        let semantic_token = if let Some(macro_pointer) = macro_pointer { -            let macro_definition = &self.semantic_tokens[macro_pointer]; -            if let SemVar::MacroDefinition(def) = ¯o_definition.variant { -                &def.body_tokens[pointer] -            } else { unreachable!() } -        } else { -                &self.semantic_tokens[pointer] -        }; -        match &semantic_token.variant { -            SemVar::MacroInvocation(pointer) => { -                let macro_definition = &self.semantic_tokens[*pointer]; -                if let SemVar::MacroDefinition(def) = ¯o_definition.variant { -                    let length = def.body_tokens.len(); -                    let macro_pointer = Some(*pointer); -                    for body_pointer in 0..length { -                        // Recurse, generate bytecode for each macro body token. -                        self.generate_bytecode_for_token(body_pointer, macro_pointer); -                    } -                } else { unreachable!() } -            } -            SemVar::Literal(value) => match value { -                Value::Byte(value) => push_byte!(*value), -                Value::Double(value) => push_double!(value), -            } -            SemVar::Padding(value) => match value { -                Value::Byte(value) => pad!(*value), -                Value::Double(value) => pad!(*value), -            } -            SemVar::Instruction(instr) => push_byte!(instr.value), -            SemVar::String(bytes) => self.bytecode.extend_from_slice(&bytes), -            SemVar::LabelReference(pointer) => { -                self.label_references.push((self.bytecode.len(), *pointer)); -                push_double!(0u16); -            } -            SemVar::BlockOpen(_) => { -                self.block_stack.push(self.bytecode.len()); -                push_double!(0u16); -            } -            SemVar::BlockClose(_) => { -                let bytecode_address = self.block_stack.pop().unwrap(); -                // TODO: If greater than u16, print a warning. -                let address_value = self.bytecode.len() as u16; -                self.replace_address_in_bytecode(bytecode_address, address_value); -            } -            _ => (), -        }; -    } - -    fn replace_address_in_bytecode(&mut self, bytecode_address: usize, address_value: u16) { -        let range = bytecode_address..bytecode_address+2; -        self.bytecode[range].clone_from_slice(&address_value.to_be_bytes()); -    } -} diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs deleted file mode 100644 index cb6a435..0000000 --- a/src/translators/semantic_parser.rs +++ /dev/null @@ -1,245 +0,0 @@ -use crate::*; - -use std::collections::HashMap; -use std::path::PathBuf; - -use SyntacticTokenVariant as SynVar; -use SemanticTokenVariant as SemVar; -use SemanticParseError as SemErr; - - -pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> { -    let semantic_parser = SemanticParser::from_source_code(source_code, path); -    semantic_parser.parse() -} - - -/// Translate syntactic tokens into semantic tokens. -struct SemanticParser { -    labels: HashMap<String, Definition>, -    macros: HashMap<String, Definition>, -    syntactic_tokens: Vec<SyntacticToken>, -    /// Index of the current outer token. -    current_outer_index: usize, -} - -impl SemanticParser { -    pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { -        let mut labels = HashMap::new(); -        let mut macros = HashMap::new(); -        let mut syntactic_tokens = Vec::new(); - -        let parser = SyntacticParser::from_source_code(source_code, path); -        for syntactic_token in parser { -            let definition = Definition::new(syntactic_token.source.clone()); -            match &syntactic_token.variant { -                SynVar::LabelDefinition(name) => { -                    let _ = labels.try_insert(name.to_owned(), definition); -                }, -                SynVar::MacroDefinition(name) => { -                    let _ = macros.try_insert(name.to_owned(), definition); -                }, -                _ => (), -            } -            syntactic_tokens.push(syntactic_token); -        } - -        Self { -            labels, -            macros, -            syntactic_tokens, -            current_outer_index: 0, -        } -    } - -    /// Parse syntactic tokens as semantic tokens. -    pub fn parse(mut self) -> Vec<SemanticToken> { -        let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens); -        let mut syntactic = syntactic_tokens.into_iter(); -        let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false); - -        // Insert real label definition pointers into label reference tokens. -        for definition in self.labels.values_mut() { -            if let Some(definition_pointer) = definition.pointer { -                // Insert definition pointer into reference tokens. -                for reference_pointer in &definition.references { -                    let reference_token = &mut semantic_tokens[*reference_pointer]; -                    reference_token.variant = SemVar::LabelReference(definition_pointer); -                } -                // Insert reference pointers into definition token. -                let definition_token = &mut semantic_tokens[definition_pointer]; -                if let SemVar::LabelDefinition(ref mut def) = definition_token.variant { -                    def.references = std::mem::take(&mut definition.references); -                } else { unreachable!() } -                // Insert definition pointer into reference tokens inside macros. -                for (outer, inner) in &definition.deep_references { -                    let macro_token = &mut semantic_tokens[*outer]; -                    if let SemVar::MacroDefinition(ref mut def) = macro_token.variant { -                        let reference_token = &mut def.body_tokens[*inner]; -                        reference_token.variant = SemVar::LabelReference(definition_pointer); -                    } else { unreachable!() } -                } -                // TODO: Record deep references in macro and label definitions? -            } -        } - -        return semantic_tokens; -    } - -    fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken> -    where I: Iterator<Item = SyntacticToken> -    { -        let mut semantic_tokens: Vec<SemanticToken> = Vec::new(); -        let mut block_stack: Vec<usize> = Vec::new(); - -        while let Some(syntactic_token) = parser.next() { -            let current_index = semantic_tokens.len(); -            if !in_macro { -                self.current_outer_index = current_index; -            } - -            let semantic_token_variant = match syntactic_token.variant { -                SynVar::LabelDefinition(name) => { -                    if in_macro { -                        SemVar::Error(SemErr::LabelDefinitionInMacroDefinition) -                    } else if let Some(definition) = self.macros.get(&name) { -                        let source = definition.source.clone(); -                        SemVar::Error(SemErr::RedefinedSymbol((name, source))) -                    } else if let Some(definition) = self.labels.get_mut(&name) { -                        if definition.pointer.is_some() { -                            let source = definition.source.clone(); -                            SemVar::Error(SemErr::RedefinedSymbol((name, source))) -                        } else { -                            definition.pointer = Some(current_index); -                            let references = Vec::new(); -                            SemVar::LabelDefinition(LabelDefinition { name, references }) -                        } -                    } else { -                        unreachable!() -                    } -                } -                SynVar::MacroDefinition(name) => { -                    if in_macro { -                        SemVar::Error(SemErr::MacroDefinitionInMacroDefinition) -                    } else if let Some(definition) = self.labels.get(&name) { -                        let source = definition.source.clone(); -                        SemVar::Error(SemErr::RedefinedSymbol((name, source))) -                    } else if let Some(definition) = self.macros.get_mut(&name) { -                        if definition.pointer.is_some() { -                            let source = definition.source.clone(); -                            SemVar::Error(SemErr::RedefinedSymbol((name, source))) -                        } else { -                            definition.pointer = Some(current_index); -                            let references = Vec::new(); -                            let body_tokens = self.pull_semantic_tokens(parser, true); -                            SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens }) -                        } -                    } else { -                        unreachable!() -                    } -                } -                SynVar::MacroDefinitionTerminator => if in_macro { -                    break; -                } else { -                    SemVar::Error(SemErr::StrayMacroTerminator) -                } -                SynVar::Literal(value) => { -                    SemVar::Literal(value) -                } -                SynVar::Padding(value) => { -                    SemVar::Padding(value) -                } -                SynVar::Instruction(instr) => { -                    SemVar::Instruction(instr) -                } -                SynVar::Comment(comment) => { -                    SemVar::Comment(comment) -                } -                SynVar::String(bytes) => { -                    SemVar::String(bytes) -                } -                SynVar::BlockOpen => { -                    block_stack.push(current_index); -                    SemVar::BlockOpen(0) -                } -                SynVar::BlockClose => { -                    if let Some(pointer) = block_stack.pop() { -                        let open = &mut semantic_tokens[pointer]; -                        open.variant = SemVar::BlockOpen(current_index); -                        SemVar::BlockClose(pointer) -                    } else { -                        SemVar::Error(SemErr::StrayBlockClose) -                    } -                } -                SynVar::MarkOpen => { -                    SemVar::MarkOpen -                } -                SynVar::MarkClose => { -                    SemVar::MarkClose -                } -                SynVar::Symbol(name) => { -                    if let Some(definition) = self.labels.get_mut(&name) { -                        if in_macro { -                            let pointer = (self.current_outer_index, current_index); -                            definition.deep_references.push(pointer); -                        } else { -                            definition.references.push(current_index); -                        } -                        SemVar::LabelReference(0) -                    } else if let Some(definition) = self.macros.get_mut(&name) { -                        if let Some(pointer) = definition.pointer { -                            if !in_macro { definition.references.push(current_index); } -                            SemVar::MacroInvocation(pointer) -                        } else { -                            let source = definition.source.clone(); -                            SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source))) -                        } -                    } else { -                        SemVar::Error(SemErr::UndefinedSymbol(name)) -                    } -                } -                SynVar::Error(syntax_err) => { -                    SemVar::Error(SemErr::SyntaxError(syntax_err)) -                } -            }; - -            let semantic_token = SemanticToken { -                source: syntactic_token.source, -                bytecode: BytecodeSpan::default(), -                variant: semantic_token_variant, -            }; -            semantic_tokens.push(semantic_token); -        } - -        if in_macro { -            //TODO: UnterminatedMacroDefinition -        } - -        // Replace each unclosed BlockOpen token with an error. -        for block_pointer in block_stack { -            semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock); -        } - -        return semantic_tokens; -    } -} - - -struct Definition { -    pub source: SourceSpan, -    pub pointer: Option<usize>, -    pub references: Vec<usize>, -    /// (macro index, label reference index) -    pub deep_references: Vec<(usize, usize)>, -} - -impl Definition { -    pub fn new(source: SourceSpan) -> Self { -        Self { -            source, -            pointer: None, -            references: Vec::new(), -            deep_references: Vec::new(), -        } -    } -} diff --git a/src/translators/symbols_generator.rs b/src/translators/symbols_generator.rs deleted file mode 100644 index d30facd..0000000 --- a/src/translators/symbols_generator.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; - - -pub fn generate_symbols_file(semantic_tokens: &[SemanticToken]) -> String { -    let mut symbols = String::new(); - -    for token in semantic_tokens { -        if let SemVar::LabelDefinition(definition) = &token.variant { -            let address = token.bytecode.location.address; -            if address > 0xffff { break; } -            let name = &definition.name; -            let location = token.source.location(); -            symbols.push_str(&format!("{address:04x} {name} {location}\n")); -        } -    } - -    return symbols; -} diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs deleted file mode 100644 index 8f0850b..0000000 --- a/src/translators/syntactic_parser.rs +++ /dev/null @@ -1,117 +0,0 @@ -use crate::*; - -use std::path::PathBuf; - - -/// Translate raw source code characters into syntactic tokens. -pub struct SyntacticParser { -    tokeniser: Tokeniser, -    /// The name of the most recently parsed label. -    label: String, -} - - -impl SyntacticParser { -    pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { -        let mut tokeniser = Tokeniser::new(source_code, path); -        tokeniser.add_delimiters(&['@','&','%',';','[',']','{','}','(','"','\'','#','~']); -        tokeniser.add_terminators(&[':']); -        Self { tokeniser, label: String::new() } -    } -} - - -impl Iterator for SyntacticParser { -    type Item = SyntacticToken; - -    /// Sequentially parse tokens from the source code. -    fn next(&mut self) -> Option<SyntacticToken> { -        use SyntacticTokenVariant as SynVar; -        use SyntacticParseError as SynErr; -        let t = &mut self.tokeniser; - -        t.drop_whitespace(); -        t.mark_start_position(); - -        let variant = match t.eat_char()? { -            '@' => { -                self.label = t.eat_token(); -                SynVar::LabelDefinition(self.label.clone()) -            } -            '&' => { -                let token = t.eat_token(); -                SynVar::LabelDefinition(format!("{}/{token}", self.label)) -            } -            '%' => SynVar::MacroDefinition(t.eat_token()), -            ';' => SynVar::MacroDefinitionTerminator, -            '[' => SynVar::MarkOpen, -            ']' => SynVar::MarkClose, -            '{' => SynVar::BlockOpen, -            '}' => SynVar::BlockClose, -            '(' => match t.eat_to_delimiter(')') { -                Some(string) => SynVar::Comment(string), -                None => SynVar::Error(SynErr::UnterminatedComment), -            } -            '\'' => match t.eat_to_delimiter('\'') { -                Some(string) => SynVar::String(string.as_bytes().to_vec()), -                None => SynVar::Error(SynErr::UnterminatedRawString), -            } -            '"' => match t.eat_to_delimiter('"') { -                Some(string) => { -                    let mut bytes = string.as_bytes().to_vec(); -                    bytes.push(0x00); -                    SynVar::String(bytes) -                } -                None => SynVar::Error(SynErr::UnterminatedNullString), -            } -            '#' => { -                let token = t.eat_token(); -                match token.parse::<Value>() { -                    Ok(value) => SynVar::Padding(value), -                    Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)), -                } -            }, -            '~' => { -                let token = t.eat_token(); -                let symbol = format!("{}/{token}", self.label); -                SynVar::Symbol(symbol) -            } -            ':' => SynVar::Symbol(String::from(':')), -            c => { -                let token = format!("{c}{}", t.eat_token()); -                match token.parse::<Value>() { -                    Ok(value) => SynVar::Literal(value), -                    Err(_) => match token.parse::<Instruction>() { -                        Ok(instruction) => SynVar::Instruction(instruction), -                        Err(_) => SynVar::Symbol(token), -                    } -                } -            } -        }; - -        // Parse source path comments. -        if let SynVar::Comment(comment) = &variant { -            // Check if the comment fills the entire line. -            if t.start_position.column == 0 && t.end_of_line() { -                if let Some(path) = comment.strip_prefix(": ") { -                    t.source_path = Some(PathBuf::from(path.trim())); -                    t.embedded_first_line = t.start_position.line + 1; -                } -            } -        } - -        let source = t.get_source_span(); -        Some( SyntacticToken { source, variant } ) -    } -} - - -#[derive(Debug)] -pub enum ParseError { -    InvalidExtension, -    NotFound, -    NotReadable, -    IsADirectory, -    InvalidUtf8, -    Unknown, -} diff --git a/src/tokens/instruction.rs b/src/types/instruction.rs index d5fb3e5..daf4ce1 100644 --- a/src/tokens/instruction.rs +++ b/src/types/instruction.rs @@ -5,7 +5,6 @@ pub struct Instruction {      pub value: u8,  } -  impl Instruction {      pub fn operation(&self) -> Operation {          match self.value & 0x1f { @@ -25,11 +24,11 @@ impl Instruction {          self.value & 0x80 != 0      } -    pub fn literal_mode(&self) -> bool { +    pub fn immediate_mode(&self) -> bool {          self.value & 0x40 != 0      } -    pub fn double_mode(&self) -> bool { +    pub fn wide_mode(&self) -> bool {          self.value & 0x20 != 0      }  } diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 0000000..998bc33 --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,5 @@ +mod instruction; +mod value; + +pub use instruction::*; +pub use value::*; diff --git a/src/tokens/value.rs b/src/types/value.rs index e421bd5..fe82710 100644 --- a/src/tokens/value.rs +++ b/src/types/value.rs @@ -1,8 +1,24 @@ +#[derive(Clone, Copy)]  pub enum Value {      Byte(u8),      Double(u16),  } +impl From<Value> for usize { +    fn from(value: Value) -> Self { +        match value { +            Value::Byte(byte) => byte.into(), +            Value::Double(double) => double.into(), +        } +    } +} + +impl From<&Value> for usize { +    fn from(value: &Value) -> Self { +        (*value).into() +    } +} +  impl std::fmt::Display for Value {      fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {          match self { | 
