diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/addressing.rs | 44 | ||||
| -rw-r--r-- | src/assembler.rs | 278 | ||||
| -rw-r--r-- | src/bin/bedrock-asm.rs | 148 | ||||
| -rw-r--r-- | src/error.rs | 10 | ||||
| -rw-r--r-- | src/gather_libraries.rs | 185 | ||||
| -rw-r--r-- | src/lib.rs | 35 | ||||
| -rw-r--r-- | src/locators.rs | 5 | ||||
| -rw-r--r-- | src/locators/bytecode.rs | 39 | ||||
| -rw-r--r-- | src/locators/source.rs | 69 | ||||
| -rw-r--r-- | src/main.rs | 43 | ||||
| -rw-r--r-- | src/print.rs | 237 | ||||
| -rw-r--r-- | src/semantic_token.rs | 116 | ||||
| -rw-r--r-- | src/symbol_resolver.rs | 230 | ||||
| -rw-r--r-- | src/syntactic_token.rs | 43 | ||||
| -rw-r--r-- | src/tokenizer.rs | 235 | ||||
| -rw-r--r-- | src/tokens.rs | 9 | ||||
| -rw-r--r-- | src/tokens/instruction.rs | 170 | ||||
| -rw-r--r-- | src/tokens/semantic.rs | 90 | ||||
| -rw-r--r-- | src/tokens/syntactic.rs | 39 | ||||
| -rw-r--r-- | src/tokens/value.rs | 32 | ||||
| -rw-r--r-- | src/translators.rs | 9 | ||||
| -rw-r--r-- | src/translators/bytecode_generator.rs | 131 | ||||
| -rw-r--r-- | src/translators/semantic_parser.rs | 245 | ||||
| -rw-r--r-- | src/translators/symbols_generator.rs | 28 | ||||
| -rw-r--r-- | src/translators/syntactic_parser.rs | 247 | 
25 files changed, 1930 insertions, 787 deletions
| diff --git a/src/addressing.rs b/src/addressing.rs deleted file mode 100644 index dd7638e..0000000 --- a/src/addressing.rs +++ /dev/null @@ -1,44 +0,0 @@ -#[derive(Clone,Copy)] -pub struct CharAddress { -    /// The number of lines that precede this line in the file. -    pub line:usize, -    /// The number of characters that precede this character in the line. -    pub column:usize, -} -impl CharAddress { -    pub fn new(line:usize, column:usize) -> Self { -        Self { line, column } -    } -    pub fn zero() -> Self { -        Self::new(0,0) -    } -} - -pub struct SourceLocation { -    /// The slice of the source file from which this token was parsed. -    pub source: String, -    /// The address of the first character of this token. -    pub start: CharAddress, -    /// The address of the final character of this token. -    pub end: CharAddress -} -impl SourceLocation { -    pub fn new(source:String, start:CharAddress, end:CharAddress) -> Self { -        Self { source, start, end } -    } -    pub fn zero() -> Self { -        Self { source:String::new(), start:CharAddress::zero(), end:CharAddress::zero() } -    } -} - -pub struct BytecodeLocation { -    /// The number of bytes that precede this byte sequence in the bytecode. -    pub start: u16, -    /// The length of this byte sequence, in bytes. -    pub length: u16, -} -impl BytecodeLocation { -    pub fn zero() -> Self { -        Self { start:0, length:0 } -    } -} diff --git a/src/assembler.rs b/src/assembler.rs deleted file mode 100644 index 692eb14..0000000 --- a/src/assembler.rs +++ /dev/null @@ -1,278 +0,0 @@ -use std::mem::take; -use std::collections::hash_map::Entry; - -use SyntacticTokenType as Syn; -use SemanticTokenType as Sem; -use crate::*; - -use std::collections::HashMap; - -/// The inner value is the index of the token that defines this symbol. -pub enum SymbolDefinition { -    Macro(usize), -    Label(usize), -} - -pub struct Assembler { -    /// The contents of the program as a list of syntactic tokens. -    syntactic_tokens: Vec<SyntacticToken>, -    /// The contents of the program as a list of semantic tokens. -    semantic_tokens: Vec<SemanticToken>, -    /// Map the name of each defined symbol to the index of the defining token. -    symbol_definitions: HashMap<String, SymbolDefinition>, -    /// Map each macro definition token index to a list of syntactic body tokens. -    syntactic_macro_bodies: HashMap<usize, Vec<SyntacticToken>>, -    /// Map each macro definition token index to a list of semantic body tokens. -    semantic_macro_bodies: HashMap<usize, Vec<SemanticToken>>, -} - -impl Assembler { -    pub fn new() -> Self { -        Self { -            syntactic_tokens: Vec::new(), -            semantic_tokens: Vec::new(), -            symbol_definitions: HashMap::new(), -            syntactic_macro_bodies: HashMap::new(), -            semantic_macro_bodies: HashMap::new(), -        } -    } - -    pub fn tokenise_source(&mut self, source_code: &str) { -        // The index of the current macro definition token -        let mut macro_definition: Option<usize> = None; -        let mut macro_definition_body_tokens: Vec<SyntacticToken> = Vec::new(); - -        for mut token in TokenIterator::from_str(source_code) { -            let next_index = self.syntactic_tokens.len(); -            if let Some(index) = macro_definition { -                token.use_in_macro_body(); -                if token.is_macro_terminator() { -                    // Commit the current macro definition -                    macro_definition_body_tokens.push(token); -                    self.syntactic_macro_bodies.insert( -                        index, take(&mut macro_definition_body_tokens)); -                    macro_definition = None; -                } else { -                    macro_definition_body_tokens.push(token); -                } -            } else { -                if let Syn::MacroDefinition(ref name) = token.r#type { -                    macro_definition = Some(next_index); -                    match self.symbol_definitions.entry(name.to_string()) { -                        Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);} -                        Entry::Vacant(v) => {v.insert(SymbolDefinition::Macro(next_index));} -                    } -                } else if let Syn::LabelDefinition(ref name) = token.r#type { -                    match self.symbol_definitions.entry(name.to_string()) { -                        Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);} -                        Entry::Vacant(v) => {v.insert(SymbolDefinition::Label(next_index));} -                    } -                } else if token.is_macro_terminator() { -                    token.set_error(Error::OrphanedMacroDefinitionTerminator); -                } -                self.syntactic_tokens.push(token); -            } -        } -    } - -    pub fn resolve_references(&mut self) { -        let syntactic_tokens = take(&mut self.syntactic_tokens); -        let syntactic_token_count = syntactic_tokens.len(); -        let mut parent_label = None; - -        for (index, syntactic_token) in syntactic_tokens.into_iter().enumerate() { -            if let SyntacticTokenType::LabelDefinition(name) = &syntactic_token.r#type { -                parent_label = Some(name.to_owned()); -            } -            let semantic_token = self.convert_syn_token_to_sem_token(syntactic_token, index, parent_label.clone()); -            self.semantic_tokens.push(semantic_token); -        } -        assert_eq!(syntactic_token_count, self.semantic_tokens.len()); - -        // Find all cyclic macros -        let cyclic_macros: Vec<usize> = self.semantic_macro_bodies.keys().map(|i|*i).filter( -            |i| !self.traverse_macro_definition(*i, 0)).collect(); -        // Replace each cyclic macro reference in a macro definition with an error -        for body_tokens in &mut self.semantic_macro_bodies.values_mut() { -            for body_token in body_tokens { -                if let Sem::MacroReference(i) = body_token.r#type { -                    if cyclic_macros.contains(&i) { -                        let name = body_token.source_location.source.clone(); -                        body_token.r#type = Sem::Error(Syn::Reference(name), Error::CyclicMacroReference); -                    } -                } -            } -        } - -    } - -    /// Attempt to recursively traverse the body tokens of a macro definition, returning -    /// false if the depth exceeds a preset maximum, and returning true otherwise. -    fn traverse_macro_definition(&self, index: usize, level: usize) -> bool { -        if level == 16 { -            false -        } else { -            self.semantic_macro_bodies[&index].iter().all( -                |token| if let Sem::MacroReference(i) = token.r#type { -                    self.traverse_macro_definition(i, level+1) -                } else { -                    true -                } -            ) -        } -    } - -    pub fn generate_bytecode(&mut self) -> (Vec<u8>, Vec<SemanticToken>) { -        let mut bytecode: Vec<u8> = Vec::new(); -        // Map each label definition token index to the bytecode addresses of the references -        let mut reference_addresses: HashMap<usize, Vec<u16>> = HashMap::new(); -        // Map each label and macro definition token to a list of reference token indices -        let mut reference_tokens: HashMap<usize, Vec<usize>> = HashMap::new(); - -        macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};} -        macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};} -        macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};} - -        let mut semantic_tokens = take(&mut self.semantic_tokens); - -        // Translate semantic tokens into bytecode -        for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() { -            let start_addr = bytecode.len() as u16; -            match &mut semantic_token.r#type { -                Sem::LabelReference(i) => { -                    reference_tokens.entry(*i).or_default().push(index); -                    reference_addresses.entry(*i).or_default().push(start_addr); -                    push_u16!(0); -                } -                Sem::MacroReference(i) => { -                    reference_tokens.entry(*i).or_default().push(index); -                    self.expand_macro_reference(*i, &mut bytecode, &mut reference_addresses); -                } -                Sem::LabelDefinition(def) => def.address=start_addr, -                Sem::MacroDefinition(_) => (), - -                Sem::Padding(p) => pad!(*p), -                Sem::ByteLiteral(b) => push_u8!(*b), -                Sem::ShortLiteral(s) => push_u16!(*s), -                Sem::Instruction(b) => push_u8!(*b), - -                Sem::MacroDefinitionTerminator => unreachable!(), -                Sem::Comment => (), -                Sem::Error(..) => (), -            }; -            let end_addr = bytecode.len() as u16; -            semantic_token.bytecode_location.start = start_addr; -            semantic_token.bytecode_location.length = end_addr - start_addr; -        } - -        // Fill each label reference with the address of the matching label definition -        for (index, slots) in reference_addresses { -            if let Sem::LabelDefinition(definition) = &semantic_tokens[index].r#type { -                let [h,l] = definition.address.to_be_bytes(); -                for slot in slots { -                    bytecode[slot as usize] = h; -                    bytecode[slot.wrapping_add(1) as usize] = l; -                } -            } else { unreachable!() } -        } - -        // Move references and macro body tokens into label and macro definition tokens -        for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() { -            if let Sem::MacroDefinition(definition) = &mut semantic_token.r#type { -                definition.body_tokens = self.semantic_macro_bodies.remove(&index).unwrap(); -                if let Some(references) = reference_tokens.remove(&index) { -                    definition.references = references; -                } -            } else if let Sem::LabelDefinition(definition) = &mut semantic_token.r#type { -                if let Some(references) = reference_tokens.remove(&index) { -                    definition.references = references; -                } -            } -        } -        assert_eq!(reference_tokens.len(), 0); - -        // Remove trailing null bytes from the bytecode -        if let Some(final_nonnull_byte) = bytecode.iter().rposition(|b| *b != 0) { -            let truncated_length = final_nonnull_byte + 1; -            let removed_byte_count = bytecode.len() - truncated_length; -            if removed_byte_count > 0 { -                bytecode.truncate(truncated_length); -            } -        } - -        (bytecode, semantic_tokens) -    } - -    fn convert_syn_token_to_sem_token(&mut self, mut syn_token: SyntacticToken, index: usize, parent_label: Option<String>) -> SemanticToken { -        SemanticToken { -            r#type: { -                if let Some(err) = syn_token.error { -                    Sem::Error(syn_token.r#type, err) -                } else { -                    match syn_token.r#type { -                        Syn::Reference(ref name) => { -                            match self.symbol_definitions.get(name) { -                                Some(SymbolDefinition::Macro(i)) => Sem::MacroReference(*i), -                                Some(SymbolDefinition::Label(i)) => Sem::LabelReference(*i), -                                None => Sem::Error(syn_token.r#type, Error::UnresolvedReference), -                            } -                        } -                        Syn::LabelDefinition(name) => {Sem::LabelDefinition(LabelDefinition::new(name))}, -                        Syn::MacroDefinition(name) => { -                            let mut sem_body_tokens = Vec::new(); -                            for syn_body_token in self.syntactic_macro_bodies.remove(&index).unwrap() { -                                // Make the source location of the macro definition token span the entire definition -                                if syn_body_token.is_macro_terminator() { -                                    syn_token.source_location.end = syn_body_token.source_location.start; -                                } -                                let sem_body_token = self.convert_syn_token_to_sem_token(syn_body_token, 0, parent_label.clone()); -                                sem_body_tokens.push(sem_body_token); -                            } -                            self.semantic_macro_bodies.insert(index, sem_body_tokens); -                            Sem::MacroDefinition(MacroDefinition::new(name)) -                        }, -                        Syn::MacroDefinitionTerminator => Sem::MacroDefinitionTerminator, -                        Syn::Padding(v) => Sem::Padding(v), -                        Syn::ByteLiteral(v) => Sem::ByteLiteral(v), -                        Syn::ShortLiteral(v) => Sem::ShortLiteral(v), -                        Syn::Instruction(v) => Sem::Instruction(v), -                        Syn::Comment => Sem::Comment, -                    } -                } -            }, -            source_location: syn_token.source_location, -            bytecode_location: BytecodeLocation::zero(), -            parent_label, -        } -    } - -    fn expand_macro_reference(&self, index: usize, bytecode: &mut Vec<u8>, reference_addresses: &mut HashMap<usize, Vec<u16>>) { -        macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};} -        macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};} -        macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};} - -        for body_token in self.semantic_macro_bodies.get(&index).unwrap() { -            let start_addr = bytecode.len() as u16; -            match &body_token.r#type { -                Sem::LabelReference(i) => { -                    reference_addresses.entry(*i).or_default().push(start_addr); -                    push_u16!(0u16); -                }, -                Sem::MacroReference(i) => { -                    self.expand_macro_reference(*i, bytecode, reference_addresses); -                }, -                Sem::LabelDefinition(_) => unreachable!(), -                Sem::MacroDefinition(_) => unreachable!(), - -                Sem::Padding(p) => pad!(*p), -                Sem::ByteLiteral(b) => push_u8!(*b), -                Sem::ShortLiteral(s) => push_u16!(*s), -                Sem::Instruction(b) => push_u8!(*b), - -                Sem::MacroDefinitionTerminator => (), -                Sem::Comment => (), -                Sem::Error(..) => (), -            }; -        } -    } -} diff --git a/src/bin/bedrock-asm.rs b/src/bin/bedrock-asm.rs new file mode 100644 index 0000000..2a29ee3 --- /dev/null +++ b/src/bin/bedrock-asm.rs @@ -0,0 +1,148 @@ +use bedrock_asm::*; + +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; + + +static mut VERBOSE: bool = false; + +macro_rules! verbose { +    ($($tokens:tt)*) => { if unsafe { VERBOSE } { +            eprint!("[INFO] "); eprintln!($($tokens)*); +    } }; +} +macro_rules! error { +    ($($tokens:tt)*) => {{ +        eprint!("[ERROR] "); eprintln!($($tokens)*); std::process::exit(1); +    }}; +} + + +fn main() { +    let args = Arguments::from_env_or_exit(); + +    // ----------------------------------------------------------------------- +    // RESOLVE syntactic symbols +    let ext = args.ext.unwrap_or(String::from("brc")); +    let mut resolver = if let Some(path) = &args.source { +        match SourceUnit::from_path(&path, &ext) { +            Ok(source_unit) => SymbolResolver::from_source_unit(source_unit), +            Err(err) => match err { +                ParseError::InvalidExtension => error!( +                    "File {path:?} has invalid extension, must be '.{ext}'"), +                ParseError::NotFound => error!( +                    "File {path:?} was not found"), +                ParseError::InvalidUtf8 => error!( +                    "File {path:?} does not contain valid UTF-8 text"), +                ParseError::NotReadable => error!( +                    "File {path:?} is not readable"), +                ParseError::IsADirectory => error!( +                    "File {path:?} is a directory"), +                ParseError::Unknown => error!( +                    "Unknown error while attempting to read from {path:?}") +            } +        } +    } else { +        let mut source_code = String::new(); +        verbose!("Reading program source from standard input"); +        if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { +            eprintln!("Could not read from standard input, exiting."); +            eprintln!("({err:?})"); +            std::process::exit(1); +        } +        let path = "<standard input>"; +        let source_unit = SourceUnit::from_source_code(source_code, path); +        SymbolResolver::from_source_unit(source_unit) +    }; +    // Load project libraries. +    if let Some(path) = &args.source { +        if !args.no_libs && !args.no_project_libs { +            let project_library = gather_project_libraries(path, &ext); +            resolver.add_library_units(project_library); +        } +    } +    // Load environment libraries. +    if !args.no_libs && !args.no_env_libs { +        for env_library in gather_environment_libraries(&ext) { +            resolver.add_library_units(env_library); +        } +    } +    resolver.resolve(); + +    // ----------------------------------------------------------------------- +    // PRINT information, generate merged source code +    if args.tree { +        print_source_tree(&resolver); +    } +    if print_resolver_errors(&resolver) { +        std::process::exit(1); +    }; +    let merged_source = resolver.get_merged_source_code(); +    if args.resolve { +        write_bytes_and_exit(merged_source.as_bytes(), args.output.as_ref()); +    } + +    // ----------------------------------------------------------------------- +    // PARSE semantic tokens from merged source code +    let path = Some("<merged source>"); +    let mut semantic_tokens = generate_semantic_tokens(&merged_source, path); +    if print_semantic_errors(&semantic_tokens, &merged_source) { +        std::process::exit(1); +    }; + +    // ----------------------------------------------------------------------- +    // GENERATE symbols file and bytecode +    let bytecode = generate_bytecode(&mut semantic_tokens); +    // let symbols = generate_symbols_file(&semantic_tokens); +    write_bytes_and_exit(&bytecode, args.output.as_ref()); +} + + +fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { +    if let Some(path) = path { +        if let Err(err) = std::fs::write(path, bytes) { +            eprintln!("Could not write to path {:?}, exiting.", path.as_ref()); +            eprintln!("({err:?})"); +            std::process::exit(1); +        } +    } else { +        if let Err(err) = std::io::stdout().write_all(bytes) { +            eprintln!("Could not write to standard output, exiting."); +            eprintln!("({err:?})"); +            std::process::exit(1); +        } +    } +    std::process::exit(0); +} + + +xflags::xflags! { +    cmd arguments { +        /// Print additional debug information +        optional --verbose +        /// Print the assembler version and exit +        optional --version + + +        /// Bedrock source code file to assemble. +        optional source: PathBuf +        /// Destination path for assembler output. +        optional output: PathBuf +        /// File extension to identify source files. +        optional ext: String + +        /// Don't include libraries or resolve references. +        optional --no-libs +        /// Don't include project libraries +        optional --no-project-libs +        /// Don't include environment libraries. +        optional --no-env-libs + +        /// Show the resolved source file heirarchy +        optional --tree +        /// Assemble the program without saving any output +        optional --check +        /// Only return resolved source code. +        optional --resolve +    } +} diff --git a/src/error.rs b/src/error.rs deleted file mode 100644 index 8a6c0d6..0000000 --- a/src/error.rs +++ /dev/null @@ -1,10 +0,0 @@ -#[derive(Clone)] -pub enum Error { -    UnresolvedReference, -    DuplicateDefinition, -    InvalidPaddingValue, -    InvalidTypeInMacroDefinition, -    OrphanedMacroDefinitionTerminator, -    CyclicMacroReference, -} - diff --git a/src/gather_libraries.rs b/src/gather_libraries.rs new file mode 100644 index 0000000..0b5d2a6 --- /dev/null +++ b/src/gather_libraries.rs @@ -0,0 +1,185 @@ +use crate::*; + +use vagabond::*; + + +/// Gather all library units from the given path. +pub fn gather_project_libraries(path: &Path, extension: &str) -> Vec<SourceUnit> { +    match path.parent() { +        Some(parent_path) => gather_source_units(parent_path, extension), +        None => Vec::new(), +    } +} + + +/// Gather all library units from the paths specified in an environment variable. +pub fn gather_environment_libraries(extension: &str) -> Vec<Vec<SourceUnit>> { +    let mut environment_libraries = Vec::new(); +    if let Ok(lib_var) = std::env::var("BEDROCK_LIBS") { +        for path_str in lib_var.split(":") { +            let lib_path = PathBuf::from(path_str); +            let source_units = gather_source_units(&lib_path, extension); +            if !source_units.is_empty() { +                environment_libraries.push(source_units); +            } +        } +    }; +    return environment_libraries; +} + + +/// Gather all source units at or descended from the given entry. +fn gather_source_units(path: &Path, extension: &str) -> Vec<SourceUnit> { +    let mut source_units = Vec::new(); +    if let Ok(entry) = Entry::from_path(path) { +        match entry.entry_type { +            EntryType::File => { +                if let Ok(source) = SourceUnit::from_path(entry.path, extension) { +                    source_units.push(source); +                } +            } +            EntryType::Directory => { +                if let Ok(entries) = traverse_directory(entry.path) { +                    for entry in entries { +                        if let Ok(source) = SourceUnit::from_path(entry.path, extension) { +                            source_units.push(source); +                        } +                    } +                } +            } +        } +    }; +    return source_units; +} + + +pub struct SourceUnit { +    pub main: SourceFile, +    pub head: Option<SourceFile>, +    pub tail: Option<SourceFile>, +} + + +impl SourceUnit { +    /// Load from a source file and an associated head and tail file. +    pub fn from_path<P: Into<PathBuf>>(path: P, extension: &str) -> Result<Self, ParseError> { +        let main_path = canonicalize_path(path); +        let main_path_str = main_path.as_os_str().to_string_lossy().to_string(); +        let head_extension = format!("head.{extension}"); +        let tail_extension = format!("tail.{extension}"); +        let is_head = main_path_str.ends_with(&head_extension); +        let is_tail = main_path_str.ends_with(&tail_extension); +        let is_not_main = !main_path_str.ends_with(extension); +        if is_not_main || is_head || is_tail { return Err(ParseError::InvalidExtension); } + +        let symbols = parse_symbols_from_file(&main_path)?; +        let head_path = main_path.with_extension(head_extension); +        let tail_path = main_path.with_extension(tail_extension); + +        let main = SourceFile { path: main_path, symbols }; +        let head = match parse_symbols_from_file(&head_path) { +            Ok(symbols) => Some(SourceFile { path: head_path, symbols }), +            Err(_) => None, +        }; +        let tail = match parse_symbols_from_file(&tail_path) { +            Ok(symbols) => Some(SourceFile { path: tail_path, symbols }), +            Err(_) => None, +        }; +        Ok( SourceUnit { main, head, tail } ) +    } + +    /// Load from a string of source code. +    pub fn from_source_code<P: Into<PathBuf>>(source_code: String, path: P) -> Self { +        let path = canonicalize_path(path); +        let symbols = parse_symbols_from_source(source_code, Some(&path)); +        Self { +            main: SourceFile { path, symbols }, +            head: None, +            tail: None, +        } +    } +} + + +/// Read and parse all symbols from a source file. +fn parse_symbols_from_file(path: &Path) -> Result<Symbols, ParseError> { +    let source = read_source_from_file(path)?; +    Ok(parse_symbols_from_source(source, Some(path))) +} + + +/// Parse all symbols from a source code string. +fn parse_symbols_from_source(source_code: String, path: Option<&Path>) -> Symbols { +    use SyntacticTokenVariant as SynVar; + +    let token_iter = SyntacticParser::from_source_code(&source_code, path); +    let mut definitions = Vec::new(); +    let mut references = Vec::new(); + +    for token in token_iter { +        match token.variant { +            SynVar::LabelDefinition(name) => { +                definitions.push(Symbol { name, source: token.source }); +            }, +            SynVar::MacroDefinition(name) => { +                definitions.push(Symbol { name, source: token.source }); +            } +            SynVar::Symbol(name) => { +                references.push(Symbol { name, source: token.source }); +            }, +            _ => (), +        } +    } + +    Symbols { +        definitions: Some(definitions), +        references: Some(references), +        source_code, +    } +} + + +/// Attempt to read program source from a file. +pub fn read_source_from_file(path: &Path) -> Result<String, ParseError> { +    match std::fs::read(&path) { +        Ok(bytes) => match String::from_utf8(bytes) { +            Ok(source) => Ok(source), +            Err(_) => return Err(ParseError::InvalidUtf8), +        } +        Err(err) => return Err( match err.kind() { +            std::io::ErrorKind::NotFound => ParseError::NotFound, +            std::io::ErrorKind::PermissionDenied => ParseError::NotReadable, +            std::io::ErrorKind::IsADirectory => ParseError::IsADirectory, +            _ => ParseError::Unknown, +        } ) +    } +} + + +fn canonicalize_path<P: Into<PathBuf>>(path: P) -> PathBuf { +    let pathbuf = path.into(); +    match pathbuf.canonicalize() { +        Ok(canonical) => canonical, +        Err(_) => pathbuf, +    } +} + + + +pub struct SourceFile { +    pub path: PathBuf, +    pub symbols: Symbols, +} + + +pub struct Symbols { +    pub definitions: Option<Vec<Symbol>>, +    pub references: Option<Vec<Symbol>>, +    pub source_code: String, +} + + +pub struct Symbol { +    pub name: String, +    pub source: SourceSpan, +} @@ -1,21 +1,20 @@ -mod addressing; -mod syntactic_token; -mod semantic_token; -mod tokenizer; -mod error; -mod assembler; +#![feature(io_error_more)] +#![feature(map_try_insert)] -pub use addressing::{CharAddress, SourceLocation, BytecodeLocation}; -pub use syntactic_token::{SyntacticToken, SyntacticTokenType}; -pub use semantic_token::{SemanticToken, SemanticTokenType, LabelDefinition, MacroDefinition}; -pub use error::Error; -pub use tokenizer::TokenIterator; -pub use assembler::Assembler; -pub fn assemble(source_code: &str) -> (Vec<u8>, Vec<SemanticToken>) { -    let mut assembler = Assembler::new(); -    assembler.tokenise_source(source_code); -    assembler.resolve_references(); -    assembler.generate_bytecode() -} +mod gather_libraries; +mod symbol_resolver; +pub use gather_libraries::*; +pub use symbol_resolver::*; + +mod locators; +mod tokens; +mod translators; + +pub use locators::*; +pub use tokens::*; +pub use translators::*; + +mod print; +pub use print::*; diff --git a/src/locators.rs b/src/locators.rs new file mode 100644 index 0000000..b7db1ee --- /dev/null +++ b/src/locators.rs @@ -0,0 +1,5 @@ +mod bytecode; +mod source; + +pub use bytecode::*; +pub use source::*; diff --git a/src/locators/bytecode.rs b/src/locators/bytecode.rs new file mode 100644 index 0000000..500e9f0 --- /dev/null +++ b/src/locators/bytecode.rs @@ -0,0 +1,39 @@ +pub struct BytecodeSpan { +    /// The location of this span in the assembled bytecode. +    pub location: BytecodeLocation, +    /// The bytes which this span represents. +    pub bytes: Vec<u8>, +} + + +impl Default for BytecodeSpan { +    fn default() -> Self { +        Self { +            location: BytecodeLocation { +                address: 0, +                length: 0, +            }, +            bytes: Vec::new(), +        } +    } +} + + +#[derive(Clone, Copy)] +pub struct BytecodeLocation { +    // Address of the first byte. +    pub address: usize, +    // Length as a number of bytes. +    pub length: usize, +} + + +impl std::fmt::Display for BytecodeLocation { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        write!(f, "0x{:>04x}", self.address)?; +        if self.length > 0 { +            write!(f, "-0x{:>04x}", self.address + self.length)?; +        } +        Ok(()) +    } +} diff --git a/src/locators/source.rs b/src/locators/source.rs new file mode 100644 index 0000000..2f10bd9 --- /dev/null +++ b/src/locators/source.rs @@ -0,0 +1,69 @@ +use std::path::PathBuf; + + +#[derive(Clone)] +pub struct SourceSpan { +    /// The source characters which this span represents. +    pub string: String, +    /// The location of this span in the merged source file. +    pub in_merged: SourceLocation, +    /// The location of this span in the original source file. +    pub in_source: Option<SourceLocation>, +} + + +#[derive(Clone)] +pub struct SourceLocation { +    /// File path the source was loaded from. +    pub path: Option<PathBuf>, +    /// Position of the first character of the string. +    pub start: Position, +    /// Position of the final character of the string. +    pub end: Position, +} + +impl std::fmt::Display for SourceLocation { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        let y = self.start.line + 1; +        let x = self.start.column + 1; +        match &self.path { +            Some(path) => write!(f, "{}:{y}:{x}", path.as_os_str().to_string_lossy()), +            None => write!(f, "<unknown>:{y}:{x}"), +        } +    } +} + + +#[derive(Clone, Copy)] +pub struct Position { +    /// The number of lines that precede this line in the file. +    pub line: usize, +    /// The number of characters that precede this character in the line. +    pub column: usize, +} + +impl Position { +    pub fn to_next_char(&mut self) { +        self.column += 1; +    } + +    pub fn to_next_line(&mut self) { +        self.line += 1; +        self.column = 0; +    } + +    pub fn advance(&mut self, c: char) { +        match c { +            '\n' => self.to_next_line(), +            _ => self.to_next_char(), +        } +    } +} + +impl std::fmt::Display for Position { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        let y = self.line + 1; +        let x = self.column + 1; +        write!(f, "{y}:{x}") +    } +} diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 11ce42b..0000000 --- a/src/main.rs +++ /dev/null @@ -1,43 +0,0 @@ -use std::io::{Read, Write}; -use bedrock_asm::*; - -fn main() { -    // Read source code from standard input -    let mut source_code = String::new(); -    if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { -        eprintln!("Could not read from standard input, quitting."); -        eprintln!("({err:?})"); -        std::process::exit(1); -    }; - -    let (bytecode, tokens) = assemble(&source_code); -    let mut is_error = false; -    for token in &tokens { -        if token.print_error(&source_code) { is_error = true }; -    } -    if !is_error { -        for token in &tokens { -            if let SemanticTokenType::LabelDefinition(def) = &token.r#type { -                if def.references.is_empty() { -                    eprintln!("Unused label definition: {}", def.name); -                } -            } -        } -        eprintln!(); -    } - -    let byte_count = bytecode.len(); -    let byte_percentage = (byte_count as f32 / 65536.0 * 100.0).round() as u16; -    eprintln!("Assembled program in {byte_count} bytes ({byte_percentage}% of maximum)."); - -    if is_error { -        std::process::exit(1) -    } - -    // Write bytecode to standard output -    if let Err(_) = std::io::stdout().write_all(&bytecode) { -        eprintln!("Could not write to standard output, quitting."); -        std::process::exit(1); -    } -} - diff --git a/src/print.rs b/src/print.rs new file mode 100644 index 0000000..7f49db2 --- /dev/null +++ b/src/print.rs @@ -0,0 +1,237 @@ +use crate::*; + +use SemanticTokenVariant as SemVar; +use SemanticParseError as SemErr; +use SyntacticParseError as SynErr; + + +const NORMAL: &str = "\x1b[0m"; +const BOLD:   &str = "\x1b[1m"; +const DIM:    &str = "\x1b[2m"; +const WHITE:  &str = "\x1b[37m"; +const RED:    &str = "\x1b[31m"; +const YELLOW: &str = "\x1b[33m"; +const BLUE:   &str = "\x1b[34m"; + + +pub struct Context<'a> { +    pub source_code: &'a str, +    pub source: &'a SourceSpan, +} + + +/// Print all errors found in the semantic tokens, including those inside macro +/// definitions. Returns true if at least one error was printed. +pub fn print_semantic_errors(semantic_tokens: &[SemanticToken], source_code: &str) -> bool { +    let mut found_error = false; +    for semantic_token in semantic_tokens { +        match &semantic_token.variant { +            SemVar::Error(err) => { +                let context = Context { +                    source_code: source_code, +                    source: &semantic_token.source, +                }; +                found_error = true; +                print_semantic_error(&err, context) +            } +            SemVar::MacroDefinition(definition) => { +                for body_token in &definition.body_tokens { +                    if let SemVar::Error(err) = &body_token.variant { +                        let context = Context { +                            source_code: source_code, +                            source: &body_token.source, +                        }; +                        found_error = true; +                        print_semantic_error(err, context) +                    } +                } +            } +            _ => (), +        } +    } +    return found_error; +} + +fn print_semantic_error(error: &SemanticParseError, context: Context) { +    let message = get_message_for_semantic_error(error); +    print_error(&message, context); +} + +fn get_message_for_semantic_error(error: &SemanticParseError) -> String { +    match error { +        SemErr::LabelDefinitionInMacroDefinition => +            format!("Labels cannot be defined inside a macro"), +        SemErr::MacroDefinitionInMacroDefinition => +            format!("Macros cannot be defined inside a macro"), +        SemErr::StrayMacroTerminator => +            format!("Macro definition terminator is missing a macro definition"), +        SemErr::StrayBlockClose => +            format!("Block was not opened, add a '{{' character to open"), +        SemErr::UnclosedBlock => +            format!("Block was not closed, add a '}}' character to close"), +        SemErr::UndefinedSymbol(name) => +            format!("Undefined symbol, no label or macro has been defined with the name '{name}'"), +        SemErr::RedefinedSymbol((_, source)) => { +            let location = source.in_source.as_ref().unwrap_or(&source.in_merged); +            format!("Redefined symbol, first defined at {location}") +        } +        SemErr::MacroInvocationBeforeDefinition((_, source)) => { +            let location = source.in_source.as_ref().unwrap_or(&source.in_merged); +            format!("Macro used before definition, definition is at {location}") +        } +        SemErr:: SyntaxError(syntax_error) => match syntax_error { +            SynErr::UnterminatedComment => +                format!("Unclosed comment, add a ')' character to close"), +            SynErr::UnterminatedRawString => +                format!("Unclosed string, add a ' character to close"), +            SynErr::UnterminatedNullString => +                format!("Unclosed string, add a \" character to close"), +            SynErr::InvalidPaddingValue(_) => +                format!("Padding value must be two or four hexidecimal digits"), +        } +    } +} + + +pub fn print_resolver_errors(resolver: &SymbolResolver) -> bool { +    let mut found_error = false; +    for reference in &resolver.unresolved { +        found_error = true; +        let message = format!( +            "Undefined symbol, no label or macro has been defined with the name '{}'", +            &reference.symbol.source.string, +        ); +        let source_code = resolver.get_source_code_for_tracked_symbol(reference); +        let source =  &reference.symbol.source; +        print_error(&message, Context { source_code, source } ) +    } +    for redefinition in &resolver.redefinitions { +        found_error = true; +        let definition = resolver.definitions.get(redefinition.1).unwrap(); +        let message = format!( +            "Redefined symbol, first defined at {}", +            &definition.symbol.source.in_merged, +        ); +        let source_code = resolver.get_source_code_for_tracked_symbol(&redefinition.0); +        let source = &redefinition.0.symbol.source; +        print_error(&message, Context { source_code, source } ) +    } +    return found_error; +} + + + +pub fn print_error(message: &str, context: Context) { +    print_source_issue(message, context, SourceIssueVariant::Error); +} + +pub fn print_warning(message: &str, context: Context) { +    print_source_issue(message, context, SourceIssueVariant::Warning); +} + +fn print_source_issue(message: &str, context: Context, variant: SourceIssueVariant) { +    let (label, colour) = match variant { +        SourceIssueVariant::Warning => ("WARNING", YELLOW), +        SourceIssueVariant::Error => ("ERROR", RED), +    }; + +    // Prepare variables. +    let location = &context.source.in_merged; +    let digits = location.start.line.to_string().len(); +    let y = location.start.line + 1; +    let arrow = "-->"; +    let space = " "; + +    // Print message and file path. +    eprintln!("{BOLD}{colour}[{label}]{WHITE}: {message}{NORMAL}"); +    eprintln!("{BLUE}{arrow:>w$}{NORMAL} {location}{NORMAL}", w=digits+3); +    if let Some(source) = &context.source.in_source { +        eprintln!("{BLUE}{arrow:>w$}{NORMAL} {source}{NORMAL}", w=digits+3); +    } + +    let start = location.start.column; +    let end = location.end.column + 1; + +    // Print source code line. +    eprint!("{BLUE} {y} | {NORMAL}"); +    let line = get_line_from_source_code(context.source_code, location.start.line); +    for (i, c) in line.chars().enumerate() { +        if i == start { eprint!("{colour}") } +        if i == end { eprint!("{NORMAL}") } +        eprint!("{c}"); +    } +    eprintln!("{NORMAL}"); + +    // Print source code underline. +    eprint!("{BLUE} {space:>w$} | {NORMAL}", w=digits); +    for _ in 0..start { eprint!(" "); } +    eprint!("{colour}"); +    for _ in start..end { eprint!("^"); } +    eprintln!("{NORMAL}"); +} + + +fn get_line_from_source_code(source_code: &str, line: usize) -> &str { +    source_code.split('\n').nth(line).unwrap_or("<error reading line from source>") +} + + +enum SourceIssueVariant { +    Warning, +    Error, +} + + +/// Print a tree containing the name and path of each source unit. +pub fn print_source_tree(resolver: &SymbolResolver) { +    eprintln!("."); +    let len = resolver.root_unit_ids.len(); +    for (i, id) in resolver.root_unit_ids.iter().enumerate() { +        let end = i + 1 == len; +        print_source_tree_leaf(resolver, *id, Vec::new(), end); +    } +    eprintln!(); +} + +fn print_source_tree_leaf(resolver: &SymbolResolver, id: usize, mut levels: Vec<bool>, end: bool) { +    // A level entry is true if all entries in that level have been printed. +    for level in &levels { +        match level { +            false => eprint!("│   "), +            true  => eprint!("    "), +        } +    } +    // The end value is true if all siblings of this entry have been printed. +    match end { +            false => eprint!("├── "), +            true  => eprint!("└── "), +    } +    if let Some(unit) = resolver.source_units.get(id) { +        let path = &unit.source_unit.main.path; +        let path_str = path.as_os_str().to_string_lossy(); +        if let Some(name) = path.file_name() { +            let name_str = name.to_string_lossy(); +            eprint!("{name_str}{BLUE}"); +            if unit.source_unit.head.is_some() { eprint!(" +head") } +            if unit.source_unit.tail.is_some() { eprint!(" +tail") } +            let mut unresolved = 0; +            for symbol in &resolver.unresolved { +                if symbol.source_id == id { unresolved += 1; } +            } +            if unresolved > 0 { eprint!("{RED} ({unresolved})"); } +            eprintln!("{NORMAL} {DIM}({path_str}){NORMAL}"); +        } else { +            eprintln!("{path_str}"); +        }; +        levels.push(end); +        let len = unit.child_ids.len(); +        for (i, id) in unit.child_ids.iter().enumerate() { +            let end = i + 1 == len; +            print_source_tree_leaf(resolver, *id, levels.clone(), end); +        } +    } else { +        eprintln!("<error loading source unit details>"); +    } +} + + diff --git a/src/semantic_token.rs b/src/semantic_token.rs deleted file mode 100644 index 265db91..0000000 --- a/src/semantic_token.rs +++ /dev/null @@ -1,116 +0,0 @@ -use crate::*; - -pub enum SemanticTokenType { -    LabelReference(usize), -    MacroReference(usize), - -    LabelDefinition(LabelDefinition), -    MacroDefinition(MacroDefinition), - -    Padding(u16), -    ByteLiteral(u8), -    ShortLiteral(u16), -    Instruction(u8), - -    MacroDefinitionTerminator, -    Comment, -    Error(SyntacticTokenType, Error), -} - -pub struct SemanticToken { -    pub r#type: SemanticTokenType, -    pub source_location: SourceLocation, -    pub bytecode_location: BytecodeLocation, -    pub parent_label: Option<String>, -} - -impl SemanticToken { -    /// Returns true if an error was printed. -    pub fn print_error(&self, source_code: &str) -> bool { -        let mut is_error = false; -        macro_rules! red {()=>{eprint!("\x1b[31m")};} -        macro_rules! dim {()=>{eprint!("\x1b[0;2m")};} -        macro_rules! normal {()=>{eprint!("\x1b[0m")};} - -        if let SemanticTokenType::Error(token, error) = &self.r#type { -            is_error = true; - -            red!(); eprint!("[ERROR] "); normal!(); -            let source = &self.source_location.source; -            match error { -                Error::UnresolvedReference => { -                    eprintln!("Unresolved reference, no label or macro has been defined with the name '{source}'") } -                Error::DuplicateDefinition => { -                    eprintln!("Duplicate definition, a label or macro has already been defined with the name '{source}'") } -                Error::OrphanedMacroDefinitionTerminator => { -                    eprintln!("Unmatched macro definition terminator, no macro definition is in progress") } -                Error::InvalidPaddingValue => { -                    eprintln!("Invalid value for padding, the value must be at least one and at most four hexadecimal characters") } -                Error::CyclicMacroReference => { -                    eprintln!("Cyclic macro reference, this macro reference contains a reference to the macro being defined") } -                Error::InvalidTypeInMacroDefinition => { -                    let name = match token { -                        SyntacticTokenType::Reference(_) => "references", -                        SyntacticTokenType::LabelDefinition(_) => "label definitions", -                        SyntacticTokenType::MacroDefinition(_) => "macro definitions", -                        SyntacticTokenType::MacroDefinitionTerminator => "macro definition terminators", -                        SyntacticTokenType::Padding(_) => "padding", -                        SyntacticTokenType::ByteLiteral(_) => "byte literals", -                        SyntacticTokenType::ShortLiteral(_) => "short literals", -                        SyntacticTokenType::Instruction(_) => "instructions", -                        SyntacticTokenType::Comment => "comments", -                    }; -                    eprintln!("Invalid token in macro definition, macro definitions are not allowed to contain {name}") } -            } - -            if let Some(label) = &self.parent_label { -                eprint!("  ... "); red!(); eprint!("| "); dim!(); eprintln!("@{label} "); normal!(); -            } - -            let line = source_code.split('\n').nth(self.source_location.start.line).unwrap(); -            eprint!("{:>5} ", self.source_location.start.line+1); -            red!(); eprint!("| "); normal!(); -            for (i, c) in line.chars().enumerate() { -                if i == self.source_location.start.column { red!() } -                eprint!("{c}"); -                if i == self.source_location.end.column { normal!() } -            } -            eprintln!(); red!(); eprint!("      | "); -            for i in 0..=self.source_location.end.column { -                if i < self.source_location.start.column { eprint!(" ") } else { eprint!("^") }; -            } -            normal!(); eprintln!(); -        } -        else if let SemanticTokenType::MacroDefinition(definition) = &self.r#type { -            for token in &definition.body_tokens { -                if token.print_error(source_code) { is_error = true } -            } -        } -        is_error -    } -} - -pub struct LabelDefinition { -    pub name: String, -    pub address: u16, -    /// A list of pointers to label reference tokens -    pub references: Vec<usize>, -} -impl LabelDefinition { -    pub fn new(name: String) -> Self { -        Self { name, address:0, references:Vec::new() } -    } -} - -pub struct MacroDefinition { -    pub name: String, -    pub body_tokens: Vec<SemanticToken>, -    /// A list of pointers to macro reference tokens -    pub references: Vec<usize>, -} -impl MacroDefinition { -    pub fn new(name: String) -> Self { -        Self { name, body_tokens:Vec::new(), references:Vec::new() } -    } -} - diff --git a/src/symbol_resolver.rs b/src/symbol_resolver.rs new file mode 100644 index 0000000..cced994 --- /dev/null +++ b/src/symbol_resolver.rs @@ -0,0 +1,230 @@ +use crate::*; + +use std::mem::take; + + +/// Resolve symbol references across source units. +pub struct SymbolResolver { +    pub definitions: Vec<TrackedSymbol>, +    pub unresolved: Vec<TrackedSymbol>, +    /// Contains the ID of the owner of the original definition. +    pub redefinitions: Vec<(TrackedSymbol, usize)>, +    pub source_units: Vec<HeirarchicalSourceUnit>, +    pub root_unit_ids: Vec<usize>, +    pub unused_library_units: Vec<SourceUnit>, +} + + +impl SymbolResolver { +    /// Construct a resolver from a root source unit. +    pub fn from_source_unit(source_unit: SourceUnit) -> Self { +        let mut new = Self { +            definitions: Vec::new(), +            unresolved: Vec::new(), +            redefinitions: Vec::new(), +            source_units: Vec::new(), +            root_unit_ids: Vec::new(), +            unused_library_units: Vec::new(), +        }; +        new.add_source_unit(source_unit, None); +        return new; +    } + +    pub fn add_library_units(&mut self, mut source_units: Vec<SourceUnit>) { +        self.unused_library_units.append(&mut source_units); +    } + +    pub fn resolve(&mut self) { +        // Repeatedly test if any unused source unit resolves an unresolved symbol, +        // breaking the loop when no new resolutions are found. +        'outer: loop { +            for (i, source_unit) in self.unused_library_units.iter().enumerate() { +                if let Some(id) = self.resolves_reference(&source_unit) { +                    let source_unit = self.unused_library_units.remove(i); +                    self.add_source_unit(source_unit, Some(id)); +                    continue 'outer; +                } +            } +            break; +        } +    } + +    /// Add a source unit to the resolver and link it to a parent unit. +    pub fn add_source_unit(&mut self, mut source_unit: SourceUnit, parent_id: Option<usize>) { +        let source_id = self.source_units.len(); + +        // Add all main symbols. +        if let Some(definitions) = take(&mut source_unit.main.symbols.definitions) { +            self.add_definitions(definitions, source_id, SourceRole::Main); } +        if let Some(references) = take(&mut source_unit.main.symbols.references) { +            self.add_references(references, source_id, SourceRole::Main); } + +        // Add all head symbols. +        if let Some(head) = &mut source_unit.head { +            if let Some(references) = take(&mut head.symbols.references) { +                self.add_references(references, source_id, SourceRole::Head); } +            if let Some(definitions) = take(&mut head.symbols.definitions) { +                self.add_definitions(definitions, source_id, SourceRole::Head); } +        } + +        // Add all tail symbols. +        if let Some(tail) = &mut source_unit.tail { +            if let Some(references) = take(&mut tail.symbols.references) { +                self.add_references(references, source_id, SourceRole::Tail); } +            if let Some(definitions) = take(&mut tail.symbols.definitions) { +                self.add_definitions(definitions, source_id, SourceRole::Tail); } +        } + +        if let Some(parent_id) = parent_id { +            if let Some(parent_unit) = self.source_units.get_mut(parent_id) { +                parent_unit.child_ids.push(source_id); +            } +        } else { +            self.root_unit_ids.push(source_id); +        } + +        let source_unit = HeirarchicalSourceUnit { source_unit, child_ids: Vec::new() }; +        self.source_units.push(source_unit); +    } + +    fn add_references(&mut self, references: Vec<Symbol>, source_id: usize, source_role: SourceRole) { +        for symbol in references { +            let reference = TrackedSymbol { symbol, source_id, source_role }; +            if !self.definitions.contains(&reference) { +                self.unresolved.push(reference); +            } +        } +    } + +    fn add_definitions(&mut self, definitions: Vec<Symbol>, source_id: usize, source_role: SourceRole) { +        for symbol in definitions { +            let predicate = |d: &&TrackedSymbol| { &d.symbol.name == &symbol.name }; +            if let Some(def) = self.definitions.iter().find(predicate) { +                let definition = TrackedSymbol { symbol, source_id, source_role }; +                let redefinition = (definition, def.source_id); +                self.redefinitions.push(redefinition); +            } else { +                self.unresolved.retain(|s| s.symbol.name != symbol.name); +                let definition = TrackedSymbol { symbol, source_id, source_role }; +                self.definitions.push(definition); +            } +        } +    } + +    /// Returns the ID of the owner of a symbol resolved by this unit. +    pub fn resolves_reference(&self, source_unit: &SourceUnit) -> Option<usize> { +        if let Some(definitions) = &source_unit.main.symbols.definitions { +            if let Some(id) = self.source_id_of_unresolved(&definitions) { +                return Some(id); +            } +        } +        if let Some(head) = &source_unit.head { +            if let Some(definitions) = &head.symbols.definitions { +                if let Some(id) = self.source_id_of_unresolved(&definitions) { +                    return Some(id); +                } +            } +        } +        if let Some(tail) = &source_unit.tail { +            if let Some(definitions) = &tail.symbols.definitions { +                if let Some(id) = self.source_id_of_unresolved(&definitions) { +                    return Some(id); +                } +            } +        } +        return None; +    } + +    /// Returns the ID of the owner of a reference to one of these symbols. +    fn source_id_of_unresolved(&self, symbols: &[Symbol]) -> Option<usize> { +        for symbol in symbols { +            let opt = self.unresolved.iter().find(|s| s.symbol.name == symbol.name); +            if let Some(unresolved) = opt { +                return Some(unresolved.source_id); +            } +        } +        return None; +    } + +    pub fn get_source_code_for_tracked_symbol(&self, symbol: &TrackedSymbol) -> &str { +        let source_unit = &self.source_units[symbol.source_id].source_unit; +        match symbol.source_role { +            SourceRole::Main => source_unit.main.symbols.source_code.as_str(), +            SourceRole::Head => match &source_unit.head { +                Some(head) => head.symbols.source_code.as_str(), +                None => unreachable!("Failed to find source for token"), +            } +            SourceRole::Tail => match &source_unit.tail { +                Some(tail) => tail.symbols.source_code.as_str(), +                None => unreachable!("Failed to find source for token"), +            } +        } +    } + +    /// Create a source file by concatenating all source units. +    pub fn get_merged_source_code(&self) -> String { +        // The first source unit is guaranteed to be the root unit, so we can +        // just push source files in their current order. +        let mut source_code = String::new(); + +        // Push head source code. +        for source_unit in self.source_units.iter().rev() { +            if let Some(head) = &source_unit.source_unit.head { +                push_source_code_to_string(&mut source_code, head); +            } +        } +        // Push main source code. +        for source_unit in self.source_units.iter() { +            push_source_code_to_string(&mut source_code, &source_unit.source_unit.main); +        } +        // Push tail source code. +        for source_unit in self.source_units.iter().rev() { +            if let Some(tail) = &source_unit.source_unit.tail { +                push_source_code_to_string(&mut source_code, tail); +            } +        } +        return source_code; +    } +} + + +fn push_source_code_to_string(string: &mut String, source_file: &SourceFile) { +    // Ensure that sections are separated by two newlines. +    if !string.is_empty() { +        if !string.ends_with('\n') { string.push('\n'); } +        if !string.ends_with("\n\n") { string.push('\n'); } +    } +    // Write a path comment to the string. +    let path_str = source_file.path.as_os_str().to_string_lossy(); +    let path_comment = format!("(: {path_str} )\n"); +    string.push_str(&path_comment); +    string.push_str(&source_file.symbols.source_code); +} + + +pub struct HeirarchicalSourceUnit { +    pub source_unit: SourceUnit, +    pub child_ids: Vec<usize>, +} + + +pub struct TrackedSymbol { +    pub symbol: Symbol, +    pub source_id: usize, +    pub source_role: SourceRole, +} + + +#[derive(Clone, Copy)] +pub enum SourceRole { +    Main, +    Head, +    Tail, +} + + +impl PartialEq for TrackedSymbol { +    fn eq(&self, other: &TrackedSymbol) -> bool { +        self.symbol.name.eq(&other.symbol.name) +    } +} diff --git a/src/syntactic_token.rs b/src/syntactic_token.rs deleted file mode 100644 index 4a50e8a..0000000 --- a/src/syntactic_token.rs +++ /dev/null @@ -1,43 +0,0 @@ -use crate::*; - -pub enum SyntacticTokenType { -    Reference(String), - -    LabelDefinition(String), -    MacroDefinition(String), -    MacroDefinitionTerminator, - -    Padding(u16), -    ByteLiteral(u8), -    ShortLiteral(u16), -    Instruction(u8), - -    Comment, -} - - - -pub struct SyntacticToken { -    pub r#type: SyntacticTokenType, -    pub source_location: SourceLocation, -    pub error: Option<Error>, -} - -impl SyntacticToken { -    // Call when this token is found inside a macro definition. -    pub fn use_in_macro_body(&mut self) { -        match self.r#type { -            SyntacticTokenType::LabelDefinition(..) | -            SyntacticTokenType::MacroDefinition(..) => { -                self.set_error(Error::InvalidTypeInMacroDefinition) -            } -            _ => (), -        }; -    } -    pub fn set_error(&mut self, error: Error) { -        self.error = Some(error); -    } -    pub fn is_macro_terminator(&self) -> bool { -        if let SyntacticTokenType::MacroDefinitionTerminator = self.r#type {true} else {false} -    } -} diff --git a/src/tokenizer.rs b/src/tokenizer.rs deleted file mode 100644 index 02bf490..0000000 --- a/src/tokenizer.rs +++ /dev/null @@ -1,235 +0,0 @@ -use std::mem::take; -use crate::*; - -#[derive(PartialEq)] -enum StringLiteral { -    None, -    Raw, -    NullTerminated, -} - -pub struct TokenIterator { -    /// The characters that comprise the program souce code. -    chars: Vec<char>, -    /// The index of the next character to read. -    i: usize, -    /// The address of the next character to read. -    addr: CharAddress, -    /// If true, skip over any whitespace characters. If false, stop reading -    /// when a whitespace character is encountered. -    skip_whitespace: bool, -    /// The name of the most recently defined label. -    label: String, -    /// If not None, each individual character will be tokenised as a ByteLiteral. -    parse_string_literal: StringLiteral, - - -    /// The address of the first character of the current token. -    start: CharAddress, -    /// The address of the final character of the current token. -    end: CharAddress, -    /// The entire current token. -    source: String, -    /// The first character of the current token. -    prefix: char, -    /// The second and remaining characters of the current token. -    suffix: String, -} - -impl TokenIterator { -    /// Create an iterator from a string of program source code. -    pub fn from_str(source_code: &str) -> Self { -        Self { -            chars: source_code.chars().collect(), -            i: 0, -            addr: CharAddress::zero(), -            skip_whitespace: true, -            parse_string_literal: StringLiteral::None, -            label: String::new(), -            start: CharAddress::zero(), -            end: CharAddress::zero(), -            source: String::new(), -            prefix: ' ', -            suffix: String::new(), -        } -    } -    /// Append a character to the current token. -    fn push(&mut self, c:char) { -        self.end = self.addr; -        self.source.push(c); -        self.suffix.push(c); -        self.next(c); -    } -    /// Move forward to the next source character. -    fn next(&mut self, c: char) { -        self.addr.column += 1; -        self.i += 1; -        if c == '\n' { -            self.addr.column = 0; -            self.addr.line += 1; -        } -    } -    /// Mark the current character as being the first character of a new token. -    fn mark_start(&mut self, c:char) { -        if c == '"' { -            self.parse_string_literal = StringLiteral::NullTerminated; -        } else if c == '\'' { -            self.parse_string_literal = StringLiteral::Raw; -        } else { -            self.start=self.addr; -            self.end=self.addr; -            self.prefix=c; -            self.source.push(c); -            self.skip_whitespace=false; -        } -        self.next(c); -    } -} - -impl Iterator for TokenIterator { -    type Item = SyntacticToken; - -    fn next(&mut self) -> Option<SyntacticToken> { -        // Initialise values before reading the next token -        let mut is_comment = false; -        self.skip_whitespace = true; - -        // Iterate over source characters until a full token is read -        while let Some(c) = self.chars.get(self.i) { -            let c = *c; -            // Parse individual characters from a string literal -            if self.parse_string_literal != StringLiteral::None { -                if c == '"' && self.parse_string_literal == StringLiteral::NullTerminated  { -                    self.parse_string_literal = StringLiteral::None; -                    let token = SyntacticToken { -                        r#type: SyntacticTokenType::ByteLiteral(0), -                        source_location: SourceLocation { -                            source: c.to_string(), start:self.addr, end:self.addr }, -                        error: None, -                    }; -                    self.next(c); -                    return Some(token); -                } else if c == '\'' && self.parse_string_literal == StringLiteral::Raw  { -                    self.parse_string_literal = StringLiteral::None; -                    self.next(c); -                    continue -                } else { -                    self.next(c); -                    return Some(SyntacticToken { -                        r#type: SyntacticTokenType::ByteLiteral(c as u8), -                        source_location: SourceLocation { -                            source: c.to_string(), start:self.addr, end:self.addr }, -                        error: None, -                    }); -                } -            } -            // Intercept comments -            if is_comment { -                self.push(c); if c == ')' { break } else { continue }; } -            else if self.skip_whitespace && c == '(' { -                is_comment = true; self.mark_start(c); continue } - -            // Allow a semicolon at the end of a token to be handled as a separate token -            if self.source.len() > 0 && c == ';' { break } -            // Handle the current character -            match (is_whitespace(c), self.skip_whitespace) { -                (true, true) => self.next(c),        // c is the expected leading whitespace -                (false, true) => self.mark_start(c), // c is the first character of the token -                (false, false) => self.push(c),      // c is a character of the token -                (true, false) => break,              // c is trailing whitespace -            } -            // Allow literal values to be chained to the end of the previous token -            if self.source.len() > 0 && c == ':' { break } -        } - -        // If no source characters were grabbed then we have read through the entire source file -        if self.source.len() == 0 { return None; } -        // Allow handling macro terminators and symbols of length 1 in the match expression -        if self.suffix.len() == 0 { self.prefix = '\0'; } -        // Consume the collected characters to be used in the match expression -        let full = take(&mut self.source); -        let suffix = take(&mut self.suffix); -        let mut error = None; -        let mut parse_padding_value = |v| { -            parse_short(v).or_else(|| { -                error = Some(Error::InvalidPaddingValue); Some(0) -            }).unwrap() -        }; - -        let r#type = match self.prefix { -            '(' => { SyntacticTokenType::Comment } -            '@' => { SyntacticTokenType::LabelDefinition({self.label=suffix.clone(); suffix}) } -            '&' => { SyntacticTokenType::LabelDefinition(format!("{}/{}", self.label, suffix)) } -            '$' => { SyntacticTokenType::Padding(parse_padding_value(&suffix)) } -            '~' => { SyntacticTokenType::Reference(format!("{}/{}", self.label, suffix)) } -            '%' => if let Some(("", sublabel)) = suffix.split_once("~") { -                SyntacticTokenType::MacroDefinition(format!("{}/{}", self.label, sublabel)) -            } else { -                SyntacticTokenType::MacroDefinition(suffix) -            } -            _ => { -                if ";" == &full                                    { SyntacticTokenType::MacroDefinitionTerminator } -                else if let Some(value) = parse_byte_lit(&full)    { SyntacticTokenType::ByteLiteral(value) } -                else if let Some(value) = parse_short_lit(&full)   { SyntacticTokenType::ShortLiteral(value) } -                else if let Some(value) = parse_instruction(&full) { SyntacticTokenType::Instruction(value) } -                else                                               { SyntacticTokenType::Reference(full.clone()) } -            } -        }; -        Some(SyntacticToken { -            r#type, -            source_location:SourceLocation::new(full,self.start,self.end), -            error, -        }) -    } -} - - -fn parse_byte_lit(token: &str) -> Option<u8> { -    match token.len() { 2 => u8::from_str_radix(token, 16).ok(), _ => None } } -fn parse_short_lit(token: &str) -> Option<u16> { -    match token.len() { 4 => u16::from_str_radix(token, 16).ok(), _ => None } } -fn parse_short(token: &str) -> Option<u16> { -    match token.len() { 1..=4 => u16::from_str_radix(token, 16).ok(), _ => None } } -fn is_whitespace(c: char) -> bool { -    match c { ' '|'\t'|'\n'|'\r'|'['|']'|'(' =>true, _=>false } } -fn parse_instruction(token: &str) -> Option<u8> { -    Some(match token { -        // Control operators -        "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2"  =>0x60,"DB3" =>0x80,"DB4"  =>0xA0,"DB5"  =>0xC0,"DB6"   =>0xE0, -        "JMP"=>0x01,"JMS" =>0x21,"JMP:"=>0x41,"JMS:" =>0x61,"JMPr"=>0x81,"JMSr" =>0xA1,"JMPr:"=>0xC1,"JMSr:" =>0xE1, -        "JCN"=>0x02,"JCS" =>0x22,"JCN:"=>0x42,"JCS:" =>0x62,"JCNr"=>0x82,"JCSr" =>0xA2,"JCNr:"=>0xC2,"JCSr:" =>0xE2, -        "JCK"=>0x03,"JCK*"=>0x23,"JCK:"=>0x43,"JCK*:"=>0x63,"JCKr"=>0x83,"JCKr*"=>0xA3,"JCKr:"=>0xC3,"JCKr*:"=>0xE3, -        "LDA"=>0x04,"LDA*"=>0x24,"LDA:"=>0x44,"LDA*:"=>0x64,"LDAr"=>0x84,"LDAr*"=>0xA4,"LDAr:"=>0xC4,"LDAr*:"=>0xE4, -        "STA"=>0x05,"STA*"=>0x25,"STA:"=>0x45,"STA*:"=>0x65,"STAr"=>0x85,"STAr*"=>0xA5,"STAr:"=>0xC5,"STAr*:"=>0xE5, -        "LDD"=>0x06,"LDD*"=>0x26,"LDD:"=>0x46,"LDD*:"=>0x66,"LDDr"=>0x86,"LDDr*"=>0xA6,"LDDr:"=>0xC6,"LDDr*:"=>0xE6, -        "STD"=>0x07,"STD*"=>0x27,"STD:"=>0x47,"STD*:"=>0x67,"STDr"=>0x87,"STDr*"=>0xA7,"STDr:"=>0xC7,"STDr*:"=>0xE7, -        // Stack operators -        "PSH"=>0x08,"PSH*"=>0x28,"PSH:"=>0x48,"PSH*:"=>0x68,"PSHr"=>0x88,"PSHr*"=>0xA8,"PSHr:"=>0xC8,"PSHr*:"=>0xE8, -        "POP"=>0x09,"POP*"=>0x29,"POP:"=>0x49,"POP*:"=>0x69,"POPr"=>0x89,"POPr*"=>0xA9,"POPr:"=>0xC9,"POPr*:"=>0xE9, -        "CPY"=>0x0A,"CPY*"=>0x2A,"CPY:"=>0x4A,"CPY*:"=>0x6A,"CPYr"=>0x8A,"CPYr*"=>0xAA,"CPYr:"=>0xCA,"CPYr*:"=>0xEA, -        "SPL"=>0x0B,"SPL*"=>0x2B,"SPL:"=>0x4B,"SPL*:"=>0x6B,"SPLr"=>0x8B,"SPLr*"=>0xAB,"SPLr:"=>0xCB,"SPLr*:"=>0xEB, -        "DUP"=>0x0C,"DUP*"=>0x2C,"DUP:"=>0x4C,"DUP*:"=>0x6C,"DUPr"=>0x8C,"DUPr*"=>0xAC,"DUPr:"=>0xCC,"DUPr*:"=>0xEC, -        "OVR"=>0x0D,"OVR*"=>0x2D,"OVR:"=>0x4D,"OVR*:"=>0x6D,"OVRr"=>0x8D,"OVRr*"=>0xAD,"OVRr:"=>0xCD,"OVRr*:"=>0xED, -        "SWP"=>0x0E,"SWP*"=>0x2E,"SWP:"=>0x4E,"SWP*:"=>0x6E,"SWPr"=>0x8E,"SWPr*"=>0xAE,"SWPr:"=>0xCE,"SWPr*:"=>0xEE, -        "ROT"=>0x0F,"ROT*"=>0x2F,"ROT:"=>0x4F,"ROT*:"=>0x6F,"ROTr"=>0x8F,"ROTr*"=>0xAF,"ROTr:"=>0xCF,"ROTr*:"=>0xEF, -        // Numeric operators -        "ADD"=>0x10,"ADD*"=>0x30,"ADD:"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr*"=>0xB0,"ADDr:"=>0xD0,"ADDr*:"=>0xF0, -        "SUB"=>0x11,"SUB*"=>0x31,"SUB:"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr*"=>0xB1,"SUBr:"=>0xD1,"SUBr*:"=>0xF1, -        "INC"=>0x12,"INC*"=>0x32,"INC:"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr*"=>0xB2,"INCr:"=>0xD2,"INCr*:"=>0xF2, -        "DEC"=>0x13,"DEC*"=>0x33,"DEC:"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr*"=>0xB3,"DECr:"=>0xD3,"DECr*:"=>0xF3, -        "LTH"=>0x14,"LTH*"=>0x34,"LTH:"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr*"=>0xB4,"LTHr:"=>0xD4,"LTHr*:"=>0xF4, -        "GTH"=>0x15,"GTH*"=>0x35,"GTH:"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr*"=>0xB5,"GTHr:"=>0xD5,"GTHr*:"=>0xF5, -        "EQU"=>0x16,"EQU*"=>0x36,"EQU:"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr*"=>0xB6,"EQUr:"=>0xD6,"EQUr*:"=>0xF6, -        "NQK"=>0x17,"NQK*"=>0x37,"NQK:"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr*"=>0xB7,"NQKr:"=>0xD7,"NQKr*:"=>0xF7, -        // Bitwise operators -        "IOR"=>0x18,"IOR*"=>0x38,"IOR:"=>0x58,"IOR*:"=>0x78,"IORr"=>0x98,"IORr*"=>0xB8,"IORr:"=>0xD8,"IORr*:"=>0xF8, -        "XOR"=>0x19,"XOR*"=>0x39,"XOR:"=>0x59,"XOR*:"=>0x79,"XORr"=>0x99,"XORr*"=>0xB9,"XORr:"=>0xD9,"XORr*:"=>0xF9, -        "AND"=>0x1A,"AND*"=>0x3A,"AND:"=>0x5A,"AND*:"=>0x7A,"ANDr"=>0x9A,"ANDr*"=>0xBA,"ANDr:"=>0xDA,"ANDr*:"=>0xFA, -        "NOT"=>0x1B,"NOT*"=>0x3B,"NOT:"=>0x5B,"NOT*:"=>0x7B,"NOTr"=>0x9B,"NOTr*"=>0xBB,"NOTr:"=>0xDB,"NOTr*:"=>0xFB, -        "SHF"=>0x1C,"SHF*"=>0x3C,"SHF:"=>0x5C,"SHF*:"=>0x7C,"SHFr"=>0x9C,"SHFr*"=>0xBC,"SHFr:"=>0xDC,"SHFr*:"=>0xFC, -        "SHC"=>0x1D,"SHC*"=>0x3D,"SHC:"=>0x5D,"SHC*:"=>0x7D,"SHCr"=>0x9D,"SHCr*"=>0xBD,"SHCr:"=>0xDD,"SHCr*:"=>0xFD, -        "TAL"=>0x1E,"TAL*"=>0x3E,"TAL:"=>0x5E,"TAL*:"=>0x7E,"TALr"=>0x9E,"TALr*"=>0xBE,"TALr:"=>0xDE,"TALr*:"=>0xFE, -        "REV"=>0x1F,"REV*"=>0x3F,"REV:"=>0x5F,"REV*:"=>0x7F,"REVr"=>0x9F,"REVr*"=>0xBF,"REVr:"=>0xDF,"REVr*:"=>0xFF, -        _ => return None, -    }) -} diff --git a/src/tokens.rs b/src/tokens.rs new file mode 100644 index 0000000..81bf9d5 --- /dev/null +++ b/src/tokens.rs @@ -0,0 +1,9 @@ +mod syntactic; +mod semantic; +mod instruction; +mod value; + +pub use syntactic::*; +pub use semantic::*; +pub use instruction::*; +pub use value::*; diff --git a/src/tokens/instruction.rs b/src/tokens/instruction.rs new file mode 100644 index 0000000..d5fb3e5 --- /dev/null +++ b/src/tokens/instruction.rs @@ -0,0 +1,170 @@ +use Operation as Op; + + +pub struct Instruction { +    pub value: u8, +} + + +impl Instruction { +    pub fn operation(&self) -> Operation { +        match self.value & 0x1f { +            0x00=>Op::HLT, 0x01=>Op::JMP, 0x02=>Op::JCN, 0x03=>Op::JCK, +            0x04=>Op::LDA, 0x05=>Op::STA, 0x06=>Op::LDD, 0x07=>Op::STD, +            0x08=>Op::PSH, 0x09=>Op::POP, 0x0a=>Op::CPY, 0x0b=>Op::SPL, +            0x0c=>Op::DUP, 0x0d=>Op::OVR, 0x0e=>Op::SWP, 0x0f=>Op::ROT, +            0x10=>Op::ADD, 0x11=>Op::SUB, 0x12=>Op::INC, 0x13=>Op::DEC, +            0x14=>Op::LTH, 0x15=>Op::GTH, 0x16=>Op::EQU, 0x17=>Op::NQK, +            0x18=>Op::IOR, 0x19=>Op::XOR, 0x1a=>Op::AND, 0x1b=>Op::NOT, +            0x1c=>Op::SHF, 0x1d=>Op::SHC, 0x1e=>Op::TAL, 0x1f=>Op::REV, +            _ => unreachable!(), +        } +    } + +    pub fn return_mode(&self) -> bool { +        self.value & 0x80 != 0 +    } + +    pub fn literal_mode(&self) -> bool { +        self.value & 0x40 != 0 +    } + +    pub fn double_mode(&self) -> bool { +        self.value & 0x20 != 0 +    } +} + + +impl std::fmt::Display for Instruction { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +       write!(f, "{}", match self.value { +           // Control operators +            0x00=>"HLT",0x20=>"NOP" ,0x40=>"DB1" ,0x60=>"DB2"  ,0x80=>"DB3" ,0xA0=>"DB4"  ,0xC0=>"DB5"  ,0xE0=>"DB6"   , +            0x01=>"JMP",0x21=>"JMS" ,0x41=>"JMP:",0x61=>"JMS:" ,0x81=>"JMPr",0xA1=>"JMSr" ,0xC1=>"JMPr:",0xE1=>"JMSr:" , +            0x02=>"JCN",0x22=>"JCS" ,0x42=>"JCN:",0x62=>"JCS:" ,0x82=>"JCNr",0xA2=>"JCSr" ,0xC2=>"JCNr:",0xE2=>"JCSr:" , +            0x03=>"JCK",0x23=>"JCK*",0x43=>"JCK:",0x63=>"JCK*:",0x83=>"JCKr",0xA3=>"JCKr*",0xC3=>"JCKr:",0xE3=>"JCKr*:", +            0x04=>"LDA",0x24=>"LDA*",0x44=>"LDA:",0x64=>"LDA*:",0x84=>"LDAr",0xA4=>"LDAr*",0xC4=>"LDAr:",0xE4=>"LDAr*:", +            0x05=>"STA",0x25=>"STA*",0x45=>"STA:",0x65=>"STA*:",0x85=>"STAr",0xA5=>"STAr*",0xC5=>"STAr:",0xE5=>"STAr*:", +            0x06=>"LDD",0x26=>"LDD*",0x46=>"LDD:",0x66=>"LDD*:",0x86=>"LDDr",0xA6=>"LDDr*",0xC6=>"LDDr:",0xE6=>"LDDr*:", +            0x07=>"STD",0x27=>"STD*",0x47=>"STD:",0x67=>"STD*:",0x87=>"STDr",0xA7=>"STDr*",0xC7=>"STDr:",0xE7=>"STDr*:", +            // Stack operators +            0x08=>"PSH",0x28=>"PSH*",0x48=>"PSH:",0x68=>"PSH*:",0x88=>"PSHr",0xA8=>"PSHr*",0xC8=>"PSHr:",0xE8=>"PSHr*:", +            0x09=>"POP",0x29=>"POP*",0x49=>"POP:",0x69=>"POP*:",0x89=>"POPr",0xA9=>"POPr*",0xC9=>"POPr:",0xE9=>"POPr*:", +            0x0A=>"CPY",0x2A=>"CPY*",0x4A=>"CPY:",0x6A=>"CPY*:",0x8A=>"CPYr",0xAA=>"CPYr*",0xCA=>"CPYr:",0xEA=>"CPYr*:", +            0x0B=>"SPL",0x2B=>"SPL*",0x4B=>"SPL:",0x6B=>"SPL*:",0x8B=>"SPLr",0xAB=>"SPLr*",0xCB=>"SPLr:",0xEB=>"SPLr*:", +            0x0C=>"DUP",0x2C=>"DUP*",0x4C=>"DUP:",0x6C=>"DUP*:",0x8C=>"DUPr",0xAC=>"DUPr*",0xCC=>"DUPr:",0xEC=>"DUPr*:", +            0x0D=>"OVR",0x2D=>"OVR*",0x4D=>"OVR:",0x6D=>"OVR*:",0x8D=>"OVRr",0xAD=>"OVRr*",0xCD=>"OVRr:",0xED=>"OVRr*:", +            0x0E=>"SWP",0x2E=>"SWP*",0x4E=>"SWP:",0x6E=>"SWP*:",0x8E=>"SWPr",0xAE=>"SWPr*",0xCE=>"SWPr:",0xEE=>"SWPr*:", +            0x0F=>"ROT",0x2F=>"ROT*",0x4F=>"ROT:",0x6F=>"ROT*:",0x8F=>"ROTr",0xAF=>"ROTr*",0xCF=>"ROTr:",0xEF=>"ROTr*:", +            // Numeric operators +            0x10=>"ADD",0x30=>"ADD*",0x50=>"ADD:",0x70=>"ADD*:",0x90=>"ADDr",0xB0=>"ADDr*",0xD0=>"ADDr:",0xF0=>"ADDr*:", +            0x11=>"SUB",0x31=>"SUB*",0x51=>"SUB:",0x71=>"SUB*:",0x91=>"SUBr",0xB1=>"SUBr*",0xD1=>"SUBr:",0xF1=>"SUBr*:", +            0x12=>"INC",0x32=>"INC*",0x52=>"INC:",0x72=>"INC*:",0x92=>"INCr",0xB2=>"INCr*",0xD2=>"INCr:",0xF2=>"INCr*:", +            0x13=>"DEC",0x33=>"DEC*",0x53=>"DEC:",0x73=>"DEC*:",0x93=>"DECr",0xB3=>"DECr*",0xD3=>"DECr:",0xF3=>"DECr*:", +            0x14=>"LTH",0x34=>"LTH*",0x54=>"LTH:",0x74=>"LTH*:",0x94=>"LTHr",0xB4=>"LTHr*",0xD4=>"LTHr:",0xF4=>"LTHr*:", +            0x15=>"GTH",0x35=>"GTH*",0x55=>"GTH:",0x75=>"GTH*:",0x95=>"GTHr",0xB5=>"GTHr*",0xD5=>"GTHr:",0xF5=>"GTHr*:", +            0x16=>"EQU",0x36=>"EQU*",0x56=>"EQU:",0x76=>"EQU*:",0x96=>"EQUr",0xB6=>"EQUr*",0xD6=>"EQUr:",0xF6=>"EQUr*:", +            0x17=>"NQK",0x37=>"NQK*",0x57=>"NQK:",0x77=>"NQK*:",0x97=>"NQKr",0xB7=>"NQKr*",0xD7=>"NQKr:",0xF7=>"NQKr*:", +            // Bitwise operators +            0x18=>"IOR",0x38=>"IOR*",0x58=>"IOR:",0x78=>"IOR*:",0x98=>"IORr",0xB8=>"IORr*",0xD8=>"IORr:",0xF8=>"IORr*:", +            0x19=>"XOR",0x39=>"XOR*",0x59=>"XOR:",0x79=>"XOR*:",0x99=>"XORr",0xB9=>"XORr*",0xD9=>"XORr:",0xF9=>"XORr*:", +            0x1A=>"AND",0x3A=>"AND*",0x5A=>"AND:",0x7A=>"AND*:",0x9A=>"ANDr",0xBA=>"ANDr*",0xDA=>"ANDr:",0xFA=>"ANDr*:", +            0x1B=>"NOT",0x3B=>"NOT*",0x5B=>"NOT:",0x7B=>"NOT*:",0x9B=>"NOTr",0xBB=>"NOTr*",0xDB=>"NOTr:",0xFB=>"NOTr*:", +            0x1C=>"SHF",0x3C=>"SHF*",0x5C=>"SHF:",0x7C=>"SHF*:",0x9C=>"SHFr",0xBC=>"SHFr*",0xDC=>"SHFr:",0xFC=>"SHFr*:", +            0x1D=>"SHC",0x3D=>"SHC*",0x5D=>"SHC:",0x7D=>"SHC*:",0x9D=>"SHCr",0xBD=>"SHCr*",0xDD=>"SHCr:",0xFD=>"SHCr*:", +            0x1E=>"TAL",0x3E=>"TAL*",0x5E=>"TAL:",0x7E=>"TAL*:",0x9E=>"TALr",0xBE=>"TALr*",0xDE=>"TALr:",0xFE=>"TALr*:", +            0x1F=>"REV",0x3F=>"REV*",0x5F=>"REV:",0x7F=>"REV*:",0x9F=>"REVr",0xBF=>"REVr*",0xDF=>"REVr:",0xFF=>"REVr*:", +        }) +    } +} + + +impl std::str::FromStr for Instruction { +    type Err = (); + +    fn from_str(token: &str) -> Result<Self, Self::Err> { +        Ok( Instruction { value: match token { +           // Control operators +            "HLT"=>0x00,"NOP" =>0x20,"DB1" =>0x40,"DB2"  =>0x60,"DB3" =>0x80,"DB4"  =>0xA0,"DB5"  =>0xC0,"DB6"   =>0xE0, +            "JMP"=>0x01,"JMS" =>0x21,"JMP:"=>0x41,"JMS:" =>0x61,"JMPr"=>0x81,"JMSr" =>0xA1,"JMPr:"=>0xC1,"JMSr:" =>0xE1, +            "JCN"=>0x02,"JCS" =>0x22,"JCN:"=>0x42,"JCS:" =>0x62,"JCNr"=>0x82,"JCSr" =>0xA2,"JCNr:"=>0xC2,"JCSr:" =>0xE2, +            "JCK"=>0x03,"JCK*"=>0x23,"JCK:"=>0x43,"JCK*:"=>0x63,"JCKr"=>0x83,"JCKr*"=>0xA3,"JCKr:"=>0xC3,"JCKr*:"=>0xE3, +            "LDA"=>0x04,"LDA*"=>0x24,"LDA:"=>0x44,"LDA*:"=>0x64,"LDAr"=>0x84,"LDAr*"=>0xA4,"LDAr:"=>0xC4,"LDAr*:"=>0xE4, +            "STA"=>0x05,"STA*"=>0x25,"STA:"=>0x45,"STA*:"=>0x65,"STAr"=>0x85,"STAr*"=>0xA5,"STAr:"=>0xC5,"STAr*:"=>0xE5, +            "LDD"=>0x06,"LDD*"=>0x26,"LDD:"=>0x46,"LDD*:"=>0x66,"LDDr"=>0x86,"LDDr*"=>0xA6,"LDDr:"=>0xC6,"LDDr*:"=>0xE6, +            "STD"=>0x07,"STD*"=>0x27,"STD:"=>0x47,"STD*:"=>0x67,"STDr"=>0x87,"STDr*"=>0xA7,"STDr:"=>0xC7,"STDr*:"=>0xE7, +            // Stack operators +            "PSH"=>0x08,"PSH*"=>0x28,"PSH:"=>0x48,"PSH*:"=>0x68,"PSHr"=>0x88,"PSHr*"=>0xA8,"PSHr:"=>0xC8,"PSHr*:"=>0xE8, +            "POP"=>0x09,"POP*"=>0x29,"POP:"=>0x49,"POP*:"=>0x69,"POPr"=>0x89,"POPr*"=>0xA9,"POPr:"=>0xC9,"POPr*:"=>0xE9, +            "CPY"=>0x0A,"CPY*"=>0x2A,"CPY:"=>0x4A,"CPY*:"=>0x6A,"CPYr"=>0x8A,"CPYr*"=>0xAA,"CPYr:"=>0xCA,"CPYr*:"=>0xEA, +            "SPL"=>0x0B,"SPL*"=>0x2B,"SPL:"=>0x4B,"SPL*:"=>0x6B,"SPLr"=>0x8B,"SPLr*"=>0xAB,"SPLr:"=>0xCB,"SPLr*:"=>0xEB, +            "DUP"=>0x0C,"DUP*"=>0x2C,"DUP:"=>0x4C,"DUP*:"=>0x6C,"DUPr"=>0x8C,"DUPr*"=>0xAC,"DUPr:"=>0xCC,"DUPr*:"=>0xEC, +            "OVR"=>0x0D,"OVR*"=>0x2D,"OVR:"=>0x4D,"OVR*:"=>0x6D,"OVRr"=>0x8D,"OVRr*"=>0xAD,"OVRr:"=>0xCD,"OVRr*:"=>0xED, +            "SWP"=>0x0E,"SWP*"=>0x2E,"SWP:"=>0x4E,"SWP*:"=>0x6E,"SWPr"=>0x8E,"SWPr*"=>0xAE,"SWPr:"=>0xCE,"SWPr*:"=>0xEE, +            "ROT"=>0x0F,"ROT*"=>0x2F,"ROT:"=>0x4F,"ROT*:"=>0x6F,"ROTr"=>0x8F,"ROTr*"=>0xAF,"ROTr:"=>0xCF,"ROTr*:"=>0xEF, +            // Numeric operators +            "ADD"=>0x10,"ADD*"=>0x30,"ADD:"=>0x50,"ADD*:"=>0x70,"ADDr"=>0x90,"ADDr*"=>0xB0,"ADDr:"=>0xD0,"ADDr*:"=>0xF0, +            "SUB"=>0x11,"SUB*"=>0x31,"SUB:"=>0x51,"SUB*:"=>0x71,"SUBr"=>0x91,"SUBr*"=>0xB1,"SUBr:"=>0xD1,"SUBr*:"=>0xF1, +            "INC"=>0x12,"INC*"=>0x32,"INC:"=>0x52,"INC*:"=>0x72,"INCr"=>0x92,"INCr*"=>0xB2,"INCr:"=>0xD2,"INCr*:"=>0xF2, +            "DEC"=>0x13,"DEC*"=>0x33,"DEC:"=>0x53,"DEC*:"=>0x73,"DECr"=>0x93,"DECr*"=>0xB3,"DECr:"=>0xD3,"DECr*:"=>0xF3, +            "LTH"=>0x14,"LTH*"=>0x34,"LTH:"=>0x54,"LTH*:"=>0x74,"LTHr"=>0x94,"LTHr*"=>0xB4,"LTHr:"=>0xD4,"LTHr*:"=>0xF4, +            "GTH"=>0x15,"GTH*"=>0x35,"GTH:"=>0x55,"GTH*:"=>0x75,"GTHr"=>0x95,"GTHr*"=>0xB5,"GTHr:"=>0xD5,"GTHr*:"=>0xF5, +            "EQU"=>0x16,"EQU*"=>0x36,"EQU:"=>0x56,"EQU*:"=>0x76,"EQUr"=>0x96,"EQUr*"=>0xB6,"EQUr:"=>0xD6,"EQUr*:"=>0xF6, +            "NQK"=>0x17,"NQK*"=>0x37,"NQK:"=>0x57,"NQK*:"=>0x77,"NQKr"=>0x97,"NQKr*"=>0xB7,"NQKr:"=>0xD7,"NQKr*:"=>0xF7, +            // Bitwise operators +            "IOR"=>0x18,"IOR*"=>0x38,"IOR:"=>0x58,"IOR*:"=>0x78,"IORr"=>0x98,"IORr*"=>0xB8,"IORr:"=>0xD8,"IORr*:"=>0xF8, +            "XOR"=>0x19,"XOR*"=>0x39,"XOR:"=>0x59,"XOR*:"=>0x79,"XORr"=>0x99,"XORr*"=>0xB9,"XORr:"=>0xD9,"XORr*:"=>0xF9, +            "AND"=>0x1A,"AND*"=>0x3A,"AND:"=>0x5A,"AND*:"=>0x7A,"ANDr"=>0x9A,"ANDr*"=>0xBA,"ANDr:"=>0xDA,"ANDr*:"=>0xFA, +            "NOT"=>0x1B,"NOT*"=>0x3B,"NOT:"=>0x5B,"NOT*:"=>0x7B,"NOTr"=>0x9B,"NOTr*"=>0xBB,"NOTr:"=>0xDB,"NOTr*:"=>0xFB, +            "SHF"=>0x1C,"SHF*"=>0x3C,"SHF:"=>0x5C,"SHF*:"=>0x7C,"SHFr"=>0x9C,"SHFr*"=>0xBC,"SHFr:"=>0xDC,"SHFr*:"=>0xFC, +            "SHC"=>0x1D,"SHC*"=>0x3D,"SHC:"=>0x5D,"SHC*:"=>0x7D,"SHCr"=>0x9D,"SHCr*"=>0xBD,"SHCr:"=>0xDD,"SHCr*:"=>0xFD, +            "TAL"=>0x1E,"TAL*"=>0x3E,"TAL:"=>0x5E,"TAL*:"=>0x7E,"TALr"=>0x9E,"TALr*"=>0xBE,"TALr:"=>0xDE,"TALr*:"=>0xFE, +            "REV"=>0x1F,"REV*"=>0x3F,"REV:"=>0x5F,"REV*:"=>0x7F,"REVr"=>0x9F,"REVr*"=>0xBF,"REVr:"=>0xDF,"REVr*:"=>0xFF, +            _ => return Err(()), +        }}) +    } +} + + +pub enum Operation { +    HLT, JMP, JCN, JCK, +    LDA, STA, LDD, STD, +    PSH, POP, CPY, SPL, +    DUP, OVR, SWP, ROT, +    ADD, SUB, INC, DEC, +    LTH, GTH, EQU, NQK, +    IOR, XOR, AND, NOT, +    SHF, SHC, TAL, REV, +} + + +impl From<Operation> for u8 { +    fn from(operation: Operation) -> Self { +        match operation { +            Op::HLT=>0x00, Op::JMP=>0x01, Op::JCN=>0x02, Op::JCK=>0x03, +            Op::LDA=>0x04, Op::STA=>0x05, Op::LDD=>0x06, Op::STD=>0x07, +            Op::PSH=>0x08, Op::POP=>0x09, Op::CPY=>0x0a, Op::SPL=>0x0b, +            Op::DUP=>0x0c, Op::OVR=>0x0d, Op::SWP=>0x0e, Op::ROT=>0x0f, +            Op::ADD=>0x10, Op::SUB=>0x11, Op::INC=>0x12, Op::DEC=>0x13, +            Op::LTH=>0x14, Op::GTH=>0x15, Op::EQU=>0x16, Op::NQK=>0x17, +            Op::IOR=>0x18, Op::XOR=>0x19, Op::AND=>0x1a, Op::NOT=>0x1b, +            Op::SHF=>0x1c, Op::SHC=>0x1d, Op::TAL=>0x1e, Op::REV=>0x1f, +        } +    } +} + + +impl std::fmt::Display for Operation { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        write!(f, "{}", match self { +            Op::HLT=>"HLT", Op::JMP=>"JMP", Op::JCN=>"JCN", Op::JCK=>"JCK", +            Op::LDA=>"LDA", Op::STA=>"STA", Op::LDD=>"LDD", Op::STD=>"STD", +            Op::PSH=>"PSH", Op::POP=>"POP", Op::CPY=>"CPY", Op::SPL=>"SPL", +            Op::DUP=>"DUP", Op::OVR=>"OVR", Op::SWP=>"SWP", Op::ROT=>"ROT", +            Op::ADD=>"ADD", Op::SUB=>"SUB", Op::INC=>"INC", Op::DEC=>"DEC", +            Op::LTH=>"LTH", Op::GTH=>"GTH", Op::EQU=>"EQU", Op::NQK=>"NQK", +            Op::IOR=>"IOR", Op::XOR=>"XOR", Op::AND=>"AND", Op::NOT=>"NOT", +            Op::SHF=>"SHF", Op::SHC=>"SHC", Op::TAL=>"TAL", Op::REV=>"REV", +        }) +    } +} diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs new file mode 100644 index 0000000..ac5179c --- /dev/null +++ b/src/tokens/semantic.rs @@ -0,0 +1,90 @@ +use crate::*; + +use SemanticTokenVariant as SemVar; + + +pub struct SemanticToken { +    pub source: SourceSpan, +    pub bytecode: BytecodeSpan, +    pub variant: SemanticTokenVariant, +} + + +pub enum SemanticTokenVariant { +    LabelDefinition(LabelDefinition), +    MacroDefinition(MacroDefinition), + +    /// Pointer to the matching label definition. +    LabelReference(usize), +    /// Pointer to the matching macro definition. +    MacroInvocation(usize), + +    Literal(Value), +    Padding(Value), +    Instruction(Instruction), + +    Comment(String), +    String(Vec<u8>), + +    /// Pointer to the matching block close. +    BlockOpen(usize), +    /// Pointer to the matching block open. +    BlockClose(usize), +    MarkOpen, +    MarkClose, + +    Error(SemanticParseError), +} + +impl std::fmt::Debug for SemanticToken { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        match &self.variant { +            SemVar::LabelDefinition(def) => write!(f, "LabelDefinition({})", def.name), +            SemVar::MacroDefinition(def) => write!(f, "MacroDefinition({})", def.name), +            SemVar::LabelReference(pointer) => write!(f, "LabelReference(*{pointer})"), +            SemVar::MacroInvocation(pointer) => write!(f, "MacroInvocation(*{pointer})"), +            SemVar::Literal(value) => write!(f, "Literal({value})"), +            SemVar::Padding(value) => write!(f, "Padding({value})"), +            SemVar::Instruction(instr) => write!(f, "Instruction(0x{:02x})", instr.value), +            SemVar::Comment(comment) => write!(f, "Comment({comment})"), +            SemVar::String(string) => write!(f, "String({})", String::from_utf8_lossy(&string)), +            SemVar::BlockOpen(_) => write!(f, "BlockOpen"), +            SemVar::BlockClose(_) => write!(f, "BlockClose"), +            SemVar::MarkOpen => write!(f, "MarkOpen"), +            SemVar::MarkClose => write!(f, "MarkClose"), +            SemVar::Error(_) => write!(f, "Error"), +        } +    } +} + + +pub struct LabelDefinition { +    /// The absolute name of the label or sublabel. +    pub name: String, +    /// List of pointers to label reference tokens. +    pub references: Vec<usize>, +} + + +pub struct MacroDefinition { +    pub name: String, +    pub references: Vec<usize>, +    pub body_tokens: Vec<SemanticToken>, +} + + +pub enum SemanticParseError { +    LabelDefinitionInMacroDefinition, +    MacroDefinitionInMacroDefinition, + +    StrayMacroTerminator, +    StrayBlockClose, +    UnclosedBlock, + +    UndefinedSymbol(String), +    RedefinedSymbol((String, SourceSpan)), + +    MacroInvocationBeforeDefinition((String, SourceSpan)), + +    SyntaxError(SyntacticParseError) +} diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs new file mode 100644 index 0000000..8684ed9 --- /dev/null +++ b/src/tokens/syntactic.rs @@ -0,0 +1,39 @@ +use crate::*; + + +pub struct SyntacticToken { +    /// Location of token in source files. +    pub source: SourceSpan, +    pub variant: SyntacticTokenVariant, +} + + +pub enum SyntacticTokenVariant { +    LabelDefinition(String), +    MacroDefinition(String), +    MacroDefinitionTerminator, + +    Literal(Value), +    Padding(Value), +    Instruction(Instruction), + +    Comment(String), +    String(Vec<u8>), + +    BlockOpen, +    BlockClose, +    MarkOpen, +    MarkClose, + +    Symbol(String), + +    Error(SyntacticParseError), +} + + +pub enum SyntacticParseError { +    UnterminatedComment, +    UnterminatedRawString, +    UnterminatedNullString, +    InvalidPaddingValue(String), +} diff --git a/src/tokens/value.rs b/src/tokens/value.rs new file mode 100644 index 0000000..e421bd5 --- /dev/null +++ b/src/tokens/value.rs @@ -0,0 +1,32 @@ +pub enum Value { +    Byte(u8), +    Double(u16), +} + +impl std::fmt::Display for Value { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        match self { +            Self::Byte(value) => write!(f, "0x{value:02x}"), +            Self::Double(value) => write!(f, "0x{value:04x}"), +        } +    } +} + + +impl std::str::FromStr for Value { +    type Err = (); + +    fn from_str(token: &str) -> Result<Self, Self::Err> { +        match token.len() { +            2 => match u8::from_str_radix(&token, 16) { +                Ok(value) => Ok(Value::Byte(value)), +                Err(_) => Err(()), +            } +            4 => match u16::from_str_radix(&token, 16) { +                Ok(value) => Ok(Value::Double(value)), +                Err(_) => Err(()), +            } +            _ => Err(()), +        } +    } +} diff --git a/src/translators.rs b/src/translators.rs new file mode 100644 index 0000000..cce5633 --- /dev/null +++ b/src/translators.rs @@ -0,0 +1,9 @@ +mod syntactic_parser; +mod semantic_parser; +mod bytecode_generator; +mod symbols_generator; + +pub use syntactic_parser::*; +pub use semantic_parser::*; +pub use bytecode_generator::*; +pub use symbols_generator::*; diff --git a/src/translators/bytecode_generator.rs b/src/translators/bytecode_generator.rs new file mode 100644 index 0000000..956aca5 --- /dev/null +++ b/src/translators/bytecode_generator.rs @@ -0,0 +1,131 @@ +use crate::*; + +use SemanticTokenVariant as SemVar; + + +pub fn generate_bytecode(semantic_tokens: &mut [SemanticToken]) -> Vec<u8> { +    let generator = BytecodeGenerator::from_semantic_tokens(semantic_tokens); +    generator.generate() +} + + +/// Translate semantic tokens into bytecode. +struct BytecodeGenerator<'a> { +    semantic_tokens: &'a mut [SemanticToken], +    block_stack: Vec<usize>, +    bytecode: Vec<u8>, +    /// (address in bytecode, label definition token index) +    label_references: Vec<(usize, usize)>, +} + +impl<'a> BytecodeGenerator<'a> { +    pub fn from_semantic_tokens(semantic_tokens: &'a mut [SemanticToken]) -> Self { +        Self { +            semantic_tokens, +            block_stack: Vec::new(), +            bytecode: Vec::new(), +            label_references: Vec::new(), +        } +    } + +    pub fn generate(mut self) -> Vec<u8> { +        for i in 0..self.semantic_tokens.len() { +            let address = self.bytecode.len(); +            self.generate_bytecode_for_token(i, None); +            self.semantic_tokens[i].bytecode = BytecodeSpan { +                bytes: self.bytecode[address..].to_vec(), +                location: BytecodeLocation { +                    address, +                    length: self.bytecode.len().saturating_sub(address), +                } +            }; +        } + +        // Replace blank label references in bytecode with real label addresses. +        // The layer of indirection is necessary because the iteration borrows +        // self immutably. +        let mut insertions: Vec<(usize, u16)> = Vec::new(); +        for (bytecode_address, token_pointer) in &self.label_references { +            let label_token = &self.semantic_tokens[*token_pointer]; +            // TODO: If greater than u16, print a warning. +            let address_value = label_token.bytecode.location.address as u16; +            insertions.push((*bytecode_address, address_value)); +        } +        for (bytecode_address, address_value) in insertions { +            self.replace_address_in_bytecode(bytecode_address, address_value); +        } + +        // Strip trailing null bytes from the bytecode. +        let mut length = self.bytecode.len(); +        for (i, byte) in self.bytecode.iter().enumerate().rev() { +            match *byte == 0 { +                true => length = i, +                false => break, +            }; +        } +        self.bytecode.truncate(length); + +        return self.bytecode; +    } + +    fn generate_bytecode_for_token(&mut self, pointer: usize, macro_pointer: Option<usize>) { +        macro_rules! push_byte { +            ($byte:expr) => { self.bytecode.push($byte) }; } +        macro_rules! push_double { +            ($double:expr) => { self.bytecode.extend_from_slice(&$double.to_be_bytes()) }; } +        macro_rules! pad { +            ($len:expr) => { for _ in 0..$len { push_byte!(0); } } } + +        let semantic_token = if let Some(macro_pointer) = macro_pointer { +            let macro_definition = &self.semantic_tokens[macro_pointer]; +            if let SemVar::MacroDefinition(def) = ¯o_definition.variant { +                &def.body_tokens[pointer] +            } else { unreachable!() } +        } else { +                &self.semantic_tokens[pointer] +        }; +        match &semantic_token.variant { +            SemVar::MacroInvocation(pointer) => { +                let macro_definition = &self.semantic_tokens[*pointer]; +                if let SemVar::MacroDefinition(def) = ¯o_definition.variant { +                    let length = def.body_tokens.len(); +                    let macro_pointer = Some(*pointer); +                    for body_pointer in 0..length { +                        // Recurse, generate bytecode for each macro body token. +                        self.generate_bytecode_for_token(body_pointer, macro_pointer); +                    } +                } else { unreachable!() } +            } +            SemVar::Literal(value) => match value { +                Value::Byte(value) => push_byte!(*value), +                Value::Double(value) => push_double!(value), +            } +            SemVar::Padding(value) => match value { +                Value::Byte(value) => pad!(*value), +                Value::Double(value) => pad!(*value), +            } +            SemVar::Instruction(instr) => push_byte!(instr.value), +            SemVar::String(bytes) => self.bytecode.extend_from_slice(&bytes), +            SemVar::LabelReference(pointer) => { +                self.label_references.push((self.bytecode.len(), *pointer)); +                push_double!(0u16); +            } +            SemVar::BlockOpen(_) => { +                self.block_stack.push(self.bytecode.len()); +                push_double!(0u16); +            } +            SemVar::BlockClose(_) => { +                let bytecode_address = self.block_stack.pop().unwrap(); +                // TODO: If greater than u16, print a warning. +                let address_value = self.bytecode.len() as u16; +                self.replace_address_in_bytecode(bytecode_address, address_value); +            } +            _ => (), +        }; +    } + +    fn replace_address_in_bytecode(&mut self, bytecode_address: usize, address_value: u16) { +        let range = bytecode_address..bytecode_address+2; +        self.bytecode[range].clone_from_slice(&address_value.to_be_bytes()); +    } +} diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs new file mode 100644 index 0000000..cb6a435 --- /dev/null +++ b/src/translators/semantic_parser.rs @@ -0,0 +1,245 @@ +use crate::*; + +use std::collections::HashMap; +use std::path::PathBuf; + +use SyntacticTokenVariant as SynVar; +use SemanticTokenVariant as SemVar; +use SemanticParseError as SemErr; + + +pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> { +    let semantic_parser = SemanticParser::from_source_code(source_code, path); +    semantic_parser.parse() +} + + +/// Translate syntactic tokens into semantic tokens. +struct SemanticParser { +    labels: HashMap<String, Definition>, +    macros: HashMap<String, Definition>, +    syntactic_tokens: Vec<SyntacticToken>, +    /// Index of the current outer token. +    current_outer_index: usize, +} + +impl SemanticParser { +    pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { +        let mut labels = HashMap::new(); +        let mut macros = HashMap::new(); +        let mut syntactic_tokens = Vec::new(); + +        let parser = SyntacticParser::from_source_code(source_code, path); +        for syntactic_token in parser { +            let definition = Definition::new(syntactic_token.source.clone()); +            match &syntactic_token.variant { +                SynVar::LabelDefinition(name) => { +                    let _ = labels.try_insert(name.to_owned(), definition); +                }, +                SynVar::MacroDefinition(name) => { +                    let _ = macros.try_insert(name.to_owned(), definition); +                }, +                _ => (), +            } +            syntactic_tokens.push(syntactic_token); +        } + +        Self { +            labels, +            macros, +            syntactic_tokens, +            current_outer_index: 0, +        } +    } + +    /// Parse syntactic tokens as semantic tokens. +    pub fn parse(mut self) -> Vec<SemanticToken> { +        let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens); +        let mut syntactic = syntactic_tokens.into_iter(); +        let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false); + +        // Insert real label definition pointers into label reference tokens. +        for definition in self.labels.values_mut() { +            if let Some(definition_pointer) = definition.pointer { +                // Insert definition pointer into reference tokens. +                for reference_pointer in &definition.references { +                    let reference_token = &mut semantic_tokens[*reference_pointer]; +                    reference_token.variant = SemVar::LabelReference(definition_pointer); +                } +                // Insert reference pointers into definition token. +                let definition_token = &mut semantic_tokens[definition_pointer]; +                if let SemVar::LabelDefinition(ref mut def) = definition_token.variant { +                    def.references = std::mem::take(&mut definition.references); +                } else { unreachable!() } +                // Insert definition pointer into reference tokens inside macros. +                for (outer, inner) in &definition.deep_references { +                    let macro_token = &mut semantic_tokens[*outer]; +                    if let SemVar::MacroDefinition(ref mut def) = macro_token.variant { +                        let reference_token = &mut def.body_tokens[*inner]; +                        reference_token.variant = SemVar::LabelReference(definition_pointer); +                    } else { unreachable!() } +                } +                // TODO: Record deep references in macro and label definitions? +            } +        } + +        return semantic_tokens; +    } + +    fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken> +    where I: Iterator<Item = SyntacticToken> +    { +        let mut semantic_tokens: Vec<SemanticToken> = Vec::new(); +        let mut block_stack: Vec<usize> = Vec::new(); + +        while let Some(syntactic_token) = parser.next() { +            let current_index = semantic_tokens.len(); +            if !in_macro { +                self.current_outer_index = current_index; +            } + +            let semantic_token_variant = match syntactic_token.variant { +                SynVar::LabelDefinition(name) => { +                    if in_macro { +                        SemVar::Error(SemErr::LabelDefinitionInMacroDefinition) +                    } else if let Some(definition) = self.macros.get(&name) { +                        let source = definition.source.clone(); +                        SemVar::Error(SemErr::RedefinedSymbol((name, source))) +                    } else if let Some(definition) = self.labels.get_mut(&name) { +                        if definition.pointer.is_some() { +                            let source = definition.source.clone(); +                            SemVar::Error(SemErr::RedefinedSymbol((name, source))) +                        } else { +                            definition.pointer = Some(current_index); +                            let references = Vec::new(); +                            SemVar::LabelDefinition(LabelDefinition { name, references }) +                        } +                    } else { +                        unreachable!() +                    } +                } +                SynVar::MacroDefinition(name) => { +                    if in_macro { +                        SemVar::Error(SemErr::MacroDefinitionInMacroDefinition) +                    } else if let Some(definition) = self.labels.get(&name) { +                        let source = definition.source.clone(); +                        SemVar::Error(SemErr::RedefinedSymbol((name, source))) +                    } else if let Some(definition) = self.macros.get_mut(&name) { +                        if definition.pointer.is_some() { +                            let source = definition.source.clone(); +                            SemVar::Error(SemErr::RedefinedSymbol((name, source))) +                        } else { +                            definition.pointer = Some(current_index); +                            let references = Vec::new(); +                            let body_tokens = self.pull_semantic_tokens(parser, true); +                            SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens }) +                        } +                    } else { +                        unreachable!() +                    } +                } +                SynVar::MacroDefinitionTerminator => if in_macro { +                    break; +                } else { +                    SemVar::Error(SemErr::StrayMacroTerminator) +                } +                SynVar::Literal(value) => { +                    SemVar::Literal(value) +                } +                SynVar::Padding(value) => { +                    SemVar::Padding(value) +                } +                SynVar::Instruction(instr) => { +                    SemVar::Instruction(instr) +                } +                SynVar::Comment(comment) => { +                    SemVar::Comment(comment) +                } +                SynVar::String(bytes) => { +                    SemVar::String(bytes) +                } +                SynVar::BlockOpen => { +                    block_stack.push(current_index); +                    SemVar::BlockOpen(0) +                } +                SynVar::BlockClose => { +                    if let Some(pointer) = block_stack.pop() { +                        let open = &mut semantic_tokens[pointer]; +                        open.variant = SemVar::BlockOpen(current_index); +                        SemVar::BlockClose(pointer) +                    } else { +                        SemVar::Error(SemErr::StrayBlockClose) +                    } +                } +                SynVar::MarkOpen => { +                    SemVar::MarkOpen +                } +                SynVar::MarkClose => { +                    SemVar::MarkClose +                } +                SynVar::Symbol(name) => { +                    if let Some(definition) = self.labels.get_mut(&name) { +                        if in_macro { +                            let pointer = (self.current_outer_index, current_index); +                            definition.deep_references.push(pointer); +                        } else { +                            definition.references.push(current_index); +                        } +                        SemVar::LabelReference(0) +                    } else if let Some(definition) = self.macros.get_mut(&name) { +                        if let Some(pointer) = definition.pointer { +                            if !in_macro { definition.references.push(current_index); } +                            SemVar::MacroInvocation(pointer) +                        } else { +                            let source = definition.source.clone(); +                            SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source))) +                        } +                    } else { +                        SemVar::Error(SemErr::UndefinedSymbol(name)) +                    } +                } +                SynVar::Error(syntax_err) => { +                    SemVar::Error(SemErr::SyntaxError(syntax_err)) +                } +            }; + +            let semantic_token = SemanticToken { +                source: syntactic_token.source, +                bytecode: BytecodeSpan::default(), +                variant: semantic_token_variant, +            }; +            semantic_tokens.push(semantic_token); +        } + +        if in_macro { +            //TODO: UnterminatedMacroDefinition +        } + +        // Replace each unclosed BlockOpen token with an error. +        for block_pointer in block_stack { +            semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock); +        } + +        return semantic_tokens; +    } +} + + +struct Definition { +    pub source: SourceSpan, +    pub pointer: Option<usize>, +    pub references: Vec<usize>, +    /// (macro index, label reference index) +    pub deep_references: Vec<(usize, usize)>, +} + +impl Definition { +    pub fn new(source: SourceSpan) -> Self { +        Self { +            source, +            pointer: None, +            references: Vec::new(), +            deep_references: Vec::new(), +        } +    } +} diff --git a/src/translators/symbols_generator.rs b/src/translators/symbols_generator.rs new file mode 100644 index 0000000..06bbaa8 --- /dev/null +++ b/src/translators/symbols_generator.rs @@ -0,0 +1,28 @@ +use crate::*; + +use SemanticTokenVariant as SemVar; + + +pub fn generate_symbols_file(semantic_tokens: &[SemanticToken]) -> String { +    let mut symbols = String::new(); + +    for token in semantic_tokens { +        if let SemVar::LabelDefinition(definition) = &token.variant { +            let address = token.bytecode.location.address; +            if address > 0xffff { break; } +            let name = &definition.name; +            let path = match &token.source.in_source { +                Some(source) => &source.path, +                None => &token.source.in_merged.path, +            }; +            if let Some(path) = path { +                let path = path.as_os_str().to_string_lossy(); +                symbols.push_str(&format!("{address:04x} {name} {path}\n")); +            } else { +                symbols.push_str(&format!("{address:04x} {name}\n")); +            } +        } +    } + +    return symbols; +} diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs new file mode 100644 index 0000000..7279daf --- /dev/null +++ b/src/translators/syntactic_parser.rs @@ -0,0 +1,247 @@ +use crate::*; + +use std::path::PathBuf; + + +/// Translate raw source code characters into syntactic tokens. +pub struct SyntacticParser { +    /// Path of file from which the source was read. +    path: Option<PathBuf>, +    /// Path of the original source file. +    source_path: Option<PathBuf>, +    /// Position of the next character to be read. +    position: Position, +    /// Previous value of the position field. +    prev_position: Position, +    /// Line where the embedded source file begins. +    source_line_start: usize, +    /// Characters waiting to be parsed, in reverse order. +    chars: Vec<char>, +    /// The token currently being parsed. +    token_source_string: String, +    /// The name of the most recently parsed label. +    label: String, +} + + +impl SyntacticParser { +    /// Parse source code. +    pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { +        Self { +            path: path.map(|p| p.into()), +            source_path: None, +            position: Position { line: 0, column: 0 }, +            prev_position: Position { line: 0, column: 0 }, +            source_line_start: 0, +            chars: source_code.chars().rev().collect(), +            token_source_string: String::new(), +            label: String::new(), +        } +    } + +    /// Return the next character, keeping it on the queue. +    fn peek_char(&self) -> Option<char> { +        self.chars.last().copied() +    } + +    /// Return the next character, removing it from the queue. +    fn eat_char(&mut self) -> Option<char> { +        let option = self.chars.pop(); +        if let Some(c) = option { +            self.prev_position = self.position; +            self.position.advance(c); +            self.token_source_string.push(c); +        } +        return option; +    } + +    /// Remove the next character from the queue. +    fn drop_char(&mut self) { +        if let Some(c) = self.chars.pop() { +            self.prev_position = self.position; +            self.position.advance(c); +        } +    } + +    /// Remove leading whitespace. +    fn drop_whitespace(&mut self) { +        while let Some(c) = self.peek_char() { +            match c.is_whitespace() { +                true => self.drop_char(), +                false => break, +            } +        } +    } + +    /// Remove a full token from the queue. +    fn eat_token(&mut self) -> String { +        const DELIMITERS: [char; 13] = +            ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~']; +        let mut token = String::new(); +        while let Some(peek) = self.peek_char() { +            if peek.is_whitespace() || DELIMITERS.contains(&peek) { +                break; +            } +            let c = self.eat_char().unwrap(); +            token.push(c); +            if c == ':' { +                break; +            } +        } +        token +    } + +    /// Return all characters until the delimiter, removing all returned +    /// characters and the delimiter from the queue. Returns None if end +    /// of source is reached before delimiter is found. +    fn eat_to_delim(&mut self, delim: char) -> Option<String> { +        let mut token = String::new(); +        while let Some(c) = self.eat_char() { +            self.token_source_string.push(c); +            match c == delim { +                true => return Some(token), +                false => token.push(c), +            } +        } +        return None; +    } + +    fn is_line_empty(&self) -> bool { +        for c in self.chars.iter().rev() { +            if *c == '\n' { +                return true; +            } +            if !c.is_whitespace() { +                return false +            } +        } +        return false; +    } +} + + +impl Iterator for SyntacticParser { +    type Item = SyntacticToken; + +    /// Sequentially parse tokens from the source code. +    fn next(&mut self) -> Option<SyntacticToken> { +        use SyntacticTokenVariant as SynVar; +        use SyntacticParseError as SynErr; + +        self.drop_whitespace(); +        let start = self.position; + +        let variant = match self.eat_char()? { +            '@' => { +                self.label = self.eat_token(); +                SynVar::LabelDefinition(self.label.clone()) +            } +            '&' => { +                let token = self.eat_token(); +                let sublabel = format!("{}/{token}", self.label); +                SynVar::LabelDefinition(sublabel) +            } +            '%' => SynVar::MacroDefinition(self.eat_token()), +            ';' => SynVar::MacroDefinitionTerminator, +            '[' => SynVar::MarkOpen, +            ']' => SynVar::MarkClose, +            '{' => SynVar::BlockOpen, +            '}' => SynVar::BlockClose, +            '(' => match self.eat_to_delim(')') { +                Some(string) => SynVar::Comment(string), +                None => SynVar::Error(SynErr::UnterminatedComment), +            } +            '\'' => match self.eat_to_delim('\'') { +                Some(string) => SynVar::String(string.as_bytes().to_vec()), +                None => SynVar::Error(SynErr::UnterminatedRawString), +            } +            '"' => match self.eat_to_delim('"') { +                Some(string) => { +                    let mut bytes = string.as_bytes().to_vec(); +                    bytes.push(0x00); +                    SynVar::String(bytes) +                } +                None => SynVar::Error(SynErr::UnterminatedNullString), +            } +            '#' => { +                let token = self.eat_token(); +                match token.parse::<Value>() { +                    Ok(value) => SynVar::Padding(value), +                    Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)), +                } +            }, +            '~' => { +                let token = self.eat_token(); +                let symbol = format!("{}/{token}", self.label); +                SynVar::Symbol(symbol) +            } +            ':' => SynVar::Symbol(String::from(':')), +            c => { +                let token = format!("{c}{}", self.eat_token()); +                match token.parse::<Value>() { +                    Ok(value) => SynVar::Literal(value), +                    Err(_) => match token.parse::<Instruction>() { +                        Ok(instruction) => SynVar::Instruction(instruction), +                        Err(_) => SynVar::Symbol(token), +                    } +                } +            } +        }; + +        // Parse source path comments. +        if let SynVar::Comment(comment) = &variant { +            // Check that the comment fills the entire line. +            if start.column == 0 && self.is_line_empty() { +                if let Some(path) = comment.strip_prefix(": ") { +                    self.source_path = Some(PathBuf::from(path.trim())); +                    self.source_line_start = start.line + 1; +                } +            } +        } + +        // Find location in current merged file. +        let in_merged = SourceLocation { +            path: self.path.to_owned(), +            start, +            end: self.prev_position, +        }; + +        // Find location in original source file. +        let in_source = if start.line >= self.source_line_start { +            match &self.source_path { +                Some(path) => { +                    let offset = self.source_line_start; +                    Some( SourceLocation { +                        path: Some(path.to_owned()), +                        start: Position { +                            line: in_merged.start.line.saturating_sub(offset), +                            column: in_merged.start.column, +                        }, +                        end: Position { +                            line: in_merged.end.line.saturating_sub(offset), +                            column: in_merged.end.column, +                        } +                    }) +                } +                None => None, +            } +        } else { +            None +        }; + +        let string = std::mem::take(&mut self.token_source_string); +        let source = SourceSpan { string, in_merged, in_source }; +        Some( SyntacticToken { source, variant } ) +    } +} + + +#[derive(Debug)] +pub enum ParseError { +    InvalidExtension, +    NotFound, +    NotReadable, +    IsADirectory, +    InvalidUtf8, +    Unknown, +} | 
