diff options
Diffstat (limited to 'src/assembler.rs')
-rw-r--r-- | src/assembler.rs | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/src/assembler.rs b/src/assembler.rs new file mode 100644 index 0000000..cb6b6f1 --- /dev/null +++ b/src/assembler.rs @@ -0,0 +1,273 @@ +use std::mem::take; +use std::collections::hash_map::Entry; + +use SyntacticTokenType as Syn; +use SemanticTokenType as Sem; +use crate::*; + +use std::collections::HashMap; + +/// The inner value is the index of the token that defines this symbol. +pub enum SymbolDefinition { + Macro(usize), + Label(usize), +} + +pub struct Assembler { + /// The contents of the program as a list of syntactic tokens. + syntactic_tokens: Vec<SyntacticToken>, + /// The contents of the program as a list of semantic tokens. + semantic_tokens: Vec<SemanticToken>, + /// Map the name of each defined symbol to the index of the defining token. + symbol_definitions: HashMap<String, SymbolDefinition>, + /// Map each macro definition token index to a list of syntactic body tokens. + syntactic_macro_bodies: HashMap<usize, Vec<SyntacticToken>>, + /// Map each macro definition token index to a list of semantic body tokens. + semantic_macro_bodies: HashMap<usize, Vec<SemanticToken>>, +} + +impl Assembler { + pub fn new() -> Self { + Self { + syntactic_tokens: Vec::new(), + semantic_tokens: Vec::new(), + symbol_definitions: HashMap::new(), + syntactic_macro_bodies: HashMap::new(), + semantic_macro_bodies: HashMap::new(), + } + } + + pub fn tokenise_source(&mut self, source_code: &str) { + // The index of the current macro definition token + let mut macro_definition: Option<usize> = None; + let mut macro_definition_body_tokens: Vec<SyntacticToken> = Vec::new(); + + for mut token in TokenIterator::from_str(source_code) { + let next_index = self.syntactic_tokens.len(); + if let Some(index) = macro_definition { + token.use_in_macro_body(); + if token.is_macro_terminator() { + // Commit the current macro definition + macro_definition_body_tokens.push(token); + self.syntactic_macro_bodies.insert( + index, take(&mut macro_definition_body_tokens)); + macro_definition = None; + } else { + macro_definition_body_tokens.push(token); + } + } else { + if let Syn::MacroDefinition(ref name) = token.r#type { + macro_definition = Some(next_index); + match self.symbol_definitions.entry(name.to_string()) { + Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);} + Entry::Vacant(v) => {v.insert(SymbolDefinition::Macro(next_index));} + } + } else if let Syn::LabelDefinition(ref name) = token.r#type { + match self.symbol_definitions.entry(name.to_string()) { + Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);} + Entry::Vacant(v) => {v.insert(SymbolDefinition::Label(next_index));} + } + } else if token.is_macro_terminator() { + token.set_error(Error::OrphanedMacroDefinitionTerminator); + } + self.syntactic_tokens.push(token); + } + } + } + + pub fn resolve_references(&mut self) { + let syntactic_tokens = take(&mut self.syntactic_tokens); + let syntactic_token_count = syntactic_tokens.len(); + + for (index, syntactic_token) in syntactic_tokens.into_iter().enumerate() { + let semantic_token = self.convert_syn_token_to_sem_token(syntactic_token, index); + self.semantic_tokens.push(semantic_token); + } + assert_eq!(syntactic_token_count, self.semantic_tokens.len()); + + // Find all cyclic macros + let cyclic_macros: Vec<usize> = self.semantic_macro_bodies.keys().map(|i|*i).filter( + |i| !self.traverse_macro_definition(*i, 0)).collect(); + // Replace each cyclic macro reference in a macro definition with an error + for body_tokens in &mut self.semantic_macro_bodies.values_mut() { + for body_token in body_tokens { + if let Sem::MacroReference(i) = body_token.r#type { + if cyclic_macros.contains(&i) { + let name = body_token.source_location.source.clone(); + body_token.r#type = Sem::Error(Syn::Reference(name), Error::CyclicMacroReference); + } + } + } + } + + } + + /// Attempt to recursively traverse the body tokens of a macro definition, returning + /// false if the depth exceeds a preset maximum, and returning true otherwise. + fn traverse_macro_definition(&self, index: usize, level: usize) -> bool { + if level == 16 { + false + } else { + self.semantic_macro_bodies[&index].iter().all( + |token| if let Sem::MacroReference(i) = token.r#type { + self.traverse_macro_definition(i, level+1) + } else { + true + } + ) + } + } + + pub fn generate_bytecode(&mut self) -> (Vec<u8>, Vec<SemanticToken>) { + let mut bytecode: Vec<u8> = Vec::new(); + // Map each label definition token index to the bytecode addresses of the references + let mut reference_addresses: HashMap<usize, Vec<u16>> = HashMap::new(); + // Map each label and macro definition token to a list of reference token indices + let mut reference_tokens: HashMap<usize, Vec<usize>> = HashMap::new(); + + macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};} + macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};} + macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};} + + let mut semantic_tokens = take(&mut self.semantic_tokens); + + // Translate semantic tokens into bytecode + for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() { + let start_addr = bytecode.len() as u16; + match &mut semantic_token.r#type { + Sem::LabelReference(i) => { + reference_tokens.entry(*i).or_default().push(index); + reference_addresses.entry(*i).or_default().push(start_addr); + push_u16!(0); + } + Sem::MacroReference(i) => { + reference_tokens.entry(*i).or_default().push(index); + self.expand_macro_reference(*i, &mut bytecode, &mut reference_addresses); + } + Sem::LabelDefinition(def) => def.address=start_addr, + Sem::MacroDefinition(_) => (), + + Sem::Padding(p) => pad!(*p), + Sem::ByteLiteral(b) => push_u8!(*b), + Sem::ShortLiteral(s) => push_u16!(*s), + Sem::Instruction(b) => push_u8!(*b), + + Sem::MacroDefinitionTerminator => unreachable!(), + Sem::Comment => (), + Sem::Error(..) => (), + }; + let end_addr = bytecode.len() as u16; + semantic_token.bytecode_location.start = start_addr; + semantic_token.bytecode_location.length = end_addr - start_addr; + } + + // Fill each label reference with the address of the matching label definition + for (index, slots) in reference_addresses { + if let Sem::LabelDefinition(definition) = &semantic_tokens[index].r#type { + let [h,l] = definition.address.to_be_bytes(); + for slot in slots { + bytecode[slot as usize] = h; + bytecode[slot.wrapping_add(1) as usize] = l; + } + } else { unreachable!() } + } + + // Move references and macro body tokens into label and macro definition tokens + for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() { + if let Sem::MacroDefinition(definition) = &mut semantic_token.r#type { + definition.body_tokens = self.semantic_macro_bodies.remove(&index).unwrap(); + if let Some(references) = reference_tokens.remove(&index) { + definition.references = references; + } + } else if let Sem::LabelDefinition(definition) = &mut semantic_token.r#type { + if let Some(references) = reference_tokens.remove(&index) { + definition.references = references; + } + } + } + assert_eq!(reference_tokens.len(), 0); + + // Remove trailing null bytes from the bytecode + if let Some(final_nonnull_byte) = bytecode.iter().rposition(|b| *b != 0) { + let truncated_length = final_nonnull_byte + 1; + let removed_byte_count = bytecode.len() - truncated_length; + if removed_byte_count > 0 { + bytecode.truncate(truncated_length); + } + } + + (bytecode, semantic_tokens) + } + + fn convert_syn_token_to_sem_token(&mut self, mut syn_token: SyntacticToken, index: usize) -> SemanticToken { + SemanticToken { + r#type: { + if let Some(err) = syn_token.error { + Sem::Error(syn_token.r#type, err) + } else { + match syn_token.r#type { + Syn::Reference(ref name) => { + match self.symbol_definitions.get(name) { + Some(SymbolDefinition::Macro(i)) => Sem::MacroReference(*i), + Some(SymbolDefinition::Label(i)) => Sem::LabelReference(*i), + None => Sem::Error(syn_token.r#type, Error::UnresolvedReference), + } + } + Syn::LabelDefinition(name) => {Sem::LabelDefinition(LabelDefinition::new(name))}, + Syn::MacroDefinition(name) => { + let mut sem_body_tokens = Vec::new(); + for syn_body_token in self.syntactic_macro_bodies.remove(&index).unwrap() { + // Make the source location of the macro definition token span the entire definition + if syn_body_token.is_macro_terminator() { + syn_token.source_location.end = syn_body_token.source_location.start; + } + let sem_body_token = self.convert_syn_token_to_sem_token(syn_body_token, 0); + sem_body_tokens.push(sem_body_token); + } + self.semantic_macro_bodies.insert(index, sem_body_tokens); + Sem::MacroDefinition(MacroDefinition::new(name)) + }, + Syn::MacroDefinitionTerminator => Sem::MacroDefinitionTerminator, + Syn::Padding(v) => Sem::Padding(v), + Syn::ByteLiteral(v) => Sem::ByteLiteral(v), + Syn::ShortLiteral(v) => Sem::ShortLiteral(v), + Syn::Instruction(v) => Sem::Instruction(v), + Syn::Comment => Sem::Comment, + } + } + }, + source_location: syn_token.source_location, + bytecode_location: BytecodeLocation::zero(), + } + } + + fn expand_macro_reference(&self, index: usize, bytecode: &mut Vec<u8>, reference_addresses: &mut HashMap<usize, Vec<u16>>) { + macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};} + macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};} + macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};} + + for body_token in self.semantic_macro_bodies.get(&index).unwrap() { + let start_addr = bytecode.len() as u16; + match &body_token.r#type { + Sem::LabelReference(i) => { + reference_addresses.entry(*i).or_default().push(start_addr); + push_u16!(0u16); + }, + Sem::MacroReference(i) => { + self.expand_macro_reference(*i, bytecode, reference_addresses); + }, + Sem::LabelDefinition(_) => unreachable!(), + Sem::MacroDefinition(_) => unreachable!(), + + Sem::Padding(p) => pad!(*p), + Sem::ByteLiteral(b) => push_u8!(*b), + Sem::ShortLiteral(s) => push_u16!(*s), + Sem::Instruction(b) => push_u8!(*b), + + Sem::MacroDefinitionTerminator => (), + Sem::Comment => (), + Sem::Error(..) => (), + }; + } + } +} |