summaryrefslogtreecommitdiff
path: root/src/assembler.rs
diff options
context:
space:
mode:
authorBen Bridle <bridle.benjamin@gmail.com>2023-05-08 12:05:57 +1200
committerBen Bridle <bridle.benjamin@gmail.com>2023-05-08 12:05:57 +1200
commitafa81e9ae6a56efe2eae2990e09c672b74328715 (patch)
treee13ceca3104a8f4bded3668f8efd743bcfbe4e35 /src/assembler.rs
parente38f108921c61e1e66d65a368f2a67a763d61e69 (diff)
downloadbedrock-asm-afa81e9ae6a56efe2eae2990e09c672b74328715.zip
Added detection of cyclic macro references, and made assembler binary usable
Diffstat (limited to 'src/assembler.rs')
-rw-r--r--src/assembler.rs273
1 files changed, 273 insertions, 0 deletions
diff --git a/src/assembler.rs b/src/assembler.rs
new file mode 100644
index 0000000..cb6b6f1
--- /dev/null
+++ b/src/assembler.rs
@@ -0,0 +1,273 @@
+use std::mem::take;
+use std::collections::hash_map::Entry;
+
+use SyntacticTokenType as Syn;
+use SemanticTokenType as Sem;
+use crate::*;
+
+use std::collections::HashMap;
+
+/// The inner value is the index of the token that defines this symbol.
+pub enum SymbolDefinition {
+ Macro(usize),
+ Label(usize),
+}
+
+pub struct Assembler {
+ /// The contents of the program as a list of syntactic tokens.
+ syntactic_tokens: Vec<SyntacticToken>,
+ /// The contents of the program as a list of semantic tokens.
+ semantic_tokens: Vec<SemanticToken>,
+ /// Map the name of each defined symbol to the index of the defining token.
+ symbol_definitions: HashMap<String, SymbolDefinition>,
+ /// Map each macro definition token index to a list of syntactic body tokens.
+ syntactic_macro_bodies: HashMap<usize, Vec<SyntacticToken>>,
+ /// Map each macro definition token index to a list of semantic body tokens.
+ semantic_macro_bodies: HashMap<usize, Vec<SemanticToken>>,
+}
+
+impl Assembler {
+ pub fn new() -> Self {
+ Self {
+ syntactic_tokens: Vec::new(),
+ semantic_tokens: Vec::new(),
+ symbol_definitions: HashMap::new(),
+ syntactic_macro_bodies: HashMap::new(),
+ semantic_macro_bodies: HashMap::new(),
+ }
+ }
+
+ pub fn tokenise_source(&mut self, source_code: &str) {
+ // The index of the current macro definition token
+ let mut macro_definition: Option<usize> = None;
+ let mut macro_definition_body_tokens: Vec<SyntacticToken> = Vec::new();
+
+ for mut token in TokenIterator::from_str(source_code) {
+ let next_index = self.syntactic_tokens.len();
+ if let Some(index) = macro_definition {
+ token.use_in_macro_body();
+ if token.is_macro_terminator() {
+ // Commit the current macro definition
+ macro_definition_body_tokens.push(token);
+ self.syntactic_macro_bodies.insert(
+ index, take(&mut macro_definition_body_tokens));
+ macro_definition = None;
+ } else {
+ macro_definition_body_tokens.push(token);
+ }
+ } else {
+ if let Syn::MacroDefinition(ref name) = token.r#type {
+ macro_definition = Some(next_index);
+ match self.symbol_definitions.entry(name.to_string()) {
+ Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);}
+ Entry::Vacant(v) => {v.insert(SymbolDefinition::Macro(next_index));}
+ }
+ } else if let Syn::LabelDefinition(ref name) = token.r#type {
+ match self.symbol_definitions.entry(name.to_string()) {
+ Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);}
+ Entry::Vacant(v) => {v.insert(SymbolDefinition::Label(next_index));}
+ }
+ } else if token.is_macro_terminator() {
+ token.set_error(Error::OrphanedMacroDefinitionTerminator);
+ }
+ self.syntactic_tokens.push(token);
+ }
+ }
+ }
+
+ pub fn resolve_references(&mut self) {
+ let syntactic_tokens = take(&mut self.syntactic_tokens);
+ let syntactic_token_count = syntactic_tokens.len();
+
+ for (index, syntactic_token) in syntactic_tokens.into_iter().enumerate() {
+ let semantic_token = self.convert_syn_token_to_sem_token(syntactic_token, index);
+ self.semantic_tokens.push(semantic_token);
+ }
+ assert_eq!(syntactic_token_count, self.semantic_tokens.len());
+
+ // Find all cyclic macros
+ let cyclic_macros: Vec<usize> = self.semantic_macro_bodies.keys().map(|i|*i).filter(
+ |i| !self.traverse_macro_definition(*i, 0)).collect();
+ // Replace each cyclic macro reference in a macro definition with an error
+ for body_tokens in &mut self.semantic_macro_bodies.values_mut() {
+ for body_token in body_tokens {
+ if let Sem::MacroReference(i) = body_token.r#type {
+ if cyclic_macros.contains(&i) {
+ let name = body_token.source_location.source.clone();
+ body_token.r#type = Sem::Error(Syn::Reference(name), Error::CyclicMacroReference);
+ }
+ }
+ }
+ }
+
+ }
+
+ /// Attempt to recursively traverse the body tokens of a macro definition, returning
+ /// false if the depth exceeds a preset maximum, and returning true otherwise.
+ fn traverse_macro_definition(&self, index: usize, level: usize) -> bool {
+ if level == 16 {
+ false
+ } else {
+ self.semantic_macro_bodies[&index].iter().all(
+ |token| if let Sem::MacroReference(i) = token.r#type {
+ self.traverse_macro_definition(i, level+1)
+ } else {
+ true
+ }
+ )
+ }
+ }
+
+ pub fn generate_bytecode(&mut self) -> (Vec<u8>, Vec<SemanticToken>) {
+ let mut bytecode: Vec<u8> = Vec::new();
+ // Map each label definition token index to the bytecode addresses of the references
+ let mut reference_addresses: HashMap<usize, Vec<u16>> = HashMap::new();
+ // Map each label and macro definition token to a list of reference token indices
+ let mut reference_tokens: HashMap<usize, Vec<usize>> = HashMap::new();
+
+ macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};}
+ macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};}
+ macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};}
+
+ let mut semantic_tokens = take(&mut self.semantic_tokens);
+
+ // Translate semantic tokens into bytecode
+ for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() {
+ let start_addr = bytecode.len() as u16;
+ match &mut semantic_token.r#type {
+ Sem::LabelReference(i) => {
+ reference_tokens.entry(*i).or_default().push(index);
+ reference_addresses.entry(*i).or_default().push(start_addr);
+ push_u16!(0);
+ }
+ Sem::MacroReference(i) => {
+ reference_tokens.entry(*i).or_default().push(index);
+ self.expand_macro_reference(*i, &mut bytecode, &mut reference_addresses);
+ }
+ Sem::LabelDefinition(def) => def.address=start_addr,
+ Sem::MacroDefinition(_) => (),
+
+ Sem::Padding(p) => pad!(*p),
+ Sem::ByteLiteral(b) => push_u8!(*b),
+ Sem::ShortLiteral(s) => push_u16!(*s),
+ Sem::Instruction(b) => push_u8!(*b),
+
+ Sem::MacroDefinitionTerminator => unreachable!(),
+ Sem::Comment => (),
+ Sem::Error(..) => (),
+ };
+ let end_addr = bytecode.len() as u16;
+ semantic_token.bytecode_location.start = start_addr;
+ semantic_token.bytecode_location.length = end_addr - start_addr;
+ }
+
+ // Fill each label reference with the address of the matching label definition
+ for (index, slots) in reference_addresses {
+ if let Sem::LabelDefinition(definition) = &semantic_tokens[index].r#type {
+ let [h,l] = definition.address.to_be_bytes();
+ for slot in slots {
+ bytecode[slot as usize] = h;
+ bytecode[slot.wrapping_add(1) as usize] = l;
+ }
+ } else { unreachable!() }
+ }
+
+ // Move references and macro body tokens into label and macro definition tokens
+ for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() {
+ if let Sem::MacroDefinition(definition) = &mut semantic_token.r#type {
+ definition.body_tokens = self.semantic_macro_bodies.remove(&index).unwrap();
+ if let Some(references) = reference_tokens.remove(&index) {
+ definition.references = references;
+ }
+ } else if let Sem::LabelDefinition(definition) = &mut semantic_token.r#type {
+ if let Some(references) = reference_tokens.remove(&index) {
+ definition.references = references;
+ }
+ }
+ }
+ assert_eq!(reference_tokens.len(), 0);
+
+ // Remove trailing null bytes from the bytecode
+ if let Some(final_nonnull_byte) = bytecode.iter().rposition(|b| *b != 0) {
+ let truncated_length = final_nonnull_byte + 1;
+ let removed_byte_count = bytecode.len() - truncated_length;
+ if removed_byte_count > 0 {
+ bytecode.truncate(truncated_length);
+ }
+ }
+
+ (bytecode, semantic_tokens)
+ }
+
+ fn convert_syn_token_to_sem_token(&mut self, mut syn_token: SyntacticToken, index: usize) -> SemanticToken {
+ SemanticToken {
+ r#type: {
+ if let Some(err) = syn_token.error {
+ Sem::Error(syn_token.r#type, err)
+ } else {
+ match syn_token.r#type {
+ Syn::Reference(ref name) => {
+ match self.symbol_definitions.get(name) {
+ Some(SymbolDefinition::Macro(i)) => Sem::MacroReference(*i),
+ Some(SymbolDefinition::Label(i)) => Sem::LabelReference(*i),
+ None => Sem::Error(syn_token.r#type, Error::UnresolvedReference),
+ }
+ }
+ Syn::LabelDefinition(name) => {Sem::LabelDefinition(LabelDefinition::new(name))},
+ Syn::MacroDefinition(name) => {
+ let mut sem_body_tokens = Vec::new();
+ for syn_body_token in self.syntactic_macro_bodies.remove(&index).unwrap() {
+ // Make the source location of the macro definition token span the entire definition
+ if syn_body_token.is_macro_terminator() {
+ syn_token.source_location.end = syn_body_token.source_location.start;
+ }
+ let sem_body_token = self.convert_syn_token_to_sem_token(syn_body_token, 0);
+ sem_body_tokens.push(sem_body_token);
+ }
+ self.semantic_macro_bodies.insert(index, sem_body_tokens);
+ Sem::MacroDefinition(MacroDefinition::new(name))
+ },
+ Syn::MacroDefinitionTerminator => Sem::MacroDefinitionTerminator,
+ Syn::Padding(v) => Sem::Padding(v),
+ Syn::ByteLiteral(v) => Sem::ByteLiteral(v),
+ Syn::ShortLiteral(v) => Sem::ShortLiteral(v),
+ Syn::Instruction(v) => Sem::Instruction(v),
+ Syn::Comment => Sem::Comment,
+ }
+ }
+ },
+ source_location: syn_token.source_location,
+ bytecode_location: BytecodeLocation::zero(),
+ }
+ }
+
+ fn expand_macro_reference(&self, index: usize, bytecode: &mut Vec<u8>, reference_addresses: &mut HashMap<usize, Vec<u16>>) {
+ macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};}
+ macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};}
+ macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};}
+
+ for body_token in self.semantic_macro_bodies.get(&index).unwrap() {
+ let start_addr = bytecode.len() as u16;
+ match &body_token.r#type {
+ Sem::LabelReference(i) => {
+ reference_addresses.entry(*i).or_default().push(start_addr);
+ push_u16!(0u16);
+ },
+ Sem::MacroReference(i) => {
+ self.expand_macro_reference(*i, bytecode, reference_addresses);
+ },
+ Sem::LabelDefinition(_) => unreachable!(),
+ Sem::MacroDefinition(_) => unreachable!(),
+
+ Sem::Padding(p) => pad!(*p),
+ Sem::ByteLiteral(b) => push_u8!(*b),
+ Sem::ShortLiteral(s) => push_u16!(*s),
+ Sem::Instruction(b) => push_u8!(*b),
+
+ Sem::MacroDefinitionTerminator => (),
+ Sem::Comment => (),
+ Sem::Error(..) => (),
+ };
+ }
+ }
+}