summaryrefslogtreecommitdiff
path: root/src/assembler.rs
blob: 692eb14febe8e064eb881773f85519786152ec3c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
use std::mem::take;
use std::collections::hash_map::Entry;

use SyntacticTokenType as Syn;
use SemanticTokenType as Sem;
use crate::*;

use std::collections::HashMap;

/// The inner value is the index of the token that defines this symbol.
pub enum SymbolDefinition {
    Macro(usize),
    Label(usize),
}

pub struct Assembler {
    /// The contents of the program as a list of syntactic tokens.
    syntactic_tokens: Vec<SyntacticToken>,
    /// The contents of the program as a list of semantic tokens.
    semantic_tokens: Vec<SemanticToken>,
    /// Map the name of each defined symbol to the index of the defining token.
    symbol_definitions: HashMap<String, SymbolDefinition>,
    /// Map each macro definition token index to a list of syntactic body tokens.
    syntactic_macro_bodies: HashMap<usize, Vec<SyntacticToken>>,
    /// Map each macro definition token index to a list of semantic body tokens.
    semantic_macro_bodies: HashMap<usize, Vec<SemanticToken>>,
}

impl Assembler {
    pub fn new() -> Self {
        Self {
            syntactic_tokens: Vec::new(),
            semantic_tokens: Vec::new(),
            symbol_definitions: HashMap::new(),
            syntactic_macro_bodies: HashMap::new(),
            semantic_macro_bodies: HashMap::new(),
        }
    }

    pub fn tokenise_source(&mut self, source_code: &str) {
        // The index of the current macro definition token
        let mut macro_definition: Option<usize> = None;
        let mut macro_definition_body_tokens: Vec<SyntacticToken> = Vec::new();

        for mut token in TokenIterator::from_str(source_code) {
            let next_index = self.syntactic_tokens.len();
            if let Some(index) = macro_definition {
                token.use_in_macro_body();
                if token.is_macro_terminator() {
                    // Commit the current macro definition
                    macro_definition_body_tokens.push(token);
                    self.syntactic_macro_bodies.insert(
                        index, take(&mut macro_definition_body_tokens));
                    macro_definition = None;
                } else {
                    macro_definition_body_tokens.push(token);
                }
            } else {
                if let Syn::MacroDefinition(ref name) = token.r#type {
                    macro_definition = Some(next_index);
                    match self.symbol_definitions.entry(name.to_string()) {
                        Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);}
                        Entry::Vacant(v) => {v.insert(SymbolDefinition::Macro(next_index));}
                    }
                } else if let Syn::LabelDefinition(ref name) = token.r#type {
                    match self.symbol_definitions.entry(name.to_string()) {
                        Entry::Occupied(_) => {token.set_error(Error::DuplicateDefinition);}
                        Entry::Vacant(v) => {v.insert(SymbolDefinition::Label(next_index));}
                    }
                } else if token.is_macro_terminator() {
                    token.set_error(Error::OrphanedMacroDefinitionTerminator);
                }
                self.syntactic_tokens.push(token);
            }
        }
    }

    pub fn resolve_references(&mut self) {
        let syntactic_tokens = take(&mut self.syntactic_tokens);
        let syntactic_token_count = syntactic_tokens.len();
        let mut parent_label = None;

        for (index, syntactic_token) in syntactic_tokens.into_iter().enumerate() {
            if let SyntacticTokenType::LabelDefinition(name) = &syntactic_token.r#type {
                parent_label = Some(name.to_owned());
            }
            let semantic_token = self.convert_syn_token_to_sem_token(syntactic_token, index, parent_label.clone());
            self.semantic_tokens.push(semantic_token);
        }
        assert_eq!(syntactic_token_count, self.semantic_tokens.len());

        // Find all cyclic macros
        let cyclic_macros: Vec<usize> = self.semantic_macro_bodies.keys().map(|i|*i).filter(
            |i| !self.traverse_macro_definition(*i, 0)).collect();
        // Replace each cyclic macro reference in a macro definition with an error
        for body_tokens in &mut self.semantic_macro_bodies.values_mut() {
            for body_token in body_tokens {
                if let Sem::MacroReference(i) = body_token.r#type {
                    if cyclic_macros.contains(&i) {
                        let name = body_token.source_location.source.clone();
                        body_token.r#type = Sem::Error(Syn::Reference(name), Error::CyclicMacroReference);
                    }
                }
            }
        }

    }

    /// Attempt to recursively traverse the body tokens of a macro definition, returning
    /// false if the depth exceeds a preset maximum, and returning true otherwise.
    fn traverse_macro_definition(&self, index: usize, level: usize) -> bool {
        if level == 16 {
            false
        } else {
            self.semantic_macro_bodies[&index].iter().all(
                |token| if let Sem::MacroReference(i) = token.r#type {
                    self.traverse_macro_definition(i, level+1)
                } else {
                    true
                }
            )
        }
    }

    pub fn generate_bytecode(&mut self) -> (Vec<u8>, Vec<SemanticToken>) {
        let mut bytecode: Vec<u8> = Vec::new();
        // Map each label definition token index to the bytecode addresses of the references
        let mut reference_addresses: HashMap<usize, Vec<u16>> = HashMap::new();
        // Map each label and macro definition token to a list of reference token indices
        let mut reference_tokens: HashMap<usize, Vec<usize>> = HashMap::new();

        macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};}
        macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};}
        macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};}

        let mut semantic_tokens = take(&mut self.semantic_tokens);

        // Translate semantic tokens into bytecode
        for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() {
            let start_addr = bytecode.len() as u16;
            match &mut semantic_token.r#type {
                Sem::LabelReference(i) => {
                    reference_tokens.entry(*i).or_default().push(index);
                    reference_addresses.entry(*i).or_default().push(start_addr);
                    push_u16!(0);
                }
                Sem::MacroReference(i) => {
                    reference_tokens.entry(*i).or_default().push(index);
                    self.expand_macro_reference(*i, &mut bytecode, &mut reference_addresses);
                }
                Sem::LabelDefinition(def) => def.address=start_addr,
                Sem::MacroDefinition(_) => (),

                Sem::Padding(p) => pad!(*p),
                Sem::ByteLiteral(b) => push_u8!(*b),
                Sem::ShortLiteral(s) => push_u16!(*s),
                Sem::Instruction(b) => push_u8!(*b),

                Sem::MacroDefinitionTerminator => unreachable!(),
                Sem::Comment => (),
                Sem::Error(..) => (),
            };
            let end_addr = bytecode.len() as u16;
            semantic_token.bytecode_location.start = start_addr;
            semantic_token.bytecode_location.length = end_addr - start_addr;
        }

        // Fill each label reference with the address of the matching label definition
        for (index, slots) in reference_addresses {
            if let Sem::LabelDefinition(definition) = &semantic_tokens[index].r#type {
                let [h,l] = definition.address.to_be_bytes();
                for slot in slots {
                    bytecode[slot as usize] = h;
                    bytecode[slot.wrapping_add(1) as usize] = l;
                }
            } else { unreachable!() }
        }

        // Move references and macro body tokens into label and macro definition tokens
        for (index, semantic_token) in semantic_tokens.iter_mut().enumerate() {
            if let Sem::MacroDefinition(definition) = &mut semantic_token.r#type {
                definition.body_tokens = self.semantic_macro_bodies.remove(&index).unwrap();
                if let Some(references) = reference_tokens.remove(&index) {
                    definition.references = references;
                }
            } else if let Sem::LabelDefinition(definition) = &mut semantic_token.r#type {
                if let Some(references) = reference_tokens.remove(&index) {
                    definition.references = references;
                }
            }
        }
        assert_eq!(reference_tokens.len(), 0);

        // Remove trailing null bytes from the bytecode
        if let Some(final_nonnull_byte) = bytecode.iter().rposition(|b| *b != 0) {
            let truncated_length = final_nonnull_byte + 1;
            let removed_byte_count = bytecode.len() - truncated_length;
            if removed_byte_count > 0 {
                bytecode.truncate(truncated_length);
            }
        }

        (bytecode, semantic_tokens)
    }

    fn convert_syn_token_to_sem_token(&mut self, mut syn_token: SyntacticToken, index: usize, parent_label: Option<String>) -> SemanticToken {
        SemanticToken {
            r#type: {
                if let Some(err) = syn_token.error {
                    Sem::Error(syn_token.r#type, err)
                } else {
                    match syn_token.r#type {
                        Syn::Reference(ref name) => {
                            match self.symbol_definitions.get(name) {
                                Some(SymbolDefinition::Macro(i)) => Sem::MacroReference(*i),
                                Some(SymbolDefinition::Label(i)) => Sem::LabelReference(*i),
                                None => Sem::Error(syn_token.r#type, Error::UnresolvedReference),
                            }
                        }
                        Syn::LabelDefinition(name) => {Sem::LabelDefinition(LabelDefinition::new(name))},
                        Syn::MacroDefinition(name) => {
                            let mut sem_body_tokens = Vec::new();
                            for syn_body_token in self.syntactic_macro_bodies.remove(&index).unwrap() {
                                // Make the source location of the macro definition token span the entire definition
                                if syn_body_token.is_macro_terminator() {
                                    syn_token.source_location.end = syn_body_token.source_location.start;
                                }
                                let sem_body_token = self.convert_syn_token_to_sem_token(syn_body_token, 0, parent_label.clone());
                                sem_body_tokens.push(sem_body_token);
                            }
                            self.semantic_macro_bodies.insert(index, sem_body_tokens);
                            Sem::MacroDefinition(MacroDefinition::new(name))
                        },
                        Syn::MacroDefinitionTerminator => Sem::MacroDefinitionTerminator,
                        Syn::Padding(v) => Sem::Padding(v),
                        Syn::ByteLiteral(v) => Sem::ByteLiteral(v),
                        Syn::ShortLiteral(v) => Sem::ShortLiteral(v),
                        Syn::Instruction(v) => Sem::Instruction(v),
                        Syn::Comment => Sem::Comment,
                    }
                }
            },
            source_location: syn_token.source_location,
            bytecode_location: BytecodeLocation::zero(),
            parent_label,
        }
    }

    fn expand_macro_reference(&self, index: usize, bytecode: &mut Vec<u8>, reference_addresses: &mut HashMap<usize, Vec<u16>>) {
        macro_rules! push_u8 {($v:expr) => {bytecode.push($v)};}
        macro_rules! push_u16 {($v:expr) => {bytecode.extend_from_slice(&u16::to_be_bytes($v))};}
        macro_rules! pad {($p:expr) => {bytecode.resize((bytecode.len() + $p as usize), 0)};}

        for body_token in self.semantic_macro_bodies.get(&index).unwrap() {
            let start_addr = bytecode.len() as u16;
            match &body_token.r#type {
                Sem::LabelReference(i) => {
                    reference_addresses.entry(*i).or_default().push(start_addr);
                    push_u16!(0u16);
                },
                Sem::MacroReference(i) => {
                    self.expand_macro_reference(*i, bytecode, reference_addresses);
                },
                Sem::LabelDefinition(_) => unreachable!(),
                Sem::MacroDefinition(_) => unreachable!(),

                Sem::Padding(p) => pad!(*p),
                Sem::ByteLiteral(b) => push_u8!(*b),
                Sem::ShortLiteral(s) => push_u16!(*s),
                Sem::Instruction(b) => push_u8!(*b),

                Sem::MacroDefinitionTerminator => (),
                Sem::Comment => (),
                Sem::Error(..) => (),
            };
        }
    }
}