diff options
Diffstat (limited to 'src/translators')
-rw-r--r-- | src/translators/bytecode_generator.rs | 131 | ||||
-rw-r--r-- | src/translators/semantic_parser.rs | 245 | ||||
-rw-r--r-- | src/translators/symbols_generator.rs | 20 | ||||
-rw-r--r-- | src/translators/syntactic_parser.rs | 117 |
4 files changed, 0 insertions, 513 deletions
diff --git a/src/translators/bytecode_generator.rs b/src/translators/bytecode_generator.rs deleted file mode 100644 index 956aca5..0000000 --- a/src/translators/bytecode_generator.rs +++ /dev/null @@ -1,131 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; - - -pub fn generate_bytecode(semantic_tokens: &mut [SemanticToken]) -> Vec<u8> { - let generator = BytecodeGenerator::from_semantic_tokens(semantic_tokens); - generator.generate() -} - - -/// Translate semantic tokens into bytecode. -struct BytecodeGenerator<'a> { - semantic_tokens: &'a mut [SemanticToken], - block_stack: Vec<usize>, - bytecode: Vec<u8>, - /// (address in bytecode, label definition token index) - label_references: Vec<(usize, usize)>, -} - -impl<'a> BytecodeGenerator<'a> { - pub fn from_semantic_tokens(semantic_tokens: &'a mut [SemanticToken]) -> Self { - Self { - semantic_tokens, - block_stack: Vec::new(), - bytecode: Vec::new(), - label_references: Vec::new(), - } - } - - pub fn generate(mut self) -> Vec<u8> { - for i in 0..self.semantic_tokens.len() { - let address = self.bytecode.len(); - self.generate_bytecode_for_token(i, None); - self.semantic_tokens[i].bytecode = BytecodeSpan { - bytes: self.bytecode[address..].to_vec(), - location: BytecodeLocation { - address, - length: self.bytecode.len().saturating_sub(address), - } - }; - } - - // Replace blank label references in bytecode with real label addresses. - // The layer of indirection is necessary because the iteration borrows - // self immutably. - let mut insertions: Vec<(usize, u16)> = Vec::new(); - for (bytecode_address, token_pointer) in &self.label_references { - let label_token = &self.semantic_tokens[*token_pointer]; - // TODO: If greater than u16, print a warning. - let address_value = label_token.bytecode.location.address as u16; - insertions.push((*bytecode_address, address_value)); - } - for (bytecode_address, address_value) in insertions { - self.replace_address_in_bytecode(bytecode_address, address_value); - } - - // Strip trailing null bytes from the bytecode. - let mut length = self.bytecode.len(); - for (i, byte) in self.bytecode.iter().enumerate().rev() { - match *byte == 0 { - true => length = i, - false => break, - }; - } - self.bytecode.truncate(length); - - return self.bytecode; - } - - fn generate_bytecode_for_token(&mut self, pointer: usize, macro_pointer: Option<usize>) { - macro_rules! push_byte { - ($byte:expr) => { self.bytecode.push($byte) }; } - macro_rules! push_double { - ($double:expr) => { self.bytecode.extend_from_slice(&$double.to_be_bytes()) }; } - macro_rules! pad { - ($len:expr) => { for _ in 0..$len { push_byte!(0); } } } - - let semantic_token = if let Some(macro_pointer) = macro_pointer { - let macro_definition = &self.semantic_tokens[macro_pointer]; - if let SemVar::MacroDefinition(def) = ¯o_definition.variant { - &def.body_tokens[pointer] - } else { unreachable!() } - } else { - &self.semantic_tokens[pointer] - }; - match &semantic_token.variant { - SemVar::MacroInvocation(pointer) => { - let macro_definition = &self.semantic_tokens[*pointer]; - if let SemVar::MacroDefinition(def) = ¯o_definition.variant { - let length = def.body_tokens.len(); - let macro_pointer = Some(*pointer); - for body_pointer in 0..length { - // Recurse, generate bytecode for each macro body token. - self.generate_bytecode_for_token(body_pointer, macro_pointer); - } - } else { unreachable!() } - } - SemVar::Literal(value) => match value { - Value::Byte(value) => push_byte!(*value), - Value::Double(value) => push_double!(value), - } - SemVar::Padding(value) => match value { - Value::Byte(value) => pad!(*value), - Value::Double(value) => pad!(*value), - } - SemVar::Instruction(instr) => push_byte!(instr.value), - SemVar::String(bytes) => self.bytecode.extend_from_slice(&bytes), - SemVar::LabelReference(pointer) => { - self.label_references.push((self.bytecode.len(), *pointer)); - push_double!(0u16); - } - SemVar::BlockOpen(_) => { - self.block_stack.push(self.bytecode.len()); - push_double!(0u16); - } - SemVar::BlockClose(_) => { - let bytecode_address = self.block_stack.pop().unwrap(); - // TODO: If greater than u16, print a warning. - let address_value = self.bytecode.len() as u16; - self.replace_address_in_bytecode(bytecode_address, address_value); - } - _ => (), - }; - } - - fn replace_address_in_bytecode(&mut self, bytecode_address: usize, address_value: u16) { - let range = bytecode_address..bytecode_address+2; - self.bytecode[range].clone_from_slice(&address_value.to_be_bytes()); - } -} diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs deleted file mode 100644 index cb6a435..0000000 --- a/src/translators/semantic_parser.rs +++ /dev/null @@ -1,245 +0,0 @@ -use crate::*; - -use std::collections::HashMap; -use std::path::PathBuf; - -use SyntacticTokenVariant as SynVar; -use SemanticTokenVariant as SemVar; -use SemanticParseError as SemErr; - - -pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> { - let semantic_parser = SemanticParser::from_source_code(source_code, path); - semantic_parser.parse() -} - - -/// Translate syntactic tokens into semantic tokens. -struct SemanticParser { - labels: HashMap<String, Definition>, - macros: HashMap<String, Definition>, - syntactic_tokens: Vec<SyntacticToken>, - /// Index of the current outer token. - current_outer_index: usize, -} - -impl SemanticParser { - pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { - let mut labels = HashMap::new(); - let mut macros = HashMap::new(); - let mut syntactic_tokens = Vec::new(); - - let parser = SyntacticParser::from_source_code(source_code, path); - for syntactic_token in parser { - let definition = Definition::new(syntactic_token.source.clone()); - match &syntactic_token.variant { - SynVar::LabelDefinition(name) => { - let _ = labels.try_insert(name.to_owned(), definition); - }, - SynVar::MacroDefinition(name) => { - let _ = macros.try_insert(name.to_owned(), definition); - }, - _ => (), - } - syntactic_tokens.push(syntactic_token); - } - - Self { - labels, - macros, - syntactic_tokens, - current_outer_index: 0, - } - } - - /// Parse syntactic tokens as semantic tokens. - pub fn parse(mut self) -> Vec<SemanticToken> { - let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens); - let mut syntactic = syntactic_tokens.into_iter(); - let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false); - - // Insert real label definition pointers into label reference tokens. - for definition in self.labels.values_mut() { - if let Some(definition_pointer) = definition.pointer { - // Insert definition pointer into reference tokens. - for reference_pointer in &definition.references { - let reference_token = &mut semantic_tokens[*reference_pointer]; - reference_token.variant = SemVar::LabelReference(definition_pointer); - } - // Insert reference pointers into definition token. - let definition_token = &mut semantic_tokens[definition_pointer]; - if let SemVar::LabelDefinition(ref mut def) = definition_token.variant { - def.references = std::mem::take(&mut definition.references); - } else { unreachable!() } - // Insert definition pointer into reference tokens inside macros. - for (outer, inner) in &definition.deep_references { - let macro_token = &mut semantic_tokens[*outer]; - if let SemVar::MacroDefinition(ref mut def) = macro_token.variant { - let reference_token = &mut def.body_tokens[*inner]; - reference_token.variant = SemVar::LabelReference(definition_pointer); - } else { unreachable!() } - } - // TODO: Record deep references in macro and label definitions? - } - } - - return semantic_tokens; - } - - fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken> - where I: Iterator<Item = SyntacticToken> - { - let mut semantic_tokens: Vec<SemanticToken> = Vec::new(); - let mut block_stack: Vec<usize> = Vec::new(); - - while let Some(syntactic_token) = parser.next() { - let current_index = semantic_tokens.len(); - if !in_macro { - self.current_outer_index = current_index; - } - - let semantic_token_variant = match syntactic_token.variant { - SynVar::LabelDefinition(name) => { - if in_macro { - SemVar::Error(SemErr::LabelDefinitionInMacroDefinition) - } else if let Some(definition) = self.macros.get(&name) { - let source = definition.source.clone(); - SemVar::Error(SemErr::RedefinedSymbol((name, source))) - } else if let Some(definition) = self.labels.get_mut(&name) { - if definition.pointer.is_some() { - let source = definition.source.clone(); - SemVar::Error(SemErr::RedefinedSymbol((name, source))) - } else { - definition.pointer = Some(current_index); - let references = Vec::new(); - SemVar::LabelDefinition(LabelDefinition { name, references }) - } - } else { - unreachable!() - } - } - SynVar::MacroDefinition(name) => { - if in_macro { - SemVar::Error(SemErr::MacroDefinitionInMacroDefinition) - } else if let Some(definition) = self.labels.get(&name) { - let source = definition.source.clone(); - SemVar::Error(SemErr::RedefinedSymbol((name, source))) - } else if let Some(definition) = self.macros.get_mut(&name) { - if definition.pointer.is_some() { - let source = definition.source.clone(); - SemVar::Error(SemErr::RedefinedSymbol((name, source))) - } else { - definition.pointer = Some(current_index); - let references = Vec::new(); - let body_tokens = self.pull_semantic_tokens(parser, true); - SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens }) - } - } else { - unreachable!() - } - } - SynVar::MacroDefinitionTerminator => if in_macro { - break; - } else { - SemVar::Error(SemErr::StrayMacroTerminator) - } - SynVar::Literal(value) => { - SemVar::Literal(value) - } - SynVar::Padding(value) => { - SemVar::Padding(value) - } - SynVar::Instruction(instr) => { - SemVar::Instruction(instr) - } - SynVar::Comment(comment) => { - SemVar::Comment(comment) - } - SynVar::String(bytes) => { - SemVar::String(bytes) - } - SynVar::BlockOpen => { - block_stack.push(current_index); - SemVar::BlockOpen(0) - } - SynVar::BlockClose => { - if let Some(pointer) = block_stack.pop() { - let open = &mut semantic_tokens[pointer]; - open.variant = SemVar::BlockOpen(current_index); - SemVar::BlockClose(pointer) - } else { - SemVar::Error(SemErr::StrayBlockClose) - } - } - SynVar::MarkOpen => { - SemVar::MarkOpen - } - SynVar::MarkClose => { - SemVar::MarkClose - } - SynVar::Symbol(name) => { - if let Some(definition) = self.labels.get_mut(&name) { - if in_macro { - let pointer = (self.current_outer_index, current_index); - definition.deep_references.push(pointer); - } else { - definition.references.push(current_index); - } - SemVar::LabelReference(0) - } else if let Some(definition) = self.macros.get_mut(&name) { - if let Some(pointer) = definition.pointer { - if !in_macro { definition.references.push(current_index); } - SemVar::MacroInvocation(pointer) - } else { - let source = definition.source.clone(); - SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source))) - } - } else { - SemVar::Error(SemErr::UndefinedSymbol(name)) - } - } - SynVar::Error(syntax_err) => { - SemVar::Error(SemErr::SyntaxError(syntax_err)) - } - }; - - let semantic_token = SemanticToken { - source: syntactic_token.source, - bytecode: BytecodeSpan::default(), - variant: semantic_token_variant, - }; - semantic_tokens.push(semantic_token); - } - - if in_macro { - //TODO: UnterminatedMacroDefinition - } - - // Replace each unclosed BlockOpen token with an error. - for block_pointer in block_stack { - semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock); - } - - return semantic_tokens; - } -} - - -struct Definition { - pub source: SourceSpan, - pub pointer: Option<usize>, - pub references: Vec<usize>, - /// (macro index, label reference index) - pub deep_references: Vec<(usize, usize)>, -} - -impl Definition { - pub fn new(source: SourceSpan) -> Self { - Self { - source, - pointer: None, - references: Vec::new(), - deep_references: Vec::new(), - } - } -} diff --git a/src/translators/symbols_generator.rs b/src/translators/symbols_generator.rs deleted file mode 100644 index d30facd..0000000 --- a/src/translators/symbols_generator.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::*; - -use SemanticTokenVariant as SemVar; - - -pub fn generate_symbols_file(semantic_tokens: &[SemanticToken]) -> String { - let mut symbols = String::new(); - - for token in semantic_tokens { - if let SemVar::LabelDefinition(definition) = &token.variant { - let address = token.bytecode.location.address; - if address > 0xffff { break; } - let name = &definition.name; - let location = token.source.location(); - symbols.push_str(&format!("{address:04x} {name} {location}\n")); - } - } - - return symbols; -} diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs deleted file mode 100644 index 8f0850b..0000000 --- a/src/translators/syntactic_parser.rs +++ /dev/null @@ -1,117 +0,0 @@ -use crate::*; - -use std::path::PathBuf; - - -/// Translate raw source code characters into syntactic tokens. -pub struct SyntacticParser { - tokeniser: Tokeniser, - /// The name of the most recently parsed label. - label: String, -} - - -impl SyntacticParser { - pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { - let mut tokeniser = Tokeniser::new(source_code, path); - tokeniser.add_delimiters(&['@','&','%',';','[',']','{','}','(','"','\'','#','~']); - tokeniser.add_terminators(&[':']); - Self { tokeniser, label: String::new() } - } -} - - -impl Iterator for SyntacticParser { - type Item = SyntacticToken; - - /// Sequentially parse tokens from the source code. - fn next(&mut self) -> Option<SyntacticToken> { - use SyntacticTokenVariant as SynVar; - use SyntacticParseError as SynErr; - let t = &mut self.tokeniser; - - t.drop_whitespace(); - t.mark_start_position(); - - let variant = match t.eat_char()? { - '@' => { - self.label = t.eat_token(); - SynVar::LabelDefinition(self.label.clone()) - } - '&' => { - let token = t.eat_token(); - SynVar::LabelDefinition(format!("{}/{token}", self.label)) - } - '%' => SynVar::MacroDefinition(t.eat_token()), - ';' => SynVar::MacroDefinitionTerminator, - '[' => SynVar::MarkOpen, - ']' => SynVar::MarkClose, - '{' => SynVar::BlockOpen, - '}' => SynVar::BlockClose, - '(' => match t.eat_to_delimiter(')') { - Some(string) => SynVar::Comment(string), - None => SynVar::Error(SynErr::UnterminatedComment), - } - '\'' => match t.eat_to_delimiter('\'') { - Some(string) => SynVar::String(string.as_bytes().to_vec()), - None => SynVar::Error(SynErr::UnterminatedRawString), - } - '"' => match t.eat_to_delimiter('"') { - Some(string) => { - let mut bytes = string.as_bytes().to_vec(); - bytes.push(0x00); - SynVar::String(bytes) - } - None => SynVar::Error(SynErr::UnterminatedNullString), - } - '#' => { - let token = t.eat_token(); - match token.parse::<Value>() { - Ok(value) => SynVar::Padding(value), - Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)), - } - }, - '~' => { - let token = t.eat_token(); - let symbol = format!("{}/{token}", self.label); - SynVar::Symbol(symbol) - } - ':' => SynVar::Symbol(String::from(':')), - c => { - let token = format!("{c}{}", t.eat_token()); - match token.parse::<Value>() { - Ok(value) => SynVar::Literal(value), - Err(_) => match token.parse::<Instruction>() { - Ok(instruction) => SynVar::Instruction(instruction), - Err(_) => SynVar::Symbol(token), - } - } - } - }; - - // Parse source path comments. - if let SynVar::Comment(comment) = &variant { - // Check if the comment fills the entire line. - if t.start_position.column == 0 && t.end_of_line() { - if let Some(path) = comment.strip_prefix(": ") { - t.source_path = Some(PathBuf::from(path.trim())); - t.embedded_first_line = t.start_position.line + 1; - } - } - } - - let source = t.get_source_span(); - Some( SyntacticToken { source, variant } ) - } -} - - -#[derive(Debug)] -pub enum ParseError { - InvalidExtension, - NotFound, - NotReadable, - IsADirectory, - InvalidUtf8, - Unknown, -} |