diff options
Diffstat (limited to 'src/translators')
-rw-r--r-- | src/translators/bytecode_generator.rs | 131 | ||||
-rw-r--r-- | src/translators/semantic_parser.rs | 245 | ||||
-rw-r--r-- | src/translators/symbols_generator.rs | 28 | ||||
-rw-r--r-- | src/translators/syntactic_parser.rs | 247 |
4 files changed, 651 insertions, 0 deletions
diff --git a/src/translators/bytecode_generator.rs b/src/translators/bytecode_generator.rs new file mode 100644 index 0000000..956aca5 --- /dev/null +++ b/src/translators/bytecode_generator.rs @@ -0,0 +1,131 @@ +use crate::*; + +use SemanticTokenVariant as SemVar; + + +pub fn generate_bytecode(semantic_tokens: &mut [SemanticToken]) -> Vec<u8> { + let generator = BytecodeGenerator::from_semantic_tokens(semantic_tokens); + generator.generate() +} + + +/// Translate semantic tokens into bytecode. +struct BytecodeGenerator<'a> { + semantic_tokens: &'a mut [SemanticToken], + block_stack: Vec<usize>, + bytecode: Vec<u8>, + /// (address in bytecode, label definition token index) + label_references: Vec<(usize, usize)>, +} + +impl<'a> BytecodeGenerator<'a> { + pub fn from_semantic_tokens(semantic_tokens: &'a mut [SemanticToken]) -> Self { + Self { + semantic_tokens, + block_stack: Vec::new(), + bytecode: Vec::new(), + label_references: Vec::new(), + } + } + + pub fn generate(mut self) -> Vec<u8> { + for i in 0..self.semantic_tokens.len() { + let address = self.bytecode.len(); + self.generate_bytecode_for_token(i, None); + self.semantic_tokens[i].bytecode = BytecodeSpan { + bytes: self.bytecode[address..].to_vec(), + location: BytecodeLocation { + address, + length: self.bytecode.len().saturating_sub(address), + } + }; + } + + // Replace blank label references in bytecode with real label addresses. + // The layer of indirection is necessary because the iteration borrows + // self immutably. + let mut insertions: Vec<(usize, u16)> = Vec::new(); + for (bytecode_address, token_pointer) in &self.label_references { + let label_token = &self.semantic_tokens[*token_pointer]; + // TODO: If greater than u16, print a warning. + let address_value = label_token.bytecode.location.address as u16; + insertions.push((*bytecode_address, address_value)); + } + for (bytecode_address, address_value) in insertions { + self.replace_address_in_bytecode(bytecode_address, address_value); + } + + // Strip trailing null bytes from the bytecode. + let mut length = self.bytecode.len(); + for (i, byte) in self.bytecode.iter().enumerate().rev() { + match *byte == 0 { + true => length = i, + false => break, + }; + } + self.bytecode.truncate(length); + + return self.bytecode; + } + + fn generate_bytecode_for_token(&mut self, pointer: usize, macro_pointer: Option<usize>) { + macro_rules! push_byte { + ($byte:expr) => { self.bytecode.push($byte) }; } + macro_rules! push_double { + ($double:expr) => { self.bytecode.extend_from_slice(&$double.to_be_bytes()) }; } + macro_rules! pad { + ($len:expr) => { for _ in 0..$len { push_byte!(0); } } } + + let semantic_token = if let Some(macro_pointer) = macro_pointer { + let macro_definition = &self.semantic_tokens[macro_pointer]; + if let SemVar::MacroDefinition(def) = ¯o_definition.variant { + &def.body_tokens[pointer] + } else { unreachable!() } + } else { + &self.semantic_tokens[pointer] + }; + match &semantic_token.variant { + SemVar::MacroInvocation(pointer) => { + let macro_definition = &self.semantic_tokens[*pointer]; + if let SemVar::MacroDefinition(def) = ¯o_definition.variant { + let length = def.body_tokens.len(); + let macro_pointer = Some(*pointer); + for body_pointer in 0..length { + // Recurse, generate bytecode for each macro body token. + self.generate_bytecode_for_token(body_pointer, macro_pointer); + } + } else { unreachable!() } + } + SemVar::Literal(value) => match value { + Value::Byte(value) => push_byte!(*value), + Value::Double(value) => push_double!(value), + } + SemVar::Padding(value) => match value { + Value::Byte(value) => pad!(*value), + Value::Double(value) => pad!(*value), + } + SemVar::Instruction(instr) => push_byte!(instr.value), + SemVar::String(bytes) => self.bytecode.extend_from_slice(&bytes), + SemVar::LabelReference(pointer) => { + self.label_references.push((self.bytecode.len(), *pointer)); + push_double!(0u16); + } + SemVar::BlockOpen(_) => { + self.block_stack.push(self.bytecode.len()); + push_double!(0u16); + } + SemVar::BlockClose(_) => { + let bytecode_address = self.block_stack.pop().unwrap(); + // TODO: If greater than u16, print a warning. + let address_value = self.bytecode.len() as u16; + self.replace_address_in_bytecode(bytecode_address, address_value); + } + _ => (), + }; + } + + fn replace_address_in_bytecode(&mut self, bytecode_address: usize, address_value: u16) { + let range = bytecode_address..bytecode_address+2; + self.bytecode[range].clone_from_slice(&address_value.to_be_bytes()); + } +} diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs new file mode 100644 index 0000000..cb6a435 --- /dev/null +++ b/src/translators/semantic_parser.rs @@ -0,0 +1,245 @@ +use crate::*; + +use std::collections::HashMap; +use std::path::PathBuf; + +use SyntacticTokenVariant as SynVar; +use SemanticTokenVariant as SemVar; +use SemanticParseError as SemErr; + + +pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> { + let semantic_parser = SemanticParser::from_source_code(source_code, path); + semantic_parser.parse() +} + + +/// Translate syntactic tokens into semantic tokens. +struct SemanticParser { + labels: HashMap<String, Definition>, + macros: HashMap<String, Definition>, + syntactic_tokens: Vec<SyntacticToken>, + /// Index of the current outer token. + current_outer_index: usize, +} + +impl SemanticParser { + pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { + let mut labels = HashMap::new(); + let mut macros = HashMap::new(); + let mut syntactic_tokens = Vec::new(); + + let parser = SyntacticParser::from_source_code(source_code, path); + for syntactic_token in parser { + let definition = Definition::new(syntactic_token.source.clone()); + match &syntactic_token.variant { + SynVar::LabelDefinition(name) => { + let _ = labels.try_insert(name.to_owned(), definition); + }, + SynVar::MacroDefinition(name) => { + let _ = macros.try_insert(name.to_owned(), definition); + }, + _ => (), + } + syntactic_tokens.push(syntactic_token); + } + + Self { + labels, + macros, + syntactic_tokens, + current_outer_index: 0, + } + } + + /// Parse syntactic tokens as semantic tokens. + pub fn parse(mut self) -> Vec<SemanticToken> { + let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens); + let mut syntactic = syntactic_tokens.into_iter(); + let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false); + + // Insert real label definition pointers into label reference tokens. + for definition in self.labels.values_mut() { + if let Some(definition_pointer) = definition.pointer { + // Insert definition pointer into reference tokens. + for reference_pointer in &definition.references { + let reference_token = &mut semantic_tokens[*reference_pointer]; + reference_token.variant = SemVar::LabelReference(definition_pointer); + } + // Insert reference pointers into definition token. + let definition_token = &mut semantic_tokens[definition_pointer]; + if let SemVar::LabelDefinition(ref mut def) = definition_token.variant { + def.references = std::mem::take(&mut definition.references); + } else { unreachable!() } + // Insert definition pointer into reference tokens inside macros. + for (outer, inner) in &definition.deep_references { + let macro_token = &mut semantic_tokens[*outer]; + if let SemVar::MacroDefinition(ref mut def) = macro_token.variant { + let reference_token = &mut def.body_tokens[*inner]; + reference_token.variant = SemVar::LabelReference(definition_pointer); + } else { unreachable!() } + } + // TODO: Record deep references in macro and label definitions? + } + } + + return semantic_tokens; + } + + fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken> + where I: Iterator<Item = SyntacticToken> + { + let mut semantic_tokens: Vec<SemanticToken> = Vec::new(); + let mut block_stack: Vec<usize> = Vec::new(); + + while let Some(syntactic_token) = parser.next() { + let current_index = semantic_tokens.len(); + if !in_macro { + self.current_outer_index = current_index; + } + + let semantic_token_variant = match syntactic_token.variant { + SynVar::LabelDefinition(name) => { + if in_macro { + SemVar::Error(SemErr::LabelDefinitionInMacroDefinition) + } else if let Some(definition) = self.macros.get(&name) { + let source = definition.source.clone(); + SemVar::Error(SemErr::RedefinedSymbol((name, source))) + } else if let Some(definition) = self.labels.get_mut(&name) { + if definition.pointer.is_some() { + let source = definition.source.clone(); + SemVar::Error(SemErr::RedefinedSymbol((name, source))) + } else { + definition.pointer = Some(current_index); + let references = Vec::new(); + SemVar::LabelDefinition(LabelDefinition { name, references }) + } + } else { + unreachable!() + } + } + SynVar::MacroDefinition(name) => { + if in_macro { + SemVar::Error(SemErr::MacroDefinitionInMacroDefinition) + } else if let Some(definition) = self.labels.get(&name) { + let source = definition.source.clone(); + SemVar::Error(SemErr::RedefinedSymbol((name, source))) + } else if let Some(definition) = self.macros.get_mut(&name) { + if definition.pointer.is_some() { + let source = definition.source.clone(); + SemVar::Error(SemErr::RedefinedSymbol((name, source))) + } else { + definition.pointer = Some(current_index); + let references = Vec::new(); + let body_tokens = self.pull_semantic_tokens(parser, true); + SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens }) + } + } else { + unreachable!() + } + } + SynVar::MacroDefinitionTerminator => if in_macro { + break; + } else { + SemVar::Error(SemErr::StrayMacroTerminator) + } + SynVar::Literal(value) => { + SemVar::Literal(value) + } + SynVar::Padding(value) => { + SemVar::Padding(value) + } + SynVar::Instruction(instr) => { + SemVar::Instruction(instr) + } + SynVar::Comment(comment) => { + SemVar::Comment(comment) + } + SynVar::String(bytes) => { + SemVar::String(bytes) + } + SynVar::BlockOpen => { + block_stack.push(current_index); + SemVar::BlockOpen(0) + } + SynVar::BlockClose => { + if let Some(pointer) = block_stack.pop() { + let open = &mut semantic_tokens[pointer]; + open.variant = SemVar::BlockOpen(current_index); + SemVar::BlockClose(pointer) + } else { + SemVar::Error(SemErr::StrayBlockClose) + } + } + SynVar::MarkOpen => { + SemVar::MarkOpen + } + SynVar::MarkClose => { + SemVar::MarkClose + } + SynVar::Symbol(name) => { + if let Some(definition) = self.labels.get_mut(&name) { + if in_macro { + let pointer = (self.current_outer_index, current_index); + definition.deep_references.push(pointer); + } else { + definition.references.push(current_index); + } + SemVar::LabelReference(0) + } else if let Some(definition) = self.macros.get_mut(&name) { + if let Some(pointer) = definition.pointer { + if !in_macro { definition.references.push(current_index); } + SemVar::MacroInvocation(pointer) + } else { + let source = definition.source.clone(); + SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source))) + } + } else { + SemVar::Error(SemErr::UndefinedSymbol(name)) + } + } + SynVar::Error(syntax_err) => { + SemVar::Error(SemErr::SyntaxError(syntax_err)) + } + }; + + let semantic_token = SemanticToken { + source: syntactic_token.source, + bytecode: BytecodeSpan::default(), + variant: semantic_token_variant, + }; + semantic_tokens.push(semantic_token); + } + + if in_macro { + //TODO: UnterminatedMacroDefinition + } + + // Replace each unclosed BlockOpen token with an error. + for block_pointer in block_stack { + semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock); + } + + return semantic_tokens; + } +} + + +struct Definition { + pub source: SourceSpan, + pub pointer: Option<usize>, + pub references: Vec<usize>, + /// (macro index, label reference index) + pub deep_references: Vec<(usize, usize)>, +} + +impl Definition { + pub fn new(source: SourceSpan) -> Self { + Self { + source, + pointer: None, + references: Vec::new(), + deep_references: Vec::new(), + } + } +} diff --git a/src/translators/symbols_generator.rs b/src/translators/symbols_generator.rs new file mode 100644 index 0000000..06bbaa8 --- /dev/null +++ b/src/translators/symbols_generator.rs @@ -0,0 +1,28 @@ +use crate::*; + +use SemanticTokenVariant as SemVar; + + +pub fn generate_symbols_file(semantic_tokens: &[SemanticToken]) -> String { + let mut symbols = String::new(); + + for token in semantic_tokens { + if let SemVar::LabelDefinition(definition) = &token.variant { + let address = token.bytecode.location.address; + if address > 0xffff { break; } + let name = &definition.name; + let path = match &token.source.in_source { + Some(source) => &source.path, + None => &token.source.in_merged.path, + }; + if let Some(path) = path { + let path = path.as_os_str().to_string_lossy(); + symbols.push_str(&format!("{address:04x} {name} {path}\n")); + } else { + symbols.push_str(&format!("{address:04x} {name}\n")); + } + } + } + + return symbols; +} diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs new file mode 100644 index 0000000..7279daf --- /dev/null +++ b/src/translators/syntactic_parser.rs @@ -0,0 +1,247 @@ +use crate::*; + +use std::path::PathBuf; + + +/// Translate raw source code characters into syntactic tokens. +pub struct SyntacticParser { + /// Path of file from which the source was read. + path: Option<PathBuf>, + /// Path of the original source file. + source_path: Option<PathBuf>, + /// Position of the next character to be read. + position: Position, + /// Previous value of the position field. + prev_position: Position, + /// Line where the embedded source file begins. + source_line_start: usize, + /// Characters waiting to be parsed, in reverse order. + chars: Vec<char>, + /// The token currently being parsed. + token_source_string: String, + /// The name of the most recently parsed label. + label: String, +} + + +impl SyntacticParser { + /// Parse source code. + pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { + Self { + path: path.map(|p| p.into()), + source_path: None, + position: Position { line: 0, column: 0 }, + prev_position: Position { line: 0, column: 0 }, + source_line_start: 0, + chars: source_code.chars().rev().collect(), + token_source_string: String::new(), + label: String::new(), + } + } + + /// Return the next character, keeping it on the queue. + fn peek_char(&self) -> Option<char> { + self.chars.last().copied() + } + + /// Return the next character, removing it from the queue. + fn eat_char(&mut self) -> Option<char> { + let option = self.chars.pop(); + if let Some(c) = option { + self.prev_position = self.position; + self.position.advance(c); + self.token_source_string.push(c); + } + return option; + } + + /// Remove the next character from the queue. + fn drop_char(&mut self) { + if let Some(c) = self.chars.pop() { + self.prev_position = self.position; + self.position.advance(c); + } + } + + /// Remove leading whitespace. + fn drop_whitespace(&mut self) { + while let Some(c) = self.peek_char() { + match c.is_whitespace() { + true => self.drop_char(), + false => break, + } + } + } + + /// Remove a full token from the queue. + fn eat_token(&mut self) -> String { + const DELIMITERS: [char; 13] = + ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~']; + let mut token = String::new(); + while let Some(peek) = self.peek_char() { + if peek.is_whitespace() || DELIMITERS.contains(&peek) { + break; + } + let c = self.eat_char().unwrap(); + token.push(c); + if c == ':' { + break; + } + } + token + } + + /// Return all characters until the delimiter, removing all returned + /// characters and the delimiter from the queue. Returns None if end + /// of source is reached before delimiter is found. + fn eat_to_delim(&mut self, delim: char) -> Option<String> { + let mut token = String::new(); + while let Some(c) = self.eat_char() { + self.token_source_string.push(c); + match c == delim { + true => return Some(token), + false => token.push(c), + } + } + return None; + } + + fn is_line_empty(&self) -> bool { + for c in self.chars.iter().rev() { + if *c == '\n' { + return true; + } + if !c.is_whitespace() { + return false + } + } + return false; + } +} + + +impl Iterator for SyntacticParser { + type Item = SyntacticToken; + + /// Sequentially parse tokens from the source code. + fn next(&mut self) -> Option<SyntacticToken> { + use SyntacticTokenVariant as SynVar; + use SyntacticParseError as SynErr; + + self.drop_whitespace(); + let start = self.position; + + let variant = match self.eat_char()? { + '@' => { + self.label = self.eat_token(); + SynVar::LabelDefinition(self.label.clone()) + } + '&' => { + let token = self.eat_token(); + let sublabel = format!("{}/{token}", self.label); + SynVar::LabelDefinition(sublabel) + } + '%' => SynVar::MacroDefinition(self.eat_token()), + ';' => SynVar::MacroDefinitionTerminator, + '[' => SynVar::MarkOpen, + ']' => SynVar::MarkClose, + '{' => SynVar::BlockOpen, + '}' => SynVar::BlockClose, + '(' => match self.eat_to_delim(')') { + Some(string) => SynVar::Comment(string), + None => SynVar::Error(SynErr::UnterminatedComment), + } + '\'' => match self.eat_to_delim('\'') { + Some(string) => SynVar::String(string.as_bytes().to_vec()), + None => SynVar::Error(SynErr::UnterminatedRawString), + } + '"' => match self.eat_to_delim('"') { + Some(string) => { + let mut bytes = string.as_bytes().to_vec(); + bytes.push(0x00); + SynVar::String(bytes) + } + None => SynVar::Error(SynErr::UnterminatedNullString), + } + '#' => { + let token = self.eat_token(); + match token.parse::<Value>() { + Ok(value) => SynVar::Padding(value), + Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)), + } + }, + '~' => { + let token = self.eat_token(); + let symbol = format!("{}/{token}", self.label); + SynVar::Symbol(symbol) + } + ':' => SynVar::Symbol(String::from(':')), + c => { + let token = format!("{c}{}", self.eat_token()); + match token.parse::<Value>() { + Ok(value) => SynVar::Literal(value), + Err(_) => match token.parse::<Instruction>() { + Ok(instruction) => SynVar::Instruction(instruction), + Err(_) => SynVar::Symbol(token), + } + } + } + }; + + // Parse source path comments. + if let SynVar::Comment(comment) = &variant { + // Check that the comment fills the entire line. + if start.column == 0 && self.is_line_empty() { + if let Some(path) = comment.strip_prefix(": ") { + self.source_path = Some(PathBuf::from(path.trim())); + self.source_line_start = start.line + 1; + } + } + } + + // Find location in current merged file. + let in_merged = SourceLocation { + path: self.path.to_owned(), + start, + end: self.prev_position, + }; + + // Find location in original source file. + let in_source = if start.line >= self.source_line_start { + match &self.source_path { + Some(path) => { + let offset = self.source_line_start; + Some( SourceLocation { + path: Some(path.to_owned()), + start: Position { + line: in_merged.start.line.saturating_sub(offset), + column: in_merged.start.column, + }, + end: Position { + line: in_merged.end.line.saturating_sub(offset), + column: in_merged.end.column, + } + }) + } + None => None, + } + } else { + None + }; + + let string = std::mem::take(&mut self.token_source_string); + let source = SourceSpan { string, in_merged, in_source }; + Some( SyntacticToken { source, variant } ) + } +} + + +#[derive(Debug)] +pub enum ParseError { + InvalidExtension, + NotFound, + NotReadable, + IsADirectory, + InvalidUtf8, + Unknown, +} |