summaryrefslogtreecommitdiff
path: root/src/translators
diff options
context:
space:
mode:
authorBen Bridle <bridle.benjamin@gmail.com>2024-10-28 19:52:29 +1300
committerBen Bridle <bridle.benjamin@gmail.com>2024-10-28 19:52:47 +1300
commitf4027cae775e3c9c237675f9df35a744d54f3f2e (patch)
tree733fa3af9e1bd44d61dd83983a2da86cb75c53e9 /src/translators
parent16ee0e9e8dce2c88acc88ba5ffd97e013624fa5e (diff)
downloadbedrock-asm-f4027cae775e3c9c237675f9df35a744d54f3f2e.zip
Rewrite assembler
This is an almost complete rewrite of the entire assembler from the ground up, with a different parsing strategy and a whole new symbol resolution mechanism for automatically including library files. The assembly syntax has also been slightly modified, with padding tokens now being prefixed with '#' instead of '$', and a block-style anonymous-label syntax which uses the '{' and '}' characters.
Diffstat (limited to 'src/translators')
-rw-r--r--src/translators/bytecode_generator.rs131
-rw-r--r--src/translators/semantic_parser.rs245
-rw-r--r--src/translators/symbols_generator.rs28
-rw-r--r--src/translators/syntactic_parser.rs247
4 files changed, 651 insertions, 0 deletions
diff --git a/src/translators/bytecode_generator.rs b/src/translators/bytecode_generator.rs
new file mode 100644
index 0000000..956aca5
--- /dev/null
+++ b/src/translators/bytecode_generator.rs
@@ -0,0 +1,131 @@
+use crate::*;
+
+use SemanticTokenVariant as SemVar;
+
+
+pub fn generate_bytecode(semantic_tokens: &mut [SemanticToken]) -> Vec<u8> {
+ let generator = BytecodeGenerator::from_semantic_tokens(semantic_tokens);
+ generator.generate()
+}
+
+
+/// Translate semantic tokens into bytecode.
+struct BytecodeGenerator<'a> {
+ semantic_tokens: &'a mut [SemanticToken],
+ block_stack: Vec<usize>,
+ bytecode: Vec<u8>,
+ /// (address in bytecode, label definition token index)
+ label_references: Vec<(usize, usize)>,
+}
+
+impl<'a> BytecodeGenerator<'a> {
+ pub fn from_semantic_tokens(semantic_tokens: &'a mut [SemanticToken]) -> Self {
+ Self {
+ semantic_tokens,
+ block_stack: Vec::new(),
+ bytecode: Vec::new(),
+ label_references: Vec::new(),
+ }
+ }
+
+ pub fn generate(mut self) -> Vec<u8> {
+ for i in 0..self.semantic_tokens.len() {
+ let address = self.bytecode.len();
+ self.generate_bytecode_for_token(i, None);
+ self.semantic_tokens[i].bytecode = BytecodeSpan {
+ bytes: self.bytecode[address..].to_vec(),
+ location: BytecodeLocation {
+ address,
+ length: self.bytecode.len().saturating_sub(address),
+ }
+ };
+ }
+
+ // Replace blank label references in bytecode with real label addresses.
+ // The layer of indirection is necessary because the iteration borrows
+ // self immutably.
+ let mut insertions: Vec<(usize, u16)> = Vec::new();
+ for (bytecode_address, token_pointer) in &self.label_references {
+ let label_token = &self.semantic_tokens[*token_pointer];
+ // TODO: If greater than u16, print a warning.
+ let address_value = label_token.bytecode.location.address as u16;
+ insertions.push((*bytecode_address, address_value));
+ }
+ for (bytecode_address, address_value) in insertions {
+ self.replace_address_in_bytecode(bytecode_address, address_value);
+ }
+
+ // Strip trailing null bytes from the bytecode.
+ let mut length = self.bytecode.len();
+ for (i, byte) in self.bytecode.iter().enumerate().rev() {
+ match *byte == 0 {
+ true => length = i,
+ false => break,
+ };
+ }
+ self.bytecode.truncate(length);
+
+ return self.bytecode;
+ }
+
+ fn generate_bytecode_for_token(&mut self, pointer: usize, macro_pointer: Option<usize>) {
+ macro_rules! push_byte {
+ ($byte:expr) => { self.bytecode.push($byte) }; }
+ macro_rules! push_double {
+ ($double:expr) => { self.bytecode.extend_from_slice(&$double.to_be_bytes()) }; }
+ macro_rules! pad {
+ ($len:expr) => { for _ in 0..$len { push_byte!(0); } } }
+
+ let semantic_token = if let Some(macro_pointer) = macro_pointer {
+ let macro_definition = &self.semantic_tokens[macro_pointer];
+ if let SemVar::MacroDefinition(def) = &macro_definition.variant {
+ &def.body_tokens[pointer]
+ } else { unreachable!() }
+ } else {
+ &self.semantic_tokens[pointer]
+ };
+ match &semantic_token.variant {
+ SemVar::MacroInvocation(pointer) => {
+ let macro_definition = &self.semantic_tokens[*pointer];
+ if let SemVar::MacroDefinition(def) = &macro_definition.variant {
+ let length = def.body_tokens.len();
+ let macro_pointer = Some(*pointer);
+ for body_pointer in 0..length {
+ // Recurse, generate bytecode for each macro body token.
+ self.generate_bytecode_for_token(body_pointer, macro_pointer);
+ }
+ } else { unreachable!() }
+ }
+ SemVar::Literal(value) => match value {
+ Value::Byte(value) => push_byte!(*value),
+ Value::Double(value) => push_double!(value),
+ }
+ SemVar::Padding(value) => match value {
+ Value::Byte(value) => pad!(*value),
+ Value::Double(value) => pad!(*value),
+ }
+ SemVar::Instruction(instr) => push_byte!(instr.value),
+ SemVar::String(bytes) => self.bytecode.extend_from_slice(&bytes),
+ SemVar::LabelReference(pointer) => {
+ self.label_references.push((self.bytecode.len(), *pointer));
+ push_double!(0u16);
+ }
+ SemVar::BlockOpen(_) => {
+ self.block_stack.push(self.bytecode.len());
+ push_double!(0u16);
+ }
+ SemVar::BlockClose(_) => {
+ let bytecode_address = self.block_stack.pop().unwrap();
+ // TODO: If greater than u16, print a warning.
+ let address_value = self.bytecode.len() as u16;
+ self.replace_address_in_bytecode(bytecode_address, address_value);
+ }
+ _ => (),
+ };
+ }
+
+ fn replace_address_in_bytecode(&mut self, bytecode_address: usize, address_value: u16) {
+ let range = bytecode_address..bytecode_address+2;
+ self.bytecode[range].clone_from_slice(&address_value.to_be_bytes());
+ }
+}
diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs
new file mode 100644
index 0000000..cb6a435
--- /dev/null
+++ b/src/translators/semantic_parser.rs
@@ -0,0 +1,245 @@
+use crate::*;
+
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+use SyntacticTokenVariant as SynVar;
+use SemanticTokenVariant as SemVar;
+use SemanticParseError as SemErr;
+
+
+pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> {
+ let semantic_parser = SemanticParser::from_source_code(source_code, path);
+ semantic_parser.parse()
+}
+
+
+/// Translate syntactic tokens into semantic tokens.
+struct SemanticParser {
+ labels: HashMap<String, Definition>,
+ macros: HashMap<String, Definition>,
+ syntactic_tokens: Vec<SyntacticToken>,
+ /// Index of the current outer token.
+ current_outer_index: usize,
+}
+
+impl SemanticParser {
+ pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
+ let mut labels = HashMap::new();
+ let mut macros = HashMap::new();
+ let mut syntactic_tokens = Vec::new();
+
+ let parser = SyntacticParser::from_source_code(source_code, path);
+ for syntactic_token in parser {
+ let definition = Definition::new(syntactic_token.source.clone());
+ match &syntactic_token.variant {
+ SynVar::LabelDefinition(name) => {
+ let _ = labels.try_insert(name.to_owned(), definition);
+ },
+ SynVar::MacroDefinition(name) => {
+ let _ = macros.try_insert(name.to_owned(), definition);
+ },
+ _ => (),
+ }
+ syntactic_tokens.push(syntactic_token);
+ }
+
+ Self {
+ labels,
+ macros,
+ syntactic_tokens,
+ current_outer_index: 0,
+ }
+ }
+
+ /// Parse syntactic tokens as semantic tokens.
+ pub fn parse(mut self) -> Vec<SemanticToken> {
+ let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens);
+ let mut syntactic = syntactic_tokens.into_iter();
+ let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false);
+
+ // Insert real label definition pointers into label reference tokens.
+ for definition in self.labels.values_mut() {
+ if let Some(definition_pointer) = definition.pointer {
+ // Insert definition pointer into reference tokens.
+ for reference_pointer in &definition.references {
+ let reference_token = &mut semantic_tokens[*reference_pointer];
+ reference_token.variant = SemVar::LabelReference(definition_pointer);
+ }
+ // Insert reference pointers into definition token.
+ let definition_token = &mut semantic_tokens[definition_pointer];
+ if let SemVar::LabelDefinition(ref mut def) = definition_token.variant {
+ def.references = std::mem::take(&mut definition.references);
+ } else { unreachable!() }
+ // Insert definition pointer into reference tokens inside macros.
+ for (outer, inner) in &definition.deep_references {
+ let macro_token = &mut semantic_tokens[*outer];
+ if let SemVar::MacroDefinition(ref mut def) = macro_token.variant {
+ let reference_token = &mut def.body_tokens[*inner];
+ reference_token.variant = SemVar::LabelReference(definition_pointer);
+ } else { unreachable!() }
+ }
+ // TODO: Record deep references in macro and label definitions?
+ }
+ }
+
+ return semantic_tokens;
+ }
+
+ fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken>
+ where I: Iterator<Item = SyntacticToken>
+ {
+ let mut semantic_tokens: Vec<SemanticToken> = Vec::new();
+ let mut block_stack: Vec<usize> = Vec::new();
+
+ while let Some(syntactic_token) = parser.next() {
+ let current_index = semantic_tokens.len();
+ if !in_macro {
+ self.current_outer_index = current_index;
+ }
+
+ let semantic_token_variant = match syntactic_token.variant {
+ SynVar::LabelDefinition(name) => {
+ if in_macro {
+ SemVar::Error(SemErr::LabelDefinitionInMacroDefinition)
+ } else if let Some(definition) = self.macros.get(&name) {
+ let source = definition.source.clone();
+ SemVar::Error(SemErr::RedefinedSymbol((name, source)))
+ } else if let Some(definition) = self.labels.get_mut(&name) {
+ if definition.pointer.is_some() {
+ let source = definition.source.clone();
+ SemVar::Error(SemErr::RedefinedSymbol((name, source)))
+ } else {
+ definition.pointer = Some(current_index);
+ let references = Vec::new();
+ SemVar::LabelDefinition(LabelDefinition { name, references })
+ }
+ } else {
+ unreachable!()
+ }
+ }
+ SynVar::MacroDefinition(name) => {
+ if in_macro {
+ SemVar::Error(SemErr::MacroDefinitionInMacroDefinition)
+ } else if let Some(definition) = self.labels.get(&name) {
+ let source = definition.source.clone();
+ SemVar::Error(SemErr::RedefinedSymbol((name, source)))
+ } else if let Some(definition) = self.macros.get_mut(&name) {
+ if definition.pointer.is_some() {
+ let source = definition.source.clone();
+ SemVar::Error(SemErr::RedefinedSymbol((name, source)))
+ } else {
+ definition.pointer = Some(current_index);
+ let references = Vec::new();
+ let body_tokens = self.pull_semantic_tokens(parser, true);
+ SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens })
+ }
+ } else {
+ unreachable!()
+ }
+ }
+ SynVar::MacroDefinitionTerminator => if in_macro {
+ break;
+ } else {
+ SemVar::Error(SemErr::StrayMacroTerminator)
+ }
+ SynVar::Literal(value) => {
+ SemVar::Literal(value)
+ }
+ SynVar::Padding(value) => {
+ SemVar::Padding(value)
+ }
+ SynVar::Instruction(instr) => {
+ SemVar::Instruction(instr)
+ }
+ SynVar::Comment(comment) => {
+ SemVar::Comment(comment)
+ }
+ SynVar::String(bytes) => {
+ SemVar::String(bytes)
+ }
+ SynVar::BlockOpen => {
+ block_stack.push(current_index);
+ SemVar::BlockOpen(0)
+ }
+ SynVar::BlockClose => {
+ if let Some(pointer) = block_stack.pop() {
+ let open = &mut semantic_tokens[pointer];
+ open.variant = SemVar::BlockOpen(current_index);
+ SemVar::BlockClose(pointer)
+ } else {
+ SemVar::Error(SemErr::StrayBlockClose)
+ }
+ }
+ SynVar::MarkOpen => {
+ SemVar::MarkOpen
+ }
+ SynVar::MarkClose => {
+ SemVar::MarkClose
+ }
+ SynVar::Symbol(name) => {
+ if let Some(definition) = self.labels.get_mut(&name) {
+ if in_macro {
+ let pointer = (self.current_outer_index, current_index);
+ definition.deep_references.push(pointer);
+ } else {
+ definition.references.push(current_index);
+ }
+ SemVar::LabelReference(0)
+ } else if let Some(definition) = self.macros.get_mut(&name) {
+ if let Some(pointer) = definition.pointer {
+ if !in_macro { definition.references.push(current_index); }
+ SemVar::MacroInvocation(pointer)
+ } else {
+ let source = definition.source.clone();
+ SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source)))
+ }
+ } else {
+ SemVar::Error(SemErr::UndefinedSymbol(name))
+ }
+ }
+ SynVar::Error(syntax_err) => {
+ SemVar::Error(SemErr::SyntaxError(syntax_err))
+ }
+ };
+
+ let semantic_token = SemanticToken {
+ source: syntactic_token.source,
+ bytecode: BytecodeSpan::default(),
+ variant: semantic_token_variant,
+ };
+ semantic_tokens.push(semantic_token);
+ }
+
+ if in_macro {
+ //TODO: UnterminatedMacroDefinition
+ }
+
+ // Replace each unclosed BlockOpen token with an error.
+ for block_pointer in block_stack {
+ semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock);
+ }
+
+ return semantic_tokens;
+ }
+}
+
+
+struct Definition {
+ pub source: SourceSpan,
+ pub pointer: Option<usize>,
+ pub references: Vec<usize>,
+ /// (macro index, label reference index)
+ pub deep_references: Vec<(usize, usize)>,
+}
+
+impl Definition {
+ pub fn new(source: SourceSpan) -> Self {
+ Self {
+ source,
+ pointer: None,
+ references: Vec::new(),
+ deep_references: Vec::new(),
+ }
+ }
+}
diff --git a/src/translators/symbols_generator.rs b/src/translators/symbols_generator.rs
new file mode 100644
index 0000000..06bbaa8
--- /dev/null
+++ b/src/translators/symbols_generator.rs
@@ -0,0 +1,28 @@
+use crate::*;
+
+use SemanticTokenVariant as SemVar;
+
+
+pub fn generate_symbols_file(semantic_tokens: &[SemanticToken]) -> String {
+ let mut symbols = String::new();
+
+ for token in semantic_tokens {
+ if let SemVar::LabelDefinition(definition) = &token.variant {
+ let address = token.bytecode.location.address;
+ if address > 0xffff { break; }
+ let name = &definition.name;
+ let path = match &token.source.in_source {
+ Some(source) => &source.path,
+ None => &token.source.in_merged.path,
+ };
+ if let Some(path) = path {
+ let path = path.as_os_str().to_string_lossy();
+ symbols.push_str(&format!("{address:04x} {name} {path}\n"));
+ } else {
+ symbols.push_str(&format!("{address:04x} {name}\n"));
+ }
+ }
+ }
+
+ return symbols;
+}
diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs
new file mode 100644
index 0000000..7279daf
--- /dev/null
+++ b/src/translators/syntactic_parser.rs
@@ -0,0 +1,247 @@
+use crate::*;
+
+use std::path::PathBuf;
+
+
+/// Translate raw source code characters into syntactic tokens.
+pub struct SyntacticParser {
+ /// Path of file from which the source was read.
+ path: Option<PathBuf>,
+ /// Path of the original source file.
+ source_path: Option<PathBuf>,
+ /// Position of the next character to be read.
+ position: Position,
+ /// Previous value of the position field.
+ prev_position: Position,
+ /// Line where the embedded source file begins.
+ source_line_start: usize,
+ /// Characters waiting to be parsed, in reverse order.
+ chars: Vec<char>,
+ /// The token currently being parsed.
+ token_source_string: String,
+ /// The name of the most recently parsed label.
+ label: String,
+}
+
+
+impl SyntacticParser {
+ /// Parse source code.
+ pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
+ Self {
+ path: path.map(|p| p.into()),
+ source_path: None,
+ position: Position { line: 0, column: 0 },
+ prev_position: Position { line: 0, column: 0 },
+ source_line_start: 0,
+ chars: source_code.chars().rev().collect(),
+ token_source_string: String::new(),
+ label: String::new(),
+ }
+ }
+
+ /// Return the next character, keeping it on the queue.
+ fn peek_char(&self) -> Option<char> {
+ self.chars.last().copied()
+ }
+
+ /// Return the next character, removing it from the queue.
+ fn eat_char(&mut self) -> Option<char> {
+ let option = self.chars.pop();
+ if let Some(c) = option {
+ self.prev_position = self.position;
+ self.position.advance(c);
+ self.token_source_string.push(c);
+ }
+ return option;
+ }
+
+ /// Remove the next character from the queue.
+ fn drop_char(&mut self) {
+ if let Some(c) = self.chars.pop() {
+ self.prev_position = self.position;
+ self.position.advance(c);
+ }
+ }
+
+ /// Remove leading whitespace.
+ fn drop_whitespace(&mut self) {
+ while let Some(c) = self.peek_char() {
+ match c.is_whitespace() {
+ true => self.drop_char(),
+ false => break,
+ }
+ }
+ }
+
+ /// Remove a full token from the queue.
+ fn eat_token(&mut self) -> String {
+ const DELIMITERS: [char; 13] =
+ ['@', '&', '%', ';', '[', ']', '{', '}', '(', '"', '\'', '#', '~'];
+ let mut token = String::new();
+ while let Some(peek) = self.peek_char() {
+ if peek.is_whitespace() || DELIMITERS.contains(&peek) {
+ break;
+ }
+ let c = self.eat_char().unwrap();
+ token.push(c);
+ if c == ':' {
+ break;
+ }
+ }
+ token
+ }
+
+ /// Return all characters until the delimiter, removing all returned
+ /// characters and the delimiter from the queue. Returns None if end
+ /// of source is reached before delimiter is found.
+ fn eat_to_delim(&mut self, delim: char) -> Option<String> {
+ let mut token = String::new();
+ while let Some(c) = self.eat_char() {
+ self.token_source_string.push(c);
+ match c == delim {
+ true => return Some(token),
+ false => token.push(c),
+ }
+ }
+ return None;
+ }
+
+ fn is_line_empty(&self) -> bool {
+ for c in self.chars.iter().rev() {
+ if *c == '\n' {
+ return true;
+ }
+ if !c.is_whitespace() {
+ return false
+ }
+ }
+ return false;
+ }
+}
+
+
+impl Iterator for SyntacticParser {
+ type Item = SyntacticToken;
+
+ /// Sequentially parse tokens from the source code.
+ fn next(&mut self) -> Option<SyntacticToken> {
+ use SyntacticTokenVariant as SynVar;
+ use SyntacticParseError as SynErr;
+
+ self.drop_whitespace();
+ let start = self.position;
+
+ let variant = match self.eat_char()? {
+ '@' => {
+ self.label = self.eat_token();
+ SynVar::LabelDefinition(self.label.clone())
+ }
+ '&' => {
+ let token = self.eat_token();
+ let sublabel = format!("{}/{token}", self.label);
+ SynVar::LabelDefinition(sublabel)
+ }
+ '%' => SynVar::MacroDefinition(self.eat_token()),
+ ';' => SynVar::MacroDefinitionTerminator,
+ '[' => SynVar::MarkOpen,
+ ']' => SynVar::MarkClose,
+ '{' => SynVar::BlockOpen,
+ '}' => SynVar::BlockClose,
+ '(' => match self.eat_to_delim(')') {
+ Some(string) => SynVar::Comment(string),
+ None => SynVar::Error(SynErr::UnterminatedComment),
+ }
+ '\'' => match self.eat_to_delim('\'') {
+ Some(string) => SynVar::String(string.as_bytes().to_vec()),
+ None => SynVar::Error(SynErr::UnterminatedRawString),
+ }
+ '"' => match self.eat_to_delim('"') {
+ Some(string) => {
+ let mut bytes = string.as_bytes().to_vec();
+ bytes.push(0x00);
+ SynVar::String(bytes)
+ }
+ None => SynVar::Error(SynErr::UnterminatedNullString),
+ }
+ '#' => {
+ let token = self.eat_token();
+ match token.parse::<Value>() {
+ Ok(value) => SynVar::Padding(value),
+ Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)),
+ }
+ },
+ '~' => {
+ let token = self.eat_token();
+ let symbol = format!("{}/{token}", self.label);
+ SynVar::Symbol(symbol)
+ }
+ ':' => SynVar::Symbol(String::from(':')),
+ c => {
+ let token = format!("{c}{}", self.eat_token());
+ match token.parse::<Value>() {
+ Ok(value) => SynVar::Literal(value),
+ Err(_) => match token.parse::<Instruction>() {
+ Ok(instruction) => SynVar::Instruction(instruction),
+ Err(_) => SynVar::Symbol(token),
+ }
+ }
+ }
+ };
+
+ // Parse source path comments.
+ if let SynVar::Comment(comment) = &variant {
+ // Check that the comment fills the entire line.
+ if start.column == 0 && self.is_line_empty() {
+ if let Some(path) = comment.strip_prefix(": ") {
+ self.source_path = Some(PathBuf::from(path.trim()));
+ self.source_line_start = start.line + 1;
+ }
+ }
+ }
+
+ // Find location in current merged file.
+ let in_merged = SourceLocation {
+ path: self.path.to_owned(),
+ start,
+ end: self.prev_position,
+ };
+
+ // Find location in original source file.
+ let in_source = if start.line >= self.source_line_start {
+ match &self.source_path {
+ Some(path) => {
+ let offset = self.source_line_start;
+ Some( SourceLocation {
+ path: Some(path.to_owned()),
+ start: Position {
+ line: in_merged.start.line.saturating_sub(offset),
+ column: in_merged.start.column,
+ },
+ end: Position {
+ line: in_merged.end.line.saturating_sub(offset),
+ column: in_merged.end.column,
+ }
+ })
+ }
+ None => None,
+ }
+ } else {
+ None
+ };
+
+ let string = std::mem::take(&mut self.token_source_string);
+ let source = SourceSpan { string, in_merged, in_source };
+ Some( SyntacticToken { source, variant } )
+ }
+}
+
+
+#[derive(Debug)]
+pub enum ParseError {
+ InvalidExtension,
+ NotFound,
+ NotReadable,
+ IsADirectory,
+ InvalidUtf8,
+ Unknown,
+}