summaryrefslogtreecommitdiff
path: root/src/translators
diff options
context:
space:
mode:
authorBen Bridle <bridle.benjamin@gmail.com>2025-02-14 09:36:52 +1300
committerBen Bridle <ben@derelict.engineering>2025-03-18 12:23:27 +1300
commit7d4dd52b8cfc865ae1b975ca3b6a3e72a812ebb9 (patch)
tree14ca9fa0ddcdd8c5155ddeaac241cd4f55486b6e /src/translators
parentf69a8f8c312ded212446082682bcabba8e3a9c9f (diff)
downloadbedrock-asm-7d4dd52b8cfc865ae1b975ca3b6a3e72a812ebb9.zip
Rewrite library
Diffstat (limited to 'src/translators')
-rw-r--r--src/translators/bytecode_generator.rs131
-rw-r--r--src/translators/semantic_parser.rs245
-rw-r--r--src/translators/symbols_generator.rs20
-rw-r--r--src/translators/syntactic_parser.rs117
4 files changed, 0 insertions, 513 deletions
diff --git a/src/translators/bytecode_generator.rs b/src/translators/bytecode_generator.rs
deleted file mode 100644
index 956aca5..0000000
--- a/src/translators/bytecode_generator.rs
+++ /dev/null
@@ -1,131 +0,0 @@
-use crate::*;
-
-use SemanticTokenVariant as SemVar;
-
-
-pub fn generate_bytecode(semantic_tokens: &mut [SemanticToken]) -> Vec<u8> {
- let generator = BytecodeGenerator::from_semantic_tokens(semantic_tokens);
- generator.generate()
-}
-
-
-/// Translate semantic tokens into bytecode.
-struct BytecodeGenerator<'a> {
- semantic_tokens: &'a mut [SemanticToken],
- block_stack: Vec<usize>,
- bytecode: Vec<u8>,
- /// (address in bytecode, label definition token index)
- label_references: Vec<(usize, usize)>,
-}
-
-impl<'a> BytecodeGenerator<'a> {
- pub fn from_semantic_tokens(semantic_tokens: &'a mut [SemanticToken]) -> Self {
- Self {
- semantic_tokens,
- block_stack: Vec::new(),
- bytecode: Vec::new(),
- label_references: Vec::new(),
- }
- }
-
- pub fn generate(mut self) -> Vec<u8> {
- for i in 0..self.semantic_tokens.len() {
- let address = self.bytecode.len();
- self.generate_bytecode_for_token(i, None);
- self.semantic_tokens[i].bytecode = BytecodeSpan {
- bytes: self.bytecode[address..].to_vec(),
- location: BytecodeLocation {
- address,
- length: self.bytecode.len().saturating_sub(address),
- }
- };
- }
-
- // Replace blank label references in bytecode with real label addresses.
- // The layer of indirection is necessary because the iteration borrows
- // self immutably.
- let mut insertions: Vec<(usize, u16)> = Vec::new();
- for (bytecode_address, token_pointer) in &self.label_references {
- let label_token = &self.semantic_tokens[*token_pointer];
- // TODO: If greater than u16, print a warning.
- let address_value = label_token.bytecode.location.address as u16;
- insertions.push((*bytecode_address, address_value));
- }
- for (bytecode_address, address_value) in insertions {
- self.replace_address_in_bytecode(bytecode_address, address_value);
- }
-
- // Strip trailing null bytes from the bytecode.
- let mut length = self.bytecode.len();
- for (i, byte) in self.bytecode.iter().enumerate().rev() {
- match *byte == 0 {
- true => length = i,
- false => break,
- };
- }
- self.bytecode.truncate(length);
-
- return self.bytecode;
- }
-
- fn generate_bytecode_for_token(&mut self, pointer: usize, macro_pointer: Option<usize>) {
- macro_rules! push_byte {
- ($byte:expr) => { self.bytecode.push($byte) }; }
- macro_rules! push_double {
- ($double:expr) => { self.bytecode.extend_from_slice(&$double.to_be_bytes()) }; }
- macro_rules! pad {
- ($len:expr) => { for _ in 0..$len { push_byte!(0); } } }
-
- let semantic_token = if let Some(macro_pointer) = macro_pointer {
- let macro_definition = &self.semantic_tokens[macro_pointer];
- if let SemVar::MacroDefinition(def) = &macro_definition.variant {
- &def.body_tokens[pointer]
- } else { unreachable!() }
- } else {
- &self.semantic_tokens[pointer]
- };
- match &semantic_token.variant {
- SemVar::MacroInvocation(pointer) => {
- let macro_definition = &self.semantic_tokens[*pointer];
- if let SemVar::MacroDefinition(def) = &macro_definition.variant {
- let length = def.body_tokens.len();
- let macro_pointer = Some(*pointer);
- for body_pointer in 0..length {
- // Recurse, generate bytecode for each macro body token.
- self.generate_bytecode_for_token(body_pointer, macro_pointer);
- }
- } else { unreachable!() }
- }
- SemVar::Literal(value) => match value {
- Value::Byte(value) => push_byte!(*value),
- Value::Double(value) => push_double!(value),
- }
- SemVar::Padding(value) => match value {
- Value::Byte(value) => pad!(*value),
- Value::Double(value) => pad!(*value),
- }
- SemVar::Instruction(instr) => push_byte!(instr.value),
- SemVar::String(bytes) => self.bytecode.extend_from_slice(&bytes),
- SemVar::LabelReference(pointer) => {
- self.label_references.push((self.bytecode.len(), *pointer));
- push_double!(0u16);
- }
- SemVar::BlockOpen(_) => {
- self.block_stack.push(self.bytecode.len());
- push_double!(0u16);
- }
- SemVar::BlockClose(_) => {
- let bytecode_address = self.block_stack.pop().unwrap();
- // TODO: If greater than u16, print a warning.
- let address_value = self.bytecode.len() as u16;
- self.replace_address_in_bytecode(bytecode_address, address_value);
- }
- _ => (),
- };
- }
-
- fn replace_address_in_bytecode(&mut self, bytecode_address: usize, address_value: u16) {
- let range = bytecode_address..bytecode_address+2;
- self.bytecode[range].clone_from_slice(&address_value.to_be_bytes());
- }
-}
diff --git a/src/translators/semantic_parser.rs b/src/translators/semantic_parser.rs
deleted file mode 100644
index cb6a435..0000000
--- a/src/translators/semantic_parser.rs
+++ /dev/null
@@ -1,245 +0,0 @@
-use crate::*;
-
-use std::collections::HashMap;
-use std::path::PathBuf;
-
-use SyntacticTokenVariant as SynVar;
-use SemanticTokenVariant as SemVar;
-use SemanticParseError as SemErr;
-
-
-pub fn generate_semantic_tokens<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Vec<SemanticToken> {
- let semantic_parser = SemanticParser::from_source_code(source_code, path);
- semantic_parser.parse()
-}
-
-
-/// Translate syntactic tokens into semantic tokens.
-struct SemanticParser {
- labels: HashMap<String, Definition>,
- macros: HashMap<String, Definition>,
- syntactic_tokens: Vec<SyntacticToken>,
- /// Index of the current outer token.
- current_outer_index: usize,
-}
-
-impl SemanticParser {
- pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
- let mut labels = HashMap::new();
- let mut macros = HashMap::new();
- let mut syntactic_tokens = Vec::new();
-
- let parser = SyntacticParser::from_source_code(source_code, path);
- for syntactic_token in parser {
- let definition = Definition::new(syntactic_token.source.clone());
- match &syntactic_token.variant {
- SynVar::LabelDefinition(name) => {
- let _ = labels.try_insert(name.to_owned(), definition);
- },
- SynVar::MacroDefinition(name) => {
- let _ = macros.try_insert(name.to_owned(), definition);
- },
- _ => (),
- }
- syntactic_tokens.push(syntactic_token);
- }
-
- Self {
- labels,
- macros,
- syntactic_tokens,
- current_outer_index: 0,
- }
- }
-
- /// Parse syntactic tokens as semantic tokens.
- pub fn parse(mut self) -> Vec<SemanticToken> {
- let syntactic_tokens = std::mem::take(&mut self.syntactic_tokens);
- let mut syntactic = syntactic_tokens.into_iter();
- let mut semantic_tokens = self.pull_semantic_tokens(&mut syntactic, false);
-
- // Insert real label definition pointers into label reference tokens.
- for definition in self.labels.values_mut() {
- if let Some(definition_pointer) = definition.pointer {
- // Insert definition pointer into reference tokens.
- for reference_pointer in &definition.references {
- let reference_token = &mut semantic_tokens[*reference_pointer];
- reference_token.variant = SemVar::LabelReference(definition_pointer);
- }
- // Insert reference pointers into definition token.
- let definition_token = &mut semantic_tokens[definition_pointer];
- if let SemVar::LabelDefinition(ref mut def) = definition_token.variant {
- def.references = std::mem::take(&mut definition.references);
- } else { unreachable!() }
- // Insert definition pointer into reference tokens inside macros.
- for (outer, inner) in &definition.deep_references {
- let macro_token = &mut semantic_tokens[*outer];
- if let SemVar::MacroDefinition(ref mut def) = macro_token.variant {
- let reference_token = &mut def.body_tokens[*inner];
- reference_token.variant = SemVar::LabelReference(definition_pointer);
- } else { unreachable!() }
- }
- // TODO: Record deep references in macro and label definitions?
- }
- }
-
- return semantic_tokens;
- }
-
- fn pull_semantic_tokens<I>(&mut self, parser: &mut I, in_macro: bool) -> Vec<SemanticToken>
- where I: Iterator<Item = SyntacticToken>
- {
- let mut semantic_tokens: Vec<SemanticToken> = Vec::new();
- let mut block_stack: Vec<usize> = Vec::new();
-
- while let Some(syntactic_token) = parser.next() {
- let current_index = semantic_tokens.len();
- if !in_macro {
- self.current_outer_index = current_index;
- }
-
- let semantic_token_variant = match syntactic_token.variant {
- SynVar::LabelDefinition(name) => {
- if in_macro {
- SemVar::Error(SemErr::LabelDefinitionInMacroDefinition)
- } else if let Some(definition) = self.macros.get(&name) {
- let source = definition.source.clone();
- SemVar::Error(SemErr::RedefinedSymbol((name, source)))
- } else if let Some(definition) = self.labels.get_mut(&name) {
- if definition.pointer.is_some() {
- let source = definition.source.clone();
- SemVar::Error(SemErr::RedefinedSymbol((name, source)))
- } else {
- definition.pointer = Some(current_index);
- let references = Vec::new();
- SemVar::LabelDefinition(LabelDefinition { name, references })
- }
- } else {
- unreachable!()
- }
- }
- SynVar::MacroDefinition(name) => {
- if in_macro {
- SemVar::Error(SemErr::MacroDefinitionInMacroDefinition)
- } else if let Some(definition) = self.labels.get(&name) {
- let source = definition.source.clone();
- SemVar::Error(SemErr::RedefinedSymbol((name, source)))
- } else if let Some(definition) = self.macros.get_mut(&name) {
- if definition.pointer.is_some() {
- let source = definition.source.clone();
- SemVar::Error(SemErr::RedefinedSymbol((name, source)))
- } else {
- definition.pointer = Some(current_index);
- let references = Vec::new();
- let body_tokens = self.pull_semantic_tokens(parser, true);
- SemVar::MacroDefinition(MacroDefinition { name, references, body_tokens })
- }
- } else {
- unreachable!()
- }
- }
- SynVar::MacroDefinitionTerminator => if in_macro {
- break;
- } else {
- SemVar::Error(SemErr::StrayMacroTerminator)
- }
- SynVar::Literal(value) => {
- SemVar::Literal(value)
- }
- SynVar::Padding(value) => {
- SemVar::Padding(value)
- }
- SynVar::Instruction(instr) => {
- SemVar::Instruction(instr)
- }
- SynVar::Comment(comment) => {
- SemVar::Comment(comment)
- }
- SynVar::String(bytes) => {
- SemVar::String(bytes)
- }
- SynVar::BlockOpen => {
- block_stack.push(current_index);
- SemVar::BlockOpen(0)
- }
- SynVar::BlockClose => {
- if let Some(pointer) = block_stack.pop() {
- let open = &mut semantic_tokens[pointer];
- open.variant = SemVar::BlockOpen(current_index);
- SemVar::BlockClose(pointer)
- } else {
- SemVar::Error(SemErr::StrayBlockClose)
- }
- }
- SynVar::MarkOpen => {
- SemVar::MarkOpen
- }
- SynVar::MarkClose => {
- SemVar::MarkClose
- }
- SynVar::Symbol(name) => {
- if let Some(definition) = self.labels.get_mut(&name) {
- if in_macro {
- let pointer = (self.current_outer_index, current_index);
- definition.deep_references.push(pointer);
- } else {
- definition.references.push(current_index);
- }
- SemVar::LabelReference(0)
- } else if let Some(definition) = self.macros.get_mut(&name) {
- if let Some(pointer) = definition.pointer {
- if !in_macro { definition.references.push(current_index); }
- SemVar::MacroInvocation(pointer)
- } else {
- let source = definition.source.clone();
- SemVar::Error(SemErr::MacroInvocationBeforeDefinition((name, source)))
- }
- } else {
- SemVar::Error(SemErr::UndefinedSymbol(name))
- }
- }
- SynVar::Error(syntax_err) => {
- SemVar::Error(SemErr::SyntaxError(syntax_err))
- }
- };
-
- let semantic_token = SemanticToken {
- source: syntactic_token.source,
- bytecode: BytecodeSpan::default(),
- variant: semantic_token_variant,
- };
- semantic_tokens.push(semantic_token);
- }
-
- if in_macro {
- //TODO: UnterminatedMacroDefinition
- }
-
- // Replace each unclosed BlockOpen token with an error.
- for block_pointer in block_stack {
- semantic_tokens[block_pointer].variant = SemVar::Error(SemErr::UnclosedBlock);
- }
-
- return semantic_tokens;
- }
-}
-
-
-struct Definition {
- pub source: SourceSpan,
- pub pointer: Option<usize>,
- pub references: Vec<usize>,
- /// (macro index, label reference index)
- pub deep_references: Vec<(usize, usize)>,
-}
-
-impl Definition {
- pub fn new(source: SourceSpan) -> Self {
- Self {
- source,
- pointer: None,
- references: Vec::new(),
- deep_references: Vec::new(),
- }
- }
-}
diff --git a/src/translators/symbols_generator.rs b/src/translators/symbols_generator.rs
deleted file mode 100644
index d30facd..0000000
--- a/src/translators/symbols_generator.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-use crate::*;
-
-use SemanticTokenVariant as SemVar;
-
-
-pub fn generate_symbols_file(semantic_tokens: &[SemanticToken]) -> String {
- let mut symbols = String::new();
-
- for token in semantic_tokens {
- if let SemVar::LabelDefinition(definition) = &token.variant {
- let address = token.bytecode.location.address;
- if address > 0xffff { break; }
- let name = &definition.name;
- let location = token.source.location();
- symbols.push_str(&format!("{address:04x} {name} {location}\n"));
- }
- }
-
- return symbols;
-}
diff --git a/src/translators/syntactic_parser.rs b/src/translators/syntactic_parser.rs
deleted file mode 100644
index 8f0850b..0000000
--- a/src/translators/syntactic_parser.rs
+++ /dev/null
@@ -1,117 +0,0 @@
-use crate::*;
-
-use std::path::PathBuf;
-
-
-/// Translate raw source code characters into syntactic tokens.
-pub struct SyntacticParser {
- tokeniser: Tokeniser,
- /// The name of the most recently parsed label.
- label: String,
-}
-
-
-impl SyntacticParser {
- pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self {
- let mut tokeniser = Tokeniser::new(source_code, path);
- tokeniser.add_delimiters(&['@','&','%',';','[',']','{','}','(','"','\'','#','~']);
- tokeniser.add_terminators(&[':']);
- Self { tokeniser, label: String::new() }
- }
-}
-
-
-impl Iterator for SyntacticParser {
- type Item = SyntacticToken;
-
- /// Sequentially parse tokens from the source code.
- fn next(&mut self) -> Option<SyntacticToken> {
- use SyntacticTokenVariant as SynVar;
- use SyntacticParseError as SynErr;
- let t = &mut self.tokeniser;
-
- t.drop_whitespace();
- t.mark_start_position();
-
- let variant = match t.eat_char()? {
- '@' => {
- self.label = t.eat_token();
- SynVar::LabelDefinition(self.label.clone())
- }
- '&' => {
- let token = t.eat_token();
- SynVar::LabelDefinition(format!("{}/{token}", self.label))
- }
- '%' => SynVar::MacroDefinition(t.eat_token()),
- ';' => SynVar::MacroDefinitionTerminator,
- '[' => SynVar::MarkOpen,
- ']' => SynVar::MarkClose,
- '{' => SynVar::BlockOpen,
- '}' => SynVar::BlockClose,
- '(' => match t.eat_to_delimiter(')') {
- Some(string) => SynVar::Comment(string),
- None => SynVar::Error(SynErr::UnterminatedComment),
- }
- '\'' => match t.eat_to_delimiter('\'') {
- Some(string) => SynVar::String(string.as_bytes().to_vec()),
- None => SynVar::Error(SynErr::UnterminatedRawString),
- }
- '"' => match t.eat_to_delimiter('"') {
- Some(string) => {
- let mut bytes = string.as_bytes().to_vec();
- bytes.push(0x00);
- SynVar::String(bytes)
- }
- None => SynVar::Error(SynErr::UnterminatedNullString),
- }
- '#' => {
- let token = t.eat_token();
- match token.parse::<Value>() {
- Ok(value) => SynVar::Padding(value),
- Err(_) => SynVar::Error(SynErr::InvalidPaddingValue(token)),
- }
- },
- '~' => {
- let token = t.eat_token();
- let symbol = format!("{}/{token}", self.label);
- SynVar::Symbol(symbol)
- }
- ':' => SynVar::Symbol(String::from(':')),
- c => {
- let token = format!("{c}{}", t.eat_token());
- match token.parse::<Value>() {
- Ok(value) => SynVar::Literal(value),
- Err(_) => match token.parse::<Instruction>() {
- Ok(instruction) => SynVar::Instruction(instruction),
- Err(_) => SynVar::Symbol(token),
- }
- }
- }
- };
-
- // Parse source path comments.
- if let SynVar::Comment(comment) = &variant {
- // Check if the comment fills the entire line.
- if t.start_position.column == 0 && t.end_of_line() {
- if let Some(path) = comment.strip_prefix(": ") {
- t.source_path = Some(PathBuf::from(path.trim()));
- t.embedded_first_line = t.start_position.line + 1;
- }
- }
- }
-
- let source = t.get_source_span();
- Some( SyntacticToken { source, variant } )
- }
-}
-
-
-#[derive(Debug)]
-pub enum ParseError {
- InvalidExtension,
- NotFound,
- NotReadable,
- IsADirectory,
- InvalidUtf8,
- Unknown,
-}