From 1ecee352f5844b0809d7ae66df52e34f42b44c8e Mon Sep 17 00:00:00 2001 From: Ben Bridle Date: Thu, 6 Mar 2025 20:33:27 +1300 Subject: Rewrite entire assembler The language is now more general, the code is better structured, error reporting is more detailed, and many new language features have been implemented: - conditional blocks - first-class strings - more expression operators - binary literals - negative values - invocations in constant expressions --- src/tokens/assembler.rs | 162 ------------------------------ src/tokens/bytecode.rs | 49 --------- src/tokens/expression.rs | 78 --------------- src/tokens/mod.rs | 19 ---- src/tokens/packed_binary_literal.rs | 57 ----------- src/tokens/semantic.rs | 192 ------------------------------------ src/tokens/syntactic.rs | 84 ---------------- src/tokens/tracked.rs | 47 --------- src/tokens/tracked_integer.rs | 14 --- 9 files changed, 702 deletions(-) delete mode 100644 src/tokens/assembler.rs delete mode 100644 src/tokens/bytecode.rs delete mode 100644 src/tokens/expression.rs delete mode 100644 src/tokens/mod.rs delete mode 100644 src/tokens/packed_binary_literal.rs delete mode 100644 src/tokens/semantic.rs delete mode 100644 src/tokens/syntactic.rs delete mode 100644 src/tokens/tracked.rs delete mode 100644 src/tokens/tracked_integer.rs (limited to 'src/tokens') diff --git a/src/tokens/assembler.rs b/src/tokens/assembler.rs deleted file mode 100644 index 048062b..0000000 --- a/src/tokens/assembler.rs +++ /dev/null @@ -1,162 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub enum AssembledToken { - Word(AssembledWord), - LabelDefinition(LabelDefinition), - PinnedAddress(PinnedAddress), - Error(AssemblerError), -} - -#[derive(Clone)] -pub struct AssembledWord { - pub source: SourceSpan, - pub value: usize, - pub bits: usize, - pub fields: Vec, - pub errors: Vec, -} - -impl AssembledWord { - pub fn count(&self) -> usize { - // If there is at least one field, and all fields have empty string - // values, then count will be zero. Else count will be at least one. - let mut count = 0; - let mut all_strings = !self.fields.is_empty(); - for field in &self.fields { - if let IntegerArgument::String(string) = &field.value { - count = std::cmp::max(count, string.chars.len()); - } else { - all_strings = false; - } - } - if !all_strings { - count = std::cmp::max(count, 1); - } - return count; - } -} - -#[derive(Clone)] -pub struct AssembledField { - pub source: SourceSpan, - pub value: IntegerArgument, - /// Length of field in bits - pub bits: usize, - /// Distance to left-shift field in value - pub shift: usize, -} - -#[derive(Clone)] -pub struct AssembledExpression { - pub source: SourceSpan, - pub tokens: Vec, -} - -#[derive(Clone)] -pub enum AssembledExpressionToken { - Integer(TrackedInteger), - LabelReference(Tracked), - Operator(Operator), - Expression(Box), -} - -#[derive(Clone)] -pub enum Argument { - Integer(IntegerArgument), - Block(Vec), -} - -#[derive(Clone)] -pub enum IntegerArgument { - LabelReference(Tracked), - Integer(TrackedInteger), - Expression(AssembledExpression), - String(TrackedString), -} - -#[derive(Clone)] -pub struct AssemblerError { - pub source: SourceSpan, - pub variant: AssemblerErrorVariant, -} - -#[derive(Clone, Debug)] -pub enum AssemblerErrorVariant { - DefinitionNotFound(String), - NotAnInteger, - NotABlock, - IntegerInBlock, - StringInExpression, - /// expected, received - IncorrectArgumentCount(usize, usize), - /// expected, received, index - IncorrectArgumentType(ArgumentVariant, ArgumentVariant), -} - -// ------------------------------------------------------------------------ // - -macro_rules! indent { - ($indent:expr => $($tokens:tt)*) => {{ - for _ in 0..$indent { print!(" "); } - println!($($tokens)*); - }}; -} - -pub fn print_assembled_tokens(tokens: &[AssembledToken]) { - for token in tokens { - match token { - AssembledToken::LabelDefinition(definition) => { - println!("LABEL {}", definition.name) - } - AssembledToken::PinnedAddress(address) => { - println!("PINNED {}", address.address) - } - AssembledToken::Word(word) => { - println!("WORD {:b}", word.value); - for field in &word.fields { - print!(" FIELD ({} << {}) ", field.bits, field.shift); - match &field.value { - IntegerArgument::LabelReference(name) => { - println!("LABEL '{name}'"); - } - IntegerArgument::Integer(integer) => { - println!("INTEGER '{}'", integer.value); - } - IntegerArgument::String(string) => { - println!("STRING {string}"); - } - IntegerArgument::Expression(expr) => { - println!("EXPRESSION"); - print_assembled_expression(2, expr); - } - } - } - } - AssembledToken::Error(error) => { - println!("ERROR {:?}", error.variant) - } - } - } -} - -fn print_assembled_expression(indent: usize, expr: &AssembledExpression) { - for token in &expr.tokens { - match token { - AssembledExpressionToken::Integer(integer) => { - indent!(indent => "INTEGER {}", integer.value) - } - AssembledExpressionToken::LabelReference(name) => { - indent!(indent => "LABEL '{name}'") - } - AssembledExpressionToken::Operator(operator) => { - indent!(indent => "OPERATOR {operator:?}") - } - AssembledExpressionToken::Expression(expr) => { - indent!(indent => "EXPRESSION"); - print_assembled_expression(indent+1, expr); - } - } - } -} diff --git a/src/tokens/bytecode.rs b/src/tokens/bytecode.rs deleted file mode 100644 index 9ac340e..0000000 --- a/src/tokens/bytecode.rs +++ /dev/null @@ -1,49 +0,0 @@ -use crate::*; - - -pub struct Bytecode { - pub words: Vec, - pub errors: Vec, -} - -#[derive(Clone, Copy)] -pub struct Word { - pub bits: usize, - pub value: usize, -} - -impl std::fmt::Display for Word { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - for i in (0..self.bits).rev() { - let is_first_bit = i+1 == self.bits; - if !is_first_bit && (i+1) % 4 == 0 { - write!(f, "_")?; - } - match (self.value >> i) & 1 { - 0 => write!(f, "0")?, - _ => write!(f, "1")?, - } - } - if self.bits == 0 { - write!(f, "0")?; - } - return Ok(()); - } -} - -pub struct BytecodeError { - pub source: SourceSpan, - pub variant: BytecodeErrorVariant, -} - -pub enum BytecodeErrorVariant { - DefinitionNotFound(String), - DuplicateLabelDefinition(String), - /// pin, real - PinnedAddressBacktrack(usize, usize), - /// expected, received - ValueTooLarge(usize, usize), - StackUnderflow, - MultipleReturnValues, - NoReturnValue, -} diff --git a/src/tokens/expression.rs b/src/tokens/expression.rs deleted file mode 100644 index 1d8a336..0000000 --- a/src/tokens/expression.rs +++ /dev/null @@ -1,78 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub struct Expression { - pub source: SourceSpan, - pub tokens: Vec, -} - -#[derive(Clone)] -pub struct ExpressionToken { - pub source: SourceSpan, - pub variant: ExpressionTokenVariant, -} - -#[derive(Clone)] -pub enum ExpressionTokenVariant { - Invocation(String), - Literal(isize), - Operator(Operator), - Error(ExpressionParseError), -} - -#[derive(Clone, Copy, Debug)] -pub enum Operator { - Equal, - NotEqual, - LessThan, - GreaterThan, - LessThanEqual, - GreaterThanEqual, - Add, - Subtract, - LeftShift, - RightShift, - And, - Or, - Xor, - Not, -} - -#[derive(Clone)] -pub enum ExpressionParseError { - InvalidHexadecimalLiteral(String), -} - -impl std::fmt::Debug for Expression { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - for (i, token) in self.tokens.iter().enumerate() { - let string = match &token.variant { - ExpressionTokenVariant::Invocation(name) => name, - ExpressionTokenVariant::Literal(value) => &value.to_string(), - ExpressionTokenVariant::Operator(operator) => match operator { - Operator::Equal => "=", - Operator::NotEqual => "!=", - Operator::LessThan => "<", - Operator::GreaterThan => ">", - Operator::LessThanEqual => "<=", - Operator::GreaterThanEqual => ">=", - Operator::Add => "+", - Operator::Subtract => "-", - Operator::LeftShift => "<<", - Operator::RightShift => ">>", - Operator::And => "&", - Operator::Or => "|", - Operator::Xor => "^", - Operator::Not => "~", - } - ExpressionTokenVariant::Error(_) => "", - }; - match i { - 0 => write!(f, "{string}")?, - _ => write!(f, " {string}")?, - } - } - return Ok(()); - } -} diff --git a/src/tokens/mod.rs b/src/tokens/mod.rs deleted file mode 100644 index 53ccc6e..0000000 --- a/src/tokens/mod.rs +++ /dev/null @@ -1,19 +0,0 @@ -mod expression; -mod packed_binary_literal; -mod tracked_integer; -mod tracked; - -pub use expression::*; -pub use packed_binary_literal::*; -pub use tracked_integer::*; -pub use tracked::*; - -mod syntactic; -mod semantic; -mod assembler; -mod bytecode; - -pub use syntactic::*; -pub use semantic::*; -pub use assembler::*; -pub use bytecode::*; diff --git a/src/tokens/packed_binary_literal.rs b/src/tokens/packed_binary_literal.rs deleted file mode 100644 index a2720b7..0000000 --- a/src/tokens/packed_binary_literal.rs +++ /dev/null @@ -1,57 +0,0 @@ -use crate::*; - - -pub struct PackedBinaryLiteral { - pub source: SourceSpan, - pub value: usize, - pub bits: usize, - pub fields: Vec, - pub errors: Vec, -} - -pub struct BitField { - pub name: char, - pub source: SourceSpan, - /// Length of field in bits - pub bits: usize, - /// Distance to left-shift field in value - pub shift: usize, -} - -pub struct PackedBinaryLiteralParseError { - pub source: SourceSpan, - pub variant: PackedBinaryLiteralParseErrorVariant, -} - -pub enum PackedBinaryLiteralParseErrorVariant { - DuplicateFieldName(char), - InvalidCharacter(char), -} - - -impl std::fmt::Display for PackedBinaryLiteral { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - if self.value == 0 { - write!(f, "0")?; - } else { - let bitcount = (self.value.ilog2() + 1) as usize; - 'bit: for i in (0..bitcount).rev() { - let is_first_bit = i+1 == bitcount; - if !is_first_bit && (i+1) % 4 == 0 { - write!(f, "_")?; - } - for field in &self.fields { - if i <= field.bits + field.shift - 1 && i >= field.shift { - write!(f, "{}", field.name)?; - continue 'bit; - } - } - match (self.value >> i) & 1 { - 0 => write!(f, "0")?, - _ => write!(f, "1")?, - } - } - } - return Ok(()); - } -} diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs deleted file mode 100644 index 225cd6b..0000000 --- a/src/tokens/semantic.rs +++ /dev/null @@ -1,192 +0,0 @@ -use crate::*; - -use indexmap::IndexMap; - - -/// The entire semantic program, ready to generate bytecode. -pub struct SemanticProgram { - pub macro_definitions: IndexMap, - pub label_definitions: IndexMap, - pub body: Vec, -} - -/// A symbol definition. -pub struct MacroDefinition { - pub source: SourceSpan, - pub arguments: Vec, - pub value: Value, - pub errors: Vec, -} - -pub struct ArgumentDefinition { - pub name: String, - pub source: SourceSpan, - pub variant: ArgumentVariant, -} - -#[derive(PartialEq, Clone, Copy, Debug)] -pub enum ArgumentVariant { - Integer, - Block, -} - -pub struct ArgumentInvocation { - pub source: SourceSpan, - pub value: Value, -} - -pub enum Value { - Integer(Integer), - Block(Vec), - Invocation(Invocation), -} - -pub enum Integer { - Literal(TrackedInteger), - String(TrackedString), - Expression(Expression), - LabelReference(Tracked), -} - -pub enum SemanticToken { - Word(PackedBinaryLiteral), - Invocation(Invocation), - LabelDefinition(LabelDefinition), - PinnedAddress(PinnedAddress), - Error(SemanticParseError), -} - -pub struct Invocation { - pub name: String, - pub source: SourceSpan, - pub arguments: Vec, - pub errors: Vec, -} - -#[derive(Clone)] -pub struct LabelDefinition { - pub source: SourceSpan, - pub name: String, -} - -#[derive(Clone)] -pub struct PinnedAddress { - pub source: SourceSpan, - pub address: usize, -} - -pub struct SemanticParseError { - pub source: SourceSpan, - pub variant: SemanticParseErrorVariant, -} - -pub enum SemanticParseErrorVariant { - UnterminatedMacroDefinition(String), - UnterminatedBlock, - InvalidToken, -} - - -impl std::fmt::Display for ArgumentVariant { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - match self { - ArgumentVariant::Integer => write!(f, "integer"), - ArgumentVariant::Block => write!(f, "block"), - } - } -} - -// ------------------------------------------------------------------------ // - -macro_rules! indent { - ($indent:expr => $($tokens:tt)*) => {{ - for _ in 0..$indent { print!(" "); } - println!($($tokens)*); - }}; -} - -impl SemanticProgram { - pub fn print_definitions(&self) { - for (name, definition) in &self.macro_definitions { - let variant = match &definition.value { - Value::Integer(_) => "INTEGER", - Value::Block(_) => "BLOCK", - Value::Invocation(_) => "INVOCATION", - }; - println!("DEFINE {variant} '{name}'"); - for argument in &definition.arguments { - self.print_argument_definition(argument); - } - match &definition.value { - Value::Integer(integer) => - self.print_integer(1, integer), - Value::Block(block) => - self.print_block(1, block), - Value::Invocation(invocation) => - indent!(1 => "INVOCATION '{}'", invocation.name), - }; - println!(); - } - - println!("LABELS"); - for (name, _) in &self.label_definitions { - println!(" @{name}"); - } - println!(); - - self.print_block(0, &self.body); - } - - fn print_argument_definition(&self, argument: &ArgumentDefinition) { - let variant = match argument.variant { - ArgumentVariant::Integer => "INTEGER", - ArgumentVariant::Block => "BLOCK", - }; - println!(" ARGUMENT {variant} '{}'", argument.name); - } - - fn print_integer(&self, indent: usize, integer: &Integer) { - match &integer { - Integer::Literal(value) => - indent!(indent => "LITERAL {value}"), - Integer::Expression(expr) => - indent!(indent => "EXPRESSION [{expr:?}]"), - Integer::String(string) => - indent!(indent => "STRING '{string}'"), - Integer::LabelReference(name) => - indent!(indent => "LABEL REFERENCE '{name}'"), - } - } - - fn print_block(&self, indent: usize, block: &[SemanticToken]) { - indent!(indent => "BLOCK"); - for semantic_token in block { - match &semantic_token { - SemanticToken::Word(word) => - indent!(indent+1 => "WORD #{word}"), - SemanticToken::Invocation(invocation) => - self.print_invocation(indent+1, invocation), - SemanticToken::LabelDefinition(definition) => - indent!(indent+1 => "LABEL DEFINITION @{}", definition.name), - SemanticToken::PinnedAddress(addr) => - indent!(indent+1 => "PINNED ADDRESS {}", addr.address), - SemanticToken::Error(_) => - indent!(indent+1 => "ERROR"), - } - } - } - - fn print_invocation(&self, indent: usize, invocation: &Invocation) { - indent!(indent => "INVOCATION '{}'", invocation.name); - for argument in &invocation.arguments { - match &argument.value { - Value::Integer(integer) => - self.print_integer(indent+1, integer), - Value::Block(block) => - self.print_block(indent+1, block), - Value::Invocation(invocation) => - self.print_invocation(indent+1, invocation), - }; - } - } -} diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs deleted file mode 100644 index 780c950..0000000 --- a/src/tokens/syntactic.rs +++ /dev/null @@ -1,84 +0,0 @@ -use crate::*; - - -pub struct SyntacticToken { - pub source: SourceSpan, - pub variant: SyntacticTokenVariant, -} - -pub enum SyntacticTokenVariant { - LabelDefinition(String), - MacroDefinition(String), - MacroDefinitionTerminator, - - IntegerLiteral(isize), - PackedBinaryLiteral(PackedBinaryLiteral), - PinnedAddress(usize), - - Expression(Expression), - - String(TrackedString), - - BlockOpen, - BlockClose, - Separator, - - Symbol(String), - - Error(SyntacticParseError), -} - -#[derive(Clone)] -pub struct TrackedString { - pub source: SourceSpan, - pub string: String, - pub chars: Vec>, -} - -impl std::fmt::Display for TrackedString { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - self.string.fmt(f) - } -} - -#[derive(Debug)] -pub enum SyntacticParseError { - InvalidHexadecimalLiteral(String), - InvalidDecimalLiteral(String), - InvalidSymbolIdentifier(String), - UnterminatedComment, - UnterminatedString, - UnterminatedExpression, - LabelInMacroDefinition, -} - - -impl std::fmt::Debug for SyntacticToken { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - use SyntacticTokenVariant::*; - let start = &self.source.in_merged; - let name = match &self.variant { - LabelDefinition(name) => format!("LabelDefinition({name})"), - MacroDefinition(name) => format!("MacroDefinition({name})"), - MacroDefinitionTerminator => format!("MacroDefinitionTerminator"), - - IntegerLiteral(value) => format!("IntegerLiteral({value})"), - PackedBinaryLiteral(pbl) => format!("PackedBinaryLiteral({pbl})"), - PinnedAddress(value) => format!("PinnedAddress({value})"), - - Expression(expr) => format!("Expression({expr:?})"), - - String(string) => format!("String('{string}')"), - - BlockOpen => format!("BlockOpen"), - BlockClose => format!("BlockClose"), - Separator => format!("Separator"), - - Symbol(name) => format!("Symbol({name})"), - - Error(error) => format!("Error({error:?})"), - }; - - write!(f, "{start} {name}") - } -} diff --git a/src/tokens/tracked.rs b/src/tokens/tracked.rs deleted file mode 100644 index ea37047..0000000 --- a/src/tokens/tracked.rs +++ /dev/null @@ -1,47 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub struct Tracked { - pub source: SourceSpan, - pub value: T, -} - -impl Tracked { - pub fn from(value: T, source: SourceSpan) -> Self { - Self { source, value } - } -} - -impl std::ops::Deref for Tracked { - type Target = T; - fn deref(&self) -> &T { - &self.value - } -} - -impl std::ops::DerefMut for Tracked { - fn deref_mut(&mut self) -> &mut T { - &mut self.value - } -} - -impl std::fmt::Display for Tracked { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{}", self.value) - } -} - -impl std::fmt::Debug for Tracked { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{:?}", self.value) - } -} - -impl PartialEq for Tracked { - fn eq(&self, other: &Tracked) -> bool { - self.value.eq(&other.value) - } -} - -impl Eq for Tracked {} diff --git a/src/tokens/tracked_integer.rs b/src/tokens/tracked_integer.rs deleted file mode 100644 index fa55f09..0000000 --- a/src/tokens/tracked_integer.rs +++ /dev/null @@ -1,14 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub struct TrackedInteger { - pub source: SourceSpan, - pub value: isize, -} - -impl std::fmt::Display for TrackedInteger { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{}", self.value) - } -} -- cgit v1.2.3-70-g09d2