diff options
author | Ben Bridle <ben@derelict.engineering> | 2025-02-28 13:23:20 +1300 |
---|---|---|
committer | Ben Bridle <ben@derelict.engineering> | 2025-02-28 13:34:23 +1300 |
commit | dba769e13ca5029643c6068e53fa34ae0fea8421 (patch) | |
tree | 47b45ecddaf08bcef19de29ad65206c34af85f53 | |
parent | 1a810d036195395c182f6cd6e011b8fb868d9872 (diff) | |
download | torque-asm-dba769e13ca5029643c6068e53fa34ae0fea8421.zip |
Implement string literals
String literals are treated as integers. If a string is passed as an
integer argument to a packed binary literal, a new instance of the
packed binary literal is invoked for every character in the string,
with each character being passed to the packed binary literal as a
Unicode character value.
-rw-r--r-- | src/parsers/assembler.rs | 14 | ||||
-rw-r--r-- | src/parsers/bytecode.rs | 78 | ||||
-rw-r--r-- | src/parsers/semantic.rs | 7 | ||||
-rw-r--r-- | src/parsers/syntactic.rs | 25 | ||||
-rw-r--r-- | src/report.rs | 8 | ||||
-rw-r--r-- | src/tokens/assembler.rs | 28 | ||||
-rw-r--r-- | src/tokens/semantic.rs | 3 | ||||
-rw-r--r-- | src/tokens/syntactic.rs | 18 | ||||
-rw-r--r-- | src/tokens/tracked.rs | 4 |
9 files changed, 151 insertions, 34 deletions
diff --git a/src/parsers/assembler.rs b/src/parsers/assembler.rs index eb180e3..61e1a84 100644 --- a/src/parsers/assembler.rs +++ b/src/parsers/assembler.rs @@ -115,7 +115,7 @@ impl<'a> Environment<'a> { } else if let Some(definition) = self.macro_definitions.get(name) { self.reify_value(&definition.value) } else if let Some(label) = self.label_definitions.get(name) { - let name = Tracked::from(self.tag_label_name(&label.name), &source); + let name = Tracked::from(self.tag_label_name(&label.name), source); Ok(Argument::Integer(IntegerArgument::LabelReference(name))) } else { let variant = ErrVar::DefinitionNotFound(name.to_string()); @@ -142,9 +142,12 @@ impl<'a> Environment<'a> { IntegerArgument::Expression(expr) } Integer::LabelReference(name) => { - let name = Tracked::from(self.tag_label_name(name), &name.source); + let name = Tracked::from(self.tag_label_name(name), name.source.clone()); IntegerArgument::LabelReference(name) } + Integer::String(string) => { + IntegerArgument::String(string.clone()) + } }; Ok(Argument::Integer(value)) } @@ -238,7 +241,7 @@ impl<'a> Environment<'a> { if received != expected { return err!(ErrVar::IncorrectArgumentCount(expected, received)); } - let name = Tracked::from(self.tag_label_name(&label.name), &label.source); + let name = Tracked::from(self.tag_label_name(&label.name), label.source.clone()); Ok(Argument::Integer(IntegerArgument::LabelReference(name))) } else { err!(ErrVar::DefinitionNotFound(invocation.name.to_string())) @@ -270,6 +273,11 @@ impl<'a> Environment<'a> { IntegerArgument::Expression(expr) => { AssembledExpressionToken::Expression(Box::new(expr)) }, + IntegerArgument::String(string) => { + let source = string.source.clone(); + let variant = AssemblerErrorVariant::StringInExpression; + return Err(AssemblerError { source, variant }) + } } } ExprVar::Error(_) => continue, diff --git a/src/parsers/bytecode.rs b/src/parsers/bytecode.rs index ec19d9f..6cdfd3a 100644 --- a/src/parsers/bytecode.rs +++ b/src/parsers/bytecode.rs @@ -25,28 +25,7 @@ impl<'a> BytecodeGenerator<'a> { for token in self.tokens { match token { AssembledToken::Word(assembled_word) => { - let mut value = assembled_word.value; - for field in &assembled_word.fields { - let (field_value, source) = match &field.value { - IntegerArgument::Expression(expr) => - (self.resolve_expression(expr), expr.source.clone()), - IntegerArgument::LabelReference(name) => - (self.resolve_label_reference(name), name.source.clone()), - IntegerArgument::Integer(integer) => - (integer.value, integer.source.clone()), - }; - let bitcount = match field_value { - 0 => 0, - _ => (field_value.ilog2() + 1) as usize, - }; - if field.bits < bitcount { - let variant = BytecodeErrorVariant::ValueTooLarge(field.bits, bitcount); - self.errors.push(BytecodeError { source, variant }); - } else { - value |= (field_value << field.shift) as usize; - } - } - self.words.push(Word { bits: assembled_word.bits, value }); + self.assemble_word(assembled_word); } AssembledToken::PinnedAddress(pinned) => { if self.words.len() > pinned.address { @@ -74,7 +53,7 @@ impl<'a> BytecodeGenerator<'a> { for token in self.tokens { match token { AssembledToken::LabelDefinition(definition) => { - let address = Tracked::from(i, &definition.source); + let address = Tracked::from(i, definition.source.clone()); if let Some(_) = self.addresses.insert(definition.name.clone(), address) { let name = definition.name.clone(); let variant = BytecodeErrorVariant::DuplicateLabelDefinition(name); @@ -82,8 +61,8 @@ impl<'a> BytecodeGenerator<'a> { self.errors.push(BytecodeError { source, variant }); } } - AssembledToken::Word(_) => { - i += 1; + AssembledToken::Word(word) => { + i += word.count(); } AssembledToken::PinnedAddress(pinned) => { i = pinned.address; @@ -158,4 +137,53 @@ impl<'a> BytecodeGenerator<'a> { 0 } } + + fn assemble_word(&mut self, assembled_word: &AssembledWord) { + let mut field_values = Vec::new(); + for field in &assembled_word.fields { + match &field.value { + IntegerArgument::Expression(expr) => { + let source = expr.source.clone(); + let value = self.resolve_expression(expr); + field_values.push(vec![Tracked::from(value, source)]) + } + IntegerArgument::LabelReference(name) => { + let source = name.source.clone(); + let value = self.resolve_label_reference(name); + field_values.push(vec![Tracked::from(value, source)]) + } + IntegerArgument::Integer(integer) => { + let source = integer.source.clone(); + let value = integer.value; + field_values.push(vec![Tracked::from(value, source)]) + } + IntegerArgument::String(string) => { + let values = string.chars.iter() + .map(|c| Tracked::from(c.value as isize, c.source.clone())) + .collect(); + field_values.push(values); + } + }; + } + for i in 0..assembled_word.count() { + let mut value = assembled_word.value; + for (f, field) in assembled_word.fields.iter().enumerate() { + let (field_value, source) = match field_values[f].get(i) { + Some(tracked) => (tracked.value, Some(tracked.source.clone())), + None => (0, None), + }; + let bitcount = match field_value { + 0 => 0, + _ => (field_value.ilog2() + 1) as usize, + }; + if field.bits < bitcount { + let variant = BytecodeErrorVariant::ValueTooLarge(field.bits, bitcount); + self.errors.push(BytecodeError { source: source.unwrap(), variant }); + } else { + value |= (field_value << field.shift) as usize; + } + } + self.words.push(Word { bits: assembled_word.bits, value }); + } + } } diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs index a58fb5f..00cfc80 100644 --- a/src/parsers/semantic.rs +++ b/src/parsers/semantic.rs @@ -74,6 +74,9 @@ impl SemanticParser { let invocation = InvocationParser::new(name, syn.source, &mut self.tokens).parse(); self.body.push(SemanticToken::Invocation(invocation)); } + SynVar::PackedBinaryLiteral(pbl) => { + self.body.push(SemanticToken::Word(pbl)); + } _ => { let variant = SemanticParseErrorVariant::InvalidToken; let error = SemanticParseError { source: syn.source, variant }; @@ -283,6 +286,10 @@ impl<'a> InvocationParser<'a> { let value = Value::Integer(Integer::Literal(integer)); Some(ArgumentInvocation { source, value }) } + SynVar::String(string) => { + let value = Value::Integer(Integer::String(string)); + Some(ArgumentInvocation { source, value }) + } SynVar::Expression(expr) => { let value = Value::Integer(Integer::Expression(expr)); Some(ArgumentInvocation { source, value }) diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs index 37f8e6c..f3fcec1 100644 --- a/src/parsers/syntactic.rs +++ b/src/parsers/syntactic.rs @@ -77,6 +77,21 @@ impl SyntacticParser { None => SynVar::Error(SynErr::UnterminatedExpression), } } + '"' => { + t.mark_child(); + match t.eat_to_delimiter('"') { + Some(string) => { + let child = t.subtokenise(); + t.mark_end(); + let chars = parse_tracked_chars(child); + let tracked_string = TrackedString { + source: t.get_source(), string, chars, + }; + SynVar::String(tracked_string) + } + None => SynVar::Error(SynErr::UnterminatedString), + } + } '(' => match t.eat_to_delimiter(')') { Some(string) => { // Check if the comment fills the entire line. @@ -145,3 +160,13 @@ impl SyntacticParser { return self.tokens; } } + + +fn parse_tracked_chars(mut t: Tokeniser) -> Vec<Tracked<char>> { + let mut output = Vec::new(); + while let Some(c) = t.eat_char() { + output.push(Tracked::from(c, t.get_source())); + t.mark_start(); + } + return output; +} diff --git a/src/report.rs b/src/report.rs index 2acdddc..a88de4f 100644 --- a/src/report.rs +++ b/src/report.rs @@ -61,8 +61,12 @@ pub fn report_syntactic_errors(syntactic_tokens: &[SyntacticToken], source_code: let message = format!("Unterminated comment"); report_source_error!(&context, &message); } + SyntacticParseError::UnterminatedString => { + let message = format!("Unterminated string"); + report_source_error!(&context, &message); + } SyntacticParseError::UnterminatedExpression => { - let message = format!("Unterminated constant expression"); + let message = format!("Unterminated assembler expression"); report_source_error!(&context, &message); } SyntacticParseError::LabelInMacroDefinition => { @@ -184,6 +188,8 @@ fn report_assembler_error(error: &AssemblerError, source_code: &str) { format!("Value of type integer was expected here"), AssemblerErrorVariant::IntegerInBlock => format!("Integer in block"), + AssemblerErrorVariant::StringInExpression => + format!("Expressions cannot contain strings"), AssemblerErrorVariant::IncorrectArgumentCount(expected, received) => format!("Expected {expected} arguments, but received {received} instead"), AssemblerErrorVariant::IncorrectArgumentType(expected, received) => diff --git a/src/tokens/assembler.rs b/src/tokens/assembler.rs index 04ecd38..048062b 100644 --- a/src/tokens/assembler.rs +++ b/src/tokens/assembler.rs @@ -18,6 +18,26 @@ pub struct AssembledWord { pub errors: Vec<AssemblerError>, } +impl AssembledWord { + pub fn count(&self) -> usize { + // If there is at least one field, and all fields have empty string + // values, then count will be zero. Else count will be at least one. + let mut count = 0; + let mut all_strings = !self.fields.is_empty(); + for field in &self.fields { + if let IntegerArgument::String(string) = &field.value { + count = std::cmp::max(count, string.chars.len()); + } else { + all_strings = false; + } + } + if !all_strings { + count = std::cmp::max(count, 1); + } + return count; + } +} + #[derive(Clone)] pub struct AssembledField { pub source: SourceSpan, @@ -53,6 +73,7 @@ pub enum IntegerArgument { LabelReference(Tracked<String>), Integer(TrackedInteger), Expression(AssembledExpression), + String(TrackedString), } #[derive(Clone)] @@ -67,6 +88,7 @@ pub enum AssemblerErrorVariant { NotAnInteger, NotABlock, IntegerInBlock, + StringInExpression, /// expected, received IncorrectArgumentCount(usize, usize), /// expected, received, index @@ -83,9 +105,6 @@ macro_rules! indent { } pub fn print_assembled_tokens(tokens: &[AssembledToken]) { - println!(); - println!("--------------------------------------------------------------"); - println!(); for token in tokens { match token { AssembledToken::LabelDefinition(definition) => { @@ -105,6 +124,9 @@ pub fn print_assembled_tokens(tokens: &[AssembledToken]) { IntegerArgument::Integer(integer) => { println!("INTEGER '{}'", integer.value); } + IntegerArgument::String(string) => { + println!("STRING {string}"); + } IntegerArgument::Expression(expr) => { println!("EXPRESSION"); print_assembled_expression(2, expr); diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs index d61ad8e..225cd6b 100644 --- a/src/tokens/semantic.rs +++ b/src/tokens/semantic.rs @@ -43,6 +43,7 @@ pub enum Value { pub enum Integer { Literal(TrackedInteger), + String(TrackedString), Expression(Expression), LabelReference(Tracked<String>), } @@ -150,6 +151,8 @@ impl SemanticProgram { indent!(indent => "LITERAL {value}"), Integer::Expression(expr) => indent!(indent => "EXPRESSION [{expr:?}]"), + Integer::String(string) => + indent!(indent => "STRING '{string}'"), Integer::LabelReference(name) => indent!(indent => "LABEL REFERENCE '{name}'"), } diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs index eb33806..780c950 100644 --- a/src/tokens/syntactic.rs +++ b/src/tokens/syntactic.rs @@ -17,6 +17,8 @@ pub enum SyntacticTokenVariant { Expression(Expression), + String(TrackedString), + BlockOpen, BlockClose, Separator, @@ -26,12 +28,26 @@ pub enum SyntacticTokenVariant { Error(SyntacticParseError), } +#[derive(Clone)] +pub struct TrackedString { + pub source: SourceSpan, + pub string: String, + pub chars: Vec<Tracked<char>>, +} + +impl std::fmt::Display for TrackedString { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + self.string.fmt(f) + } +} + #[derive(Debug)] pub enum SyntacticParseError { InvalidHexadecimalLiteral(String), InvalidDecimalLiteral(String), InvalidSymbolIdentifier(String), UnterminatedComment, + UnterminatedString, UnterminatedExpression, LabelInMacroDefinition, } @@ -52,6 +68,8 @@ impl std::fmt::Debug for SyntacticToken { Expression(expr) => format!("Expression({expr:?})"), + String(string) => format!("String('{string}')"), + BlockOpen => format!("BlockOpen"), BlockClose => format!("BlockClose"), Separator => format!("Separator"), diff --git a/src/tokens/tracked.rs b/src/tokens/tracked.rs index 049c8f8..ea37047 100644 --- a/src/tokens/tracked.rs +++ b/src/tokens/tracked.rs @@ -8,8 +8,8 @@ pub struct Tracked<T> { } impl<T> Tracked<T> { - pub fn from(value: T, source: &SourceSpan) -> Self { - Self { source: source.clone(), value } + pub fn from(value: T, source: SourceSpan) -> Self { + Self { source, value } } } |