diff options
author | Ben Bridle <ben@derelict.engineering> | 2025-04-26 13:00:51 +1200 |
---|---|---|
committer | Ben Bridle <ben@derelict.engineering> | 2025-04-26 13:00:51 +1200 |
commit | e5447e2568e24db9a5218bbe452b856266ca39ae (patch) | |
tree | e5425bf49fb7ff93b8c46a80b8742a85ecf19fe4 /src/stages | |
parent | 83e5107684a37aa825d626c19a2af7fd6bfc231a (diff) | |
download | torque-asm-e5447e2568e24db9a5218bbe452b856266ca39ae.zip |
Implement first-class string literals
This feature promotes strings to a first-class type in the language.
If a string is passed to an invocation via the new string-type argument,
the string will be passed as a whole value. String arguments can still
be passed to an invocation via an integer-type argument, in which case
they'll be broken apart into individual characters with the macro being
invoked once per character.
String-type macro arguments are declared like "name".
Diffstat (limited to 'src/stages')
-rw-r--r-- | src/stages/compiler.rs | 16 | ||||
-rw-r--r-- | src/stages/intermediate.rs | 145 | ||||
-rw-r--r-- | src/stages/intermediate_tokens.rs | 4 | ||||
-rw-r--r-- | src/stages/semantic.rs | 32 | ||||
-rw-r--r-- | src/stages/semantic_tokens.rs | 38 | ||||
-rw-r--r-- | src/stages/syntactic_tokens.rs | 1 |
6 files changed, 179 insertions, 57 deletions
diff --git a/src/stages/compiler.rs b/src/stages/compiler.rs index 44b7660..9d16bf0 100644 --- a/src/stages/compiler.rs +++ b/src/stages/compiler.rs @@ -92,6 +92,9 @@ impl SymbolParser { MacroDefinitionBody::Integer(integer) => { self.parse_integer_token(&integer, &integer.source) } + MacroDefinitionBody::String(string) => { + self.parse_string_token(&string, &string.source) + } MacroDefinitionBody::Invocation(invocation) => { self.parse_invocation(&invocation, &invocation.source) } @@ -142,10 +145,12 @@ impl SymbolParser { InvocationArgument::BlockToken(block) => { self.parse_block_token(block, &source); } + InvocationArgument::StringToken(string) => { + self.parse_string_token(string, &source); + }, InvocationArgument::Invocation(invocation) => { self.parse_invocation(invocation, &source); } - InvocationArgument::String(_) => (), } } } @@ -197,4 +202,13 @@ impl SymbolParser { IntegerToken::IntegerLiteral(_) => (), } } + + fn parse_string_token(&mut self, token: &StringToken, source: &SourceSpan) { + match &token { + StringToken::Invocation(invocation) => { + self.parse_invocation(&invocation, source) + } + StringToken::StringLiteral(_) => (), + } + } } diff --git a/src/stages/intermediate.rs b/src/stages/intermediate.rs index c28426c..c4bb74d 100644 --- a/src/stages/intermediate.rs +++ b/src/stages/intermediate.rs @@ -84,6 +84,13 @@ impl IntermediateParser { ArgumentType::Block => { IntermediateValue::Block(Vec::new()) } + ArgumentType::String => { + let string = String::new(); + let chars = Vec::new(); + let literal = StringLiteral { string, chars }; + let tracked = Tracked::from(literal, null.clone()); + IntermediateValue::String(tracked) + } }; let tracked = Tracked::from(value, null.clone()); arguments.insert(argument.name.clone(), tracked); @@ -173,6 +180,11 @@ impl<'a> Environment<'a> { let value = IntermediateValue::Block(tokens); Some(Tracked::from(value, source.clone())) } + MacroDefinitionBody::String(string) => { + let string = self.parse_string_token(string, &source)?; + let integer = IntermediateValue::String(string); + Some(Tracked::from(integer, source.clone())) + } } } @@ -279,10 +291,21 @@ impl<'a> Environment<'a> { } } + fn parse_string_token(&mut self, string: &StringToken, source: &SourceSpan) -> Option<Tracked<StringLiteral>> { + match string { + StringToken::StringLiteral(literal) => { + Some(Tracked::from(literal.clone(), source.clone())) + } + StringToken::Invocation(invocation) => { + self.parse_string_invocation(&invocation, source) + } + } + } + fn parse_integer_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { match self.parse_invocation(invocation, source)?.value { IntermediateValue::Integer(integer) => Some(integer), - IntermediateValue::Block(_) => { + IntermediateValue::Block(_) | IntermediateValue::String(_) => { let error = IntermediateError::ExpectedInteger; self.errors.push(Tracked::from(error, source.clone())); None @@ -293,7 +316,7 @@ impl<'a> Environment<'a> { fn parse_block_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Vec<Tracked<IntermediateToken>>> { match self.parse_invocation(invocation, source)?.value { IntermediateValue::Block(tokens) => Some(tokens), - IntermediateValue::Integer(_) => { + IntermediateValue::Integer(_) | IntermediateValue::String(_) => { let error = IntermediateError::ExpectedBlock; self.errors.push(Tracked::from(error, source.clone())); None @@ -301,9 +324,21 @@ impl<'a> Environment<'a> { } } + fn parse_string_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<StringLiteral>> { + match self.parse_invocation(invocation, source)?.value { + IntermediateValue::String(literal) => Some(literal), + IntermediateValue::Integer(_) | IntermediateValue::Block(_) => { + let error = IntermediateError::ExpectedString; + self.errors.push(Tracked::from(error, source.clone())); + None + } + } + } + fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> { let received_count = invocation.arguments.len(); if let Some(argument) = self.arguments.get(&invocation.name) { + // This invocation is a macro argument if received_count != 0 { let error = IntermediateError::IncorrectArgumentCount(0, received_count); self.errors.push(Tracked::from(error, source.clone())); @@ -312,6 +347,7 @@ impl<'a> Environment<'a> { Some(argument.clone()) } } else if let Some(label_name) = self.label_names.get(&invocation.name) { + // This invocation is a label reference if received_count != 0 { let error = IntermediateError::IncorrectArgumentCount(0, received_count); self.errors.push(Tracked::from(error, source.clone())); @@ -335,48 +371,9 @@ impl<'a> Environment<'a> { // Gather and type-check the provided arguments. let mut arguments = Vec::new(); for (i, argument) in invocation.arguments.iter().enumerate() { - let received_type = match &argument.value { - InvocationArgument::String(string) => { - let mut values = Vec::new(); - for c in &string.chars { - let integer = IntermediateInteger::Integer(**c); - let tracked = Tracked::from(integer, c.source.clone()); - values.push(IntermediateValue::Integer(tracked)); - } - arguments.push(RepeatedArgument::List(values)); - ArgumentType::Integer - } - InvocationArgument::IntegerToken(integer) => { - let tracked = self.parse_integer_token(&integer, &argument.source)?; - let value = IntermediateValue::Integer(tracked); - arguments.push(RepeatedArgument::Loop(value)); - ArgumentType::Integer - } - InvocationArgument::BlockToken(block) => { - let tokens = self.parse_block_token(&block, &argument.source); - let value = IntermediateValue::Block(tokens); - arguments.push(RepeatedArgument::Loop(value)); - ArgumentType::Block - } - InvocationArgument::Invocation(invocation) => { - let value = self.parse_invocation(&invocation, &argument.source)?; - let received_type = match &value.value { - IntermediateValue::Integer(_) => ArgumentType::Integer, - IntermediateValue::Block(_) => ArgumentType::Block, - }; - arguments.push(RepeatedArgument::Loop(value.value)); - received_type - } - }; - let expected_type = match received_type { - ArgumentType::Integer => ArgumentType::Block, - ArgumentType::Block => ArgumentType::Integer, - }; - if definition.arguments[i].variant != received_type { - let error = IntermediateError::IncorrectArgumentType(expected_type, received_type); - self.errors.push(Tracked::from(error, argument.source.clone())); - return None; - } + let expected_type = definition.arguments[i].variant; + let received_value = self.parse_invocation_argument(argument, expected_type)?; + arguments.push(received_value); } // Invoke the invocation multiple times. let repetitions = arguments.iter().map(|a| a.len()).max().unwrap_or(1); @@ -407,6 +404,7 @@ impl<'a> Environment<'a> { unreachable!("Uncaught duplicate macro argument name '{name}'"); }; } + // Invoke the macro once. let mut env = Environment { label_names: &self.label_names, macro_names: &self.macro_names, @@ -415,16 +413,19 @@ impl<'a> Environment<'a> { errors: &mut self.errors, id: next_id!(), }; + // Save the result of this macro invocation. values.push(env.parse_macro_definition_body(&definition.body, source)?); } if values.len() == 1 { + // If the macro was invoked once, return the value. values.pop() } else { - // Flatten all values into a list of block tokens. + // If the macro was invoked multiple times, create a list of + // block tokens from the returned values. let mut block = Vec::new(); for value in values { match value.value { - IntermediateValue::Integer(_) => { + IntermediateValue::Integer(_) | IntermediateValue::String(_) => { let error = IntermediateError::ExpectedBlock; self.errors.push(Tracked::from(error, value.source)); return None; @@ -447,6 +448,58 @@ impl<'a> Environment<'a> { } } + fn parse_invocation_argument(&mut self, argument: &Tracked<InvocationArgument>, expected_type: ArgumentType) -> Option<RepeatedArgument> { + let source = &argument.source; + let (received_value, received_type) = match &argument.value { + InvocationArgument::StringToken(string) => { + let string = self.parse_string_token(string, source)?; + self.parse_invocation_string_argument(string, expected_type)? + } + InvocationArgument::IntegerToken(integer) => { + let tracked = self.parse_integer_token(&integer, &argument.source)?; + let value = IntermediateValue::Integer(tracked); + (RepeatedArgument::Loop(value), ArgumentType::Integer) + } + InvocationArgument::BlockToken(block) => { + let tokens = self.parse_block_token(&block, &argument.source); + let value = IntermediateValue::Block(tokens); + (RepeatedArgument::Loop(value), ArgumentType::Block) + } + InvocationArgument::Invocation(invocation) => { + let value = self.parse_invocation(&invocation, &argument.source)?; + match value.value { + IntermediateValue::Integer(_) => + (RepeatedArgument::Loop(value.value), ArgumentType::Integer), + IntermediateValue::Block(_) => + (RepeatedArgument::Loop(value.value), ArgumentType::Block), + IntermediateValue::String(string) => + self.parse_invocation_string_argument(string, expected_type)? + } + } + }; + if expected_type != received_type { + let error = IntermediateError::IncorrectArgumentType(expected_type, received_type); + self.errors.push(Tracked::from(error, argument.source.clone())); + return None; + } + return Some(received_value); + } + + fn parse_invocation_string_argument(&mut self, string: Tracked<StringLiteral>, expected_type: ArgumentType) -> Option<(RepeatedArgument, ArgumentType)> { + if let ArgumentType::Integer = expected_type { + let mut values = Vec::new(); + for c in &string.chars { + let integer = IntermediateInteger::Integer(**c); + let tracked = Tracked::from(integer, c.source.clone()); + values.push(IntermediateValue::Integer(tracked)); + } + Some((RepeatedArgument::List(values), ArgumentType::Integer)) + } else { + let value = IntermediateValue::String(string); + Some((RepeatedArgument::Loop(value), ArgumentType::String)) + } + } + fn parse_expression(&mut self, expression: &Expression, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { let mut intermediate = Vec::new(); let mut error = false; diff --git a/src/stages/intermediate_tokens.rs b/src/stages/intermediate_tokens.rs index a09581e..4c4a345 100644 --- a/src/stages/intermediate_tokens.rs +++ b/src/stages/intermediate_tokens.rs @@ -47,6 +47,7 @@ pub enum IntermediateExpressionToken { pub enum IntermediateValue { Integer(Tracked<IntermediateInteger>), Block(Vec<Tracked<IntermediateToken>>), + String(Tracked<StringLiteral>), } pub enum RepeatedArgument { @@ -66,6 +67,7 @@ impl RepeatedArgument { pub enum IntermediateError { ExpectedInteger, ExpectedBlock, + ExpectedString, ListExhausted, LabelReferenceInConditionPredicate, LabelDefinitionInConditionBody, @@ -91,6 +93,8 @@ fn report_intermediate_error(error: &Tracked<IntermediateError>, source_code: &s "An integer value was expected here", IntermediateError::ExpectedBlock => "A block value was expected here", + IntermediateError::ExpectedString => + "A string value was expected here", IntermediateError::ListExhausted => "This string is shorter than another string passed to the same invocation", IntermediateError::LabelReferenceInConditionPredicate => diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs index ec2f704..6cd83f8 100644 --- a/src/stages/semantic.rs +++ b/src/stages/semantic.rs @@ -144,6 +144,11 @@ impl SemanticParser { MacroDefinitionBody::Block(mut tokens) => { block_tokens.append(&mut tokens); } + MacroDefinitionBody::String(string) => { + let error = SemanticError::ExpectedBlock(location); + let tracked = Tracked::from(error, string.source); + self.errors.push(tracked); + } MacroDefinitionBody::Invocation(invocation) => { // Convert invocation to a block invocation. let token = BlockToken::Invocation(invocation.value); @@ -178,10 +183,10 @@ impl SemanticParser { let tracked = Tracked::from(token, source); Some(MacroDefinitionBody::Integer(tracked)) } - SyntacticToken::StringLiteral(_) => { - let error = SemanticError::MisplacedStringLiteral; - self.errors.push(Tracked::from(error, source)); - None + SyntacticToken::StringLiteral(value) => { + let token = StringToken::StringLiteral(value); + let tracked = Tracked::from(token, source); + Some(MacroDefinitionBody::String(tracked)) } SyntacticToken::WordTemplate(word_template) => { let token = BlockToken::WordTemplate(word_template); @@ -260,6 +265,11 @@ impl SemanticParser { self.errors.push(Tracked::from(error, token.source)); None } + MacroDefinitionBody::String(string) => { + let error = SemanticError::ExpectedInteger(location); + self.errors.push(Tracked::from(error, string.source)); + None + } } } @@ -281,6 +291,11 @@ impl SemanticParser { self.errors.push(Tracked::from(error, integer.source)); None } + MacroDefinitionBody::String(string) => { + let error = SemanticError::ExpectedBlock(location); + self.errors.push(Tracked::from(error, string.source)); + None + } } } @@ -357,7 +372,8 @@ impl SemanticParser { let source = token.source; match token.value { SyntacticToken::StringLiteral(string_literal) => { - let argument = InvocationArgument::String(string_literal); + let string = StringToken::StringLiteral(string_literal); + let argument = InvocationArgument::StringToken(string); Some(Tracked::from(argument, source)) } SyntacticToken::IntegerLiteral(value) => { @@ -430,6 +446,12 @@ impl SemanticParser { } } } + SyntacticToken::StringLiteral(string) => { + let variant = ArgumentType::String; + let name = string.string; + let definition = ArgumentDefinition { name, variant }; + return Some(Tracked::from(definition, source)); + } _ => (), }; let error = SemanticError::InvalidArgumentDefinition; diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs index dfbea1a..bdbc0f9 100644 --- a/src/stages/semantic_tokens.rs +++ b/src/stages/semantic_tokens.rs @@ -17,10 +17,11 @@ pub struct ArgumentDefinition { pub variant: ArgumentType, } -#[derive(PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq)] pub enum ArgumentType { Integer, Block, + String, } impl std::fmt::Display for ArgumentType { @@ -28,6 +29,7 @@ impl std::fmt::Display for ArgumentType { match self { ArgumentType::Integer => write!(f, "an integer"), ArgumentType::Block => write!(f, "a block"), + ArgumentType::String => write!(f, "a string"), } } } @@ -35,6 +37,7 @@ impl std::fmt::Display for ArgumentType { pub enum MacroDefinitionBody { Integer(Tracked<IntegerToken>), Block(Vec<Tracked<BlockToken>>), + String(Tracked<StringToken>), Invocation(Tracked<Invocation>), } @@ -68,15 +71,20 @@ pub enum BlockToken { Invocation(Invocation), } +pub enum StringToken { + StringLiteral(StringLiteral), + Invocation(Invocation), +} + pub struct Invocation { pub name: String, pub arguments: Vec<Tracked<InvocationArgument>>, } pub enum InvocationArgument { - String(StringLiteral), IntegerToken(IntegerToken), BlockToken(BlockToken), + StringToken(StringToken), Invocation(Invocation), } @@ -88,6 +96,7 @@ pub enum SemanticError { ExpectedInteger(SemanticLocation), ExpectedBlock(SemanticLocation), + ExpectedString(SemanticLocation), InvalidArgumentDefinition, InvalidInvocationArgument, @@ -153,9 +162,11 @@ fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) { &format!("An integer value was expected {location}"), SemanticError::ExpectedBlock(location) => &format!("A block value was expected {location}"), + SemanticError::ExpectedString(location) => + &format!("A string value was expected {location}"), SemanticError::InvalidArgumentDefinition => - "Argument definitions must be in the form 'name' or '{{name}}'", + "Argument definitions must be in the form name, {name}, or \"name\"", SemanticError::InvalidInvocationArgument => "This token cannot be used in an invocation argument", @@ -185,6 +196,9 @@ pub fn print_semantic_token(i: usize, token: &SemanticToken) { MacroDefinitionBody::Block(tokens) => { print_block(i+1, tokens); } + MacroDefinitionBody::String(string) => { + print_string_token(i+1, string); + } MacroDefinitionBody::Invocation(invocation) => { print_invocation(i+1, invocation); } @@ -202,6 +216,9 @@ fn print_argument_definition(i: usize, argument: &ArgumentDefinition) { ArgumentType::Block => { indent!(i, "Argument({}, block)", argument.name) } + ArgumentType::String => { + indent!(i, "Argument({}, string)", argument.name) + } } } @@ -249,8 +266,8 @@ fn print_invocation(i: usize, invocation: &Invocation) { fn print_invocation_argument(i: usize, argument: &InvocationArgument) { match &argument { - InvocationArgument::String(string_literal) => { - indent!(i, "String({string_literal})") + InvocationArgument::StringToken(string) => { + print_string_token(i, string) } InvocationArgument::IntegerToken(integer) => { print_integer_token(i, integer) @@ -278,6 +295,17 @@ fn print_integer_token(i: usize, integer: &IntegerToken) { } } +fn print_string_token(i: usize, string: &StringToken) { + match string { + StringToken::StringLiteral(string_literal) => { + indent!(i, "String({string_literal})") + } + StringToken::Invocation(invocation) => { + print_invocation(i, invocation) + } + } +} + fn print_expression(i: usize, expression: &Expression) { indent!(i, "Expression"); for token in &expression.tokens { diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs index 5bfa0be..5a0ac9e 100644 --- a/src/stages/syntactic_tokens.rs +++ b/src/stages/syntactic_tokens.rs @@ -23,6 +23,7 @@ pub struct SyntacticMacroDefinition { pub body: Vec<Tracked<SyntacticToken>>, } +#[derive(Clone)] pub struct StringLiteral { pub string: String, pub chars: Vec<Tracked<isize>>, |