summaryrefslogtreecommitdiff
path: root/src/stages/syntactic_tokens.rs
diff options
context:
space:
mode:
authorBen Bridle <ben@derelict.engineering>2025-03-06 20:33:27 +1300
committerBen Bridle <ben@derelict.engineering>2025-03-11 16:59:26 +1300
commit1ecee352f5844b0809d7ae66df52e34f42b44c8e (patch)
tree472b6fd57ff7f64ac3f8cd676cbe7a113ba01f05 /src/stages/syntactic_tokens.rs
parentf2ed89083f5326a7a6f0a1720033d3388aa431fb (diff)
downloadtorque-asm-1ecee352f5844b0809d7ae66df52e34f42b44c8e.zip
Rewrite entire assembler
The language is now more general, the code is better structured, error reporting is more detailed, and many new language features have been implemented: - conditional blocks - first-class strings - more expression operators - binary literals - negative values - invocations in constant expressions
Diffstat (limited to 'src/stages/syntactic_tokens.rs')
-rw-r--r--src/stages/syntactic_tokens.rs160
1 files changed, 160 insertions, 0 deletions
diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs
new file mode 100644
index 0000000..eabf34b
--- /dev/null
+++ b/src/stages/syntactic_tokens.rs
@@ -0,0 +1,160 @@
+use crate::*;
+
+pub enum SyntacticToken {
+ LabelDefinition(ScopedSymbol),
+ MacroDefinition(SyntacticMacroDefinition),
+
+ IntegerLiteral(isize),
+ StringLiteral(StringLiteral),
+ WordTemplate(WordTemplate),
+
+ BlockLiteral(Vec<Tracked<SyntacticToken>>),
+ Expression(Vec<Tracked<SyntacticToken>>),
+
+ Symbol(ScopedSymbol),
+
+ Separator,
+ Condition,
+ Pin,
+}
+
+pub struct SyntacticMacroDefinition {
+ pub name: Tracked<String>,
+ pub tokens: Vec<Tracked<SyntacticToken>>,
+}
+
+pub struct StringLiteral {
+ pub string: String,
+ pub chars: Vec<Tracked<isize>>,
+}
+
+impl std::fmt::Display for StringLiteral {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ self.string.fmt(f)
+ }
+}
+
+pub enum ScopedSymbol {
+ Local(String),
+ Global(String),
+}
+
+impl std::fmt::Display for ScopedSymbol {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ match self {
+ ScopedSymbol::Local(name) => write!(f, "~{name}"),
+ ScopedSymbol::Global(name) => write!(f, "{name}"),
+ }
+ }
+}
+
+
+pub enum SyntacticError {
+ UnterminatedBlock,
+ UnterminatedExpression,
+ UnterminatedComment,
+ UnterminatedCharacterLiteral,
+ UnterminatedStringLiteral,
+ UnterminatedMacroDefinition(String),
+
+ UnmatchedBlockTerminator,
+ UnmatchedExpressionTerminator,
+ UnmatchedCommentTerminator,
+ UnmatchedMacroTerminator,
+
+ ExpectedSingleCharacter,
+
+ DuplicateFieldNameInWord(char),
+ InvalidCharacterInWord(char),
+
+ InvalidDecimalLiteral(String),
+ InvalidHexadecimalLiteral(String),
+ InvalidBinaryLiteral(String),
+}
+
+
+pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) {
+ for error in errors {
+ report_syntactic_error(error, source_code);
+ }
+}
+
+fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ SyntacticError::UnterminatedBlock =>
+ "Block was not terminated, add a '}}' character to terminate",
+ SyntacticError::UnterminatedExpression =>
+ "Expression was not terminated, add a ']' character to terminate",
+ SyntacticError::UnterminatedComment =>
+ "Comment was not terminated, add a ')' character to terminate",
+ SyntacticError::UnterminatedCharacterLiteral =>
+ "Character was not terminated, add a ' character to terminate",
+ SyntacticError::UnterminatedStringLiteral =>
+ "String was not terminated, add a '\"' character to terminate",
+ SyntacticError::UnterminatedMacroDefinition(name) =>
+ &format!("The '{name}' macro definition was not terminated, add a ';' character to terminate"),
+
+ SyntacticError::UnmatchedBlockTerminator =>
+ "Attempted to terminate a block, but no block was in progress",
+ SyntacticError::UnmatchedExpressionTerminator =>
+ "Attempted to terminate an expression, but no expression was in progress",
+ SyntacticError::UnmatchedCommentTerminator =>
+ "Attempted to terminate a comment, but no comment was in progress",
+ SyntacticError::UnmatchedMacroTerminator =>
+ "Attempted to terminate a macro definition, but no macro definition was in progress",
+
+ SyntacticError::ExpectedSingleCharacter =>
+ "A character literal must contain exactly one character",
+
+ SyntacticError::DuplicateFieldNameInWord(name) =>
+ &format!("The field '{name}' has already been used in this word"),
+ SyntacticError::InvalidCharacterInWord(c) =>
+ &format!("The character '{c}' cannot be used in a word"),
+
+ SyntacticError::InvalidDecimalLiteral(string) =>
+ &format!("The string '{string}' is not a valid decimal literal"),
+ SyntacticError::InvalidHexadecimalLiteral(string) =>
+ &format!("The string '{string}' is not a valid hexadecimal literal"),
+ SyntacticError::InvalidBinaryLiteral(string) =>
+ &format!("The string '{string}' is not a valid binary literal"),
+ };
+
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_syntactic_token(i: usize, token: &SyntacticToken) {
+ match token {
+ SyntacticToken::LabelDefinition(symbol) => indent!(i, "LabelDefinition({symbol})"),
+ SyntacticToken::MacroDefinition(definition) => {
+ indent!(i, "MacroDefinition({})", definition.name);
+ for token in &definition.tokens {
+ print_syntactic_token(i+1, token);
+ }
+ }
+
+ SyntacticToken::IntegerLiteral(value) => indent!(i, "IntegerLiteral({value})"),
+ SyntacticToken::StringLiteral(literal) => indent!(i, "StringLiteral({literal})"),
+ SyntacticToken::WordTemplate(template) => indent!(i, "WordTemplate({template})"),
+
+ SyntacticToken::BlockLiteral(tokens) => {
+ indent!(i, "BlockLiteral");
+ for token in tokens {
+ print_syntactic_token(i+1, token);
+ }
+ }
+ SyntacticToken::Expression(tokens) => {
+ indent!(i, "Expression");
+ for token in tokens {
+ print_syntactic_token(i+1, token);
+ }
+ }
+
+ SyntacticToken::Symbol(symbol) => indent!(i, "Symbol({symbol})"),
+
+ SyntacticToken::Separator => indent!(i, "Separator"),
+ SyntacticToken::Condition => indent!(i, "Condition"),
+ SyntacticToken::Pin => indent!(i, "Pin"),
+ }
+}