From 360a13ad170a8eb52cf125749129168581ace7fe Mon Sep 17 00:00:00 2001
From: Ben Bridle <bridle.benjamin@gmail.com>
Date: Sun, 26 Nov 2023 10:49:01 +1300
Subject: Implement null-terminated strings

Double-quoted strings are automatically terminated by null bytes, with
single-quoted strings maintaining the prior non-terminated string
behaviour.
---
 src/tokenizer.rs | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 03e09e3..2476112 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1,6 +1,13 @@
 use std::mem::take;
 use crate::*;
 
+#[derive(PartialEq)]
+enum StringLiteral {
+    None,
+    Raw,
+    NullTerminated,
+}
+
 pub struct TokenIterator {
     /// The characters that comprise the program souce code.
     chars: Vec<char>,
@@ -13,8 +20,8 @@ pub struct TokenIterator {
     skip_whitespace: bool,
     /// The name of the most recently defined label.
     label: String,
-    /// If true, each individual character is tokenised as a ByteLiteral
-    parse_string_literal: bool,
+    /// If not None, each individual character will be tokenised as a ByteLiteral.
+    parse_string_literal: StringLiteral,
 
 
     /// The address of the first character of the current token.
@@ -37,7 +44,7 @@ impl TokenIterator {
             i: 0,
             addr: CharAddress::zero(),
             skip_whitespace: true,
-            parse_string_literal: false,
+            parse_string_literal: StringLiteral::None,
             label: String::new(),
             start: CharAddress::zero(),
             end: CharAddress::zero(),
@@ -65,7 +72,9 @@ impl TokenIterator {
     /// Mark the current character as being the first character of a new token.
     fn mark_start(&mut self, c:char) {
         if c == '"' {
-            self.parse_string_literal = true;
+            self.parse_string_literal = StringLiteral::NullTerminated;
+        } else if c == '\'' {
+            self.parse_string_literal = StringLiteral::Raw;
         } else {
             self.start=self.addr;
             self.end=self.addr;
@@ -89,9 +98,19 @@ impl Iterator for TokenIterator {
         while let Some(c) = self.chars.get(self.i) {
             let c = *c;
             // Parse individual characters from a string literal
-            if self.parse_string_literal {
-                if c == '"' {
-                    self.parse_string_literal=false;
+            if self.parse_string_literal != StringLiteral::None {
+                if c == '"' && self.parse_string_literal == StringLiteral::NullTerminated  {
+                    self.parse_string_literal = StringLiteral::None;
+                    let token = SyntacticToken {
+                        r#type: SyntacticTokenType::ByteLiteral(0),
+                        source_location: SourceLocation {
+                            source: c.to_string(), start:self.addr, end:self.addr },
+                        error: None,
+                    };
+                    self.next(c);
+                    return Some(token);
+                } else if c == '\'' && self.parse_string_literal == StringLiteral::Raw  {
+                    self.parse_string_literal = StringLiteral::None;
                     self.next(c);
                     continue
                 } else {
-- 
cgit v1.2.3-70-g09d2