use std::mem::take;
use crate::*;
#[derive(PartialEq)]
enum StringLiteral {
None,
Raw,
NullTerminated,
}
pub struct TokenIterator {
/// The characters that comprise the program souce code.
chars: Vec<char>,
/// The index of the next character to read.
i: usize,
/// The address of the next character to read.
addr: CharAddress,
/// If true, skip over any whitespace characters. If false, stop reading
/// when a whitespace character is encountered.
skip_whitespace: bool,
/// The name of the most recently defined label.
label: String,
/// If not None, each individual character will be tokenised as a ByteLiteral.
parse_string_literal: StringLiteral,
/// The address of the first character of the current token.
start: CharAddress,
/// The address of the final character of the current token.
end: CharAddress,
/// The entire current token.
source: String,
/// The first character of the current token.
prefix: char,
/// The second and remaining characters of the current token.
suffix: String,
}
impl TokenIterator {
/// Create an iterator from a string of program source code.
pub fn from_str(source_code: &str) -> Self {
Self {
chars: source_code.chars().collect(),
i: 0,
addr: CharAddress::zero(),
skip_whitespace: true,
parse_string_literal: StringLiteral::None,
label: String::new(),
start: CharAddress::zero(),
end: CharAddress::zero(),
source: String::new(),
prefix: ' ',
suffix: String::new(),
}
}
/// Append a character to the current token.
fn push(&mut self, c:char) {
self.end = self.addr;
self.source.push(c);
self.suffix.push(c);
self.next(c);
}
/// Move forward to the next source character.
fn next(&mut self, c: char) {
self.addr.column += 1;
self.i += 1;
if c == '\n' {
self.addr.column = 0;
self.addr.line += 1;
}
}
/// Mark the current character as being the first character of a new token.
fn mark_start(&mut self, c:char) {
if c == '"' {
self.parse_string_literal = StringLiteral::NullTerminated;
} else if c == '\'' {
self.parse_string_literal = StringLiteral::Raw;
} else {
self.start=self.addr;
self.end=self.addr;
self.prefix=c;
self.source.push(c);
self.skip_whitespace=false;
}
self.next(c);
}
}
impl Iterator for TokenIterator {
type Item = SyntacticToken;
fn next(&mut self) -> Option<SyntacticToken> {
// Initialise values before reading the next token
let mut is_comment = false;
self.skip_whitespace = true;
// Iterate over source characters until a full token is read
while let Some(c) = self.chars.get(self.i) {
let c = *c;
// Parse individual characters from a string literal
if self.parse_string_literal != StringLiteral::None {
if c == '"' && self.parse_string_literal == StringLiteral::NullTerminated {
self.parse_string_literal = StringLiteral::None;
let token = SyntacticToken {
r#type: SyntacticTokenType::ByteLiteral(0),
source_location: SourceLocation {
source: c.to_string(), start:self.addr, end:self.addr },
error: None,
};
self.next(c);
return Some(token);
} else if c == '\'' && self.parse_string_literal == StringLiteral::Raw {
self.parse_string_literal = StringLiteral::None;
self.next(c);
continue
} else {
self.next(c);
return Some(SyntacticToken {
r#type: SyntacticTokenType::ByteLiteral(c as u8),
source_location: SourceLocation {
source: c.to_string(), start:self.addr, end:self.addr },
error: None,
});
}
}
// Intercept comments
if is_comment {
self.push(c); if c == ')' { break } else { continue }; }
else if self.skip_whitespace && c == '(' {
is_comment = true; self.mark_start(c); continue }
// Allow a semicolon at the end of a token to be handled as a separate token
if self.source.len() > 0 && c == ';' { break }
// Handle the current character
match (is_whitespace(c), self.skip_whitespace) {
(true, true) => self.next(c), // c is the expected leading whitespace
(false, true) => self.mark_start(c), // c is the first character of the token
(false, false) => self.push(c), // c is a character of the token
(true, false) => break, // c is trailing whitespace
}
// Allow literal values to be chained to the end of the previous token
if self.source.len() > 0 && c == ':' { break }
}
// If no source characters were grabbed then we have read through the entire source file
if self.source.len() == 0 { return None; }
// Allow handling macro terminators and symbols of length 1 in the match expression
if self.suffix.len() == 0 { self.prefix = '\0'; }
// Consume the collected characters to be used in the match expression
let full = take(&mut self.source);
let suffix = take(&mut self.suffix);
let mut error = None;
let mut parse_padding_value = |v| {
parse_short(v).or_else(|| {
error = Some(Error::InvalidPaddingValue); Some(0)
}).unwrap()
};
let r#type = match self.prefix {
'(' => { SyntacticTokenType::Comment }
'@' => { SyntacticTokenType::LabelDefinition({self.label=suffix.clone(); suffix}) }
'&' => { SyntacticTokenType::LabelDefinition(format!("{}/{}", self.label, suffix)) }
'$' => { SyntacticTokenType::Padding(parse_padding_value(&suffix)) }
'~' => { SyntacticTokenType::Reference(format!("{}/{}", self.label, suffix)) }
'%' => if let Some(("", sublabel)) = suffix.split_once("~") {
SyntacticTokenType::MacroDefinition(format!("{}/{}", self.label, sublabel))
} else {
SyntacticTokenType::MacroDefinition(suffix)
}
_ => {
if ";" == &full { SyntacticTokenType::MacroDefinitionTerminator }
else if let Some(value) = parse_byte_lit(&full) { SyntacticTokenType::ByteLiteral(value) }
else if let Some(value) = parse_short_lit(&full) { SyntacticTokenType::ShortLiteral(value) }
else if let Some(value) = parse_instruction(&full) { SyntacticTokenType::Instruction(value) }
else { SyntacticTokenType::Reference(full.clone()) }
}
};
Some(SyntacticToken {
r#type,
source_location:SourceLocation::new(full,self.start,self.end),
error,
})
}
}
fn parse_byte_lit(token: &str) -> Option<u8> {
match token.len() { 2 => u8::from_str_radix(token, 16).ok(), _ => None } }
fn parse_short_lit(token: &str) -> Option<u16> {
match token.len() { 4 => u16::from_str_radix(token, 16).ok(), _ => None } }
fn parse_short(token: &str) -> Option<u16> {
match token.len() { 1..=4 => u16::from_str_radix(token, 16).ok(), _ => None } }
fn is_whitespace(c: char) -> bool {
match c { ' '|'\t'|'\n'|'\r'|'['|']'|'(' =>true, _=>false } }
fn parse_instruction(token: &str) -> Option<u8> {
Some(match token {
// Flow control operators
"HLT"=>0x00,"DBG" =>0x20,"NOP" =>0x80, // The remaining op-codes in this line are unclaimed
"JMP"=>0x01,"JSR" =>0x21,"JMP:" =>0x41,"JSR:" =>0x61,"JMPr" =>0x81,"JSRr" =>0xA1,"JMPr:" =>0xC1,"JSRr:" =>0xE1,
"JCN"=>0x02,"JSN" =>0x22,"JCN:" =>0x42,"JSN:" =>0x62,"JCNr" =>0x82,"JSNr" =>0xA2,"JCNr:" =>0xC2,"JSNr:" =>0xE2,
"JKN"=>0x03,"JKN*"=>0x23,"JKN:" =>0x43,"JKN*:"=>0x63,"JKNr" =>0x83,"JKNr*"=>0xA3,"JKNr:" =>0xC3,"JKNr*:"=>0xE3,
// Relational operators
"EQU"=>0x04,"EQU*"=>0x24,"EQU:"=>0x44,"EQU*:"=>0x64,"EQUr"=>0x84,"EQUr*"=>0xA4,"EQUr:"=>0xC4,"EQUr*:"=>0xE4,
"NKQ"=>0x05,"NKQ*"=>0x25,"NKQ:"=>0x45,"NKQ*:"=>0x65,"NKQr"=>0x85,"NKQr*"=>0xA5,"NKQr:"=>0xC5,"NKQr*:"=>0xE5,
"LTH"=>0x06,"LTH*"=>0x26,"LTH:"=>0x46,"LTH*:"=>0x66,"LTHr"=>0x86,"LTHr*"=>0xA6,"LTHr:"=>0xC6,"LTHr*:"=>0xE6,
"GTH"=>0x07,"GTH*"=>0x27,"GTH:"=>0x47,"GTH*:"=>0x67,"GTHr"=>0x87,"GTHr*"=>0xA7,"GTHr:"=>0xC7,"GTHr*:"=>0xE7,
// Memory operators
"LDA"=>0x08,"LDA*"=>0x28,"LDA:"=>0x48,"LDA*:"=>0x68,"LDAr"=>0x88,"LDAr*"=>0xA8,"LDAr:"=>0xC8,"LDAr*:"=>0xE8,
"STA"=>0x09,"STA*"=>0x29,"STA:"=>0x49,"STA*:"=>0x69,"STAr"=>0x89,"STAr*"=>0xA9,"STAr:"=>0xC9,"STAr*:"=>0xE9,
"LKA"=>0x0A,"LKA*"=>0x2A,"LKA:"=>0x4A,"LKA*:"=>0x6A,"LKAr"=>0x8A,"LKAr*"=>0xAA,"LKAr:"=>0xCA,"LKAr*:"=>0xEA,
"SKA"=>0x0B,"SKA*"=>0x2B,"SKA:"=>0x4B,"SKA*:"=>0x6B,"SKAr"=>0x8B,"SKAr*"=>0xAB,"SKAr:"=>0xCB,"SKAr*:"=>0xEB,
// Device operators
"LDD"=>0x0C,"LDD*"=>0x2C,"LDD:"=>0x4C,"LDD*:"=>0x6C,"LDDr"=>0x8C,"LDDr*"=>0xAC,"LDDr:"=>0xCC,"LDDr*:"=>0xEC,
"STD"=>0x0D,"STD*"=>0x2D,"STD:"=>0x4D,"STD*:"=>0x6D,"STDr"=>0x8D,"STDr*"=>0xAD,"STDr:"=>0xCD,"STDr*:"=>0xED,
"SHF"=>0x0E,"SHF*"=>0x2E,"SHF:"=>0x4E,"SHF*:"=>0x6E,"SHFr"=>0x8E,"SHFr*"=>0xAE,"SHFr:"=>0xCE,"SHFr*:"=>0xEE,
"SHC"=>0x0F,"SHC*"=>0x2F,"SHC:"=>0x4F,"SHC*:"=>0x6F,"SHCr"=>0x8F,"SHCr*"=>0xAF,"SHCr:"=>0xCF,"SHCr*:"=>0xEF,
//
"PSH"=>0x10,"PSH*"=>0x30,"PSH:"=>0x50,"PSH*:"=>0x70,"PSHr"=>0x90,"PSHr*"=>0xB0,"PSHr:"=>0xD0,"PSHr*:"=>0xF0,
"PSK"=>0x11,"PSK*"=>0x31,"PSK:"=>0x51,"PSK*:"=>0x71,"PSKr"=>0x91,"PSKr*"=>0xB1,"PSKr:"=>0xD1,"PSKr*:"=>0xF1,
"POP"=>0x12,"POP*"=>0x32,"POP:"=>0x52,"POP*:"=>0x72,"POPr"=>0x92,"POPr*"=>0xB2,"POPr:"=>0xD2,"POPr*:"=>0xF2,
"SPL"=>0x13,"SPL*"=>0x33,"SPL:"=>0x53,"SPL*:"=>0x73,"SPLr"=>0x93,"SPLr*"=>0xB3,"SPLr:"=>0xD3,"SPLr*:"=>0xF3,
// Stack operators
"DUP"=>0x14,"DUP*"=>0x34,"DUP:"=>0x54,"DUP*:"=>0x74,"DUPr"=>0x94,"DUPr*"=>0xB4,"DUPr:"=>0xD4,"DUPr*:"=>0xF4,
"OVR"=>0x15,"OVR*"=>0x35,"OVR:"=>0x55,"OVR*:"=>0x75,"OVRr"=>0x95,"OVRr*"=>0xB5,"OVRr:"=>0xD5,"OVRr*:"=>0xF5,
"SWP"=>0x16,"SWP*"=>0x36,"SWP:"=>0x56,"SWP*:"=>0x76,"SWPr"=>0x96,"SWPr*"=>0xB6,"SWPr:"=>0xD6,"SWPr*:"=>0xF6,
"ROT"=>0x17,"ROT*"=>0x37,"ROT:"=>0x57,"ROT*:"=>0x77,"ROTr"=>0x97,"ROTr*"=>0xB7,"ROTr:"=>0xD7,"ROTr*:"=>0xF7,
// Arithmetic operators
"ADD"=>0x18,"ADD*"=>0x38,"ADD:"=>0x58,"ADD*:"=>0x78,"ADDr"=>0x98,"ADDr*"=>0xB8,"ADDr:"=>0xD8,"ADDr*:"=>0xF8,
"SUB"=>0x19,"SUB*"=>0x39,"SUB:"=>0x59,"SUB*:"=>0x79,"SUBr"=>0x99,"SUBr*"=>0xB9,"SUBr:"=>0xD9,"SUBr*:"=>0xF9,
"INC"=>0x1A,"INC*"=>0x3A,"INC:"=>0x5A,"INC*:"=>0x7A,"INCr"=>0x9A,"INCr*"=>0xBA,"INCr:"=>0xDA,"INCr*:"=>0xFA,
"DEC"=>0x1B,"DEC*"=>0x3B,"DEC:"=>0x5B,"DEC*:"=>0x7B,"DECr"=>0x9B,"DECr*"=>0xBB,"DECr:"=>0xDB,"DECr*:"=>0xFB,
// Logical operators
"AND"=>0x1C,"AND*"=>0x3C,"AND:"=>0x5C,"AND*:"=>0x7C,"ANDr"=>0x9C,"ANDr*"=>0xBC,"ANDr:"=>0xDC,"ANDr*:"=>0xFC,
"NOT"=>0x1D,"NOT*"=>0x3D,"NOT:"=>0x5D,"NOT*:"=>0x7D,"NOTr"=>0x9D,"NOTr*"=>0xBD,"NOTr:"=>0xDD,"NOTr*:"=>0xFD,
"IOR"=>0x1E,"IOR*"=>0x3E,"IOR:"=>0x5E,"IOR*:"=>0x7E,"IORr"=>0x9E,"IORr*"=>0xBE,"IORr:"=>0xDE,"IORr*:"=>0xFE,
"XOR"=>0x1F,"XOR*"=>0x3F,"XOR:"=>0x5F,"XOR*:"=>0x7F,"XORr"=>0x9F,"XORr*"=>0xBF,"XORr:"=>0xDF,"XORr*:"=>0xFF,
_ => return None,
})
}