use crate::*;
use assembler::Tokeniser;
pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path))
}
fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
t.add_delimiters(&['@','&','%',';',':','|','{','}','(',')','[',']','#','~','"','\'']);
let mut tokens = Vec::new();
let mut errors = Vec::new();
macro_rules! err {
($error:expr) => {{
err!($error, t.get_source());
}};
($error:expr, $source:expr) => {{
errors.push(Tracked::from($error, $source));
continue;
}};
}
macro_rules! is_matching {
($open:expr, $close:expr) => {{
let mut depth = 1;
move |t: &mut Tokeniser| {
match t.eat_char() {
Some($open) => { depth += 1; false }
Some($close) => { depth -= 1; depth == 0 }
_ => false,
}
}}
};
}
macro_rules! is_any {
($close:expr) => {
|t: &mut Tokeniser| { t.eat_char() == Some($close) }
};
}
loop {
t.eat_whitespace();
t.mark_start();
let Some(c) = t.eat_char() else { break };
let token = match c {
'"' => {
let source = t.get_source();
t.mark_child();
if let Some(_) = t.track_until(is_any!('"')) {
let child = t.tokenise_child_span();
SyntacticToken::StringLiteral(parse_string_literal(child))
} else {
err!(SyntacticError::UnterminatedStringLiteral, source);
}
}
'\'' => {
let source = t.get_source();
if let Some(string) = t.track_until(is_any!('\'')) {
let mut chars: Vec<char> = string.chars().collect();
if chars.len() == 1 {
let value = parse_char(chars.pop().unwrap());
SyntacticToken::IntegerLiteral(value)
} else {
t.mark_end();
err!(SyntacticError::ExpectedSingleCharacter, t.get_source());
}
} else {
err!(SyntacticError::UnterminatedCharacterLiteral, source);
}
}
'{' => {
let source = t.get_source();
t.mark_child();
if let Some(_) = t.track_until(is_matching!('{','}')) {
let child = t.tokenise_child_span();
match parse_syntactic_from_tokeniser(child) {
Ok(tokens) => SyntacticToken::BlockLiteral(tokens),
Err(mut parse_errors) => {
errors.append(&mut parse_errors);
continue;
}
}
} else {
err!(SyntacticError::UnterminatedBlock, source);
}
}
'[' => {
let source = t.get_source();
t.mark_child();
if let Some(_) = t.track_until(is_matching!('[',']')) {
let child = t.tokenise_child_span();
match parse_syntactic_from_tokeniser(child) {
Ok(tokens) => SyntacticToken::Expression(tokens),
Err(mut parse_errors) => {
errors.append(&mut parse_errors);
continue;
}
}
} else {
err!(SyntacticError::UnterminatedExpression, source);
}
}
'(' => {
let source = t.get_source();
if let Some(string) = t.track_until(is_matching!('(',')')) {
// Check if the comment fills the entire line.
if t.start.position.column == 0 && t.end_of_line() {
if let Some(path) = string.strip_prefix(": ") {
t.embedded_path = Some(PathBuf::from(path.trim()));
t.embedded_first_line = t.start.position.line + 1;
}
}
continue;
} else {
err!(SyntacticError::UnterminatedComment, source);
}
}
'%' => {
let name = t.eat_token();
let source = t.get_source();
t.mark_child();
if let Some(_) = t.track_until(is_any!(';')) {
let child = t.tokenise_child_span();
match parse_syntactic_from_tokeniser(child) {
Ok(body) => {
let name = Tracked::from(name, source);
let definition = SyntacticMacroDefinition { name, body };
SyntacticToken::MacroDefinition(definition)
}
Err(mut parse_errors) => {
errors.append(&mut parse_errors);
continue;
}
}
} else {
err!(SyntacticError::UnterminatedMacroDefinition(name), source);
}
}
'}' => err!(SyntacticError::UnmatchedBlockTerminator),
']' => err!(SyntacticError::UnmatchedExpressionTerminator),
')' => err!(SyntacticError::UnmatchedCommentTerminator),
';' => err!(SyntacticError::UnmatchedMacroTerminator),
'@' => SyntacticToken::LabelDefinition(ScopedSymbol::Global(t.eat_token())),
'&' => SyntacticToken::LabelDefinition(ScopedSymbol::Local(t.eat_token())),
'~' => SyntacticToken::Symbol(ScopedSymbol::Local(t.eat_token())),
':' => SyntacticToken::Separator,
'|' => SyntacticToken::Pin,
'?' => SyntacticToken::Condition,
'#' => {
t.mark_child();
t.eat_token();
let child = t.tokenise_child_span();
match parse_word_template(child) {
Ok(word_template) => SyntacticToken::WordTemplate(word_template),
Err(mut parse_errors) => {
errors.append(&mut parse_errors);
continue;
}
}
},
c => {
let token = format!("{c}{}", t.eat_token());
let (stripped, neg) = match token.strip_prefix('-') {
Some(stripped) => (stripped, true),
None => (token.as_str(), false),
};
if let Some(hex_string) = stripped.strip_prefix("0x") {
let hex_string = hex_string.to_string();
match parse_integer_literal(&hex_string, 16, neg) {
Ok(value) => SyntacticToken::IntegerLiteral(value),
Err(_) => err!(SyntacticError::InvalidHexadecimalLiteral(hex_string)),
}
} else if let Some(binary_string) = stripped.strip_prefix("0b") {
let binary_string = binary_string.to_string();
match parse_integer_literal(&binary_string, 2, neg) {
Ok(value) => SyntacticToken::IntegerLiteral(value),
Err(_) => err!(SyntacticError::InvalidBinaryLiteral(binary_string)),
}
} else if let Some(octal_string) = stripped.strip_prefix("0o") {
let octal_string = octal_string.to_string();
match parse_integer_literal(&octal_string, 8, neg) {
Ok(value) => SyntacticToken::IntegerLiteral(value),
Err(_) => err!(SyntacticError::InvalidOctalLiteral(octal_string)),
}
} else {
let decimal_string = stripped.to_string();
match parse_integer_literal(&decimal_string, 10, neg) {
Ok(value) => SyntacticToken::IntegerLiteral(value),
Err(true) => err!(SyntacticError::InvalidDecimalLiteral(decimal_string)),
Err(false) => SyntacticToken::Symbol(ScopedSymbol::Global(token)),
}
}
}
};
t.mark_end();
tokens.push(Tracked::from(token, t.get_source()))
}
match errors.is_empty() {
true => Ok(tokens),
false => Err(errors),
}
}
fn parse_integer_literal(token: &str, radix: u32, neg: bool) -> Result<isize, bool> {
match usize::from_str_radix(&token.replace('_', ""), radix) {
Ok(value) => match isize::try_from(value) {
Ok(value) => match neg {
true => Ok(-value),
false => Ok(value),
}
Err(_) => Err(true),
}
Err(_) => Err(false),
}
}
fn parse_string_literal(mut t: Tokeniser) -> StringLiteral {
let mut string = String::new();
let mut chars = Vec::new();
while let Some(c) = t.eat_char() {
string.push(c);
chars.push(Tracked::from(parse_char(c), t.get_source()));
t.mark_start();
}
StringLiteral { string, chars }
}
fn parse_char(c: char) -> isize {
c as u32 as isize
}
fn parse_word_template(mut t: Tokeniser) -> Result<WordTemplate, Vec<Tracked<SyntacticError>>> {
let mut value = 0; // Value of the whole word template.
let mut value_width = 0; // Bit width of the whole word template.
let mut field_width = 0; // Width of the current bit field.
let mut field_name = '\0'; // Name of the current bit field.
let mut fields: Vec<Tracked<BitField>> = Vec::new();
let mut errors: Vec<Tracked<SyntacticError>> = Vec::new();
macro_rules! push_field {
() => {
if fields.iter().any(|f| f.name == field_name) {
let error = SyntacticError::DuplicateFieldNameInWord(field_name);
errors.push(Tracked::from(error, t.get_source()));
} else {
let field = BitField { name: field_name, width: field_width, shift: 0};
fields.push(Tracked::from(field, t.get_source()));
}
};
}
while let Some(c) = t.eat_char() {
// Ignore underscores.
if c == '_' {
t.mark.undo();
continue;
}
// Add a bit to the value;
value <<= 1;
value_width += 1;
for field in &mut fields {
field.shift += 1;
}
// Extend the current field.
if c == field_name {
field_width += 1;
continue;
}
// Commit the current field.
if field_width > 0 {
t.mark_end_prev();
push_field!();
field_width = 0;
field_name = '\0';
}
// Parse bit literals.
if c == '0' {
continue;
}
if c == '1' {
value |= 1;
continue;
}
t.mark_start_prev();
if c.is_alphabetic() {
field_name = c;
field_width = 1;
continue;
} else {
t.mark_end();
let error = SyntacticError::InvalidCharacterInWord(c);
errors.push(Tracked::from(error, t.get_source()));
}
}
// Commit the final field.
for field in &mut fields {
field.shift += 1;
}
if field_width > 0 {
t.mark_end();
push_field!();
}
match errors.is_empty() {
true => Ok(WordTemplate { value, width: value_width, fields }),
false => Err(errors),
}
}