use crate::*; use assembler::Tokeniser; pub fn parse_syntactic>(source_code: &str, path: Option

) -> Result>, Vec>> { parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path)) } fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result>, Vec>> { t.add_delimiters(&['@','&','%',';',':','|','{','}','(',')','[',']','#','~','"','\'']); let mut tokens = Vec::new(); let mut errors = Vec::new(); macro_rules! err { ($error:expr) => {{ err!($error, t.get_source()); }}; ($error:expr, $source:expr) => {{ errors.push(Tracked::from($error, $source)); continue; }}; } macro_rules! is_matching { ($open:expr, $close:expr) => {{ let mut depth = 1; move |t: &mut Tokeniser| { match t.eat_char() { Some($open) => { depth += 1; false } Some($close) => { depth -= 1; depth == 0 } _ => false, } }} }; } macro_rules! is_any { ($close:expr) => { |t: &mut Tokeniser| { t.eat_char() == Some($close) } }; } loop { t.eat_whitespace(); t.mark_start(); let Some(c) = t.eat_char() else { break }; let token = match c { '"' => { let source = t.get_source(); t.mark_child(); if let Some(_) = t.track_until(is_any!('"')) { let child = t.tokenise_child_span(); SyntacticToken::StringLiteral(parse_string_literal(child)) } else { err!(SyntacticError::UnterminatedStringLiteral, source); } } '\'' => { let source = t.get_source(); if let Some(string) = t.track_until(is_any!('\'')) { let mut chars: Vec = string.chars().collect(); if chars.len() == 1 { let value = parse_char(chars.pop().unwrap()); SyntacticToken::IntegerLiteral(value) } else { t.mark_end(); err!(SyntacticError::ExpectedSingleCharacter, t.get_source()); } } else { err!(SyntacticError::UnterminatedCharacterLiteral, source); } } '{' => { let source = t.get_source(); t.mark_child(); if let Some(_) = t.track_until(is_matching!('{','}')) { let child = t.tokenise_child_span(); match parse_syntactic_from_tokeniser(child) { Ok(tokens) => SyntacticToken::BlockLiteral(tokens), Err(mut parse_errors) => { errors.append(&mut parse_errors); continue; } } } else { err!(SyntacticError::UnterminatedBlock, source); } } '[' => { let source = t.get_source(); t.mark_child(); if let Some(_) = t.track_until(is_matching!('[',']')) { let child = t.tokenise_child_span(); match parse_syntactic_from_tokeniser(child) { Ok(tokens) => SyntacticToken::Expression(tokens), Err(mut parse_errors) => { errors.append(&mut parse_errors); continue; } } } else { err!(SyntacticError::UnterminatedExpression, source); } } '(' => { let source = t.get_source(); if let Some(string) = t.track_until(is_matching!('(',')')) { // Check if the comment fills the entire line. if t.start.position.column == 0 && t.end_of_line() { if let Some(path) = string.strip_prefix(": ") { t.embedded_path = Some(PathBuf::from(path.trim())); t.embedded_first_line = t.start.position.line + 1; } } continue; } else { err!(SyntacticError::UnterminatedComment, source); } } '%' => { let name = t.eat_token(); let source = t.get_source(); t.mark_child(); if let Some(_) = t.track_until(is_any!(';')) { let child = t.tokenise_child_span(); match parse_syntactic_from_tokeniser(child) { Ok(body) => { let name = Tracked::from(name, source); let definition = SyntacticMacroDefinition { name, body }; SyntacticToken::MacroDefinition(definition) } Err(mut parse_errors) => { errors.append(&mut parse_errors); continue; } } } else { err!(SyntacticError::UnterminatedMacroDefinition(name), source); } } '}' => err!(SyntacticError::UnmatchedBlockTerminator), ']' => err!(SyntacticError::UnmatchedExpressionTerminator), ')' => err!(SyntacticError::UnmatchedCommentTerminator), ';' => err!(SyntacticError::UnmatchedMacroTerminator), '@' => SyntacticToken::LabelDefinition(ScopedSymbol::Global(t.eat_token())), '&' => SyntacticToken::LabelDefinition(ScopedSymbol::Local(t.eat_token())), '~' => SyntacticToken::Symbol(ScopedSymbol::Local(t.eat_token())), ':' => SyntacticToken::Separator, '|' => SyntacticToken::Pin, '?' => SyntacticToken::Condition, '#' => { t.mark_child(); t.eat_token(); let child = t.tokenise_child_span(); match parse_word_template(child) { Ok(word_template) => SyntacticToken::WordTemplate(word_template), Err(mut parse_errors) => { errors.append(&mut parse_errors); continue; } } }, c => { let token = format!("{c}{}", t.eat_token()); if let Some(hex_string) = token.strip_prefix("0x") { let hex_string = hex_string.to_string(); match parse_integer_literal(&hex_string, 16) { Ok(value) => SyntacticToken::IntegerLiteral(value), Err(_) => err!(SyntacticError::InvalidHexadecimalLiteral(hex_string)), } } else if let Some(binary_string) = token.strip_prefix("0b") { let binary_string = binary_string.to_string(); match parse_integer_literal(&binary_string, 2) { Ok(value) => SyntacticToken::IntegerLiteral(value), Err(_) => err!(SyntacticError::InvalidBinaryLiteral(binary_string)), } } else { match parse_integer_literal(&token, 10) { Ok(value) => SyntacticToken::IntegerLiteral(value), Err(true) => err!(SyntacticError::InvalidDecimalLiteral(token)), Err(false) => SyntacticToken::Symbol(ScopedSymbol::Global(token)), } } } }; t.mark_end(); tokens.push(Tracked::from(token, t.get_source())) } match errors.is_empty() { true => Ok(tokens), false => Err(errors), } } fn parse_integer_literal(token: &str, radix: u32) -> Result { match usize::from_str_radix(&token.replace('_', ""), radix) { Ok(value) => match isize::try_from(value) { Ok(value) => Ok(value), Err(_) => Err(true), } Err(_) => Err(false), } } fn parse_string_literal(mut t: Tokeniser) -> StringLiteral { let mut string = String::new(); let mut chars = Vec::new(); while let Some(c) = t.eat_char() { string.push(c); chars.push(Tracked::from(parse_char(c), t.get_source())); t.mark_start(); } StringLiteral { string, chars } } fn parse_char(c: char) -> isize { c as u32 as isize } fn parse_word_template(mut t: Tokeniser) -> Result>> { let mut value = 0; // Value of the whole word template. let mut value_width = 0; // Bit width of the whole word template. let mut field_width = 0; // Width of the current bit field. let mut field_name = '\0'; // Name of the current bit field. let mut fields: Vec> = Vec::new(); let mut errors: Vec> = Vec::new(); macro_rules! push_field { () => { if fields.iter().any(|f| f.name == field_name) { let error = SyntacticError::DuplicateFieldNameInWord(field_name); errors.push(Tracked::from(error, t.get_source())); } else { let field = BitField { name: field_name, width: field_width, shift: 0}; fields.push(Tracked::from(field, t.get_source())); } }; } while let Some(c) = t.eat_char() { // Ignore underscores. if c == '_' { t.mark.undo(); continue; } // Add a bit to the value; value <<= 1; value_width += 1; for field in &mut fields { field.shift += 1; } // Extend the current field. if c == field_name { field_width += 1; continue; } // Commit the current field. if field_width > 0 { t.mark_end_prev(); push_field!(); field_width = 0; field_name = '\0'; } // Parse bit literals. if c == '0' { continue; } if c == '1' { value |= 1; continue; } t.mark_start_prev(); if c.is_alphabetic() { field_name = c; field_width = 1; continue; } else { t.mark_end(); let error = SyntacticError::InvalidCharacterInWord(c); errors.push(Tracked::from(error, t.get_source())); } } // Commit the final field. for field in &mut fields { field.shift += 1; } if field_width > 0 { t.mark_end(); push_field!(); } match errors.is_empty() { true => Ok(WordTemplate { value, width: value_width, fields }), false => Err(errors), } }