diff options
author | Ben Bridle <ben@derelict.engineering> | 2025-03-06 20:33:27 +1300 |
---|---|---|
committer | Ben Bridle <ben@derelict.engineering> | 2025-03-11 16:59:26 +1300 |
commit | 1ecee352f5844b0809d7ae66df52e34f42b44c8e (patch) | |
tree | 472b6fd57ff7f64ac3f8cd676cbe7a113ba01f05 /src/stages/syntactic.rs | |
parent | f2ed89083f5326a7a6f0a1720033d3388aa431fb (diff) | |
download | torque-asm-1ecee352f5844b0809d7ae66df52e34f42b44c8e.zip |
Rewrite entire assembler
The language is now more general, the code is better structured, error
reporting is more detailed, and many new language features have
been implemented:
- conditional blocks
- first-class strings
- more expression operators
- binary literals
- negative values
- invocations in constant expressions
Diffstat (limited to 'src/stages/syntactic.rs')
-rw-r--r-- | src/stages/syntactic.rs | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs new file mode 100644 index 0000000..2e7f959 --- /dev/null +++ b/src/stages/syntactic.rs @@ -0,0 +1,323 @@ +use crate::*; + +use assembler::Tokeniser; + + +pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path)) +} + +fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + t.add_delimiters(&['@','&','%',';',':','|','{','}','(',')','[',']','#','~','"','\'']); + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + + macro_rules! push_err { + ($error:expr) => {{ + push_err!($error, t.get_source()); + }}; + ($error:expr, $source:expr) => {{ + errors.push(Tracked::from($error, $source)); + continue; + }}; + } + + loop { + t.eat_whitespace(); + t.mark_start(); + let Some(c) = t.eat_char() else { break }; + let token = match c { + '"' => { + let source = t.get_source(); + t.mark_child(); + let is_any_close = |t: &mut Tokeniser| { + t.eat_char() == Some('"') + }; + if let Some(_) = t.track_until(is_any_close) { + let child = t.tokenise_child_span(); + SyntacticToken::StringLiteral(parse_string_literal(child)) + } else { + push_err!(SyntacticError::UnterminatedStringLiteral, source); + } + } + '\'' => { + let source = t.get_source(); + let is_any_close = |t: &mut Tokeniser| { + t.eat_char() == Some('\'') + }; + if let Some(string) = t.track_until(is_any_close) { + let mut chars: Vec<char> = string.chars().collect(); + if chars.len() == 1 { + let value = parse_char(chars.pop().unwrap()); + SyntacticToken::IntegerLiteral(value) + } else { + t.mark_end(); + push_err!(SyntacticError::ExpectedSingleCharacter, t.get_source()); + } + } else { + push_err!(SyntacticError::UnterminatedCharacterLiteral, source); + } + } + + '{' => { + let source = t.get_source(); + t.mark_child(); + let mut depth = 1; + let is_matching_close = |t: &mut Tokeniser| { + match t.eat_char() { + Some('{') => { depth += 1; false } + Some('}') => { depth -= 1; depth == 0 } + _ => false, + } + }; + if let Some(_) = t.track_until(is_matching_close) { + let child = t.tokenise_child_span(); + match parse_syntactic_from_tokeniser(child) { + Ok(tokens) => SyntacticToken::BlockLiteral(tokens), + Err(mut parse_errors) => { + errors.append(&mut parse_errors); + continue; + } + } + } else { + push_err!(SyntacticError::UnterminatedBlock, source); + } + } + '[' => { + let source = t.get_source(); + t.mark_child(); + let mut depth = 1; + let is_matching_close = |t: &mut Tokeniser| { + match t.eat_char() { + Some('[') => { depth += 1; false } + Some(']') => { depth -= 1; depth == 0 } + _ => false, + } + }; + if let Some(_) = t.track_until(is_matching_close) { + let child = t.tokenise_child_span(); + match parse_syntactic_from_tokeniser(child) { + Ok(tokens) => SyntacticToken::Expression(tokens), + Err(mut parse_errors) => { + errors.append(&mut parse_errors); + continue; + } + } + } else { + push_err!(SyntacticError::UnterminatedExpression, source); + } + } + '(' => { + let source = t.get_source(); + let mut depth = 1; + let is_matching_close = |t: &mut Tokeniser| { + match t.eat_char() { + Some('(') => { depth += 1; false } + Some(')') => { depth -= 1; depth == 0 } + _ => false, + } + }; + if let Some(string) = t.track_until(is_matching_close) { + // Check if the comment fills the entire line. + if t.start.position.column == 0 && t.end_of_line() { + if let Some(path) = string.strip_prefix(": ") { + t.embedded_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start.position.line + 1; + } + } + continue; + } else { + push_err!(SyntacticError::UnterminatedComment, source); + } + } + '%' => { + let name = t.eat_token(); + let source = t.get_source(); + t.mark_child(); + let is_any_close = |t: &mut Tokeniser| t.eat_char() == Some(';'); + if let Some(_) = t.track_until(is_any_close) { + let child = t.tokenise_child_span(); + match parse_syntactic_from_tokeniser(child) { + Ok(tokens) => { + let name = Tracked::from(name, source); + let def = SyntacticMacroDefinition { name, tokens }; + SyntacticToken::MacroDefinition(def) + } + Err(mut parse_errors) => { + errors.append(&mut parse_errors); + continue; + } + } + } else { + push_err!(SyntacticError::UnterminatedMacroDefinition(name), source); + } + } + + '}' => push_err!(SyntacticError::UnmatchedBlockTerminator), + ']' => push_err!(SyntacticError::UnmatchedExpressionTerminator), + ')' => push_err!(SyntacticError::UnmatchedCommentTerminator), + ';' => push_err!(SyntacticError::UnmatchedMacroTerminator), + + '@' => SyntacticToken::LabelDefinition(ScopedSymbol::Global(t.eat_token())), + '&' => SyntacticToken::LabelDefinition(ScopedSymbol::Local(t.eat_token())), + '~' => SyntacticToken::Symbol(ScopedSymbol::Local(t.eat_token())), + ':' => SyntacticToken::Separator, + '|' => SyntacticToken::Pin, + '?' => SyntacticToken::Condition, + + '#' => { + t.mark_child(); + t.eat_token(); + let child = t.tokenise_child_span(); + match parse_word_template(child) { + Ok(word_template) => SyntacticToken::WordTemplate(word_template), + Err(mut parse_errors) => { + errors.append(&mut parse_errors); + continue; + } + } + }, + + c => { + let token = format!("{c}{}", t.eat_token()); + if let Some(hex_string) = token.strip_prefix("0x") { + match parse_integer_literal(hex_string, 16) { + Ok(value) => SyntacticToken::IntegerLiteral(value), + Err(_) => push_err!(SyntacticError::InvalidHexadecimalLiteral(token)), + } + } else if let Some(binary_string) = token.strip_prefix("0b") { + match parse_integer_literal(binary_string, 2) { + Ok(value) => SyntacticToken::IntegerLiteral(value), + Err(_) => push_err!(SyntacticError::InvalidBinaryLiteral(token)), + } + } else { + match parse_integer_literal(&token, 10) { + Ok(value) => SyntacticToken::IntegerLiteral(value), + Err(true) => push_err!(SyntacticError::InvalidDecimalLiteral(token)), + Err(false) => SyntacticToken::Symbol(ScopedSymbol::Global(token)), + } + } + } + }; + + t.mark_end(); + tokens.push(Tracked::from(token, t.get_source())) + } + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} + + +fn parse_integer_literal(token: &str, radix: u32) -> Result<isize, bool> { + match usize::from_str_radix(&token.replace('_', ""), radix) { + Ok(value) => match isize::try_from(value) { + Ok(value) => Ok(value), + Err(_) => Err(true), + } + Err(_) => Err(false), + } +} + + +fn parse_string_literal(mut t: Tokeniser) -> StringLiteral { + let mut string = String::new(); + let mut chars = Vec::new(); + + while let Some(c) = t.eat_char() { + string.push(c); + chars.push(Tracked::from(parse_char(c), t.get_source())); + t.mark_start(); + } + StringLiteral { string, chars } +} + +fn parse_char(c: char) -> isize { + c as u32 as isize +} + + +fn parse_word_template(mut t: Tokeniser) -> Result<WordTemplate, Vec<Tracked<SyntacticError>>> { + let mut value = 0; // Value of the whole word template. + let mut value_width = 0; // Bit width of the whole word template. + let mut field_width = 0; // Width of the current bit field. + let mut field_name = '\0'; // Name of the current bit field. + let mut fields: Vec<Tracked<BitField>> = Vec::new(); + let mut errors: Vec<Tracked<SyntacticError>> = Vec::new(); + + macro_rules! push_field { + () => { + if fields.iter().any(|f| f.name == field_name) { + let error = SyntacticError::DuplicateFieldNameInWord(field_name); + errors.push(Tracked::from(error, t.get_source())); + } else { + let field = BitField { name: field_name, width: field_width, shift: 0}; + fields.push(Tracked::from(field, t.get_source())); + } + }; + } + + while let Some(c) = t.eat_char() { + // Ignore underscores. + if c == '_' { + t.mark.undo(); + continue; + } + + // Add a bit to the value; + value <<= 1; + value_width += 1; + for field in &mut fields { + field.shift += 1; + } + + // Extend the current field. + if c == field_name { + field_width += 1; + continue; + } + + // Commit the current field. + if field_width > 0 { + t.mark_end_prev(); + push_field!(); + field_width = 0; + field_name = '\0'; + } + + // Parse bit literals. + if c == '0' { + continue; + } + if c == '1' { + value |= 1; + continue; + } + + t.mark_start_prev(); + if c.is_alphabetic() { + field_name = c; + field_width = 1; + continue; + } else { + t.mark_end(); + let error = SyntacticError::InvalidCharacterInWord(c); + errors.push(Tracked::from(error, t.get_source())); + } + } + + // Commit the final field. + for field in &mut fields { + field.shift += 1; + } + if field_width > 0 { + t.mark_end(); + push_field!(); + } + + match errors.is_empty() { + true => Ok(WordTemplate { value, width: value_width, fields }), + false => Err(errors), + } +} |