summaryrefslogtreecommitdiff
path: root/src/stages/syntactic.rs
diff options
context:
space:
mode:
authorBen Bridle <ben@derelict.engineering>2025-03-06 20:33:27 +1300
committerBen Bridle <ben@derelict.engineering>2025-03-11 16:59:26 +1300
commit1ecee352f5844b0809d7ae66df52e34f42b44c8e (patch)
tree472b6fd57ff7f64ac3f8cd676cbe7a113ba01f05 /src/stages/syntactic.rs
parentf2ed89083f5326a7a6f0a1720033d3388aa431fb (diff)
downloadtorque-asm-1ecee352f5844b0809d7ae66df52e34f42b44c8e.zip
Rewrite entire assembler
The language is now more general, the code is better structured, error reporting is more detailed, and many new language features have been implemented: - conditional blocks - first-class strings - more expression operators - binary literals - negative values - invocations in constant expressions
Diffstat (limited to 'src/stages/syntactic.rs')
-rw-r--r--src/stages/syntactic.rs323
1 files changed, 323 insertions, 0 deletions
diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs
new file mode 100644
index 0000000..2e7f959
--- /dev/null
+++ b/src/stages/syntactic.rs
@@ -0,0 +1,323 @@
+use crate::*;
+
+use assembler::Tokeniser;
+
+
+pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path))
+}
+
+fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ t.add_delimiters(&['@','&','%',';',':','|','{','}','(',')','[',']','#','~','"','\'']);
+ let mut tokens = Vec::new();
+ let mut errors = Vec::new();
+
+ macro_rules! push_err {
+ ($error:expr) => {{
+ push_err!($error, t.get_source());
+ }};
+ ($error:expr, $source:expr) => {{
+ errors.push(Tracked::from($error, $source));
+ continue;
+ }};
+ }
+
+ loop {
+ t.eat_whitespace();
+ t.mark_start();
+ let Some(c) = t.eat_char() else { break };
+ let token = match c {
+ '"' => {
+ let source = t.get_source();
+ t.mark_child();
+ let is_any_close = |t: &mut Tokeniser| {
+ t.eat_char() == Some('"')
+ };
+ if let Some(_) = t.track_until(is_any_close) {
+ let child = t.tokenise_child_span();
+ SyntacticToken::StringLiteral(parse_string_literal(child))
+ } else {
+ push_err!(SyntacticError::UnterminatedStringLiteral, source);
+ }
+ }
+ '\'' => {
+ let source = t.get_source();
+ let is_any_close = |t: &mut Tokeniser| {
+ t.eat_char() == Some('\'')
+ };
+ if let Some(string) = t.track_until(is_any_close) {
+ let mut chars: Vec<char> = string.chars().collect();
+ if chars.len() == 1 {
+ let value = parse_char(chars.pop().unwrap());
+ SyntacticToken::IntegerLiteral(value)
+ } else {
+ t.mark_end();
+ push_err!(SyntacticError::ExpectedSingleCharacter, t.get_source());
+ }
+ } else {
+ push_err!(SyntacticError::UnterminatedCharacterLiteral, source);
+ }
+ }
+
+ '{' => {
+ let source = t.get_source();
+ t.mark_child();
+ let mut depth = 1;
+ let is_matching_close = |t: &mut Tokeniser| {
+ match t.eat_char() {
+ Some('{') => { depth += 1; false }
+ Some('}') => { depth -= 1; depth == 0 }
+ _ => false,
+ }
+ };
+ if let Some(_) = t.track_until(is_matching_close) {
+ let child = t.tokenise_child_span();
+ match parse_syntactic_from_tokeniser(child) {
+ Ok(tokens) => SyntacticToken::BlockLiteral(tokens),
+ Err(mut parse_errors) => {
+ errors.append(&mut parse_errors);
+ continue;
+ }
+ }
+ } else {
+ push_err!(SyntacticError::UnterminatedBlock, source);
+ }
+ }
+ '[' => {
+ let source = t.get_source();
+ t.mark_child();
+ let mut depth = 1;
+ let is_matching_close = |t: &mut Tokeniser| {
+ match t.eat_char() {
+ Some('[') => { depth += 1; false }
+ Some(']') => { depth -= 1; depth == 0 }
+ _ => false,
+ }
+ };
+ if let Some(_) = t.track_until(is_matching_close) {
+ let child = t.tokenise_child_span();
+ match parse_syntactic_from_tokeniser(child) {
+ Ok(tokens) => SyntacticToken::Expression(tokens),
+ Err(mut parse_errors) => {
+ errors.append(&mut parse_errors);
+ continue;
+ }
+ }
+ } else {
+ push_err!(SyntacticError::UnterminatedExpression, source);
+ }
+ }
+ '(' => {
+ let source = t.get_source();
+ let mut depth = 1;
+ let is_matching_close = |t: &mut Tokeniser| {
+ match t.eat_char() {
+ Some('(') => { depth += 1; false }
+ Some(')') => { depth -= 1; depth == 0 }
+ _ => false,
+ }
+ };
+ if let Some(string) = t.track_until(is_matching_close) {
+ // Check if the comment fills the entire line.
+ if t.start.position.column == 0 && t.end_of_line() {
+ if let Some(path) = string.strip_prefix(": ") {
+ t.embedded_path = Some(PathBuf::from(path.trim()));
+ t.embedded_first_line = t.start.position.line + 1;
+ }
+ }
+ continue;
+ } else {
+ push_err!(SyntacticError::UnterminatedComment, source);
+ }
+ }
+ '%' => {
+ let name = t.eat_token();
+ let source = t.get_source();
+ t.mark_child();
+ let is_any_close = |t: &mut Tokeniser| t.eat_char() == Some(';');
+ if let Some(_) = t.track_until(is_any_close) {
+ let child = t.tokenise_child_span();
+ match parse_syntactic_from_tokeniser(child) {
+ Ok(tokens) => {
+ let name = Tracked::from(name, source);
+ let def = SyntacticMacroDefinition { name, tokens };
+ SyntacticToken::MacroDefinition(def)
+ }
+ Err(mut parse_errors) => {
+ errors.append(&mut parse_errors);
+ continue;
+ }
+ }
+ } else {
+ push_err!(SyntacticError::UnterminatedMacroDefinition(name), source);
+ }
+ }
+
+ '}' => push_err!(SyntacticError::UnmatchedBlockTerminator),
+ ']' => push_err!(SyntacticError::UnmatchedExpressionTerminator),
+ ')' => push_err!(SyntacticError::UnmatchedCommentTerminator),
+ ';' => push_err!(SyntacticError::UnmatchedMacroTerminator),
+
+ '@' => SyntacticToken::LabelDefinition(ScopedSymbol::Global(t.eat_token())),
+ '&' => SyntacticToken::LabelDefinition(ScopedSymbol::Local(t.eat_token())),
+ '~' => SyntacticToken::Symbol(ScopedSymbol::Local(t.eat_token())),
+ ':' => SyntacticToken::Separator,
+ '|' => SyntacticToken::Pin,
+ '?' => SyntacticToken::Condition,
+
+ '#' => {
+ t.mark_child();
+ t.eat_token();
+ let child = t.tokenise_child_span();
+ match parse_word_template(child) {
+ Ok(word_template) => SyntacticToken::WordTemplate(word_template),
+ Err(mut parse_errors) => {
+ errors.append(&mut parse_errors);
+ continue;
+ }
+ }
+ },
+
+ c => {
+ let token = format!("{c}{}", t.eat_token());
+ if let Some(hex_string) = token.strip_prefix("0x") {
+ match parse_integer_literal(hex_string, 16) {
+ Ok(value) => SyntacticToken::IntegerLiteral(value),
+ Err(_) => push_err!(SyntacticError::InvalidHexadecimalLiteral(token)),
+ }
+ } else if let Some(binary_string) = token.strip_prefix("0b") {
+ match parse_integer_literal(binary_string, 2) {
+ Ok(value) => SyntacticToken::IntegerLiteral(value),
+ Err(_) => push_err!(SyntacticError::InvalidBinaryLiteral(token)),
+ }
+ } else {
+ match parse_integer_literal(&token, 10) {
+ Ok(value) => SyntacticToken::IntegerLiteral(value),
+ Err(true) => push_err!(SyntacticError::InvalidDecimalLiteral(token)),
+ Err(false) => SyntacticToken::Symbol(ScopedSymbol::Global(token)),
+ }
+ }
+ }
+ };
+
+ t.mark_end();
+ tokens.push(Tracked::from(token, t.get_source()))
+ }
+ match errors.is_empty() {
+ true => Ok(tokens),
+ false => Err(errors),
+ }
+}
+
+
+fn parse_integer_literal(token: &str, radix: u32) -> Result<isize, bool> {
+ match usize::from_str_radix(&token.replace('_', ""), radix) {
+ Ok(value) => match isize::try_from(value) {
+ Ok(value) => Ok(value),
+ Err(_) => Err(true),
+ }
+ Err(_) => Err(false),
+ }
+}
+
+
+fn parse_string_literal(mut t: Tokeniser) -> StringLiteral {
+ let mut string = String::new();
+ let mut chars = Vec::new();
+
+ while let Some(c) = t.eat_char() {
+ string.push(c);
+ chars.push(Tracked::from(parse_char(c), t.get_source()));
+ t.mark_start();
+ }
+ StringLiteral { string, chars }
+}
+
+fn parse_char(c: char) -> isize {
+ c as u32 as isize
+}
+
+
+fn parse_word_template(mut t: Tokeniser) -> Result<WordTemplate, Vec<Tracked<SyntacticError>>> {
+ let mut value = 0; // Value of the whole word template.
+ let mut value_width = 0; // Bit width of the whole word template.
+ let mut field_width = 0; // Width of the current bit field.
+ let mut field_name = '\0'; // Name of the current bit field.
+ let mut fields: Vec<Tracked<BitField>> = Vec::new();
+ let mut errors: Vec<Tracked<SyntacticError>> = Vec::new();
+
+ macro_rules! push_field {
+ () => {
+ if fields.iter().any(|f| f.name == field_name) {
+ let error = SyntacticError::DuplicateFieldNameInWord(field_name);
+ errors.push(Tracked::from(error, t.get_source()));
+ } else {
+ let field = BitField { name: field_name, width: field_width, shift: 0};
+ fields.push(Tracked::from(field, t.get_source()));
+ }
+ };
+ }
+
+ while let Some(c) = t.eat_char() {
+ // Ignore underscores.
+ if c == '_' {
+ t.mark.undo();
+ continue;
+ }
+
+ // Add a bit to the value;
+ value <<= 1;
+ value_width += 1;
+ for field in &mut fields {
+ field.shift += 1;
+ }
+
+ // Extend the current field.
+ if c == field_name {
+ field_width += 1;
+ continue;
+ }
+
+ // Commit the current field.
+ if field_width > 0 {
+ t.mark_end_prev();
+ push_field!();
+ field_width = 0;
+ field_name = '\0';
+ }
+
+ // Parse bit literals.
+ if c == '0' {
+ continue;
+ }
+ if c == '1' {
+ value |= 1;
+ continue;
+ }
+
+ t.mark_start_prev();
+ if c.is_alphabetic() {
+ field_name = c;
+ field_width = 1;
+ continue;
+ } else {
+ t.mark_end();
+ let error = SyntacticError::InvalidCharacterInWord(c);
+ errors.push(Tracked::from(error, t.get_source()));
+ }
+ }
+
+ // Commit the final field.
+ for field in &mut fields {
+ field.shift += 1;
+ }
+ if field_width > 0 {
+ t.mark_end();
+ push_field!();
+ }
+
+ match errors.is_empty() {
+ true => Ok(WordTemplate { value, width: value_width, fields }),
+ false => Err(errors),
+ }
+}