summaryrefslogtreecommitdiff
path: root/src/stages/syntactic.rs
diff options
context:
space:
mode:
authorBen Bridle <bridle.benjamin@gmail.com>2025-02-14 09:36:52 +1300
committerBen Bridle <ben@derelict.engineering>2025-03-18 12:23:27 +1300
commit7d4dd52b8cfc865ae1b975ca3b6a3e72a812ebb9 (patch)
tree14ca9fa0ddcdd8c5155ddeaac241cd4f55486b6e /src/stages/syntactic.rs
parentf69a8f8c312ded212446082682bcabba8e3a9c9f (diff)
downloadbedrock-asm-7d4dd52b8cfc865ae1b975ca3b6a3e72a812ebb9.zip
Rewrite library
Diffstat (limited to 'src/stages/syntactic.rs')
-rw-r--r--src/stages/syntactic.rs185
1 files changed, 185 insertions, 0 deletions
diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs
new file mode 100644
index 0000000..c680700
--- /dev/null
+++ b/src/stages/syntactic.rs
@@ -0,0 +1,185 @@
+use crate::*;
+
+use std::path::PathBuf;
+
+
+pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path))
+}
+
+fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ t.add_delimiters(&['@','&','%',';','{','}','(',')','[',']','#','~','"','\'']);
+ t.add_terminators(&[':']);
+ let mut tokens = Vec::new();
+ let mut errors = Vec::new();
+ let mut label_name = String::new();
+
+ macro_rules! err {
+ ($error:expr) => {{
+ err!($error, t.get_source());
+ }};
+ ($error:expr, $source:expr) => {{
+ errors.push(Tracked::from($error, $source));
+ continue;
+ }};
+ }
+
+ macro_rules! is_any {
+ ($close:expr) => {
+ |t: &mut Tokeniser| { t.eat_char() == Some($close) }
+ };
+ }
+
+ loop {
+ t.eat_whitespace();
+ t.mark_start();
+ let Some(c) = t.eat_char() else { break };
+ let token = match c {
+ '"' => {
+ let source = t.get_source();
+ match t.track_until(is_any!('"')) {
+ Some(string) => {
+ let mut bytes = string.into_bytes();
+ bytes.push(0x00);
+ SyntacticToken::String(bytes)
+ }
+ None => err!(SyntacticError::UnterminatedNullString, source),
+ }
+ }
+ '\'' => {
+ let source = t.get_source();
+ match t.track_until(is_any!('\'')) {
+ Some(string) => SyntacticToken::String(string.into_bytes()),
+ None => err!(SyntacticError::UnterminatedRawString, source),
+ }
+ }
+ '(' => {
+ let source = t.get_source();
+ if let Some(string) = t.track_until(is_any!(')')) {
+ // Check if the comment fills the entire line.
+ if t.start.position.column == 0 && t.end_of_line() {
+ if let Some(path) = string.strip_prefix(": ") {
+ t.embedded_path = Some(PathBuf::from(path.trim()));
+ t.embedded_first_line = t.start.position.line + 1;
+ continue;
+ }
+ }
+ SyntacticToken::Comment(string)
+ } else {
+ err!(SyntacticError::UnterminatedComment, source)
+ }
+ }
+ '%' => {
+ let name = t.eat_token();
+ let source = t.get_source();
+ t.mark_child();
+ if let Some(_) = t.track_until(is_any!(';')) {
+ let child = t.tokenise_child_span();
+ match parse_body_from_tokeniser(child) {
+ Ok(body) => {
+ let name = Tracked::from(name, source);
+ let definition = SyntacticMacroDefinition { name, body };
+ SyntacticToken::MacroDefinition(definition)
+ }
+ Err(mut err) => {
+ errors.append(&mut err);
+ continue;
+ }
+ }
+ } else {
+ err!(SyntacticError::UnterminatedMacroDefinition, source);
+ }
+ }
+ '{' => SyntacticToken::BlockOpen,
+ '}' => SyntacticToken::BlockClose,
+ '[' => continue,
+ ']' => continue,
+
+ ')' => err!(SyntacticError::UnmatchedCommentTerminator),
+ ';' => err!(SyntacticError::UnmatchedMacroTerminator),
+
+ '@' => {
+ label_name = t.eat_token();
+ SyntacticToken::LabelDefinition(label_name.clone())
+ }
+ '&' => {
+ let name = t.eat_token();
+ SyntacticToken::LabelDefinition(format!("{label_name}/{name}"))
+ }
+ '~' => {
+ let name = t.eat_token();
+ SyntacticToken::Invocation(format!("{label_name}/{name}"))
+ }
+ '#' => {
+ let token = t.eat_token();
+ match token.parse::<Value>() {
+ Ok(value) => SyntacticToken::Padding(value),
+ Err(_) => err!(SyntacticError::InvalidPaddingValue),
+ }
+ },
+ c => {
+ let token = format!("{c}{}", t.eat_token());
+ match token.parse::<Value>() {
+ Ok(value) => SyntacticToken::RawValue(value),
+ Err(_) => match token.parse::<Instruction>() {
+ Ok(instruction) => SyntacticToken::Instruction(instruction),
+ Err(_) => SyntacticToken::Invocation(token),
+ }
+ }
+ }
+ };
+
+ t.mark_end();
+ let source = t.get_source();
+ tokens.push(Tracked::from(token, source));
+ }
+
+ // Check that every block open matches a block close.
+ let mut stack = Vec::new();
+ for token in &tokens {
+ match &token.value {
+ SyntacticToken::BlockOpen => stack.push(token.source.clone()),
+ SyntacticToken::BlockClose => if let None = stack.pop() {
+ let error = SyntacticError::UnmatchedBlockTerminator;
+ errors.push(Tracked::from(error, token.source.clone()));
+ }
+ _ => (),
+ }
+ }
+ for source in stack {
+ let error = SyntacticError::UnterminatedBlock;
+ errors.push(Tracked::from(error, source));
+ }
+
+ match errors.is_empty() {
+ true => Ok(tokens),
+ false => Err(errors),
+ }
+}
+
+
+fn parse_body_from_tokeniser(t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ let mut tokens = Vec::new();
+ let mut errors = Vec::new();
+
+ for token in parse_syntactic_from_tokeniser(t)? {
+ match token.value {
+ SyntacticToken::LabelDefinition(_) => {
+ let error = SyntacticError::LabelDefinitionInMacroDefinition;
+ errors.push(Tracked::from(error, token.source));
+ continue;
+ }
+ SyntacticToken::MacroDefinition(_) => {
+ let error = SyntacticError::MacroDefinitionInMacroDefinition;
+ errors.push(Tracked::from(error, token.source));
+ continue;
+ }
+ _ => tokens.push(token),
+ };
+ }
+
+ match errors.is_empty() {
+ true => Ok(tokens),
+ false => Err(errors),
+ }
+}