diff options
Diffstat (limited to 'src/stages/syntactic.rs')
-rw-r--r-- | src/stages/syntactic.rs | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs new file mode 100644 index 0000000..6453ae0 --- /dev/null +++ b/src/stages/syntactic.rs @@ -0,0 +1,220 @@ +use crate::*; + +use std::path::PathBuf; + + +pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path), "") +} + +fn parse_syntactic_from_tokeniser(mut t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + t.add_delimiters(&['(',')','[',']','{','}',';']); + t.add_terminators(&[':']); + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + let mut label_name = label_name.to_string(); + + macro_rules! err { + ($error:expr) => {{ + err!($error, t.get_source()); + }}; + ($error:expr, $source:expr) => {{ + errors.push(Tracked::from($error, $source)); + continue; + }}; + } + + macro_rules! check_name { + ($name:expr) => {{ + check_name!($name, t.get_source()); + }}; + ($name:expr, $source:expr) => { + if $name.chars().count() > 63 { + let error = SyntacticError::InvalidIdentifier($name.clone()); + errors.push(Tracked::from(error, $source.clone())); + } + }; + } + + // Eat characters until the end character is found. + macro_rules! is_any_end { + ($end:expr) => { + |t: &mut Tokeniser| { + t.eat_char() == Some($end) + } + }; + } + + // Eat characters until the end character is found without a preceding back-slash. + macro_rules! is_plain_end { + ($end:expr) => { + |t: &mut Tokeniser| { + t.eat_if(concat!('\\', $end)).is_some() || t.eat_char() == Some($end) + } + }; + } + + loop { + // Eat leading whitespace. + while let Some(c) = t.peek_char() { + match [' ', '\n', '\r', '\t'].contains(&c) { + true => t.eat_char(), + false => break, + }; + } + t.mark_start(); + let Some(c) = t.eat_char() else { break }; + let token = match c { + '"' => { + let source = t.get_source(); + match t.track_until(is_plain_end!('"')) { + Some(string) => { + let mut bytes = string.into_bytes(); + bytes.push(0x00); + SyntacticToken::String(bytes) + } + None => err!(SyntacticError::UnterminatedNullString, source), + } + } + '\'' => { + let source = t.get_source(); + match t.track_until(is_plain_end!('\'')) { + Some(string) => SyntacticToken::String(string.into_bytes()), + None => err!(SyntacticError::UnterminatedRawString, source), + } + } + '(' => { + let source = t.get_source(); + if let Some(string) = t.track_until(is_any_end!(')')) { + // Check if the comment fills the entire line. + if t.start.position.column == 0 && t.end_of_line() { + if let Some(path) = string.strip_prefix(": ") { + t.embedded_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start.position.line + 1; + continue; + } + } + SyntacticToken::Comment(string) + } else { + err!(SyntacticError::UnterminatedComment, source) + } + } + ')' => err!(SyntacticError::UnmatchedCommentTerminator), + '%' => { + let name = t.eat_token(); + let source = t.get_source(); + check_name!(name, source); + t.mark_child(); + if let Some(_) = t.track_until(is_any_end!(';')) { + let child = t.tokenise_child_span(); + match parse_body_from_tokeniser(child, &label_name) { + Ok(body) => { + let name = Tracked::from(name, source); + let definition = SyntacticMacroDefinition { name, body }; + SyntacticToken::MacroDefinition(definition) + } + Err(mut err) => { + errors.append(&mut err); + continue; + } + } + } else { + err!(SyntacticError::UnterminatedMacroDefinition, source); + } + } + ';' => err!(SyntacticError::UnmatchedMacroTerminator), + '{' => SyntacticToken::BlockOpen, + '}' => SyntacticToken::BlockClose, + '['|']' => continue, + '@' => { + label_name = t.eat_token(); + check_name!(label_name); + SyntacticToken::LabelDefinition(label_name.clone()) + } + '&' => { + let name = format!("{label_name}/{}", t.eat_token()); + check_name!(name); + SyntacticToken::LabelDefinition(name) + } + '~' => { + let name = format!("{label_name}/{}", t.eat_token()); + check_name!(name); + SyntacticToken::Symbol(name) + } + '#' => { + let token = t.eat_token(); + match token.parse::<Value>() { + Ok(value) => SyntacticToken::Pad(value), + Err(_) => err!(SyntacticError::InvalidPadValue), + } + }, + ':' => { + SyntacticToken::Symbol(String::from(':')) + } + c => { + let token = format!("{c}{}", t.eat_token()); + if let Ok(value) = token.parse::<Value>() { + SyntacticToken::Literal(value) + } else if let Ok(instruction) = token.parse::<Instruction>() { + SyntacticToken::Instruction(instruction) + } else { + check_name!(token); + SyntacticToken::Symbol(token) + } + } + }; + + t.mark_end(); + let source = t.get_source(); + tokens.push(Tracked::from(token, source)); + } + + // Check that every block open matches a block close. + let mut stack = Vec::new(); + for token in &tokens { + match &token.value { + SyntacticToken::BlockOpen => stack.push(token.source.clone()), + SyntacticToken::BlockClose => if let None = stack.pop() { + let error = SyntacticError::UnmatchedBlockTerminator; + errors.push(Tracked::from(error, token.source.clone())); + } + _ => (), + } + } + for source in stack { + let error = SyntacticError::UnterminatedBlock; + errors.push(Tracked::from(error, source)); + } + + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} + + +fn parse_body_from_tokeniser(t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + + for token in parse_syntactic_from_tokeniser(t, label_name)? { + match token.value { + SyntacticToken::LabelDefinition(_) => { + let error = SyntacticError::LabelDefinitionInMacroDefinition; + errors.push(Tracked::from(error, token.source)); + continue; + } + SyntacticToken::MacroDefinition(_) => { + let error = SyntacticError::MacroDefinitionInMacroDefinition; + errors.push(Tracked::from(error, token.source)); + continue; + } + _ => tokens.push(token), + }; + } + + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} |