diff options
Diffstat (limited to 'src/stages/syntactic.rs')
-rw-r--r-- | src/stages/syntactic.rs | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs new file mode 100644 index 0000000..c680700 --- /dev/null +++ b/src/stages/syntactic.rs @@ -0,0 +1,185 @@ +use crate::*; + +use std::path::PathBuf; + + +pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path)) +} + +fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + t.add_delimiters(&['@','&','%',';','{','}','(',')','[',']','#','~','"','\'']); + t.add_terminators(&[':']); + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + let mut label_name = String::new(); + + macro_rules! err { + ($error:expr) => {{ + err!($error, t.get_source()); + }}; + ($error:expr, $source:expr) => {{ + errors.push(Tracked::from($error, $source)); + continue; + }}; + } + + macro_rules! is_any { + ($close:expr) => { + |t: &mut Tokeniser| { t.eat_char() == Some($close) } + }; + } + + loop { + t.eat_whitespace(); + t.mark_start(); + let Some(c) = t.eat_char() else { break }; + let token = match c { + '"' => { + let source = t.get_source(); + match t.track_until(is_any!('"')) { + Some(string) => { + let mut bytes = string.into_bytes(); + bytes.push(0x00); + SyntacticToken::String(bytes) + } + None => err!(SyntacticError::UnterminatedNullString, source), + } + } + '\'' => { + let source = t.get_source(); + match t.track_until(is_any!('\'')) { + Some(string) => SyntacticToken::String(string.into_bytes()), + None => err!(SyntacticError::UnterminatedRawString, source), + } + } + '(' => { + let source = t.get_source(); + if let Some(string) = t.track_until(is_any!(')')) { + // Check if the comment fills the entire line. + if t.start.position.column == 0 && t.end_of_line() { + if let Some(path) = string.strip_prefix(": ") { + t.embedded_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start.position.line + 1; + continue; + } + } + SyntacticToken::Comment(string) + } else { + err!(SyntacticError::UnterminatedComment, source) + } + } + '%' => { + let name = t.eat_token(); + let source = t.get_source(); + t.mark_child(); + if let Some(_) = t.track_until(is_any!(';')) { + let child = t.tokenise_child_span(); + match parse_body_from_tokeniser(child) { + Ok(body) => { + let name = Tracked::from(name, source); + let definition = SyntacticMacroDefinition { name, body }; + SyntacticToken::MacroDefinition(definition) + } + Err(mut err) => { + errors.append(&mut err); + continue; + } + } + } else { + err!(SyntacticError::UnterminatedMacroDefinition, source); + } + } + '{' => SyntacticToken::BlockOpen, + '}' => SyntacticToken::BlockClose, + '[' => continue, + ']' => continue, + + ')' => err!(SyntacticError::UnmatchedCommentTerminator), + ';' => err!(SyntacticError::UnmatchedMacroTerminator), + + '@' => { + label_name = t.eat_token(); + SyntacticToken::LabelDefinition(label_name.clone()) + } + '&' => { + let name = t.eat_token(); + SyntacticToken::LabelDefinition(format!("{label_name}/{name}")) + } + '~' => { + let name = t.eat_token(); + SyntacticToken::Invocation(format!("{label_name}/{name}")) + } + '#' => { + let token = t.eat_token(); + match token.parse::<Value>() { + Ok(value) => SyntacticToken::Padding(value), + Err(_) => err!(SyntacticError::InvalidPaddingValue), + } + }, + c => { + let token = format!("{c}{}", t.eat_token()); + match token.parse::<Value>() { + Ok(value) => SyntacticToken::RawValue(value), + Err(_) => match token.parse::<Instruction>() { + Ok(instruction) => SyntacticToken::Instruction(instruction), + Err(_) => SyntacticToken::Invocation(token), + } + } + } + }; + + t.mark_end(); + let source = t.get_source(); + tokens.push(Tracked::from(token, source)); + } + + // Check that every block open matches a block close. + let mut stack = Vec::new(); + for token in &tokens { + match &token.value { + SyntacticToken::BlockOpen => stack.push(token.source.clone()), + SyntacticToken::BlockClose => if let None = stack.pop() { + let error = SyntacticError::UnmatchedBlockTerminator; + errors.push(Tracked::from(error, token.source.clone())); + } + _ => (), + } + } + for source in stack { + let error = SyntacticError::UnterminatedBlock; + errors.push(Tracked::from(error, source)); + } + + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} + + +fn parse_body_from_tokeniser(t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + + for token in parse_syntactic_from_tokeniser(t)? { + match token.value { + SyntacticToken::LabelDefinition(_) => { + let error = SyntacticError::LabelDefinitionInMacroDefinition; + errors.push(Tracked::from(error, token.source)); + continue; + } + SyntacticToken::MacroDefinition(_) => { + let error = SyntacticError::MacroDefinitionInMacroDefinition; + errors.push(Tracked::from(error, token.source)); + continue; + } + _ => tokens.push(token), + }; + } + + match errors.is_empty() { + true => Ok(tokens), + false => Err(errors), + } +} |