diff options
author | Ben Bridle <bridle.benjamin@gmail.com> | 2025-02-14 09:34:48 +1300 |
---|---|---|
committer | Ben Bridle <bridle.benjamin@gmail.com> | 2025-02-14 09:34:48 +1300 |
commit | e39505931b05be321ee2b04c41a9739f00c19208 (patch) | |
tree | 529a4162ed2d34a7c283a928136808bd20750563 | |
parent | 1995f8a8f2cb5ea810afc173fe8dfa2f5355f684 (diff) | |
download | torque-asm-e39505931b05be321ee2b04c41a9739f00c19208.zip |
Implement semantic parsing
-rw-r--r-- | Cargo.lock | 5 | ||||
-rw-r--r-- | Cargo.toml | 4 | ||||
-rw-r--r-- | src/environment.rs | 39 | ||||
-rw-r--r-- | src/main.rs | 17 | ||||
-rw-r--r-- | src/parsers/semantic.rs | 287 | ||||
-rw-r--r-- | src/tokens/semantic.rs | 64 |
6 files changed, 368 insertions, 48 deletions
@@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "ansi" @@ -45,11 +45,12 @@ dependencies = [ ] [[package]] -name = "torque-assembler" +name = "torque-asm" version = "0.1.0" dependencies = [ "assembler", "log 1.1.2", + "paste", "switchboard", ] @@ -1,5 +1,5 @@ [package] -name = "torque-assembler" +name = "torque-asm" version = "0.1.0" edition = "2021" @@ -8,3 +8,5 @@ assembler = { path = "/home/ben/Libraries/assembler" } # assembler = { git = "git://benbridle.com/assembler", tag = "v1.0.0" } log = { git = "git://benbridle.com/log", tag = "v1.1.2" } switchboard = { git = "git://benbridle.com/switchboard", tag = "v1.0.0" } + +paste = "1.0.15" diff --git a/src/environment.rs b/src/environment.rs index d88e1b5..2bb3f5b 100644 --- a/src/environment.rs +++ b/src/environment.rs @@ -1,4 +1,5 @@ use crate::*; +use semantic::*; use std::collections::HashMap; @@ -32,32 +33,24 @@ pub struct Scope { } impl Scope { - pub fn get_integer(&self, name: &str, environment: &Environment) -> Result<usize, ()> { - use IntegerDefinition as IntDef; - if let Some(definition) = self.definitions.get(name) { - if let Definition::Integer(integer) = definition { - match integer { - IntDef::Literal(value) => return Ok(*value), - IntDef::ConstantExpression(expr) => match expr.evaluate(environment) { - Ok(_) | Err(_) => todo!(), - }, - }; - } - } - return Err(()); + pub fn get_integer(&self, _name: &str, _environment: &Environment) -> Result<usize, ()> { + todo!() + // use semantic::IntegerDefinition as IntDef; + + // if let Some(Definition { variant, ..}) = self.definitions.get(name) { + // if let IntegerDefinition::Integer(integer) = definition { + // match integer { + // IntDef::Literal(value) => return Ok(*value), + // IntDef::ConstantExpression(expr) => match expr.evaluate(environment) { + // Ok(_) | Err(_) => todo!(), + // }, + // }; + // } + // } + // return Err(()); } pub fn get_block(&self, _name: &str, _environment: &Environment) -> Result<usize, ()> { todo!() } } - -pub enum Definition { - Integer(IntegerDefinition), - Block, -} - -pub enum IntegerDefinition { - Literal(usize), - ConstantExpression(ConstantExpression), -} diff --git a/src/main.rs b/src/main.rs index 32f4f5d..342057d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,7 @@ pub use environment::*; pub use parsers::*; pub use tokens::*; + pub use assembler::*; use log::{info, fatal}; use switchboard::{Switchboard, SwitchQuery}; @@ -85,6 +86,9 @@ fn main() { error.report(); std::process::exit(1); } + + compiler.resolver.unused().report(); + let merged_source = compiler.get_compiled_source().unwrap_or_else( |error| { error.report(); std::process::exit(1); } ); @@ -94,23 +98,14 @@ fn main() { // ----------------------------------------------------------------------- - - // // TODO: Remove this block - // let code = &compiler.resolver.source_units[0].source_unit.main.source_code; - // let parser = SyntacticParser::from_source_code(code, Some("<main>")); - // println!(); - // for t in parser { - // println!("{t:?}"); - // } - // Parse syntactic tokens from merged source code. let path = Some("<merged source>"); let parser = SyntacticParser::from_source_code(&merged_source, path); let syntactic_tokens: Vec<_> = parser.collect(); report_syntactic_errors(&syntactic_tokens, &merged_source); - // let mut semantic_parser = SemanticParser::new(syntactic_tokens); - // semantic_parser.parse(); + let program = ProgramParser::new(syntactic_tokens).parse(); + program.print_definitions(); } diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs index 6576b44..44861e1 100644 --- a/src/parsers/semantic.rs +++ b/src/parsers/semantic.rs @@ -1,2 +1,289 @@ use crate::*; +use syntactic as syn; +use syn::TokenVariant as SynVar; +use semantic::*; + +use std::collections::VecDeque; + + +macro_rules! fn_is_syn_variant { + ($name:ident, $variant:ty) => { paste::paste! { + fn [< is_ $name >](token: &syn::Token) -> bool { + match token.variant { $variant => true, _ => false, } + } } }; } +fn_is_syn_variant!(block_open, syn::TokenVariant::BlockOpen); +fn_is_syn_variant!(block_close, syn::TokenVariant::BlockClose); +fn_is_syn_variant!(separator, syn::TokenVariant::Separator); +fn_is_syn_variant!(terminator, syn::TokenVariant::MacroDefinitionTerminator); + + +pub struct Tokens { + tokens: VecDeque<syn::Token>, +} + +impl Tokens { + pub fn new<T: Into<VecDeque<syn::Token>>>(tokens: T) -> Self { + Self { tokens: tokens.into() } + } + + pub fn pop(&mut self) -> Option<syn::Token> { + self.tokens.pop_front() + } + + pub fn pop_if(&mut self, predicate: fn(&syn::Token) -> bool) -> Option<syn::Token> { + match predicate(self.tokens.front()?) { + true => self.tokens.pop_front(), + false => None, + } + } + + pub fn unpop(&mut self, token: syn::Token) { + self.tokens.push_front(token); + } + + /// Pull tokens until the predicate returns true, otherwise return Err. + pub fn pull_until(&mut self, predicate: fn(&syn::Token) -> bool) -> Result<Self, ()> { + let mut output = VecDeque::new(); + while let Some(token) = self.tokens.pop_front() { + match predicate(&token) { + true => return Ok(Self::new(output)), + false => output.push_back(token), + }; + } + return Err(()); + } + + pub fn take(&mut self) -> Self { + Self { tokens: std::mem::take(&mut self.tokens) } + } + + pub fn len(&self) -> usize { + self.tokens.len() + } +} + + +pub struct ProgramParser { + tokens: Tokens, + definitions: Vec<Definition>, + invocations: Vec<Invocation>, + errors: Vec<ParseError>, +} + +impl ProgramParser { + pub fn new(syntactic_tokens: Vec<syn::Token>) -> Self { + Self { + tokens: Tokens::new(syntactic_tokens), + definitions: Vec::new(), + invocations: Vec::new(), + errors: Vec::new(), + } + } + + pub fn parse(mut self) -> Program { + while let Some(syn) = self.tokens.pop() { + if let SynVar::MacroDefinition(name) = syn.variant { + // Collect all tokens up to the next definition terminator. + let Ok(definition_tokens) = self.tokens.pull_until(is_terminator) else { + let variant = ParseErrorVariant::UnterminatedMacroDefinition; + self.errors.push(ParseError { source: syn.source, variant}); + break; + }; + // Parse macro definition arguments. + match DefinitionParser::new(name, syn.source, definition_tokens).parse() { + Ok(definition) => self.definitions.push(definition), + Err(errors) => self.errors.extend(errors), + }; + } + } + + Program { + definitions: self.definitions, + invocations: self.invocations, + errors: self.errors, + } + } +} + + +pub struct DefinitionParser { + name: String, + source: SourceSpan, + tokens: Tokens, + arguments: Vec<ArgumentDefinition>, + errors: Vec<ParseError>, +} + +impl DefinitionParser { + pub fn new(name: String, source: SourceSpan, tokens: Tokens) -> Self { + Self { + name, + tokens, + source, + arguments: Vec::new(), + errors: Vec::new(), + } + } + + pub fn parse(mut self) -> Result<Definition, Vec<ParseError>> { + while let Some(definition) = self.parse_argument_definition() { + self.arguments.push(definition) + } + if self.errors.is_empty() { + let variant = self.parse_body(); + Ok(Definition { + name: self.name, + source: self.source, + arguments: self.arguments, + variant, + }) + } else { + Err(self.errors) + } + } + + fn parse_argument_definition(&mut self) -> Option<ArgumentDefinition> { + // Only continue if the first token is a separator. + self.tokens.pop_if(is_separator)?; + + // Pop argument tokens. + let is_block = match self.tokens.pop_if(is_block_open) { + Some(_) => true, + None => false, + }; + let token = self.tokens.pop(); + if is_block { + self.tokens.pop_if(is_block_close); + } + // Parse argument token. + let token = token?; + let source = token.source; + if let SynVar::Symbol(name) = token.variant { + let variant = ArgumentDefinitionVariant::Integer; + Some(ArgumentDefinition { name, source, variant }) + } else { + let name = self.name.clone(); + let variant = ParseErrorVariant::InvalidArgumentDefinition(name); + self.errors.push(ParseError { source, variant}); + None + } + } + + fn parse_body(&mut self) -> DefinitionVariant { + // Attempt to parse an IntegerDefinition. + if self.tokens.len() == 1 { + let token = self.tokens.pop().unwrap(); + match token.variant { + SynVar::DecimalLiteral(value) | SynVar::HexadecimalLiteral(value) => { + return DefinitionVariant::Integer(IntegerDefinition { + source: token.source, + variant: IntegerDefinitionVariant::Literal(value), + }); + } + SynVar::ConstantExpression(expr) => { + return DefinitionVariant::Integer(IntegerDefinition { + source: token.source, + variant: IntegerDefinitionVariant::Constant(expr), + }); + } + SynVar::Symbol(name) => { + return DefinitionVariant::Reference(ReferenceDefinition { + source: token.source, + name, + }); + } + _ => (), + } + self.tokens.unpop(token); + } + + // Parse the remaining tokens as a BlockDefinition. + let block = BlockParser::new(self.tokens.take()).parse(); + return DefinitionVariant::Block(block); + } +} + + +/// Parse an entire block, excluding delimiters. +pub struct BlockParser { + tokens: Tokens, + block_tokens: Vec<BlockToken>, + errors: Vec<ParseError>, +} + +impl BlockParser { + pub fn new(tokens: Tokens) -> Self { + Self { tokens, block_tokens: Vec::new(), errors: Vec::new() } + } + + pub fn parse(mut self) -> BlockDefinition { + while let Some(token) = self.tokens.pop() { + let source = token.source; + match token.variant { + SynVar::Symbol(name) => { + let mut arguments = Vec::new(); + while let Some(argument) = self.parse_invocation_argument() { + arguments.push(argument); + } + let invocation = Invocation { name, arguments }; + let variant = BlockTokenVariant::Invocation(invocation); + let block_token = BlockToken { source, variant }; + self.block_tokens.push(block_token); + } + SynVar::PackedBinaryLiteral(pbl) => { + let variant = BlockTokenVariant::Word(pbl); + let block_token = BlockToken { source, variant }; + self.block_tokens.push(block_token); + } + _ => { + let variant = ParseErrorVariant::InvalidToken; + self.errors.push(ParseError { source, variant }) + } + } + } + BlockDefinition { tokens: self.block_tokens, errors: self.errors } + } + + fn parse_invocation_argument(&mut self) -> Option<DefinitionVariant> { + // Only continue if the first token is a separator. + self.tokens.pop_if(is_separator)?; + + if let Some(block_open) = self.tokens.pop_if(is_block_open) { + let source = block_open.source; + if let Ok(block_tokens) = self.tokens.pull_until(is_block_close) { + let block = BlockParser::new(block_tokens).parse(); + Some(DefinitionVariant::Block(block)) + } else { + let variant = ParseErrorVariant::UnterminatedBlockDefinition; + self.errors.push(ParseError { source, variant }); + None + } + } else { + let token = self.tokens.pop()?; + let source = token.source; + match token.variant { + SynVar::Symbol(name) => { + let reference = ReferenceDefinition { source, name }; + Some(DefinitionVariant::Reference(reference)) + } + SynVar::DecimalLiteral(value) | SynVar::HexadecimalLiteral(value) => { + let variant = IntegerDefinitionVariant::Literal(value); + let integer = IntegerDefinition { source, variant }; + Some(DefinitionVariant::Integer(integer)) + } + SynVar::ConstantExpression(expr) => { + let variant = IntegerDefinitionVariant::Constant(expr); + let integer = IntegerDefinition { source, variant }; + Some(DefinitionVariant::Integer(integer)) + } + _ => { + let variant = ParseErrorVariant::InvalidToken; + self.errors.push(ParseError { source, variant }); + None + } + } + + } + } +} diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs index a665b9a..66db7b2 100644 --- a/src/tokens/semantic.rs +++ b/src/tokens/semantic.rs @@ -5,6 +5,7 @@ use crate::*; pub struct Program { pub definitions: Vec<Definition>, pub invocations: Vec<Invocation>, + pub errors: Vec<ParseError>, } /// A symbol definition. @@ -28,7 +29,8 @@ pub enum ArgumentDefinitionVariant { pub enum DefinitionVariant { Integer(IntegerDefinition), - Block(Vec<BlockToken>), + Block(BlockDefinition), + Reference(ReferenceDefinition), } pub struct IntegerDefinition { @@ -39,7 +41,11 @@ pub struct IntegerDefinition { pub enum IntegerDefinitionVariant { Literal(usize), Constant(ConstantExpression), - Reference(String), +} + +pub struct BlockDefinition { + pub tokens: Vec<BlockToken>, + pub errors: Vec<ParseError>, } pub struct BlockToken { @@ -53,20 +59,56 @@ pub enum BlockTokenVariant { Word(PackedBinaryLiteral), } +/// References aren't necessarily an integer or a block +pub struct ReferenceDefinition { + pub source: SourceSpan, + pub name: String, +} + pub struct Invocation { pub name: String, - pub source: SourceSpan, - pub arguments: Vec<InvocationArgument>, + pub arguments: Vec<DefinitionVariant>, } -pub struct InvocationArgument { +pub struct ParseError { pub source: SourceSpan, - pub variant: InvocationArgumentVariant, + pub variant: ParseErrorVariant, +} + +pub enum ParseErrorVariant { + UnterminatedMacroDefinition, + UnterminatedBlockDefinition, + /// Name of the macro. + InvalidArgumentDefinition(String), + InvalidToken, } -pub enum InvocationArgumentVariant { - BlockLiteral(Vec<BlockToken>), - IntegerLiteral(usize), - ConstantExpression(ConstantExpression), - Reference(String), + +// ------------------------------------------------------------------------ // + +impl Program { + pub fn print_definitions(&self) { + for definition in &self.definitions { + let variant = match definition.variant { + DefinitionVariant::Integer(_) => "integer", + DefinitionVariant::Block(_) => "block", + DefinitionVariant::Reference(_) => "reference", + }; + println!("DEFINE {} ({variant})", definition.name); + for argument in &definition.arguments { + let variant = match argument.variant { + ArgumentDefinitionVariant::Integer => "integer", + ArgumentDefinitionVariant::Block => "block", + }; + println!(" ARGUMENT {} ({variant})", argument.name); + } + let variant = match &definition.variant { + DefinitionVariant::Integer(integer) => todo!(), + DefinitionVariant::Block(block) => todo!(), + DefinitionVariant::Reference(reference) => todo!() + }; + + println!(); + } + } } |