diff options
author | Ben Bridle <bridle.benjamin@gmail.com> | 2025-02-11 14:00:20 +1300 |
---|---|---|
committer | Ben Bridle <bridle.benjamin@gmail.com> | 2025-02-11 14:00:20 +1300 |
commit | 2b4e522b12a7eb87e91cd1cdc56064ee429a5212 (patch) | |
tree | 9f302e18b7e664502dad32d8d33d44e24a01c677 | |
download | torque-asm-2b4e522b12a7eb87e91cd1cdc56064ee429a5212.zip |
Initial commit
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Cargo.lock | 59 | ||||
-rw-r--r-- | Cargo.toml | 10 | ||||
-rw-r--r-- | src/compiler.rs | 144 | ||||
-rw-r--r-- | src/environment.rs | 63 | ||||
-rw-r--r-- | src/main.rs | 178 | ||||
-rw-r--r-- | src/parsers/constant_expression.rs | 52 | ||||
-rw-r--r-- | src/parsers/mod.rs | 11 | ||||
-rw-r--r-- | src/parsers/packed_binary_literal.rs | 80 | ||||
-rw-r--r-- | src/parsers/semantic.rs | 21 | ||||
-rw-r--r-- | src/parsers/syntactic.rs | 108 | ||||
-rw-r--r-- | src/tokens/constant_expression.rs | 134 | ||||
-rw-r--r-- | src/tokens/mod.rs | 11 | ||||
-rw-r--r-- | src/tokens/packed_binary_literal.rs | 60 | ||||
-rw-r--r-- | src/tokens/semantic.rs | 71 | ||||
-rw-r--r-- | src/tokens/syntactic.rs | 66 |
16 files changed, 1069 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..d9ebcc1 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,59 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ansi" +version = "1.0.0" +source = "git+git://benbridle.com/ansi?tag=v1.0.0#81d47867c2c97a9ae1d1c8fdfcd42c582410ad2a" + +[[package]] +name = "assembler" +version = "1.1.0" +dependencies = [ + "ansi", + "log 1.1.2", + "vagabond", +] + +[[package]] +name = "log" +version = "1.1.1" +source = "git+git://benbridle.com/log?tag=v1.1.1#930f3d0e2b82df1243f423c092a38546ea7533c3" + +[[package]] +name = "log" +version = "1.1.2" +source = "git+git://benbridle.com/log?tag=v1.1.2#3d5d1f7a19436151ba1dd52a2b50664969d90db6" +dependencies = [ + "ansi", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "switchboard" +version = "1.0.0" +source = "git+git://benbridle.com/switchboard?tag=v1.0.0#ea70fa89659e5cf1a9d4ca6ea31fb67f7a2cc633" +dependencies = [ + "log 1.1.1", + "paste", +] + +[[package]] +name = "torque-assembler" +version = "0.1.0" +dependencies = [ + "assembler", + "log 1.1.2", + "switchboard", +] + +[[package]] +name = "vagabond" +version = "1.0.1" +source = "git+git://benbridle.com/vagabond?tag=v1.0.1#08f3153fea62ea81a42438347eeee058f5bec199" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..44aa1a3 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "torque-assembler" +version = "0.1.0" +edition = "2021" + +[dependencies] +assembler = { path = "/home/ben/Libraries/assembler" } +# assembler = { git = "git://benbridle.com/assembler", tag = "v1.0.0" } +log = { git = "git://benbridle.com/log", tag = "v1.1.2" } +switchboard = { git = "git://benbridle.com/switchboard", tag = "v1.0.0" } diff --git a/src/compiler.rs b/src/compiler.rs new file mode 100644 index 0000000..068c6d5 --- /dev/null +++ b/src/compiler.rs @@ -0,0 +1,144 @@ +use crate::*; + + +/// Compiles multiple source code files into one. +pub struct Compiler { + pub source_path: PathBuf, + pub resolver: Resolver, +} + +impl Compiler { + pub fn from_string<P: AsRef<Path>>(source_code: String, path: P) -> Self { + let source_unit = SourceUnit::from_string(source_code, &path, parse_symbols); + Self { + source_path: path.as_ref().to_path_buf(), + resolver: Resolver::new(source_unit) + } + } + + pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, FileError> { + let source_unit = SourceUnit::from_path(&path, None, parse_symbols)?; + Ok(Self { + source_path: path.as_ref().to_path_buf(), + resolver: Resolver::new(source_unit) + }) + } + + /// Find library files descending from the parent directory. + pub fn include_libs_from_parent(&mut self, ext: &str) { + if let Some(parent_path) = self.source_path.parent() { + let parent_path = parent_path.to_owned(); + self.include_libs_from_path(&parent_path, ext); + } + } + + /// Find library files at or descending from a path. + pub fn include_libs_from_path(&mut self, path: &Path, ext: &str) { + let libraries = gather_from_path(path, Some(ext), parse_symbols); + self.resolver.add_library_source_units(libraries); + self.resolver.resolve(); + } + + /// Find library files from a PATH-style environment variable. + pub fn include_libs_from_path_variable(&mut self, name: &str, ext: &str) { + let libraries = gather_from_path_variable(name, Some(ext), parse_symbols); + self.resolver.add_library_source_units(libraries); + self.resolver.resolve(); + } + + pub fn error(&self) -> Option<ResolverError> { + self.resolver.error() + } + + pub fn get_compiled_source(&self) -> Result<String, MergeError> { + self.resolver.get_merged_source_code(push_source_code) + } +} + + +/// Parse all symbols from a source code string. +fn parse_symbols(source_code: &str, path: Option<&Path>) -> Vec<Symbol> { + use SyntacticTokenVariant as SynVar; + use DefinitionType::*; + use SymbolRole::*; + let mut symbols = Vec::new(); + let mut macro_name: Option<String> = None; + let mut parse_arg_list = false; // true if parsing macro argument list + let mut after_separator = false; // true if prev token was separator + + macro_rules! push { + ($name:expr, $source:expr, $role:expr) => { + symbols.push(Symbol { + name: $name, + source: $source, + role: $role, + namespace: match ¯o_name { + Some(name) => vec![name.to_owned()], + None => vec![], + } + }) + } + } + + for token in SyntacticParser::from_source_code(&source_code, path) { + match token.variant { + SynVar::MacroDefinition(name) => { + push!(name.clone(), token.source, Definition(MustPrecedeReference)); + macro_name = Some(name); + parse_arg_list = true; + } + SynVar::MacroDefinitionTerminator => { + macro_name = None; + } + SynVar::LabelDefinition(name) => { + push!(name.clone(), token.source, Definition(CanFollowReference)); + } + SynVar::Symbol(name) => if parse_arg_list && after_separator { + push!(name, token.source, Definition(MustPrecedeReference)); + } else { + parse_arg_list = false; + push!(name, token.source, Reference); + } + SynVar::Separator => { + after_separator = true; + continue; + } + SynVar::BlockOpen | SynVar::BlockClose => { + continue; + } + SynVar::PackedBinaryLiteral(pbl) => { + for field in pbl.fields { + push!(field.name.to_string(), field.source, Reference) + } + } + SynVar::ConstantExpression(expr) => { + use ConstantExpressionTokenVariant as TokenVar; + for token in expr.tokens { + if let TokenVar::SymbolReference(name) = token.variant { + push!(name, token.source, Reference); + } + } + } + _ => () + }; + after_separator = false; + } + return symbols; +} + +/// Push source code to a source compilation string. +fn push_source_code(compilation: &mut String, source_file: &SourceFile) { + // Skip blank files. + let source_code = &source_file.source_code; + if source_code.chars().all(|c| c.is_whitespace()) { return; } + // Ensure that the previous section is followed by two newline characters. + if !compilation.is_empty() { + if !compilation.ends_with('\n') { compilation.push('\n'); } + if !compilation.ends_with("\n\n") { compilation.push('\n'); } + } + // Push a path comment and the source code. + let path_str = source_file.path.as_os_str().to_string_lossy(); + let path_comment = format!("(: {path_str} )\n"); + compilation.push_str(&path_comment); + compilation.push_str(&source_code); +} diff --git a/src/environment.rs b/src/environment.rs new file mode 100644 index 0000000..006b45b --- /dev/null +++ b/src/environment.rs @@ -0,0 +1,63 @@ +use crate::*; + +use std::collections::HashMap; + + +pub struct Environment { + pub scopes: Vec<Scope>, +} + +impl Environment { + pub fn get_integer(&self, name: &str) -> Result<usize, ()> { + for scope in self.scopes.iter().rev() { + if let Ok(value) = scope.get_integer(name, &self) { + return Ok(value); + } + } + return Err(()); + } + + pub fn get_block(&self, name: &str) -> Result<usize, ()> { + for scope in self.scopes.iter().rev() { + if let Ok(value) = scope.get_block(name, &self) { + return Ok(value); + } + } + return Err(()); + } +} + +pub struct Scope { + pub definitions: HashMap<String, Definition>, +} + +impl Scope { + pub fn get_integer(&self, name: &str, environment: &Environment) -> Result<usize, ()> { + use IntegerDefinition as IntDef; + if let Some(definition) = self.definitions.get(name) { + if let Definition::Integer(integer) = definition { + match integer { + IntDef::Literal(value) => return Ok(*value), + IntDef::ConstantExpression(expr) => match expr.evaluate(environment) { + Ok(_) | Err(_) => todo!(), + }, + }; + } + } + return Err(()); + } + + pub fn get_block(&self, _name: &str, _environment: &Environment) -> Result<usize, ()> { + todo!() + } +} + +pub enum Definition { + Integer(IntegerDefinition), + Block(BlockLiteral), +} + +pub enum IntegerDefinition { + Literal(usize), + ConstantExpression(ConstantExpression), +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..0086496 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,178 @@ +mod compiler; +mod environment; +mod parsers; +mod tokens; + +pub use compiler::*; +pub use environment::*; +pub use parsers::*; +pub use tokens::*; + +pub use assembler::*; +use log::{info, fatal}; +use switchboard::{Switchboard, SwitchQuery}; + +use std::io::{Read, Write}; + + +fn print_version() -> ! { + let version = env!("CARGO_PKG_VERSION"); + eprintln!("torque assembler, version {version}"); + eprintln!("written by ben bridle"); + std::process::exit(0); +} + +fn main() { + let mut args = Switchboard::from_env(); + if args.named("version").as_bool() { + print_version(); + } + if args.named("verbose").short('v').as_bool() { + log::set_log_level(log::LogLevel::Info); + } + let source_path = args.positional("source").as_path_opt().map( + |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}"))); + let destination_path = args.positional("destination").as_path_opt(); + let extension = args.named("ext").default("tq").as_string(); + + let no_libs = args.named("no-libs").as_bool(); + let no_project_libs = args.named("no-project-libs").as_bool(); + let no_environment_libs = args.named("no-env-libs").as_bool(); + + let print_tree = args.named("tree").as_bool(); + let dry_run = args.named("dry-run").short('n').as_bool(); + let only_resolve = args.named("resolve").as_bool(); + let _export_symbols = args.named("symbols").as_bool(); + + // ----------------------------------------------------------------------- + + let mut compiler = if let Some(path) = &source_path { + info!("Reading program source from {path:?}"); + Compiler::from_path(path).unwrap_or_else(|err| match err { + FileError::InvalidExtension => fatal!( + "File {path:?} has invalid extension, must be '.{extension}'"), + FileError::NotFound => fatal!( + "File {path:?} was not found"), + FileError::InvalidUtf8 => fatal!( + "File {path:?} does not contain valid UTF-8 text"), + FileError::NotReadable => fatal!( + "File {path:?} is not readable"), + FileError::IsADirectory => fatal!( + "File {path:?} is a directory"), + FileError::Unknown => fatal!( + "Unknown error while attempting to read from {path:?}") + }) + } else { + let mut source_code = String::new(); + info!("Reading program source from standard input"); + if let Err(err) = std::io::stdin().read_to_string(&mut source_code) { + fatal!("Could not read from standard input\n{err:?}"); + } + Compiler::from_string(source_code, "<standard input>") + }; + if compiler.error().is_some() && !no_libs && !no_project_libs { + compiler.include_libs_from_parent(&extension); + } + if compiler.error().is_some() && !no_libs && !no_environment_libs { + compiler.include_libs_from_path_variable("TORQUE_LIBS", &extension); + } + + if print_tree { + compiler.resolver.hierarchy().report() + } + + if let Some(error) = compiler.error() { + error.report(); + std::process::exit(1); + } + let merged_source = compiler.get_compiled_source().unwrap_or_else( + |error| { error.report(); std::process::exit(1); } + ); + if only_resolve && !dry_run { + write_bytes_and_exit(merged_source.as_bytes(), destination_path.as_ref()); + } + + // ----------------------------------------------------------------------- + + + // // TODO: Remove this block + // let code = &compiler.resolver.source_units[0].source_unit.main.source_code; + // let parser = SyntacticParser::from_source_code(code, Some("<main>")); + // println!(); + // for t in parser { + // println!("{t:?}"); + // } + + // Parse syntactic tokens from merged source code. + let path = Some("<merged source>"); + let parser = SyntacticParser::from_source_code(&merged_source, path); + let syntactic_tokens: Vec<_> = parser.collect(); + report_syntactic_errors(&syntactic_tokens, &merged_source); + + // let mut semantic_parser = SemanticParser::new(syntactic_tokens); + // semantic_parser.parse(); +} + + +fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! { + match path { + Some(path) => match std::fs::write(path, bytes) { + Ok(_) => info!("Wrote output to path {:?}", path.as_ref()), + Err(err) => fatal!("Could not write to path {:?}\n{err:?}", path.as_ref()), + } + None => match std::io::stdout().write_all(bytes) { + Ok(_) => info!("Wrote output to standard output"), + Err(err) => fatal!("Could not write to standard output\n{err:?}"), + } + } + std::process::exit(0); +} + + +fn report_syntactic_errors(syntactic_tokens: &[SyntacticToken], source_code: &str) { + for token in syntactic_tokens { + let context = Context { source_code: &source_code, source: &token.source }; + match &token.variant { + SyntacticTokenVariant::ConstantExpression(expr) => for t in &expr.tokens { + let context = Context { source_code: &source_code, source: &t.source }; + if let ConstantExpressionTokenVariant::Error(err) = &t.variant { + let ConstantExpressionParseError::InvalidHexadecimalLiteral(hex) = err; + let message = format!("Invalid hexadecimal literal {hex:?} in constant expression"); + report_source_issue(LogLevel::Error, &context, &message); + } + } + SyntacticTokenVariant::PackedBinaryLiteral(pbl) => for e in &pbl.errors { + let context = Context { source_code: &source_code, source: &e.source }; + match e.variant { + PackedBinaryLiteralParseErrorVariant::DuplicateFieldName(name) => { + let message = format!("Duplicate field name {name:?} in packed binary literal"); + report_source_issue(LogLevel::Error, &context, &message); + } + PackedBinaryLiteralParseErrorVariant::InvalidCharacter(c) => { + let message = format!("Invalid character {c:?} in packed binary literal"); + report_source_issue(LogLevel::Error, &context, &message); + } + } + } + SyntacticTokenVariant::Error(err) => match err { + SyntacticParseError::InvalidHexadecimalLiteral(hex) => { + let message = format!("Invalid hexadecimal literal {hex:?}"); + report_source_issue(LogLevel::Error, &context, &message); + } + SyntacticParseError::InvalidSymbolIdentifier(name) => { + let message = format!("Invalid identifier {name:?}"); + report_source_issue(LogLevel::Error, &context, &message); + } + SyntacticParseError::UnterminatedComment => { + let message = format!("Unterminated comment"); + report_source_issue(LogLevel::Error, &context, &message); + } + SyntacticParseError::UnterminatedConstantExpression => { + let message = format!("Unterminated constant expression"); + report_source_issue(LogLevel::Error, &context, &message); + } + } + _ => (), + } + } +} diff --git a/src/parsers/constant_expression.rs b/src/parsers/constant_expression.rs new file mode 100644 index 0000000..78dc697 --- /dev/null +++ b/src/parsers/constant_expression.rs @@ -0,0 +1,52 @@ +use crate::*; + + +pub fn parse_constant_expression(string: &str, parent: &Tokeniser) -> ConstantExpression { + use ConstantExpressionTokenVariant as TokenVar; + use ConstantExpressionParseError as ParseError; + + let mut tokens = Vec::new(); + let mut t = Tokeniser::new_child(string, parent); + t.position.to_next_char(); // skip opening delimiter + + loop { + t.drop_whitespace(); + t.mark_start_position(); + let token = t.eat_token(); + if token.is_empty() { + break; + } + + let variant = match token.as_str() { + "=" => TokenVar::Operator(Operator::Equal), + "!" => TokenVar::Operator(Operator::NotEqual), + "<" => TokenVar::Operator(Operator::LessThan), + ">" => TokenVar::Operator(Operator::GreaterThan), + "+" => TokenVar::Operator(Operator::Add), + "-" => TokenVar::Operator(Operator::Subtract), + "<<" => TokenVar::Operator(Operator::LeftShift), + ">>" => TokenVar::Operator(Operator::RightShift), + "&" => TokenVar::Operator(Operator::And), + "|" => TokenVar::Operator(Operator::Or), + "^" => TokenVar::Operator(Operator::Xor), + "~" => TokenVar::Operator(Operator::Not), + _ => if let Some(stripped) = token.strip_prefix("0x") { + match usize::from_str_radix(stripped, 16) { + Ok(value) => TokenVar::IntegerLiteral(value), + Err(_) => TokenVar::Error( + ParseError::InvalidHexadecimalLiteral(stripped.to_string())), + } + } else { + match usize::from_str_radix(&token, 10) { + Ok(value) => TokenVar::IntegerLiteral(value), + Err(_) => TokenVar::SymbolReference(token.to_string()), + } + } + }; + + let source = t.mark_end_position(); + tokens.push(ConstantExpressionToken { source, variant }); + } + + return ConstantExpression { tokens }; +} diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs new file mode 100644 index 0000000..91765a9 --- /dev/null +++ b/src/parsers/mod.rs @@ -0,0 +1,11 @@ +mod constant_expression; +pub use constant_expression::*; + +mod packed_binary_literal; +pub use packed_binary_literal::*; + +mod syntactic; +pub use syntactic::*; + +mod semantic; +pub use semantic::*; diff --git a/src/parsers/packed_binary_literal.rs b/src/parsers/packed_binary_literal.rs new file mode 100644 index 0000000..9704fc4 --- /dev/null +++ b/src/parsers/packed_binary_literal.rs @@ -0,0 +1,80 @@ +use crate::*; + + +pub fn parse_packed_binary_literal(string: &str, parent: &Tokeniser) -> PackedBinaryLiteral { + use PackedBinaryLiteralParseError as ParseError; + use PackedBinaryLiteralParseErrorVariant as ParseErrorVar; + + let mut value = 0; + let mut bits = 0; + let mut name = '\0'; + let mut fields: Vec<BitField> = Vec::new(); + let mut errors: Vec<ParseError> = Vec::new(); + + macro_rules! push_field { + ($source:expr) => { + if fields.iter().any(|f| f.name == name) { + let variant = ParseErrorVar::DuplicateFieldName(name); + errors.push(ParseError { source: $source, variant }); + } else { + fields.push(BitField { name, source: $source, bits, shift: 0 }); + } + }; + } + + let mut t = Tokeniser::new_child(string, parent); + t.position.to_next_char(); // skip opening hash character + + while let Some(c) = t.eat_char() { + // Ignore underscores. + if c == '_' { + t.prev_position = t.prev_prev_position; + continue; + } + + // Add a bit to the value; + value <<= 1; + for field in &mut fields { + field.shift += 1; + } + + // Extend the current field. + if c == name { + bits += 1; + continue; + } + + // Commit the current field. + if bits > 0 { + push_field!(t.mark_prev_end_position()); + bits = 0; + name = '\0'; + } + + // Parse bit literals. + if c == '0' { + continue; + } + if c == '1' { + value |= 1; + continue; + } + + t.mark_prev_start_position(); + if c.is_alphabetic() { + name = c; + bits = 1; + continue; + } else { + let source = t.mark_end_position(); + errors.push(ParseError { source, variant: ParseErrorVar::InvalidCharacter(c) }); + } + } + + // Commit the final field. + if bits > 0 { + push_field!(t.mark_end_position()); + } + + PackedBinaryLiteral { value, fields, errors } +} diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs new file mode 100644 index 0000000..94ed70c --- /dev/null +++ b/src/parsers/semantic.rs @@ -0,0 +1,21 @@ +use crate::*; + + +pub struct SemanticParser { + pub syntactic_tokens: Vec<SyntacticToken>, + pub semantic_tokens: Vec<SemanticToken>, +} + + +impl SemanticParser { + pub fn new(syntactic_tokens: Vec<SyntacticToken>) -> Self { + Self { + syntactic_tokens, + semantic_tokens: Vec::new(), + } + } + + pub fn parse(&mut self) { + todo!() + } +} diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs new file mode 100644 index 0000000..443e47e --- /dev/null +++ b/src/parsers/syntactic.rs @@ -0,0 +1,108 @@ +use crate::*; + + +pub struct SyntacticParser { + tokeniser: Tokeniser, + /// The name of the most recently parsed label. + label_name: String, + /// The name of the macro being parsed. + macro_name: Option<String>, +} + +impl SyntacticParser { + pub fn from_source_code<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { + let mut tokeniser = Tokeniser::new(source_code, path); + tokeniser.add_delimiters(&['@','&','%',';',':','{','}','(','[','#','~']); + Self { + tokeniser, + label_name: String::new(), + macro_name: None, + } + } +} + + +impl Iterator for SyntacticParser { + type Item = SyntacticToken; + + /// Sequentially parse tokens from the source code. + fn next(&mut self) -> Option<SyntacticToken> { + use SyntacticTokenVariant as SynVar; + use SyntacticParseError as SynErr; + let t = &mut self.tokeniser; + + t.drop_whitespace(); + t.mark_start_position(); + + let variant = match t.eat_char()? { + '@' => { + self.label_name = t.eat_token(); + SynVar::LabelDefinition(self.label_name.clone()) + } + '&' => { + let token = t.eat_token(); + SynVar::LabelDefinition(format!("{}/{token}", self.label_name)) + } + '%' => { + let macro_name = t.eat_token(); + self.macro_name = Some(macro_name.clone()); + SynVar::MacroDefinition(macro_name) + } + ';' => { + self.macro_name = None; + SynVar::MacroDefinitionTerminator + } + '[' => match t.eat_to_delimiter(']') { + Some(string) => { + let constant = ConstantExpression::from_str(&string, t); + SynVar::ConstantExpression(constant) + } + None => SynVar::Error(SynErr::UnterminatedConstantExpression), + } + '{' => SynVar::BlockOpen, + '}' => SynVar::BlockClose, + '(' => match t.eat_to_delimiter(')') { + Some(string) => SynVar::Comment(string), + None => SynVar::Error(SynErr::UnterminatedComment), + } + '#' => { + let token = t.eat_token(); + let pbl = PackedBinaryLiteral::from_str(&token, t); + SynVar::PackedBinaryLiteral(pbl) + }, + '~' => { + let token = t.eat_token(); + SynVar::Symbol(format!("{}/{token}", self.label_name)) + } + ':' => SynVar::Separator, + c => { + let token = format!("{c}{}", t.eat_token()); + if let Some(hex_string) = token.strip_prefix("0x") { + match usize::from_str_radix(hex_string, 16) { + Ok(hex) => SynVar::HexadecimalLiteral(hex), + Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), + } + } else { + match usize::from_str_radix(&token, 10) { + Ok(value) => SynVar::DecimalLiteral(value), + Err(_) => SynVar::Symbol(token), + } + } + } + }; + + // Parse source path comments. + if let SynVar::Comment(comment) = &variant { + // Check if the comment fills the entire line. + if t.start_position.column == 0 && t.end_of_line() { + if let Some(path) = comment.strip_prefix(": ") { + t.source_path = Some(PathBuf::from(path.trim())); + t.embedded_first_line = t.start_position.line + 1; + } + } + } + + let source = t.mark_end_position(); + Some( SyntacticToken { source, variant } ) + } +} diff --git a/src/tokens/constant_expression.rs b/src/tokens/constant_expression.rs new file mode 100644 index 0000000..e4aa099 --- /dev/null +++ b/src/tokens/constant_expression.rs @@ -0,0 +1,134 @@ +use crate::*; + + +pub struct ConstantExpression { + pub tokens: Vec<ConstantExpressionToken>, +} + +impl ConstantExpression { + pub fn from_str(string: &str, tokeniser: &Tokeniser) -> Self { + parse_constant_expression(string, tokeniser) + } +} + +pub struct ConstantExpressionToken { + pub source: SourceSpan, + pub variant: ConstantExpressionTokenVariant, +} + +pub enum ConstantExpressionTokenVariant { + SymbolReference(String), + IntegerLiteral(usize), + Operator(Operator), + Error(ConstantExpressionParseError), +} + +pub enum Operator { + Equal, + NotEqual, + LessThan, + GreaterThan, + Add, + Subtract, + LeftShift, + RightShift, + And, + Or, + Xor, + Not, +} + +pub enum ConstantExpressionParseError { + InvalidHexadecimalLiteral(String), +} + + +impl ConstantExpression { + pub fn evaluate(&self, environment: &Environment) -> Result<usize, ConstantExpressionEvaluationError> { + use ConstantExpressionTokenVariant as Token; + use ConstantExpressionEvaluationError as EvalErr; + + let mut stack = Vec::new(); + macro_rules! push { + ($value:expr) => { stack.push($value) }; + } + macro_rules! pop { + ($name:ident) => { let $name = match stack.pop() { + Some(value) => value, + None => return Err(EvalErr::StackUnderflow), + }; }; + } + macro_rules! truth { + ($bool:expr) => { match $bool { true => 1, false => 0 } }; + } + + for token in &self.tokens { + match &token.variant { + Token::IntegerLiteral(value) => push!(*value), + Token::SymbolReference(name) => match environment.get_integer(name) { + Ok(value) => push!(value), + Err(_) => todo!(), + } + Token::Operator(operator) => match operator { + Operator::Equal => { pop!(b); pop!(a); push!(truth!(a==b)) }, + Operator::NotEqual => { pop!(b); pop!(a); push!(truth!(a!=b)) }, + Operator::LessThan => { pop!(b); pop!(a); push!(truth!(a < b)) }, + Operator::GreaterThan => { pop!(b); pop!(a); push!(truth!(a > b)) }, + Operator::Add => { pop!(b); pop!(a); push!(a + b) }, + Operator::Subtract => { pop!(b); pop!(a); push!(a - b) }, + Operator::LeftShift => { pop!(b); pop!(a); push!(a << b) }, + Operator::RightShift => { pop!(b); pop!(a); push!(a >> b) }, + Operator::And => { pop!(b); pop!(a); push!(a & b) }, + Operator::Or => { pop!(b); pop!(a); push!(a | b) }, + Operator::Xor => { pop!(b); pop!(a); push!(a ^ b) }, + Operator::Not => { pop!(a); push!(!a) }, + } + Token::Error(_) => (), + } + } + match stack.len() { + 0 => Err(EvalErr::NoReturnValue), + 1 => Ok(stack[0]), + _ => Err(EvalErr::MultipleReturnValues), + } + } +} + +pub enum ConstantExpressionEvaluationError { + StackUnderflow, + MultipleReturnValues, + NoReturnValue, +} + + +impl std::fmt::Debug for ConstantExpression { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + use ConstantExpressionTokenVariant as TokenVar; + for (i, token) in self.tokens.iter().enumerate() { + let string = match &token.variant { + TokenVar::SymbolReference(name) => name, + TokenVar::IntegerLiteral(value) => &value.to_string(), + TokenVar::Operator(operator) => match operator { + Operator::Equal => "=", + Operator::NotEqual => "!", + Operator::LessThan => "<", + Operator::GreaterThan => ">", + Operator::Add => "+", + Operator::Subtract => "-", + Operator::LeftShift => "<<", + Operator::RightShift => ">>", + Operator::And => "&", + Operator::Or => "|", + Operator::Xor => "^", + Operator::Not => "~", + } + TokenVar::Error(_) => "<error>", + }; + match i { + 0 => write!(f, "{string}")?, + _ => write!(f, " {string}")?, + } + } + return Ok(()); + } +} diff --git a/src/tokens/mod.rs b/src/tokens/mod.rs new file mode 100644 index 0000000..65f361c --- /dev/null +++ b/src/tokens/mod.rs @@ -0,0 +1,11 @@ +mod syntactic; +pub use syntactic::*; + +mod semantic; +pub use semantic::*; + +mod constant_expression; +pub use constant_expression::*; + +mod packed_binary_literal; +pub use packed_binary_literal::*; diff --git a/src/tokens/packed_binary_literal.rs b/src/tokens/packed_binary_literal.rs new file mode 100644 index 0000000..1252398 --- /dev/null +++ b/src/tokens/packed_binary_literal.rs @@ -0,0 +1,60 @@ +use crate::*; + + +pub struct PackedBinaryLiteral { + pub value: usize, + pub fields: Vec<BitField>, + pub errors: Vec<PackedBinaryLiteralParseError>, +} + +impl PackedBinaryLiteral { + pub fn from_str(string: &str, parent: &Tokeniser) -> Self { + parse_packed_binary_literal(string, parent) + } +} + +pub struct BitField { + pub name: char, + pub source: SourceSpan, + /// Length of field in bits + pub bits: usize, + /// Distance to left-shift field in value + pub shift: usize, +} + +pub struct PackedBinaryLiteralParseError { + pub source: SourceSpan, + pub variant: PackedBinaryLiteralParseErrorVariant, +} + +pub enum PackedBinaryLiteralParseErrorVariant { + DuplicateFieldName(char), + InvalidCharacter(char), +} + + +impl std::fmt::Display for PackedBinaryLiteral { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + if self.value == 0 { + write!(f, "0")?; + } else { + let bitcount = (self.value.ilog2() + 1) as usize; + 'bit: for i in (0..bitcount).rev() { + if (i+1) % 4 == 0 { + write!(f, "_")?; + } + for field in &self.fields { + if i <= field.bits + field.shift - 1 && i >= field.shift { + write!(f, "{}", field.name)?; + continue 'bit; + } + } + match (self.value >> i) & 1 { + 0 => write!(f, "0")?, + _ => write!(f, "1")?, + } + } + } + return Ok(()); + } +} diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs new file mode 100644 index 0000000..ed53685 --- /dev/null +++ b/src/tokens/semantic.rs @@ -0,0 +1,71 @@ +use crate::*; + + +pub enum SemanticToken { + MacroDefinition, + Invocation, +} + +pub struct Invocation { + pub name: String, + pub bytecode: BytecodeSpan, + pub arguments: Vec<InvocationArgument>, +} + +pub struct BlockLiteral { + pub tokens: Vec<BlockToken>, +} + +pub struct BlockToken { + pub source: SourceSpan, + pub bytecode: BytecodeSpan, + pub variant: BlockTokenVariant, +} + +pub enum BlockTokenVariant { + Invocation(Invocation), + Word(PackedBinaryLiteral), +} + +pub struct MacroDefinition { + pub name: String, + pub arguments: Vec<DefinitionArgument>, + pub body: BlockLiteral, +} + +// -------------------------------------------------------------------------- // + +pub struct SemanticParseError { + pub source: SourceSpan, + pub variant: SemanticParseErrorVariant, +} + +pub enum SemanticParseErrorVariant { + +} + +// -------------------------------------------------------------------------- // + +pub struct DefinitionArgument { + pub name: String, + pub source: SourceSpan, + pub variant: DefinitionArgumentVariant, +} + +pub enum DefinitionArgumentVariant { + Integer, + Block, +} + +pub struct InvocationArgument { + pub source: SourceSpan, + pub variant: InvocationArgumentVariant, +} + +pub enum InvocationArgumentVariant { + BlockLiteral(BlockLiteral), + IntegerLiteral(usize), + ConstantExpression(ConstantExpression), + Invocation(Invocation), +} + diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs new file mode 100644 index 0000000..000d178 --- /dev/null +++ b/src/tokens/syntactic.rs @@ -0,0 +1,66 @@ +use crate::*; + + +pub struct SyntacticToken { + pub source: SourceSpan, + pub variant: SyntacticTokenVariant, +} + +pub enum SyntacticTokenVariant { + LabelDefinition(String), + MacroDefinition(String), + MacroDefinitionTerminator, + + DecimalLiteral(usize), + HexadecimalLiteral(usize), + PackedBinaryLiteral(PackedBinaryLiteral), + + Comment(String), + ConstantExpression(ConstantExpression), + + BlockOpen, + BlockClose, + Separator, + + Symbol(String), + + Error(SyntacticParseError), +} + +#[derive(Debug)] +pub enum SyntacticParseError { + InvalidHexadecimalLiteral(String), + InvalidSymbolIdentifier(String), + UnterminatedComment, + UnterminatedConstantExpression, +} + + +impl std::fmt::Debug for SyntacticToken { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + use SyntacticTokenVariant::*; + let start = &self.source.in_merged; + let name = match &self.variant { + LabelDefinition(name) => format!("LabelDefinition({name})"), + MacroDefinition(name) => format!("MacroDefinition({name})"), + MacroDefinitionTerminator => format!("MacroDefinitionTerminator"), + + DecimalLiteral(value) => format!("DecimalLiteral({value})"), + HexadecimalLiteral(value) => format!("HexadecimalLiteral(0x{value:x})"), + PackedBinaryLiteral(pbl) => format!("PackedBinaryLiteral({pbl})"), + + Comment(_) => format!("Comment"), + ConstantExpression(expr) => format!("ConstantExpression({expr:?})"), + + BlockOpen => format!("BlockOpen"), + BlockClose => format!("BlockClose"), + Separator => format!("Separator"), + + Symbol(name) => format!("Symbol({name})"), + + Error(error) => format!("Error({error:?})"), + }; + + write!(f, "{start} {name}") + } +} |