diff options
| author | Ben Bridle <ben@derelict.engineering> | 2025-03-06 20:33:27 +1300 | 
|---|---|---|
| committer | Ben Bridle <ben@derelict.engineering> | 2025-03-11 16:59:26 +1300 | 
| commit | 1ecee352f5844b0809d7ae66df52e34f42b44c8e (patch) | |
| tree | 472b6fd57ff7f64ac3f8cd676cbe7a113ba01f05 /src | |
| parent | f2ed89083f5326a7a6f0a1720033d3388aa431fb (diff) | |
| download | torque-asm-1ecee352f5844b0809d7ae66df52e34f42b44c8e.zip | |
Rewrite entire assembler
The language is now more general, the code is better structured, error
reporting is more detailed, and many new language features have
been implemented:
- conditional blocks
- first-class strings
- more expression operators
- binary literals
- negative values
- invocations in constant expressions
Diffstat (limited to 'src')
37 files changed, 2942 insertions, 2234 deletions
| diff --git a/src/bin/tq.rs b/src/bin/tq.rs index f22bd14..d1e51f3 100644 --- a/src/bin/tq.rs +++ b/src/bin/tq.rs @@ -1,43 +1,100 @@  use torque_asm::*; +use assembler::FileError;  use log::{info, fatal}; -use switchboard::{Switchboard, SwitchQuery}; +use switchboard::*;  use std::io::{Read, Write}; -use std::str::FromStr; +use std::path::Path; -fn print_version() -> ! { -    let version = env!("CARGO_PKG_VERSION"); -    eprintln!("torque assembler, version {version}"); -    eprintln!("written by ben bridle"); -    std::process::exit(0); -} -  fn main() {      let mut args = Switchboard::from_env(); -    if args.named("version").as_bool() { -        print_version(); -    } -    if args.named("verbose").short('v').as_bool() { -        log::set_log_level(log::LogLevel::Info); -    } -    let source_path = args.positional("source").as_path_opt().map( +    args.positional("source"); +    args.positional("destination"); +    args.positional("extension").default("tq"); +    args.named("no-libs"); +    args.named("no-project-libs"); +    args.named("no-env-libs"); +    args.named("format").default("debug"); +    args.named("width"); +    args.named("dry-run").short('n'); +    args.named("tree"); +    args.named("help").short('h'); +    args.named("version"); +    args.named("verbose").short('v'); +    args.raise_errors(); + +    let source_path     = args.get("source").as_path_opt().map(          |p| p.canonicalize().unwrap_or_else(|e| fatal!("{p:?}: {e:?}"))); -    let destination_path = args.positional("destination").as_path_opt(); -    let extension = args.named("ext").default("tq").as_string(); +    let destination     = args.get("destination").as_path_opt(); +    let extension       = args.get("extension").as_string(); +    let no_libs         = args.get("no-libs").as_bool(); +    let no_project_libs = args.get("no-project-libs").as_bool(); +    let no_env_libs     = args.get("no-env-libs").as_bool(); +    let format          = Format::from_str(&args.get("format").as_string()); +    let width           = args.get("width").as_u32_opt(); +    let dry_run         = args.get("dry-run").as_bool(); +    let print_tree      = args.get("tree").as_bool(); +    let print_help      = args.get("help").as_bool(); +    let print_version   = args.get("version").as_bool(); +    let verbose         = args.get("verbose").as_bool(); -    let no_libs = args.named("no-libs").as_bool(); -    let no_project_libs = args.named("no-project-libs").as_bool(); -    let no_environment_libs = args.named("no-env-libs").as_bool(); +    if verbose { log::set_log_level(log::LogLevel::Info) } +    if print_version { +        let version = env!("CARGO_PKG_VERSION"); +        eprintln!("torque assembler, version {version}"); +        eprintln!("written by ben bridle"); +        std::process::exit(0); +    } +    if print_help { +        eprintln!("\ +Usage: tq [source] [destination] -    let format = args.named("format").default("debug").as_string(); -    let print_tree = args.named("tree").as_bool(); -    let dry_run = args.named("dry-run").short('n').as_bool(); +Torque multi-assembler, see http://benbridle.com/torque for documentation. -    let Ok(format) = Format::from_str(format.as_str()) else { -        fatal!("Unknown format '{format}', expected 'debug', 'inhx', 'inhx32', 'raw', or 'source'. "); -    }; +Arguments: +  [source]               Path to a source file to assemble +  [destination]          Path to which output will be written +  [extension]            File extension to identify library files (default is 'tq') + +Switches: +  --format=<fmt>         Format to apply to assembled bytecode (default is 'debug') +  --width=<width>        Force a fixed width for all assembled words +  --no-project-libs      Don't search for libraries in the source parent folder +  --no-env-libs          Don't search for libraries in the TORQUE_LIBS path variable +  --no-libs              Combination of --no-project-libs and --no-env-libs +  --tree                 Display a tree visualisation of all included library files +  --dry-run        (-n)  Assemble and show errors only, don't write any output +  --help           (-h)  Prints help +  --verbose,       (-v)  Print additional debug information +  --version              Print the assembler version and exit + +Environment variables: +  TORQUE_LIBS +    A list of colon-separated paths which will be searched to find +    Torque source code files to use as libraries when assembling a +    Torque program. If a library file resolves an unresolved symbol +    in the program being assembled, the library file will be merged +    into the program. + +Output formats: +  <debug> +    Print assembled words as human-readable binary literals. +  <inhx> +    Original 8-bit Intel hex format. +  <inhx32> +    Modified 16-bit Intel hex format used by Microchip. +  <raw> +    Assembled words are converted to big-endian bytestrings and concatenated. +    Each word is padded to the nearest byte. Words must all be the same width. +  <source> +    Print the source file before assembly, with symbols resolved. + +Created by Ben Bridle. +        "); +        std::process::exit(0); +    }      // ----------------------------------------------------------------------- @@ -68,14 +125,13 @@ fn main() {      if compiler.error().is_some() && !no_libs && !no_project_libs {          compiler.include_libs_from_parent(&extension);      } -    if compiler.error().is_some() && !no_libs && !no_environment_libs { +    if compiler.error().is_some() && !no_libs && !no_env_libs {          compiler.include_libs_from_path_variable("TORQUE_LIBS", &extension);      }      if print_tree {          compiler.resolver.hierarchy().report()      } -      if let Some(error) = compiler.error() {          error.report();          std::process::exit(1); @@ -85,54 +141,58 @@ fn main() {          error.report();          std::process::exit(1);      }); -    if format == Format::Source && !dry_run { -        write_bytes_and_exit(merged_source.as_bytes(), destination_path.as_ref()); + +    if !dry_run && format == Format::Source { +        write_bytes_and_exit(merged_source.as_bytes(), destination.as_ref());      }      // ----------------------------------------------------------------------- -    // Parse syntactic tokens from merged source code.      let path = Some("<merged source>"); -    let syntactic_tokens = SyntacticParser::new(&merged_source, path).parse(); -    report_syntactic_errors(&syntactic_tokens, &merged_source); +    let syntactic = match parse_syntactic(&merged_source, path) { +        Ok(tokens) => tokens, +        Err(errors) => { +            report_syntactic_errors(&errors, &merged_source); +            std::process::exit(1); +        } +    }; -    let program = SemanticParser::new(syntactic_tokens).parse(); -    report_semantic_errors(&program, &merged_source); +    let semantic = match parse_semantic(syntactic) { +        Ok(tokens) => tokens, +        Err(errors) => { +            report_semantic_errors(&errors, &merged_source); +            std::process::exit(1); +        } +    }; -    // program.print_definitions(); -    let assembled_tokens = program.assemble(); -    report_assembler_errors(&assembled_tokens, &merged_source); +    let intermediate = match parse_intermediate(semantic) { +        Ok(tokens) => tokens, +        Err(errors) => { +            report_intermediate_errors(&errors, &merged_source); +            std::process::exit(1); +        } +    }; + +    let segments = match parse_bytecode(intermediate, width) { +        Ok(segments) => segments, +        Err(errors) => { +            report_bytecode_errors(&errors, &merged_source); +            std::process::exit(1); +        } +    }; -    let bytecode = BytecodeGenerator::new(&assembled_tokens).generate(); -    report_bytecode_errors(&bytecode, &merged_source);      if !dry_run { -        match format { -            Format::Debug => { -                let mut output = String::new(); -                for word in &bytecode.words { -                    output.push_str(&word.to_string()); -                    output.push('\n'); -                } -                write_bytes_and_exit(output.as_bytes(), destination_path.as_ref()); -            } -            Format::Inhx => { -                let output = format_inhx(&bytecode.words); -                write_bytes_and_exit(output.as_bytes(), destination_path.as_ref()); -            } -            Format::Inhx32 => { -                let output = format_inhx32(&bytecode.words); -                write_bytes_and_exit(output.as_bytes(), destination_path.as_ref()); -            } -            Format::Raw => { -                let mut output = Vec::new(); -                for word in &bytecode.words { -                    let value = word.value as u16; -                    output.extend(value.to_be_bytes()); -                } -                write_bytes_and_exit(&output, destination_path.as_ref()); -            } -            Format::Source => unreachable!(), +        let result = match format { +            Format::Debug => format_debug(&segments), +            Format::Inhx => format_inhx(&segments), +            Format::Inhx32 => format_inhx32(&segments), +            Format::Raw => format_raw(&segments, width), +            Format::Source => unreachable!("Source output is handled before merged assembly"), +        }; +        match result { +            Ok(bytes) => write_bytes_and_exit(&bytes, destination.as_ref()), +            Err(error) => report_format_error(&error, format, &merged_source),          }      }  } @@ -151,26 +211,3 @@ fn write_bytes_and_exit<P: AsRef<Path>>(bytes: &[u8], path: Option<&P>) -> ! {      }      std::process::exit(0);  } - -#[derive(PartialEq)] -enum Format { -    Debug, -    Inhx, -    Inhx32, -    Raw, -    Source, -} - -impl FromStr for Format { -    type Err = (); -    fn from_str(string: &str) -> Result<Self, ()> { -        match string { -            "debug" => Ok(Self::Debug), -            "inhx" => Ok(Self::Inhx), -            "inhx32" => Ok(Self::Inhx32), -            "raw" => Ok(Self::Raw), -            "source" => Ok(Self::Source), -            _ => Err(()), -        } -    } -} diff --git a/src/compiler.rs b/src/compiler.rs index 10f1433..c0caae0 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1,5 +1,9 @@  use crate::*; +use assembler::*; +use assembler::DefinitionType::*; +use assembler::SymbolRole::*; +  /// Compiles multiple source code files into one.  pub struct Compiler { @@ -50,82 +54,184 @@ impl Compiler {          self.resolver.error()      } -    pub fn get_compiled_source(&self) -> Result<String, MergeError> { +    pub fn get_compiled_source(&mut self) -> Result<String, MergeError> { +        self.resolver.calculate_hierarchy();          self.resolver.get_merged_source_code(push_source_code)      }  }  /// Parse all symbols from a source code string. -fn parse_symbols(source_code: &str, path: Option<&Path>) -> Vec<Symbol> { -    use SyntacticTokenVariant as SynVar; -    use DefinitionType::*; -    use SymbolRole::*; -    let mut symbols = Vec::new(); -    let mut macro_name: Option<String> = None; -    let mut parse_arg_list = false;   // true if parsing macro argument list -    let mut after_separator = false;  // true if prev token was separator +fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbol>> { +    let syntactic = match parse_syntactic(source_code, path) { +        Ok(syntactic) => syntactic, +        Err(_errors) => return None, +    }; +    let semantic = match parse_semantic(syntactic) { +        Ok(semantic) => semantic, +        Err(_errors) => return None, +    }; +    Some(SymbolParser::new().parse(&semantic)) +} -    macro_rules! push { -        ($name:expr, $source:expr, $role:expr) => { -            symbols.push(Symbol { -                name: $name, -                source: $source, -                role: $role, -                namespace: match ¯o_name { -                    Some(name) => vec![name.to_owned()], -                    None => vec![], -                } -            }) + +// Extract symbol definitions from a list of semantic tokens. +pub struct SymbolParser { +    pub macro_name: Option<String>, +    pub symbols: Vec<Symbol>, +} + +impl SymbolParser { +    pub fn new() -> Self { +        Self { +            macro_name: None, +            symbols: Vec::new(),          }      } -    let syntactic_tokens = SyntacticParser::new(&source_code, path).parse(); -    for token in syntactic_tokens { -        match token.variant { -            SynVar::MacroDefinition(name) => { -                push!(name.clone(), token.source, Definition(MustPrecedeReference)); -                macro_name = Some(name); -                parse_arg_list = true; +    fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) { +        let name = name.to_string(); +        let namespace = match &self.macro_name { +            Some(macro_name) => vec![macro_name.to_owned()], +            None => vec![], +        }; +        let source = source.to_owned(); +        self.symbols.push(Symbol { name, namespace, source, role }); + +    } + +    pub fn parse(mut self, semantic: &[Tracked<SemanticToken>]) -> Vec<Symbol> { +        for token in semantic { +            let source = &token.source; +            match &token.value { +                SemanticToken::MacroDefinition(definition) => { +                    // Record macro definition. +                    self.record_symbol( +                        &definition.name, +                        &definition.name.source, +                        Definition(MustPrecedeReference), +                    ); +                    self.macro_name = Some(definition.name.to_string()); + +                    for argument in &definition.arguments { +                        self.record_symbol( +                            &argument.name, +                            &argument.source, +                            Definition(MustPrecedeReference), +                        ); +                    } +                    match &definition.body { +                        MacroDefinitionBody::Integer(integer) => { +                            self.parse_integer_token(&integer, &integer.source) +                        } +                        MacroDefinitionBody::Invocation(invocation) => { +                            self.parse_invocation(&invocation, &invocation.source) +                        } +                        MacroDefinitionBody::Block(tokens) => { +                            for token in tokens { +                                self.parse_block_token(&token, &token.source); +                            } +                        } +                    } +                    self.macro_name = None; +                } +                SemanticToken::BlockToken(token) => { +                    self.parse_block_token(token, &source); +                }              } -            SynVar::MacroDefinitionTerminator => { -                macro_name = None; +        } +        return self.symbols; +    } + +    fn parse_expression(&mut self, expression: &Expression, _source: &SourceSpan) { +        for token in &expression.tokens { +            let source = &token.source; +            match &token.value { +                ExpressionToken::IntegerToken(integer) => { +                    self.parse_integer_token(integer, source); +                } +                ExpressionToken::Invocation(invocation) => { +                    self.parse_invocation(invocation, source); +                } +                ExpressionToken::Operator(_) => (),              } -            SynVar::LabelDefinition(name) => { -                push!(name.clone(), token.source, Definition(CanFollowReference)); +        } +    } + +    fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) { +        self.record_symbol( +            &invocation.name, +            &source, +            Reference, +        ); + +        for argument in &invocation.arguments { +            let source = &argument.source; +            match &argument.value { +                InvocationArgument::IntegerToken(integer) => { +                    self.parse_integer_token(integer, &source); +                } +                InvocationArgument::BlockToken(block) => { +                    self.parse_block_token(block, &source); +                } +                InvocationArgument::Invocation(invocation) => { +                    self.parse_invocation(invocation, &source); +                } +                InvocationArgument::String(_) => (),              } -            SynVar::Symbol(name) => if parse_arg_list && after_separator { -                push!(name, token.source, Definition(MustPrecedeReference)); -            } else { -                parse_arg_list = false; -                push!(name, token.source, Reference); +        } +    } + +    fn parse_block_token(&mut self, token: &BlockToken, source: &SourceSpan) { +        match token { +            BlockToken::LabelDefinition(name) => { +                self.record_symbol( +                    &name, +                    &source, +                    Definition(CanFollowReference), +                );              } -            SynVar::Separator => { -                after_separator = true; -                continue; +            BlockToken::PinnedAddress(integer) => { +                self.parse_integer_token(integer, &integer.source);              } -            SynVar::BlockOpen | SynVar::BlockClose => { -                continue; +            BlockToken::ConditionalBlock(condition) => { +                self.parse_integer_token(&condition.predicate, &condition.predicate.source); +                self.parse_block_token(&condition.body, &condition.body.source);              } -            SynVar::PackedBinaryLiteral(pbl) => { -                for field in pbl.fields { -                    push!(field.name.to_string(), field.source, Reference) +            BlockToken::WordTemplate(word_template) => { +                for field in &word_template.fields { +                    self.record_symbol( +                        &field.name.to_string(), +                        &field.source, +                        Reference, +                    );                  }              } -            SynVar::Expression(expr) => { -                for token in expr.tokens { -                    if let ExpressionTokenVariant::Invocation(name) = token.variant { -                        push!(name, token.source, Reference); -                    } +            BlockToken::Block(tokens) => { +                for token in tokens { +                    self.parse_block_token(token, &token.source);                  }              } -            _ => () -        }; -        after_separator = false; +            BlockToken::Invocation(invocation) => { +                self.parse_invocation(invocation, source); +            } +        } +    } + +    fn parse_integer_token(&mut self, token: &IntegerToken, source: &SourceSpan) { +        match &token { +            IntegerToken::Expression(expression) => { +                self.parse_expression(&expression, source) +            } +            IntegerToken::Invocation(invocation) => { +                self.parse_invocation(&invocation, source) +            } +            IntegerToken::IntegerLiteral(_) => (), +        }      } -    return symbols;  } +  /// Push source code to a source compilation string.  fn push_source_code(compilation: &mut String, source_file: &SourceFile) {      // Skip blank files. diff --git a/src/formats/debug.rs b/src/formats/debug.rs new file mode 100644 index 0000000..23fd34f --- /dev/null +++ b/src/formats/debug.rs @@ -0,0 +1,18 @@ +use crate::*; + + +pub fn format_debug(segments: &[Segment]) -> Result<Vec<u8>, FormatError> { +    let mut output = String::new(); +    for segment in segments { +        // Find maximum width of all words in the segment. +        let width = segment.words.iter().map(|w| w.to_string().chars().count()).max().unwrap_or(0); +        let address = &segment.address; +        output.push_str(&format!("SEGMENT: 0x{address:>04x}\n")); +        for word in &segment.words { +            let string = word.to_string(); +            let w = width as usize; +            output.push_str(&format!("  {string:>w$}\n")); +        } +    } +    return Ok(output.as_bytes().to_vec()); +} diff --git a/src/formats/inhx.rs b/src/formats/inhx.rs index e83e870..fc4791b 100644 --- a/src/formats/inhx.rs +++ b/src/formats/inhx.rs @@ -1,10 +1,15 @@  use crate::*; -pub fn format_inhx(words: &[Word]) -> String { +pub fn format_inhx(segments: &[Segment]) -> Result<Vec<u8>, FormatError> {      let mut records = Vec::new(); -    for (i, chunk) in words.chunks(16).enumerate() { -        records.push(data_record(chunk, (i * 16) as u16)); +    let mut address; +    for segment in segments { +        address = segment.address; +        for chunk in segment.words.chunks(16) { +            records.push(data_record(chunk, address)?); +            address += 16; +        }      }      records.push(terminating_record()); @@ -12,21 +17,24 @@ pub fn format_inhx(words: &[Word]) -> String {      for record in records {          output.push_str(&record.to_string());      } -    return output; +    return Ok(output.as_bytes().to_vec());  } -fn data_record(words: &[Word], address: u16) -> InhxRecord { +fn data_record(words: &[Tracked<Word>], address: usize) -> Result<InhxRecord, FormatError> { +    let Ok(address) = u16::try_from(address) else { +        return Err(FormatError::AddressTooLarge(u16::MAX as usize, address)); +    };      let mut record = InhxRecord::new();      record.byte((words.len()) as u8);      record.be_double(address);      record.byte(0x00);      for word in words { -        match word.bits <= 8 { -            true => record.byte(word.value as u8), -            false => panic!("Word '{word}' has more than 8 bits."), -        }; +        if word.value.width > 8 { +            return Err(FormatError::WordTooWide(8, word.width, word.source.clone())); +        } +        record.byte(word.value.value as u8);      } -    return record; +    return Ok(record);  }  fn terminating_record() -> InhxRecord { diff --git a/src/formats/inhx32.rs b/src/formats/inhx32.rs index fd7fd7b..8febeae 100644 --- a/src/formats/inhx32.rs +++ b/src/formats/inhx32.rs @@ -1,11 +1,19 @@  use crate::*; -pub fn format_inhx32(words: &[Word]) -> String { +pub fn format_inhx32(segments: &[Segment]) -> Result<Vec<u8>, FormatError> {      let mut records = Vec::new(); -    records.push(extended_linear_address(0x0000)); -    for (i, chunk) in words.chunks(8).enumerate() { -        records.push(data_record(chunk, (i * 8) as u16)); +    let mut address = 0; +    records.push(extended_linear_address(0)); +    for segment in segments { +        if (segment.address >> 16) != (address >> 16) { +            records.push(extended_linear_address(segment.address)); +        } +        address = segment.address; +        for chunk in segment.words.chunks(8) { +            records.push(data_record(chunk, address)?); +            address += 8; +        }      }      records.push(terminating_record()); @@ -13,24 +21,29 @@ pub fn format_inhx32(words: &[Word]) -> String {      for record in records {          output.push_str(&record.to_string());      } -    return output; +    return Ok(output.as_bytes().to_vec());  } -fn data_record(words: &[Word], address: u16) -> InhxRecord { +fn data_record(words: &[Tracked<Word>], address: usize) -> Result<InhxRecord, FormatError> { +    let Ok(address) = u32::try_from(address * 2) else { +        return Err(FormatError::AddressTooLarge(u32::MAX as usize / 2, address)); +    }; +    let address = address as u16;      let mut record = InhxRecord::new();      record.byte((words.len() * 2) as u8); -    record.be_double(address * 2); +    record.be_double(address);      record.byte(0x00);      for word in words { -        match word.bits <= 16 { -            true => record.le_double(word.value as u16), -            false => panic!("Word '{word}' has more than 16 bits."), -        }; +        if word.value.width > 16 { +            return Err(FormatError::WordTooWide(16, word.width, word.source.clone())); +        } +        record.le_double(word.value.value as u16);      } -    return record; +    return Ok(record);  } -fn extended_linear_address(address: u16) -> InhxRecord { +fn extended_linear_address(address: usize) -> InhxRecord { +    let address = (address >> 16) as u16;      let mut record = InhxRecord::new();      record.byte(0x02);      record.be_double(0x0000); diff --git a/src/formats/mod.rs b/src/formats/mod.rs index 82f19f1..132001a 100644 --- a/src/formats/mod.rs +++ b/src/formats/mod.rs @@ -1,8 +1,78 @@  mod inhx;  mod inhx32; +mod raw; +mod debug;  pub use inhx::*;  pub use inhx32::*; +pub use raw::*; +pub use debug::*; + +use crate::*; + +use log::*; + + +#[derive(Clone, Copy, PartialEq)] +pub enum Format { +    Debug, +    Inhx, +    Inhx32, +    Raw, +    Source, +} + +impl Format { +    pub fn from_str(string: &str) -> Self { +        match string { +            "debug" => Self::Debug, +            "inhx" => Self::Inhx, +            "inhx32" => Self::Inhx32, +            "raw" => Self::Raw, +            "source" => Self::Source, +            _ => fatal!("Unknown format '{string}', expected 'debug', 'inhx', 'inhx32', 'raw', or 'source'. "), +        } +    } +} + +impl std::fmt::Display for Format { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        let string = match self { +            Self::Debug => "debug", +            Self::Inhx => "inhx", +            Self::Inhx32 => "inhx32", +            Self::Raw => "raw", +            Self::Source => "source", +        }; +        write!(f, "{string}") +    } +} + + +pub enum FormatError { +    /// (expected, received) +    AddressTooLarge(usize, usize), +    /// (expected, received) +    WordTooWide(u32, u32, SourceSpan), +    /// +    ExpectedFixedWidth, +} + +pub fn report_format_error(error: &FormatError, format: Format, source_code: &str) { +    match error { +        FormatError::AddressTooLarge(expected, received) => +            error!("The {format} format requires that addresses do not exceed {expected}, but the address {received} was reached"), +        FormatError::WordTooWide(expected, received, source) => { +            let message = format!("The {format} format requires that words are no wider than {expected} bits, but a {received} bit word was found"); +            let context = Context { source_code, source }; +            report_source_issue(LogLevel::Error, &context, &message); +        } +        FormatError::ExpectedFixedWidth => +            error!("The {format} format requires all words to be the same width"), +    } +    std::process::exit(1); +} +  pub struct InhxRecord { @@ -43,3 +113,20 @@ impl InhxRecord {          format!(":{output}{checksum:0>2X}\n")      }  } + + +pub fn calculate_fixed_width(segments: &[Segment]) -> Option<u32> { +    let mut width = None; +    for segment in segments { +        for word in &segment.words { +            let word_width = word.value.width; +            match width { +                Some(width) => if word_width != width { +                    return None; +                } +                None => width = Some(word_width), +            } +        } +    } +    return width.or(Some(0)); +} diff --git a/src/formats/raw.rs b/src/formats/raw.rs new file mode 100644 index 0000000..ecc6473 --- /dev/null +++ b/src/formats/raw.rs @@ -0,0 +1,29 @@ +use crate::*; + + +pub fn format_raw(segments: &[Segment], width: Option<u32>) -> Result<Vec<u8>, FormatError> { +    let Some(width) = width.or_else(|| calculate_fixed_width(&segments)) else { +        return Err(FormatError::ExpectedFixedWidth); +    }; + +    let mut address = 0; +    let bytes_per_word = ((width + 7) / 8) as usize; +    let mut bytes = Vec::new(); + +    for segment in segments { +        // Pad to the segment start address. +        let padding = segment.address.saturating_sub(address); +        bytes.resize(bytes.len() + (padding * bytes_per_word), 0); +        for word in &segment.words { +            // Decompose word value into bytes. +            let value = word.value.value; +            for i in (0..bytes_per_word).rev() { +                let byte = (value >> (i*8) & 0xff) as u8; +                bytes.push(byte); +            } +            address += 1; +        } +    } + +    return Ok(bytes); +} @@ -1,13 +1,14 @@ -mod compiler; -mod parsers; -mod report; -mod tokens; +mod stages; +mod types;  mod formats; +mod compiler; -pub use compiler::*; -pub use parsers::*; -pub use report::*; -pub use tokens::*; +pub use stages::*; +pub use types::*;  pub use formats::*; +pub use compiler::*; + +use assembler::{Context, Tracked, SourceSpan, report_source_issue}; +use log::LogLevel; -pub use assembler::*; +use std::path::{PathBuf}; diff --git a/src/parsers/assembler.rs b/src/parsers/assembler.rs deleted file mode 100644 index 61e1a84..0000000 --- a/src/parsers/assembler.rs +++ /dev/null @@ -1,290 +0,0 @@ -use crate::*; -use AssemblerErrorVariant as ErrVar; - -use indexmap::IndexMap; - - -static mut ID: usize = 0; -macro_rules! new_id { -    () => { unsafe { -        let id = ID; -        ID += 1; -        id -    }}; -} - - -impl SemanticProgram { -    pub fn assemble(&self) -> Vec<AssembledToken> { -        let environment = Environment { -            macro_definitions: &self.macro_definitions, -            label_definitions: &self.label_definitions, -            arguments: &IndexMap::new(), -            id: new_id!(), -        }; -        let mut assembled_tokens = Vec::new(); -        for token in &self.body { -            let tokens = environment.reify_semantic_token(token); -            assembled_tokens.extend(tokens); -        } -        return assembled_tokens; -    } -} - - -pub struct Environment<'a> { -    pub macro_definitions: &'a IndexMap<String, MacroDefinition>, -    pub label_definitions: &'a IndexMap<String, LabelDefinition>, -    pub arguments: &'a IndexMap<String, Argument>, -    pub id: usize, -} - -impl<'a> Environment<'a> { -    // This is only ever called for the highest level body tokens, never for invocations. -    fn reify_semantic_token(&self, token: &SemanticToken) -> Vec<AssembledToken> { -        let mut assembled_tokens = Vec::new(); -        match token { -            SemanticToken::Word(pbl) => { -                let word = self.reify_packed_binary_literal(pbl); -                assembled_tokens.push(AssembledToken::Word(word)); -            } -            SemanticToken::Invocation(invocation) => { -                match self.reify_invocation(invocation) { -                    Ok(argument) => match argument { -                        Argument::Block(block) => assembled_tokens.extend(block), -                        Argument::Integer(_) => { -                            let variant = AssemblerErrorVariant::NotABlock; -                            let source = invocation.source.clone(); -                            let error = AssemblerError { source, variant }; -                            assembled_tokens.push(AssembledToken::Error(error)) -                        } -                    } -                    Err(error) => assembled_tokens.push(AssembledToken::Error(error)), -                } -            } -            SemanticToken::LabelDefinition(definition) => { -                assembled_tokens.push(AssembledToken::LabelDefinition(definition.clone())); -            } -            SemanticToken::PinnedAddress(address) => { -                assembled_tokens.push(AssembledToken::PinnedAddress(address.clone())); -            } -            SemanticToken::Error(_) => (), -        } -        return assembled_tokens; -    } - -    fn reify_packed_binary_literal(&self, pbl: &PackedBinaryLiteral) -> AssembledWord { -        let mut assembled_fields = Vec::new(); -        let mut errors = Vec::new(); -        for field in &pbl.fields { -            let name = field.name.to_string(); -            match self.reify_integer_reference(&name, &field.source) { -                Ok(value) => assembled_fields.push( -                    AssembledField { -                        source: field.source.clone(), -                        value, -                        bits: field.bits, -                        shift: field.shift, -                    } -                ), -                Err(error) => errors.push(error), -            }; -        } -        let source = pbl.source.clone(); -        let value = pbl.value; -        let bits = pbl.bits; -        AssembledWord { source, bits, fields: assembled_fields, value, errors } -    } - -    fn reify_integer_reference(&self, name: &str, source: &SourceSpan) -> Result<IntegerArgument, AssemblerError> { -        match self.reify_reference(name, source)? { -            Argument::Integer(integer) => Ok(integer), -            Argument::Block(_) => Err( -                AssemblerError { -                    source: source.clone(), -                    variant: ErrVar::NotAnInteger, -                } -            ), -        } -    } - -    fn reify_reference(&self, name: &str, source: &SourceSpan) -> Result<Argument, AssemblerError> { -        let source = source.clone(); -        if let Some(argument) = self.arguments.get(name) { -            Ok(argument.clone()) -        } else if let Some(definition) = self.macro_definitions.get(name) { -            self.reify_value(&definition.value) -        } else if let Some(label) = self.label_definitions.get(name) { -            let name = Tracked::from(self.tag_label_name(&label.name), source); -            Ok(Argument::Integer(IntegerArgument::LabelReference(name))) -        } else { -            let variant = ErrVar::DefinitionNotFound(name.to_string()); -            Err(AssemblerError { source, variant }) -        } -    } - -    fn tag_label_name(&self, name: &str) -> String { -        match name.contains(':') { -            true => format!("{name}:{}", self.id), -            false => name.to_string(), -        } -    } - -    fn reify_value(&self, value: &Value) -> Result<Argument, AssemblerError> { -        match value { -            Value::Integer(integer) => { -                let value = match &integer { -                    Integer::Literal(integer) => { -                        IntegerArgument::Integer(integer.clone()) -                    } -                    Integer::Expression(expr) => { -                        let expr = self.reify_constant_expression(expr)?; -                        IntegerArgument::Expression(expr) -                    } -                    Integer::LabelReference(name) => { -                        let name = Tracked::from(self.tag_label_name(name), name.source.clone()); -                        IntegerArgument::LabelReference(name) -                    } -                    Integer::String(string) => { -                        IntegerArgument::String(string.clone()) -                    } -                }; -                Ok(Argument::Integer(value)) -            } -            Value::Block(block) => { -                let mut assembled_tokens = Vec::new(); -                for token in block { -                    match &token { -                        SemanticToken::Word(pbl) => { -                            let word = self.reify_packed_binary_literal(pbl); -                            assembled_tokens.push(AssembledToken::Word(word)); -                        } -                        SemanticToken::Invocation(invocation) => { -                            match self.reify_invocation(invocation)? { -                                Argument::Block(block) => assembled_tokens.extend(block), -                                Argument::Integer(_) => { -                                    let source = invocation.source.clone(); -                                    let variant = AssemblerErrorVariant::IntegerInBlock; -                                    return Err(AssemblerError { source, variant}); -                                } -                            } -                        } -                        SemanticToken::LabelDefinition(definition) => { -                            let mut definition = definition.clone(); -                            definition.name.push_str(&format!(":{}", self.id)); -                            let token = AssembledToken::LabelDefinition(definition); -                            assembled_tokens.push(token); -                        } -                        SemanticToken::PinnedAddress(address) => { -                            let token = AssembledToken::PinnedAddress(address.to_owned()); -                            assembled_tokens.push(token); -                        } -                        SemanticToken::Error(_) => (), -                    } -                } -                Ok(Argument::Block(assembled_tokens)) -            } -            Value::Invocation(invocation) => { -                self.reify_invocation(invocation) -            } -        } -    } - -    fn reify_invocation(&self, invocation: &Invocation) -> Result<Argument, AssemblerError> { -        macro_rules! err { -            ($variant:expr) => { Err(AssemblerError { -                source: invocation.source.clone(), variant: $variant -            }) }; -        } -        if let Some(argument) = self.arguments.get(&invocation.name) { -            let expected = 0; -            let received = invocation.arguments.len(); -            if received != expected { -                return err!(ErrVar::IncorrectArgumentCount(expected, received)); -            } -            Ok(argument.clone()) -        } else if let Some(definition) = self.macro_definitions.get(&invocation.name) { -            // Check that the correct number of arguments were provided. -            let received = invocation.arguments.len(); -            let expected = definition.arguments.len(); -            if received != expected { -                return err!(ErrVar::IncorrectArgumentCount(expected, received)); -            } -            let mut arguments = IndexMap::new(); -            for (i, argument) in invocation.arguments.iter().enumerate() { -                // Check that the correct types of arguments were provided. -                let arg_invocation = self.reify_value(&argument.value)?; -                let arg_invocation_type = match &arg_invocation { -                    Argument::Integer(_) => ArgumentVariant::Integer, -                    Argument::Block(_) => ArgumentVariant::Block, -                }; -                let arg_definition_type = definition.arguments[i].variant; -                if arg_invocation_type != arg_definition_type { -                    let variant = ErrVar::IncorrectArgumentType( -                        arg_definition_type, arg_invocation_type -                    ); -                    return Err(AssemblerError { source: argument.source.clone(), variant }); -                } -                let name = definition.arguments[i].name.clone(); -                arguments.insert(name, arg_invocation); -            } -            let environment = Environment { -                macro_definitions: &self.macro_definitions, -                label_definitions: &self.label_definitions, -                arguments: &arguments, -                id: new_id!(), -            }; -            environment.reify_value(&definition.value) -        } else if let Some(label) = self.label_definitions.get(&invocation.name) { -            let expected = 0; -            let received = invocation.arguments.len(); -            if received != expected { -                return err!(ErrVar::IncorrectArgumentCount(expected, received)); -            } -            let name = Tracked::from(self.tag_label_name(&label.name), label.source.clone()); -            Ok(Argument::Integer(IntegerArgument::LabelReference(name))) -        } else { -            err!(ErrVar::DefinitionNotFound(invocation.name.to_string())) -        } -    } - -    fn reify_constant_expression(&self, expr: &Expression) -> Result<AssembledExpression, AssemblerError> { -        use ExpressionTokenVariant as ExprVar; - -        let mut assembled_tokens = Vec::new(); -        for token in &expr.tokens { -            let assembled_token = match &token.variant { -                ExprVar::Literal(value) => { -                    let source = token.source.clone(); -                    let integer = TrackedInteger { source, value: *value }; -                    AssembledExpressionToken::Integer(integer) -                } -                ExprVar::Operator(operator) => { -                    AssembledExpressionToken::Operator(*operator) -                } -                ExprVar::Invocation(name) => { -                    match self.reify_integer_reference(&name, &token.source)? { -                        IntegerArgument::LabelReference(name) => { -                            AssembledExpressionToken::LabelReference(name) -                        } -                        IntegerArgument::Integer(integer) => { -                            AssembledExpressionToken::Integer(integer) -                        } -                        IntegerArgument::Expression(expr) => { -                            AssembledExpressionToken::Expression(Box::new(expr)) -                        }, -                        IntegerArgument::String(string) => { -                            let source = string.source.clone(); -                            let variant = AssemblerErrorVariant::StringInExpression; -                            return Err(AssemblerError { source, variant }) -                        } -                    } -                } -                ExprVar::Error(_) => continue, -            }; -            assembled_tokens.push(assembled_token); -        } -        Ok(AssembledExpression { source: expr.source.clone(), tokens: assembled_tokens }) -    } -} - diff --git a/src/parsers/bytecode.rs b/src/parsers/bytecode.rs deleted file mode 100644 index ed16e22..0000000 --- a/src/parsers/bytecode.rs +++ /dev/null @@ -1,191 +0,0 @@ -use crate::*; - -use std::collections::HashMap; - - -pub struct BytecodeGenerator<'a> { -    tokens: &'a [AssembledToken], -    addresses: HashMap<String, Tracked<usize>>, -    words: Vec<Word>, -    errors: Vec<BytecodeError>, -} - -impl<'a> BytecodeGenerator<'a> { -    pub fn new(tokens: &'a [AssembledToken]) -> Self { -        Self { -            tokens, -            addresses: HashMap::new(), -            words: Vec::new(), -            errors: Vec::new(), -        } -    } - -    pub fn generate(mut self) -> Bytecode { -        self.calculate_addresses(); -        for token in self.tokens { -            match token { -                AssembledToken::Word(assembled_word) => { -                    self.assemble_word(assembled_word); -                } -                AssembledToken::PinnedAddress(pinned) => { -                    if self.words.len() > pinned.address { -                        let variant = BytecodeErrorVariant::PinnedAddressBacktrack( -                            pinned.address, self.words.len()); -                        let source = pinned.source.clone(); -                        self.errors.push(BytecodeError { source, variant }); -                    } else { -                        self.words.resize(pinned.address, Word { bits: 0, value: 0}); -                    } -                } -                AssembledToken::LabelDefinition(_) => (), -                AssembledToken::Error(_) => (), -            } -        } - -        return Bytecode { -            words: self.words, -            errors: self.errors, -        } -    } - -    fn calculate_addresses(&mut self) { -        let mut i = 0; -        for token in self.tokens { -            match token { -                AssembledToken::LabelDefinition(definition) => { -                    let address = Tracked::from(i, definition.source.clone()); -                    if let Some(_) = self.addresses.insert(definition.name.clone(), address) { -                        let name = definition.name.clone(); -                        let variant = BytecodeErrorVariant::DuplicateLabelDefinition(name); -                        let source = definition.source.clone(); -                        self.errors.push(BytecodeError { source, variant }); -                    } -                } -                AssembledToken::Word(word) => { -                    i += word.count(); -                } -                AssembledToken::PinnedAddress(pinned) => { -                    i = pinned.address; -                } -                AssembledToken::Error(_) => (), -            } -        } -    } - -    fn resolve_expression(&mut self, expr: &AssembledExpression) -> isize { -        let mut stack = Vec::new(); -            macro_rules! push { -                ($value:expr) => { stack.push($value) }; -            } -            macro_rules! pop { -                ($name:ident) => { let $name = match stack.pop() { -                    Some(value) => value, -                    None => { -                        let variant = BytecodeErrorVariant::StackUnderflow; -                        self.errors.push(BytecodeError { source: expr.source.clone(), variant }); -                        return 0; -                    }, -                }; }; -            } -            macro_rules! truth { -                ($bool:expr) => { match $bool { true => 1, false => 0 } }; -            } - -            for token in &expr.tokens { -                match &token { -                    AssembledExpressionToken::Integer(value) => { -                        push!(value.value) -                    } -                    AssembledExpressionToken::LabelReference(name) => { -                        push!(self.resolve_label_reference(name)) -                    } -                    AssembledExpressionToken::Expression(expr) => { -                        push!(self.resolve_expression(expr)) -                    } -                    AssembledExpressionToken::Operator(operator) => match operator { -                        Operator::Equal            => { pop!(b); pop!(a); push!(truth!(a==b)) }, -                        Operator::NotEqual         => { pop!(b); pop!(a); push!(truth!(a!=b)) }, -                        Operator::LessThan         => { pop!(b); pop!(a); push!(truth!(a < b)) }, -                        Operator::GreaterThan      => { pop!(b); pop!(a); push!(truth!(a > b)) }, -                        Operator::LessThanEqual    => { pop!(b); pop!(a); push!(truth!(a <= b)) }, -                        Operator::GreaterThanEqual => { pop!(b); pop!(a); push!(truth!(a >= b)) }, -                        Operator::Add              => { pop!(b); pop!(a); push!(a + b) }, -                        Operator::Subtract         => { pop!(b); pop!(a); push!(a - b) }, -                        Operator::LeftShift        => { pop!(b); pop!(a); push!(a << b) }, -                        Operator::RightShift       => { pop!(b); pop!(a); push!(a >> b) }, -                        Operator::And              => { pop!(b); pop!(a); push!(a & b) }, -                        Operator::Or               => { pop!(b); pop!(a); push!(a | b) }, -                        Operator::Xor              => { pop!(b); pop!(a); push!(a ^ b) }, -                        Operator::Not              => {          pop!(a); push!(!a) }, -                    } -                } -            } - -            let variant = match stack.len() { -                0 => BytecodeErrorVariant::NoReturnValue, -                1 => return stack[0], -                _ => BytecodeErrorVariant::MultipleReturnValues, -            }; -            self.errors.push(BytecodeError { source: expr.source.clone(), variant}); -            0 -    } - -    fn resolve_label_reference(&mut self, name: &Tracked<String>) -> isize { -        if let Some(address) = self.addresses.get(&name.value) { -            address.value as isize -        } else { -            let variant = BytecodeErrorVariant::DefinitionNotFound(name.value.clone()); -            self.errors.push(BytecodeError { source: name.source.clone(), variant }); -            0 -        } -    } - -    fn assemble_word(&mut self, assembled_word: &AssembledWord) { -        let mut field_values = Vec::new(); -        for field in &assembled_word.fields { -            match &field.value { -                IntegerArgument::Expression(expr) => { -                    let source = expr.source.clone(); -                    let value = self.resolve_expression(expr); -                    field_values.push(vec![Tracked::from(value, source)]) -                } -                IntegerArgument::LabelReference(name) => { -                    let source = name.source.clone(); -                    let value = self.resolve_label_reference(name); -                    field_values.push(vec![Tracked::from(value, source)]) -                } -                IntegerArgument::Integer(integer) => { -                    let source = integer.source.clone(); -                    let value = integer.value; -                    field_values.push(vec![Tracked::from(value, source)]) -                } -                IntegerArgument::String(string) => { -                    let values = string.chars.iter() -                        .map(|c| Tracked::from(c.value as isize, c.source.clone())) -                        .collect(); -                    field_values.push(values); -                } -            }; -        } -        for i in 0..assembled_word.count() { -            let mut value = assembled_word.value; -            for (f, field) in assembled_word.fields.iter().enumerate() { -                let (field_value, source) = match field_values[f].get(i) { -                    Some(tracked) => (tracked.value, Some(tracked.source.clone())), -                    None => (0, None), -                }; -                let bitcount = match field_value { -                    0 => 0, -                    _ => (field_value.ilog2() + 1) as usize, -                }; -                if field.bits < bitcount { -                    let variant = BytecodeErrorVariant::ValueTooLarge(field.bits, bitcount); -                    self.errors.push(BytecodeError { source: source.unwrap(), variant }); -                } else { -                    value |= (field_value << field.shift) as usize; -                } -            } -            self.words.push(Word { bits: assembled_word.bits, value }); -        } -    } -} diff --git a/src/parsers/expression.rs b/src/parsers/expression.rs deleted file mode 100644 index e938881..0000000 --- a/src/parsers/expression.rs +++ /dev/null @@ -1,52 +0,0 @@ -use crate::*; - - -pub fn parse_constant_expression(mut t: Tokeniser, source: SourceSpan) -> Expression { -    use ExpressionTokenVariant as TokenVar; -    use ExpressionParseError as ParseError; - -    let mut tokens = Vec::new(); - -    loop { -        t.eat_whitespace(); -        t.mark_start(); -        let token = t.eat_token(); -        if token.is_empty() { -            break; -        } - -        let variant = match token.as_str() { -            "=" => TokenVar::Operator(Operator::Equal), -            "!=" => TokenVar::Operator(Operator::NotEqual), -            "<" => TokenVar::Operator(Operator::LessThan), -            ">" => TokenVar::Operator(Operator::GreaterThan), -            "<=" => TokenVar::Operator(Operator::LessThanEqual), -            ">=" => TokenVar::Operator(Operator::GreaterThanEqual), -            "+" => TokenVar::Operator(Operator::Add), -            "-" => TokenVar::Operator(Operator::Subtract), -            "<<" => TokenVar::Operator(Operator::LeftShift), -            ">>" => TokenVar::Operator(Operator::RightShift), -            "&" => TokenVar::Operator(Operator::And), -            "|" => TokenVar::Operator(Operator::Or), -            "^" => TokenVar::Operator(Operator::Xor), -            "~" => TokenVar::Operator(Operator::Not), -            _ => if let Some(stripped) = token.strip_prefix("0x") { -                match usize::from_str_radix(stripped, 16) { -                    Ok(value) => TokenVar::Literal(value as isize), -                    Err(_) => TokenVar::Error( -                        ParseError::InvalidHexadecimalLiteral(stripped.to_string())), -                } -            } else { -                match usize::from_str_radix(&token, 10) { -                    Ok(value) => TokenVar::Literal(value as isize), -                    Err(_) => TokenVar::Invocation(token.to_string()), -                } -            } -        }; - -        let source = t.get_source(); -        tokens.push(ExpressionToken { source, variant }); -    } - -    return Expression { source, tokens }; -} diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs deleted file mode 100644 index da2c23a..0000000 --- a/src/parsers/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -mod expression; -mod packed_binary_literal; - -pub use expression::*; -pub use packed_binary_literal::*; - -mod syntactic; -mod semantic; -mod assembler; -mod bytecode; - -pub use syntactic::*; -pub use semantic::*; -pub use assembler::*; -pub use bytecode::*; diff --git a/src/parsers/packed_binary_literal.rs b/src/parsers/packed_binary_literal.rs deleted file mode 100644 index 18f8da7..0000000 --- a/src/parsers/packed_binary_literal.rs +++ /dev/null @@ -1,85 +0,0 @@ -use crate::*; - - -/// t is a Tokeniser over the characters of the PBL, excluding the leading hash. -pub fn parse_packed_binary_literal(mut t: Tokeniser, source: SourceSpan) -> PackedBinaryLiteral  { -    use PackedBinaryLiteralParseError as ParseError; -    use PackedBinaryLiteralParseErrorVariant as ParseErrorVar; - -    let mut value = 0; -    let mut bits = 0; -    let mut field_bits = 0; -    let mut name = '\0'; -    let mut fields: Vec<BitField> = Vec::new(); -    let mut errors: Vec<ParseError> = Vec::new(); - -    macro_rules! push_field { -        () => { -            if fields.iter().any(|f| f.name == name) { -                let variant = ParseErrorVar::DuplicateFieldName(name); -                errors.push(ParseError { source: t.get_source(), variant }); -            } else { -                fields.push(BitField { name, source: t.get_source(), bits: field_bits, shift: 0 }); -            } -        }; -    } - -    while let Some(c) = t.eat_char() { -        // Ignore underscores. -        if c == '_' { -            t.mark.undo(); -            continue; -        } - -        // Add a bit to the value; -        value <<= 1; -        bits += 1; -        for field in &mut fields { -            field.shift += 1; -        } - -        // Extend the current field. -        if c == name { -            field_bits += 1; -            continue; -        } - -        // Commit the current field. -        if field_bits > 0 { -            t.mark_end_prev(); -            push_field!(); -            field_bits = 0; -            name = '\0'; -        } - -        // Parse bit literals. -        if c == '0' { -            continue; -        } -        if c == '1' { -            value |= 1; -            continue; -        } - -        t.mark_start_prev(); -        if c.is_alphabetic() { -            name = c; -            field_bits = 1; -            continue; -        } else { -            let source = t.get_source(); -            let variant = ParseErrorVar::InvalidCharacter(c); -            errors.push(ParseError { source, variant }); -        } -    } - -    // Commit the final field. -    for field in &mut fields { -        field.shift += 1; -    } -    if field_bits > 0 { -        push_field!(); -    } - -    PackedBinaryLiteral { source, bits, value, fields, errors } -} diff --git a/src/parsers/semantic.rs b/src/parsers/semantic.rs deleted file mode 100644 index 00cfc80..0000000 --- a/src/parsers/semantic.rs +++ /dev/null @@ -1,352 +0,0 @@ -use crate::*; -use SyntacticTokenVariant as SynVar; - -use std::collections::VecDeque; - -use indexmap::IndexMap; - - -macro_rules! fn_is_syn_variant { -    ($name:ident, $variant:ty) => { paste::paste! { -        fn [< is_ $name >](token: &SyntacticToken) -> bool { -            match token.variant { $variant => true, _ => false, } -    } } }; } -fn_is_syn_variant!(block_open, SyntacticTokenVariant::BlockOpen); -fn_is_syn_variant!(block_close, SyntacticTokenVariant::BlockClose); -fn_is_syn_variant!(separator, SyntacticTokenVariant::Separator); -fn_is_syn_variant!(terminator, SyntacticTokenVariant::MacroDefinitionTerminator); - - -pub struct SemanticParser { -    tokens: Tokens, -    macro_definitions: IndexMap<String, MacroDefinition>, -    label_definitions: IndexMap<String, LabelDefinition>, -    body: Vec<SemanticToken>, -} - -impl SemanticParser { -    pub fn new(syntactic_tokens: Vec<SyntacticToken>) -> Self { -        // Gather all labels ahead of time. -        let mut label_definitions = IndexMap::new(); -        for token in &syntactic_tokens { -            if let SyntacticTokenVariant::LabelDefinition(name) = &token.variant { -                let definition = LabelDefinition { -                    source: token.source.clone(), -                    name: name.clone(), -                }; -                let None = label_definitions.insert(name.to_string(), definition) else { -                    unreachable!("Duplicate definition for label {name:?}"); -                }; -            } -        } -        Self { -            tokens: Tokens::new(syntactic_tokens), -            macro_definitions: IndexMap::new(), -            label_definitions, -            body: Vec::new(), -        } -    } - -    pub fn parse(mut self) -> SemanticProgram { -        while let Some(syn) = self.tokens.pop() { -            match syn.variant { -                SynVar::MacroDefinition(name) => { -                    let Ok(definition_tokens) = self.tokens.pull_until(is_terminator) else { -                        let variant = SemanticParseErrorVariant::UnterminatedMacroDefinition(name); -                        let error = SemanticParseError { source: syn.source, variant }; -                        self.body.push(SemanticToken::Error(error)); -                        break; -                    }; -                    let definition = MacroDefinitionParser::new(syn.source, definition_tokens).parse(); -                    let None = self.macro_definitions.insert(name.clone(), definition) else { -                        unreachable!("Duplicate definition for macro {name}"); -                    }; -                } -                SynVar::LabelDefinition(name) => { -                    let label_definition = LabelDefinition { source: syn.source, name }; -                    self.body.push(SemanticToken::LabelDefinition(label_definition)); -                } -                SynVar::PinnedAddress(address) => { -                    let pinned_address = PinnedAddress { source: syn.source, address }; -                    self.body.push(SemanticToken::PinnedAddress(pinned_address)); -                } -                SynVar::Symbol(name) => { -                    let invocation = InvocationParser::new(name, syn.source, &mut self.tokens).parse(); -                    self.body.push(SemanticToken::Invocation(invocation)); -                } -                SynVar::PackedBinaryLiteral(pbl) => { -                    self.body.push(SemanticToken::Word(pbl)); -                } -                _ => { -                    let variant = SemanticParseErrorVariant::InvalidToken; -                    let error = SemanticParseError { source: syn.source, variant }; -                    self.body.push(SemanticToken::Error(error)); -                } -            } -        } - -        SemanticProgram { -            macro_definitions: self.macro_definitions, -            label_definitions: self.label_definitions, -            body: self.body, -        } -    } -} - - -pub struct MacroDefinitionParser { -    source: SourceSpan, -    tokens: Tokens, -    arguments: Vec<ArgumentDefinition>, -    errors: Vec<SemanticParseError>, -} - -impl MacroDefinitionParser { -    pub fn new(source: SourceSpan, tokens: Tokens) -> Self { -        Self { -            tokens, -            source, -            arguments: Vec::new(), -            errors: Vec::new(), -        } -    } - -    pub fn parse(mut self) -> MacroDefinition { -        while let Some(definition) = self.parse_argument_definition() { -            self.arguments.push(definition) -        } -        MacroDefinition { -            value: self.parse_body(), -            source: self.source, -            arguments: self.arguments, -            errors: self.errors, -        } -    } - -    fn parse_argument_definition(&mut self) -> Option<ArgumentDefinition> { -        // Only continue if the first token is a separator. -        self.tokens.pop_if(is_separator)?; - -        // Pop argument tokens. -        let is_block = match self.tokens.pop_if(is_block_open) { -            Some(_) => true, -            None => false, -        }; -        let token = self.tokens.pop(); -        if is_block { -            self.tokens.pop_if(is_block_close); -        } -        // Parse argument token. -        let token = token?; -        let source = token.source; -        if let SynVar::Symbol(name) = token.variant { -            let variant = match is_block { -                true => ArgumentVariant::Block, -                false => ArgumentVariant::Integer, -            }; -            Some(ArgumentDefinition { name, source, variant }) -        } else { -            let variant = SemanticParseErrorVariant::InvalidToken; -            self.errors.push(SemanticParseError { source, variant}); -            None -        } -    } - -    fn parse_body(&mut self) -> Value { -        // Attempt to parse an Integer. -        if self.tokens.len() == 1 { -            let token = self.tokens.pop().unwrap(); -            match token.variant { -                SynVar::IntegerLiteral(value) => { -                    let integer = TrackedInteger { source: token.source, value }; -                    return Value::Integer(Integer::Literal(integer)); -                } -                SynVar::Expression(expr) => { -                    return Value::Integer(Integer::Expression(expr)); -                } -                _ => (), -            } -            self.tokens.unpop(token); -        } -        // Parse a Block. -        let mut block = BlockParser::new(self.tokens.take()).parse(); -        // If the block contains a single invocation, unwrap it. -        if block.len() == 1 { -            match block.pop() { -                Some(SemanticToken::Invocation(invocation)) => return Value::Invocation(invocation), -                Some(other) => block.push(other), -                None => (), -            }; -        } -        return Value::Block(block); -    } -} - - -/// Parse an entire block, excluding delimiters. -pub struct BlockParser { -    tokens: Tokens, -    semantic_tokens: Vec<SemanticToken>, -} - -impl BlockParser { -    pub fn new(tokens: Tokens) -> Self { -        Self { tokens, semantic_tokens: Vec::new() } -    } - -    pub fn parse(mut self) -> Vec<SemanticToken> { -        while let Some(token) = self.tokens.pop() { -            let source = token.source; -            match token.variant { -                SynVar::Symbol(name) => { -                    let invocation = InvocationParser::new(name, source, &mut self.tokens).parse(); -                    self.semantic_tokens.push(SemanticToken::Invocation(invocation)); -                } -                SynVar::PackedBinaryLiteral(pbl) => { -                    self.semantic_tokens.push(SemanticToken::Word(pbl)); -                } -                SynVar::LabelDefinition(name) => { -                    let label_definition = LabelDefinition { source, name }; -                    self.semantic_tokens.push(SemanticToken::LabelDefinition(label_definition)); -                } -                _ => { -                    let variant = SemanticParseErrorVariant::InvalidToken; -                    let error = SemanticParseError { source, variant }; -                    self.semantic_tokens.push(SemanticToken::Error(error)); -                } -            } -        } -        return self.semantic_tokens; -    } -} - - -struct InvocationParser<'a> { -    name: String, -    source: SourceSpan, -    tokens: &'a mut Tokens, -    arguments: Vec<ArgumentInvocation>, -    errors: Vec<SemanticParseError>, -} - -impl<'a> InvocationParser<'a> { -    pub fn new(name: String, source: SourceSpan, tokens: &'a mut Tokens) -> Self { -        Self { name, source, tokens, arguments: Vec::new(), errors: Vec::new() } -    } - -    pub fn parse(mut self) -> Invocation { -        while let Some(argument) = self.parse_invocation_argument() { -            self.arguments.push(argument); -        } -        Invocation { -            name: self.name, -            source: self.source, -            arguments: self.arguments, -            errors: self.errors, -        } -    } - -    fn parse_invocation_argument(&mut self) -> Option<ArgumentInvocation> { -        // Only continue if the first token is a separator. -        self.tokens.pop_if(is_separator)?; - -        if let Some(block_open) = self.tokens.pop_if(is_block_open) { -            let source = block_open.source; -            let mut depth = 1; -            let is_matching_block_close = |token: &SyntacticToken| { -                match token.variant { -                    SyntacticTokenVariant::BlockOpen => { -                        depth += 1; false } -                    SyntacticTokenVariant::BlockClose => { -                        depth -= 1; depth == 0 } -                    _ => false, -                } -            }; -            if let Ok(block_tokens) = self.tokens.pull_until(is_matching_block_close) { -                let block = BlockParser::new(block_tokens).parse(); -                Some(ArgumentInvocation { source, value: Value::Block(block) }) -            } else { -                let variant = SemanticParseErrorVariant::UnterminatedBlock; -                self.errors.push(SemanticParseError { source, variant }); -                None -            } -        } else { -            let token = self.tokens.pop()?; -            let source = token.source; -            match token.variant { -                SynVar::Symbol(name) => { -                    let arguments = Vec::new(); -                    let errors = Vec::new(); -                    let invocation = Invocation { source: source.clone(), name, arguments, errors }; -                    let value = Value::Invocation(invocation); -                    Some(ArgumentInvocation { source, value }) -                } -                SynVar::IntegerLiteral(value) => { -                    let integer = TrackedInteger { source: source.clone(), value }; -                    let value = Value::Integer(Integer::Literal(integer)); -                    Some(ArgumentInvocation { source, value }) -                } -                SynVar::String(string) => { -                    let value = Value::Integer(Integer::String(string)); -                    Some(ArgumentInvocation { source, value }) -                } -                SynVar::Expression(expr) => { -                    let value = Value::Integer(Integer::Expression(expr)); -                    Some(ArgumentInvocation { source, value }) -                } -                _ => { -                    let variant = SemanticParseErrorVariant::InvalidToken; -                    self.errors.push(SemanticParseError { source, variant }); -                    None -                } -            } -        } -    } -} - - -pub struct Tokens { -    tokens: VecDeque<SyntacticToken>, -} - -impl Tokens { -    pub fn new<T: Into<VecDeque<SyntacticToken>>>(tokens: T) -> Self { -        Self { tokens: tokens.into() } -    } - -    pub fn pop(&mut self) -> Option<SyntacticToken> { -        self.tokens.pop_front() -    } - -    pub fn pop_if(&mut self, predicate: fn(&SyntacticToken) -> bool) -> Option<SyntacticToken> { -        match predicate(self.tokens.front()?) { -            true => self.tokens.pop_front(), -            false => None, -        } -    } - -    pub fn unpop(&mut self, token: SyntacticToken) { -        self.tokens.push_front(token); -    } - -    /// Pull tokens until the predicate returns true, otherwise return Err. -    pub fn pull_until(&mut self, mut predicate: impl FnMut(&SyntacticToken) -> bool) -> Result<Self, ()> { -        let mut output = VecDeque::new(); -        while let Some(token) = self.tokens.pop_front() { -            match predicate(&token) { -                true => return Ok(Self::new(output)), -                false => output.push_back(token), -            }; -        } -        return Err(()); -    } - -    pub fn take(&mut self) -> Self { -        Self { tokens: std::mem::take(&mut self.tokens) } -    } - -    pub fn len(&self) -> usize { -        self.tokens.len() -    } -} - diff --git a/src/parsers/syntactic.rs b/src/parsers/syntactic.rs deleted file mode 100644 index f3fcec1..0000000 --- a/src/parsers/syntactic.rs +++ /dev/null @@ -1,172 +0,0 @@ -use crate::*; - - -pub struct SyntacticParser { -    tokeniser: Tokeniser, -    tokens: Vec<SyntacticToken>, -    /// The name of the macro being parsed. -    macro_name: Option<String>, -    /// The name of the most recent label. -    label_name: String, -} - -impl SyntacticParser { -    pub fn new<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Self { -        let mut tokeniser = Tokeniser::new(source_code, path); -        tokeniser.add_delimiters(&['@','%',';',':','{','}','(','[','#','~']); -        Self { -            tokeniser, -            tokens: Vec::new(), -            macro_name: None, -            label_name: String::new(), -        } -    } - -    pub fn parse(mut self) -> Vec<SyntacticToken> { -        use SyntacticTokenVariant as SynVar; -        use SyntacticParseError as SynErr; -        let t = &mut self.tokeniser; - -        loop { -            t.eat_whitespace(); -            t.mark_start(); -            let Some(c) = t.eat_char() else { break }; -            let variant = match c { -                ':' => SynVar::Separator, -                '{' => SynVar::BlockOpen, -                '}' => SynVar::BlockClose, -                '@' => match &self.macro_name { -                    Some(_) => { -                        t.eat_token(); -                        SynVar::Error(SynErr::LabelInMacroDefinition) -                    } -                    None => { -                        self.label_name = t.eat_token(); -                        SynVar::LabelDefinition(self.label_name.clone()) -                    } -                } -                '&' => match &self.macro_name { -                    Some(macro_name) => { -                        let label_name = format!("{macro_name}:{}", t.eat_token()); -                        SynVar::LabelDefinition(label_name) -                    } -                    None => { -                        let label_name = &self.label_name; -                        let sublabel_name = format!("{label_name}/{}", t.eat_token()); -                        SynVar::LabelDefinition(sublabel_name) -                    } -                } -                '%' => { -                    let macro_name = t.eat_token(); -                    self.macro_name = Some(macro_name.clone()); -                    SynVar::MacroDefinition(macro_name) -                } -                ';' => { -                    self.macro_name = None; -                    SynVar::MacroDefinitionTerminator -                } -                '[' => { -                    t.mark_child(); -                    match t.eat_to_delimiter(']') { -                        Some(_) => { -                            let child = t.subtokenise(); -                            t.mark_end(); -                            let expr = parse_constant_expression(child, t.get_source()); -                            SynVar::Expression(expr) -                        } -                        None => SynVar::Error(SynErr::UnterminatedExpression), -                    } -                } -                '"' => { -                    t.mark_child(); -                    match t.eat_to_delimiter('"') { -                        Some(string) => { -                            let child = t.subtokenise(); -                            t.mark_end(); -                            let chars = parse_tracked_chars(child); -                            let tracked_string = TrackedString { -                                source: t.get_source(), string, chars, -                            }; -                            SynVar::String(tracked_string) -                        } -                        None => SynVar::Error(SynErr::UnterminatedString), -                    } -                } -                '(' => match t.eat_to_delimiter(')') { -                    Some(string) => { -                        // Check if the comment fills the entire line. -                        if t.start.position.column == 0 && t.end_of_line() { -                            if let Some(path) = string.strip_prefix(": ") { -                                t.embedded_path = Some(PathBuf::from(path.trim())); -                                t.embedded_first_line = t.start.position.line + 1; -                            } -                        } -                        continue; -                    }, -                    None => SynVar::Error(SynErr::UnterminatedComment), -                } -                '|' => { -                    let token = t.eat_token(); -                    if let Some(hex_string) = token.strip_prefix("0x") { -                        match usize::from_str_radix(hex_string, 16) { -                            Ok(addr) => SynVar::PinnedAddress(addr), -                            Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), -                        } -                    } else { -                        match usize::from_str_radix(&token, 10) { -                            Ok(addr) => SynVar::PinnedAddress(addr), -                            Err(_) => SynVar::Error(SynErr::InvalidDecimalLiteral(token)), -                        } -                    } -                } -                '#' => { -                    t.mark_child(); -                    t.eat_token(); -                    let pbl = parse_packed_binary_literal(t.subtokenise(), t.get_source()); -                    SynVar::PackedBinaryLiteral(pbl) -                }, -                '~' => match &self.macro_name { -                    Some(macro_name) => { -                        let symbol_name = format!("{macro_name}:{}", t.eat_token()); -                        SynVar::Symbol(symbol_name) -                    } -                    None => { -                        let label_name = &self.label_name; -                        let symbol_name = format!("{label_name}/{}", t.eat_token()); -                        SynVar::Symbol(symbol_name) -                    } -                } -                c => { -                    let token = format!("{c}{}", t.eat_token()); -                    if let Some(hex_string) = token.strip_prefix("0x") { -                        match usize::from_str_radix(hex_string, 16) { -                            Ok(value) => SynVar::IntegerLiteral(value as isize), -                            Err(_) => SynVar::Error(SynErr::InvalidHexadecimalLiteral(token)), -                        } -                    } else { -                        match usize::from_str_radix(&token, 10) { -                            Ok(value) => SynVar::IntegerLiteral(value as isize), -                            Err(_) => SynVar::Symbol(token), -                        } -                    } -                } -            }; - -            t.mark_end(); -            let source = t.get_source(); -            self.tokens.push(SyntacticToken { source, variant }); -        } - -        return self.tokens; -    } -} - - -fn parse_tracked_chars(mut t: Tokeniser) -> Vec<Tracked<char>> { -    let mut output = Vec::new(); -    while let Some(c) = t.eat_char() { -        output.push(Tracked::from(c, t.get_source())); -        t.mark_start(); -    } -    return output; -} diff --git a/src/report.rs b/src/report.rs deleted file mode 100644 index a88de4f..0000000 --- a/src/report.rs +++ /dev/null @@ -1,235 +0,0 @@ -use crate::*; - - -static mut ERROR_REPORTED: bool = false; - -macro_rules! report_source_error { -    ($context:expr, $message:expr) => { -        report_source_issue(LogLevel::Error, $context, $message); -        unsafe { ERROR_REPORTED = true; } -    }; -} - -macro_rules! exit_if_error_reported { -    () => { -        if unsafe { ERROR_REPORTED } { -            std::process::exit(1); -        } -    }; -} - -pub fn report_syntactic_errors(syntactic_tokens: &[SyntacticToken], source_code: &str) { -    use SyntacticTokenVariant as SynVar; -    for token in syntactic_tokens { -        let context = Context { source_code: &source_code, source: &token.source }; -        match &token.variant { -            SynVar::Expression(expr) => for t in &expr.tokens { -                let context = Context { source_code: &source_code, source: &t.source }; -                if let ExpressionTokenVariant::Error(err) = &t.variant { -                    let ExpressionParseError::InvalidHexadecimalLiteral(hex) = err; -                    let message = format!("Invalid hexadecimal literal {hex:?} in constant expression"); -                    report_source_error!(&context, &message); -                } -            } -            SynVar::PackedBinaryLiteral(pbl) => for e in &pbl.errors { -                let context = Context { source_code: &source_code, source: &e.source }; -                match &e.variant { -                    PackedBinaryLiteralParseErrorVariant::DuplicateFieldName(name) => { -                        let message = format!("Duplicate field name {name:?} in packed binary literal"); -                        report_source_error!(&context, &message); -                    } -                    PackedBinaryLiteralParseErrorVariant::InvalidCharacter(c) => { -                        let message = format!("Invalid character {c:?} in packed binary literal"); -                        report_source_error!(&context, &message); -                    } -                } -            } -            SynVar::Error(err) => match err { -                SyntacticParseError::InvalidHexadecimalLiteral(hex) => { -                    let message = format!("Invalid hexadecimal literal {hex:?}"); -                    report_source_error!(&context, &message); -                } -                SyntacticParseError::InvalidDecimalLiteral(dec) => { -                    let message = format!("Invalid decimal literal {dec:?}"); -                    report_source_error!(&context, &message); -                } -                SyntacticParseError::InvalidSymbolIdentifier(name) => { -                    let message = format!("Invalid identifier {name:?}"); -                    report_source_error!(&context, &message); -                } -                SyntacticParseError::UnterminatedComment => { -                    let message = format!("Unterminated comment"); -                    report_source_error!(&context, &message); -                } -                SyntacticParseError::UnterminatedString => { -                    let message = format!("Unterminated string"); -                    report_source_error!(&context, &message); -                } -                SyntacticParseError::UnterminatedExpression => { -                    let message = format!("Unterminated assembler expression"); -                    report_source_error!(&context, &message); -                } -                SyntacticParseError::LabelInMacroDefinition => { -                    let message = format!("Only sublabels can be used in macro definitions"); -                    report_source_error!(&context, &message); -                } -            } -            _ => (), -        } -    } -    exit_if_error_reported!(); -} - - -pub fn report_semantic_errors(program: &SemanticProgram, source_code: &str) { -    for (_, definition) in &program.macro_definitions { -        report_value_errors(&definition.value, source_code); -    } -    for token in &program.body { -        report_semantic_token_errors(token, source_code); -    } -    exit_if_error_reported!(); -} - -fn report_value_errors(definition: &Value, source_code: &str) { -    match definition { -        Value::Integer(integer) => match integer { -            Integer::Expression(expr) => for token in &expr.tokens { -                if let ExpressionTokenVariant::Error(error) = &token.variant { -                    let message = match error { -                        ExpressionParseError::InvalidHexadecimalLiteral(hex) => -                            format!("Invalid hexadecimal literal '{hex}' in constant expression"), -                    }; -                    let context = Context { source: &token.source, source_code}; -                    report_source_error!(&context, &message); -                } -            } -            _ => (), -        } -        Value::Block(block) => { -            for token in block { -                report_semantic_token_errors(token, source_code); -            } -        } -        Value::Invocation(invocation) => report_invocation_errors(invocation, source_code), -    } -} - -fn report_semantic_token_errors(token: &SemanticToken, source_code: &str) { -    match &token { -        SemanticToken::Word(pbl) => for error in &pbl.errors { -            let message = match &error.variant { -                PackedBinaryLiteralParseErrorVariant::DuplicateFieldName(name) => -                    format!("Duplicate field name '{name}' in packed binary literal"), -                PackedBinaryLiteralParseErrorVariant::InvalidCharacter(c) => -                    format!("Invalid character '{c}' in packed binary literal"), -            }; -            let context = Context { source: &error.source, source_code }; -            report_source_error!(&context, &message); -        } -        SemanticToken::Invocation(invocation) => { -            report_invocation_errors(invocation, source_code) -        } -        SemanticToken::Error(error) => { -            report_semantic_error(error, source_code) -        } -        SemanticToken::LabelDefinition(_) => (), -        SemanticToken::PinnedAddress(_) => (), -    } -} - -fn report_invocation_errors(invocation: &Invocation, source_code: &str) { -    for error in &invocation.errors { -        report_semantic_error(&error, source_code); -    } -    for argument in &invocation.arguments { -        report_value_errors(&argument.value, source_code); -    } -} - -fn report_semantic_error(error: &SemanticParseError, source_code: &str) { -    let message = match &error.variant { -        SemanticParseErrorVariant::UnterminatedMacroDefinition(name) => -            format!("The macro definition '{name}' is missing a terminating ';' character"), -        SemanticParseErrorVariant::UnterminatedBlock => -            format!("Block literal is missing a terminating '}}' character"), -        SemanticParseErrorVariant::InvalidToken => -            format!("Invalid token"), -    }; -    let context = Context { source: &error.source, source_code}; -    report_source_error!(&context, &message); -} - - -pub fn report_assembler_errors(tokens: &[AssembledToken], source_code: &str) { -    for token in tokens { -        match token { -            AssembledToken::Word(word) => { -                for error in &word.errors { -                    report_assembler_error(&error, source_code); -                } -            } -            AssembledToken::Error(error) => { -                report_assembler_error(error, source_code); -            }, -            _ => (), -        } -    } -    exit_if_error_reported!(); -} - -fn report_assembler_error(error: &AssemblerError, source_code: &str) { -    let message = match &error.variant { -        AssemblerErrorVariant::DefinitionNotFound(name) => -            format!("Definition not found for name '{name}'"), -        AssemblerErrorVariant::NotABlock => -            format!("Value of type block was expected here"), -        AssemblerErrorVariant::NotAnInteger => -            format!("Value of type integer was expected here"), -        AssemblerErrorVariant::IntegerInBlock => -            format!("Integer in block"), -        AssemblerErrorVariant::StringInExpression => -            format!("Expressions cannot contain strings"), -        AssemblerErrorVariant::IncorrectArgumentCount(expected, received) => -            format!("Expected {expected} arguments, but received {received} instead"), -        AssemblerErrorVariant::IncorrectArgumentType(expected, received) => -            format!("Expected {expected} argument but received {received} instead"), -    }; -    let context = Context { -        source_code: &source_code, -        source: &error.source, -    }; -    report_source_error!(&context, &message); -} - - -pub fn report_bytecode_errors(bytecode: &Bytecode, source_code: &str) { -    for error in &bytecode.errors { -        report_bytecode_error(error, source_code); -    } -    exit_if_error_reported!(); -} - -pub fn report_bytecode_error(error: &BytecodeError, source_code: &str) { -    let message = match &error.variant { -        BytecodeErrorVariant::DefinitionNotFound(name) => -            format!("Could not find definition for label reference '{name}'"), -        BytecodeErrorVariant::DuplicateLabelDefinition(name) => -            format!("Duplicate definition for label '{name}'"), -        BytecodeErrorVariant::PinnedAddressBacktrack(expected, received) => -            format!("Cannot pin back to address {expected} when already at address {received}"), -        BytecodeErrorVariant::ValueTooLarge(expected, received) => -            format!("Expected {expected}-bit value, but received {received}-bit value instead"), -        BytecodeErrorVariant::StackUnderflow => -            format!("Stack underflow when evaluating expression"), -        BytecodeErrorVariant::NoReturnValue => -            format!("No value left on stack when evaluating expression"), -        BytecodeErrorVariant::MultipleReturnValues => -            format!("More than one value left on stack when evaluating expression"), -    }; -    let context = Context { -        source_code: &source_code, -        source: &error.source, -    }; -    report_source_error!(&context, &message); -} diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs new file mode 100644 index 0000000..3618b26 --- /dev/null +++ b/src/stages/bytecode.rs @@ -0,0 +1,182 @@ +use crate::*; + +use std::collections::HashMap; + + +pub fn parse_bytecode(tokens: Vec<Tracked<IntermediateToken>>, width: Option<u32>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> { +    BytecodeParser::new(width).parse(tokens) +} + + +pub struct BytecodeParser { +    width: Option<u32>, +    addresses: HashMap<String, Tracked<usize>>, +    address: usize, +    segment_address: usize, +    segment_source: Option<SourceSpan>, +    segments: Vec<Segment>, +    words: Vec<Tracked<Word>>, +    errors: Vec<Tracked<BytecodeError>>, +} + +impl BytecodeParser { +    pub fn new(width: Option<u32>) -> Self { +        Self { +            width, +            addresses: HashMap::new(), +            address: 0, +            segment_address: 0, +            segment_source: None, +            segments: Vec::new(), +            words: Vec::new(), +            errors: Vec::new(), +        } +    } + +    pub fn parse(mut self, tokens: Vec<Tracked<IntermediateToken>>) -> Result<Vec<Segment>, Vec<Tracked<BytecodeError>>> { +        // Calculate all label addresses ahead of time. +        let mut address = 0; +        for token in &tokens { +            let source = &token.source; +            match &token.value { +                IntermediateToken::LabelDefinition(name) => { +                    let tracked = Tracked::from(address, source.clone()); +                    if let Some(_) = self.addresses.insert(name.clone(), tracked) { +                        unreachable!("Uncaught duplicate label definition '{name}'"); +                    } +                } +                IntermediateToken::Word(_) => { +                    address += 1; +                } +                IntermediateToken::PinnedAddress(pinned) => { +                    address = pinned.value; +                } +            } +        } +        for token in &tokens { +            let source = &token.source; +            match &token.value { +                IntermediateToken::Word(word) => { +                    let word = self.evaluate_word(word, source); +                    // Check that the word width fits the provided width. +                    if let Some(width) = self.width { +                        if word.width != width { +                            let error = BytecodeError::IncorrectWidth(width, word.width); +                            self.errors.push(Tracked::from(error, source.clone())); +                        } +                    } +                    self.words.push(word); +                    self.address += 1; +                } +                IntermediateToken::PinnedAddress(address) => { +                    let current = self.address; +                    let pinned = address.value; +                    if current > pinned { +                        let error = BytecodeError::PinnedAddressBacktrack(pinned, current); +                        self.errors.push(Tracked::from(error, address.source.clone())); +                    } else { +                        let words = std::mem::take(&mut self.words); +                        if !words.is_empty() { +                            let address = self.segment_address; +                            let source = std::mem::take(&mut self.segment_source); +                            let segment = Segment { address, source, words }; +                            self.segments.push(segment); +                        } +                        self.segment_source = Some(address.source.clone()); +                        self.address = pinned; +                        self.segment_address = pinned; +                    } +                } +                IntermediateToken::LabelDefinition(_) => (), +            } +        } +        // Finish final segment. +        let words = std::mem::take(&mut self.words); +        if !words.is_empty() { +            let address = self.segment_address; +            let source = std::mem::take(&mut self.segment_source); +            let segment = Segment { address, source, words }; +            self.segments.push(segment); +        } + +        match self.errors.is_empty() { +            true => Ok(self.segments), +            false => Err(self.errors), +        } +    } + +    fn evaluate_expression(&mut self, expression: &IntermediateExpression, source: &SourceSpan) -> isize { +        let mut stack = ExpressionStack::new(); +        for token in &expression.tokens { +            let source = &token.source; +            match &token.value { +                IntermediateExpressionToken::Integer(integer) => match integer { +                    IntermediateInteger::Integer(value) => { +                        stack.push(*value); +                    } +                    IntermediateInteger::Expression(expression) => { +                        stack.push(self.evaluate_expression(expression, source)); +                    } +                    IntermediateInteger::LabelReference(name) => { +                        stack.push(self.evaluate_label_reference(name)); +                    } +                } +                IntermediateExpressionToken::Operator(operator) => { +                    if let Err(err) = stack.apply(*operator, source) { +                        let error = BytecodeError::StackError(err); +                        self.errors.push(Tracked::from(error, source.clone())) +                    } +                } +            } +        } +        match stack.pull_result() { +            Ok(value) => value, +            Err(err) => { +                let error = BytecodeError::StackError(Tracked::from(err, source.clone())); +                self.errors.push(Tracked::from(error, source.clone())); +                0 +            } +        } +    } + +    fn evaluate_label_reference(&mut self, name: &Tracked<String>) -> isize { +        if let Some(address) = self.addresses.get(&name.to_string()) { +            address.value as isize +        } else { +            unreachable!("Uncaught unresolved label reference '{name}'") +        } +    } + +    fn evaluate_word(&mut self, word: &IntermediateWord, source: &SourceSpan) -> Tracked<Word> { +        let mut word_value = word.value; +        for field in &word.fields { +            let field_source = &field.value.value.source; +            let field_value = match &field.value.value.value { +                IntermediateInteger::Expression(expression) => { +                    self.evaluate_expression(expression, source) +                } +                IntermediateInteger::LabelReference(name) => { +                    self.evaluate_label_reference(name) +                } +                IntermediateInteger::Integer(value) => { +                    *value +                } +            }; +            let value_width = match field_value.cmp(&0) { +                std::cmp::Ordering::Less => (-field_value).ilog2() + 1, +                std::cmp::Ordering::Equal => 0, +                std::cmp::Ordering::Greater => field_value.ilog2() + 1, +            }; +            if field.width < value_width { +                let error = BytecodeError::ValueTooWide(field.width, value_width); +                self.errors.push(Tracked::from(error, field_source.clone())); +            } else { +                let mask = 2_usize.pow(field.width as u32) - 1; +                let clamped_value = (field_value as usize) & mask; +                word_value |= (clamped_value << field.shift) as usize; +            } +        } +        let word = Word { width: word.width, value: word_value }; +        return Tracked::from(word, source.clone()); +    } +} diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs new file mode 100644 index 0000000..b54cb0e --- /dev/null +++ b/src/stages/bytecode_tokens.rs @@ -0,0 +1,78 @@ +use crate::*; + + +pub struct Segment { +    pub address: usize, +    /// Source of the address value. +    pub source: Option<SourceSpan>, +    pub words: Vec<Tracked<Word>>, +} + +pub struct Word { +    pub value: usize, +    pub width: u32, +} + +impl std::fmt::Display for Word { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        if self.width == 0 { +            write!(f, "0") +        } else { +            for i in (0..self.width).rev() { +                let is_first_bit = i+1 == self.width; +                if !is_first_bit && (i+1) % 4 == 0 { +                    write!(f, "_")?; +                } +                match (self.value >> i) & 1 { +                    0 => write!(f, "0")?, +                    _ => write!(f, "1")?, +                } +            } +            Ok(()) +        } +    } +} + +pub enum BytecodeError { +    /// expected, received +    IncorrectWidth(u32, u32), +    /// pinned, real +    PinnedAddressBacktrack(usize, usize), +    /// expected, received +    ValueTooWide(u32, u32), +    StackError(Tracked<StackError>), +} + + +pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) { +    for error in errors { +        report_bytecode_error(error, source_code); +    } +} + +fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) { +    let context = Context { source_code: &source_code, source: &error.source }; +    let message = match &error.value { +        BytecodeError::IncorrectWidth(expected, received) => +            &format!("Word is {received} bits wide, but was expected to have a fixed width of {expected} bits"), +        BytecodeError::PinnedAddressBacktrack(pinned, real) => +            &format!("Cannot pin to address {pinned} when address is already {real}"), +        BytecodeError::StackError(stack_error) => { +            report_stack_error(stack_error, source_code); return; }, +        BytecodeError::ValueTooWide(expected, received) => +            &format!("Field is {expected} bits wide, but received a value that is {received} bits wide"), +    }; + +    report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_segment(segment: &Segment) { +    println!("SEGMENT: 0x{:>04x}", segment.address); +    // Find maximum width of all words in the segment. +    let width = segment.words.iter().map(|w| w.to_string().chars().count()).max().unwrap_or(0); +    for word in &segment.words { +        let string = word.to_string(); +        println!("  {string:>w$}", w=width as usize); +    } +} diff --git a/src/stages/intermediate.rs b/src/stages/intermediate.rs new file mode 100644 index 0000000..6853f62 --- /dev/null +++ b/src/stages/intermediate.rs @@ -0,0 +1,577 @@ +use crate::*; + +use assembler::{DefinitionType, SourceLocation, SourcePosition, SymbolRole}; + +use indexmap::{IndexSet, IndexMap}; + + +static mut ID: usize = 0; +macro_rules! next_id { () => { unsafe { let id = ID; ID += 1; id }}; } + +pub fn parse_intermediate(semantic: Vec<Tracked<SemanticToken>>) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> { +    IntermediateParser::new(semantic).parse() +} + + +struct IntermediateParser { +    semantic: Vec<Tracked<SemanticToken>>, +    label_names: IndexSet<Tracked<String>>, +    macro_names: IndexSet<Tracked<String>>, +    macro_definitions: IndexMap<String, MacroDefinition>, +    intermediate: Vec<Tracked<IntermediateToken>>, +    errors: Vec<Tracked<IntermediateError>>, +} + +impl IntermediateParser { +    pub fn new(semantic: Vec<Tracked<SemanticToken>>) -> Self { +        let mut label_names = IndexSet::new(); +        let mut macro_names = IndexSet::new(); +        for symbol in SymbolParser::new().parse(&semantic) { +            match symbol.role { +                SymbolRole::Definition(DefinitionType::MustPrecedeReference) => { +                    // Only consider macro definitions, not macro argument definitions. +                    if symbol.namespace.is_empty() { +                        if !macro_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) { +                            unreachable!("Uncaught duplicate macro definition '{}'", symbol.name); +                        } +                    } +                } +                SymbolRole::Definition(DefinitionType::CanFollowReference) => { +                    if !label_names.insert(Tracked::from(symbol.name.clone(), symbol.source)) { +                        unreachable!("Uncaught duplicate label definition '{}'", symbol.name); +                    } +                } +                SymbolRole::Reference => (), +            } +        } + +        Self { +            semantic, +            label_names, +            macro_names, +            macro_definitions: IndexMap::new(), +            intermediate: Vec::new(), +            errors: Vec::new(), +        } +    } + +    pub fn parse(mut self) -> Result<Vec<Tracked<IntermediateToken>>, Vec<Tracked<IntermediateError>>> { +        for token in self.semantic { +            let source = &token.source; +            match token.value { +                SemanticToken::MacroDefinition(definition) => { +                    // Invoke the body to see if it contains undefined macros. +                    let error_count = self.errors.len(); +                    let mut arguments = IndexMap::new(); +                    // Prepare dummy argument values. +                    let null = SourceSpan { +                        string: String::new(), +                        in_merged: SourceLocation { +                            path: None, +                            start: SourcePosition::ZERO, +                            end: SourcePosition::ZERO, +                        }, +                        in_source: None, +                        child: None, +                    }; +                    for argument in &definition.arguments { +                        let value = match argument.variant { +                            ArgumentType::Integer => { +                                let integer = IntermediateInteger::Integer(0); +                                let tracked = Tracked::from(integer, null.clone()); +                                IntermediateValue::Integer(tracked) +                            } +                            ArgumentType::Block => { +                                IntermediateValue::Block(Vec::new()) +                            } +                        }; +                        let tracked = Tracked::from(value, null.clone()); +                        arguments.insert(argument.name.clone(), tracked); +                    } +                    let mut env = Environment { +                        label_names: &self.label_names, +                        macro_names: &self.macro_names, +                        macro_definitions: &self.macro_definitions, +                        arguments, +                        errors: &mut self.errors, +                        id: next_id!(), +                    }; +                    env.parse_macro_definition_body(&definition.body, source); +                    if self.errors.len() != error_count { +                        break; +                    } + +                    let name = definition.name.to_string(); +                    if self.macro_definitions.insert(name.clone(), definition).is_some() { +                        unreachable!("Uncaught duplicate macro definition '{}'", name); +                    } +                } +                SemanticToken::BlockToken(block_token) => { +                    let mut env = Environment { +                        label_names: &self.label_names, +                        macro_names: &self.macro_names, +                        macro_definitions: &self.macro_definitions, +                        arguments: IndexMap::new(), +                        errors: &mut self.errors, +                        id: next_id!(), +                    }; +                    let mut tokens = env.parse_block_token(&block_token, source); +                    self.intermediate.append(&mut tokens); +                } +            } +        } +        match self.errors.is_empty() { +            true => Ok(self.intermediate), +            false => Err(self.errors), +        } +    } +} + + +struct Environment<'a> { +    label_names: &'a IndexSet<Tracked<String>>, +    macro_names: &'a IndexSet<Tracked<String>>, +    macro_definitions: &'a IndexMap<String, MacroDefinition>, +    arguments: IndexMap<String, Tracked<IntermediateValue>>, +    errors: &'a mut Vec<Tracked<IntermediateError>>, +    id: usize, +} + +impl<'a> Environment<'a> { +    // Attach the invocation ID to every macro label name +    fn tag_name(&self, name: &str) -> String { +        match name.contains(':') { +            true => format!("{name}:{}", self.id), +            false => name.to_string(), +        } +    } + +    fn parse_macro_definition_body(&mut self, body: &MacroDefinitionBody, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> { +        match &body { +            MacroDefinitionBody::Integer(integer) => { +                let token = self.parse_integer_token(&integer, &source)?; +                let integer = IntermediateValue::Integer(token); +                Some(Tracked::from(integer, source.clone())) +            } +            MacroDefinitionBody::Invocation(invocation) => { +                self.parse_invocation(&invocation, &invocation.source) +            } +            MacroDefinitionBody::Block(blocks) => { +                let mut tokens = Vec::new(); +                for block in blocks { +                    tokens.append(&mut self.parse_block_token(block, &block.source)); +                } +                let value = IntermediateValue::Block(tokens); +                Some(Tracked::from(value, source.clone())) +            } +        } +    } + +    fn parse_block_token(&mut self, block: &BlockToken, source: &SourceSpan) -> Vec<Tracked<IntermediateToken>> { +        let mut intermediate = Vec::new(); +        match block { +            BlockToken::LabelDefinition(name) => { +                let token = IntermediateToken::LabelDefinition(self.tag_name(name)); +                intermediate.push(Tracked::from(token, source.clone())); +            } +            BlockToken::PinnedAddress(address) => { +                if let Some(integer) = self.parse_integer_token(address, &address.source) { +                    if let Some(source) = integer_contains_label_reference(&integer) { +                        let error = IntermediateError::LabelReferenceInPinnedAddress; +                        let new_source = address.source.clone().wrap(source); +                        self.errors.push(Tracked::from(error, new_source)); +                    } else { +                        match evaluate_integer(&integer, source) { +                            Ok(value) => { +                                let value = usize::try_from(value).unwrap_or(0); +                                let tracked = Tracked::from(value, address.source.clone()); +                                let token = IntermediateToken::PinnedAddress(tracked); +                                intermediate.push(Tracked::from(token, source.clone())); +                            } +                            Err(error) => self.errors.push(error), +                        } +                    } +                } +            } +            BlockToken::ConditionalBlock(cond) => { +                let predicate = self.parse_integer_token(&cond.predicate, &cond.predicate.source); +                let mut body = self.parse_block_token(&cond.body, &cond.body.source); +                if let Some(predicate) = predicate { +                    let mut found_error = false; +                    if let Some(source) = integer_contains_label_reference(&predicate) { +                        let error = IntermediateError::LabelReferenceInConditionPredicate; +                        let new_source = cond.predicate.source.clone().wrap(source); +                        self.errors.push(Tracked::from(error, new_source)); +                        found_error = true; +                    }; +                    if let Some(source) = block_contains_label_definition(&cond.body, &cond.body.source) { +                        let error = IntermediateError::LabelDefinitionInConditionBody; +                        let new_source = cond.body.source.clone().wrap(source); +                        self.errors.push(Tracked::from(error, new_source)); +                        found_error = true; +                    } +                    if !found_error { +                        match evaluate_integer(&predicate, &cond.predicate.source) { +                            Ok(value) => if value != 0 { intermediate.append(&mut body) }, +                            Err(error) => self.errors.push(error), +                        } +                    } +                } +            } +            BlockToken::WordTemplate(word_template) => { +                let mut fields = Vec::new(); +                for bit_field in &word_template.fields { +                    let name = bit_field.name.to_string(); +                    let source = &bit_field.source; +                    let invocation = Invocation { name, arguments: Vec::new() }; +                    if let Some(value) = self.parse_integer_invocation(&invocation, source) { +                        let field = IntermediateField { +                            width: bit_field.width, +                            shift: bit_field.shift, +                            value, +                        }; +                        fields.push(Tracked::from(field, bit_field.source.clone())); +                    } +                } +                let word = IntermediateWord { +                    value: word_template.value, +                    width: word_template.width, +                    fields, +                }; +                let token = IntermediateToken::Word(word); +                intermediate.push(Tracked::from(token, source.clone())); +            } +            BlockToken::Block(blocks) => { +                for block in blocks { +                    let mut tokens = self.parse_block_token(block, &block.source); +                    intermediate.append(&mut tokens); +                } +            } +            BlockToken::Invocation(invocation) => { +                if let Some(mut tokens) = self.parse_block_invocation(invocation, source) { +                    intermediate.append(&mut tokens); +                } +            } +        } + +        return intermediate; +    } + +    fn parse_integer_token(&mut self, integer: &IntegerToken, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { +        match integer { +            IntegerToken::IntegerLiteral(value) => { +                let integer = IntermediateInteger::Integer(*value); +                Some(Tracked::from(integer, source.clone())) +            } +            IntegerToken::Expression(expression) => { +                self.parse_expression(expression, source) +            } +            IntegerToken::Invocation(invocation) => { +                self.parse_integer_invocation(invocation, source) +            } +        } +    } + +    fn parse_integer_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { +        match self.parse_invocation(invocation, source)?.value { +            IntermediateValue::Integer(integer) => Some(integer), +            IntermediateValue::Block(_) => { +                let error = IntermediateError::ExpectedInteger; +                self.errors.push(Tracked::from(error, source.clone())); +                None +            } +        } +    } + +    fn parse_block_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Vec<Tracked<IntermediateToken>>> { +        match self.parse_invocation(invocation, source)?.value { +            IntermediateValue::Block(tokens) => Some(tokens), +            IntermediateValue::Integer(_) => { +                let error = IntermediateError::ExpectedBlock; +                self.errors.push(Tracked::from(error, source.clone())); +                None +            } +        } +    } + +    fn parse_invocation(&mut self, invocation: &Invocation, source: &SourceSpan) -> Option<Tracked<IntermediateValue>> { +        let received_count = invocation.arguments.len(); +        if let Some(argument) = self.arguments.get(&invocation.name) { +            if received_count != 0 { +                let error = IntermediateError::IncorrectArgumentCount(0, received_count); +                self.errors.push(Tracked::from(error, source.clone())); +                None +            } else { +                Some(argument.clone()) +            } +        } else if let Some(label_name) = self.label_names.get(&invocation.name) { +            if received_count != 0 { +                let error = IntermediateError::IncorrectArgumentCount(0, received_count); +                self.errors.push(Tracked::from(error, source.clone())); +                None +            } else { +                let name = self.tag_name(label_name); +                let tracked = Tracked::from(name, label_name.source.clone()); +                let integer = IntermediateInteger::LabelReference(tracked); +                let tracked = Tracked::from(integer, source.clone()); +                let value = IntermediateValue::Integer(tracked); +                Some(Tracked::from(value, source.clone())) +            } +        } else if let Some(definition) = self.macro_definitions.get(&invocation.name) { +            // Check that the correct number of arguments were provided. +            let expected_count = definition.arguments.len(); +            if received_count != expected_count { +                let error = IntermediateError::IncorrectArgumentCount(expected_count, received_count); +                self.errors.push(Tracked::from(error, source.clone())); +                None +            } else { +                // Gather and type-check the provided arguments. +                let mut arguments = Vec::new(); +                for (i, argument) in invocation.arguments.iter().enumerate() { +                    let received_type = match &argument.value { +                        InvocationArgument::String(string) => { +                            let mut values = Vec::new(); +                            for c in &string.chars { +                                let integer = IntermediateInteger::Integer(**c); +                                let tracked = Tracked::from(integer, c.source.clone()); +                                values.push(IntermediateValue::Integer(tracked)); +                            } +                            arguments.push(RepeatedArgument::List(values)); +                            ArgumentType::Integer +                        } +                        InvocationArgument::IntegerToken(integer) => { +                            let tracked = self.parse_integer_token(&integer, &argument.source)?; +                            let value = IntermediateValue::Integer(tracked); +                            arguments.push(RepeatedArgument::Loop(value)); +                            ArgumentType::Integer +                        } +                        InvocationArgument::BlockToken(block) => { +                            let tokens = self.parse_block_token(&block, &argument.source); +                            let value = IntermediateValue::Block(tokens); +                            arguments.push(RepeatedArgument::Loop(value)); +                            ArgumentType::Block +                        } +                        InvocationArgument::Invocation(invocation) => { +                            let value = self.parse_invocation(&invocation, &argument.source)?; +                            let received_type = match &value.value { +                                IntermediateValue::Integer(_) => ArgumentType::Integer, +                                IntermediateValue::Block(_) => ArgumentType::Block, +                            }; +                            arguments.push(RepeatedArgument::Loop(value.value)); +                            received_type +                        } +                    }; +                    let expected_type = match received_type { +                        ArgumentType::Integer => ArgumentType::Block, +                        ArgumentType::Block => ArgumentType::Integer, +                    }; +                    if definition.arguments[i].variant != received_type { +                        let error = IntermediateError::IncorrectArgumentType(expected_type, received_type); +                        self.errors.push(Tracked::from(error, argument.source.clone())); +                        return None; +                    } +                } +                // Invoke the invocation multiple times. +                let repetitions = arguments.iter().map(|a| a.len()).max().unwrap_or(1); +                let mut values = Vec::new(); +                for i in 0..repetitions { +                    // Construct an argument map for this invocation. +                    let mut argument_map = IndexMap::new(); +                    for (a, argument) in arguments.iter().enumerate() { +                        let name = definition.arguments[a].name.clone(); +                        let source = invocation.arguments[a].source.clone(); +                        let value = match argument { +                            RepeatedArgument::Loop(value) => { +                                Tracked::from(value.clone(), source) +                            } +                            RepeatedArgument::List(list) => match list.get(i) { +                                Some(value) => { +                                    Tracked::from(value.clone(), source) +                                } +                                None => { +                                    let error = IntermediateError::ListExhausted; +                                    let source = invocation.arguments[a].source.clone(); +                                    self.errors.push(Tracked::from(error, source)); +                                    return None; +                                } +                            } +                        }; +                        if argument_map.insert(name.clone(), value).is_some() { +                            unreachable!("Uncaught duplicate macro argument name '{name}'"); +                        }; +                    } +                    let mut env = Environment { +                        label_names: &self.label_names, +                        macro_names: &self.macro_names, +                        macro_definitions: &self.macro_definitions, +                        arguments: argument_map, +                        errors: &mut self.errors, +                        id: next_id!(), +                    }; +                    values.push(env.parse_macro_definition_body(&definition.body, source)?); +                } +                if values.len() == 1 { +                    values.pop() +                } else { +                    // Flatten all values into a list of block tokens. +                    let mut block = Vec::new(); +                    for value in values { +                        match value.value { +                            IntermediateValue::Integer(_) => { +                                let error = IntermediateError::ExpectedBlock; +                                self.errors.push(Tracked::from(error, value.source)); +                                return None; +                            } +                            IntermediateValue::Block(mut tokens) => { +                                block.append(&mut tokens); +                            } +                        } +                    } +                    Some(Tracked::from(IntermediateValue::Block(block), source.clone())) +                } +            } +        } else if let Some(macro_name) = self.macro_names.get(&invocation.name) { +            let error = IntermediateError::InvocationBeforeDefinition; +            let source = source.clone().wrap(macro_name.source.clone()); +            self.errors.push(Tracked::from(error, source)); +            None +        } else { +            unreachable!("Uncaught unresolved reference '{}'", invocation.name); +        } +    } + +    fn parse_expression(&mut self, expression: &Expression, source: &SourceSpan) -> Option<Tracked<IntermediateInteger>> { +        let mut intermediate = Vec::new(); +        let mut error = false; + +        for token in &expression.tokens { +            let source = &token.source; +            match &token.value { +                ExpressionToken::IntegerToken(integer) => { +                    let Some(integer) = self.parse_integer_token(integer, source) else { +                        error = true; continue; +                    }; +                    let token = IntermediateExpressionToken::Integer(integer.value); +                    intermediate.push(Tracked::from(token, integer.source)); +                } +                ExpressionToken::Operator(operator) => { +                    let token = IntermediateExpressionToken::Operator(*operator); +                    intermediate.push(Tracked::from(token, source.clone())); +                } +                ExpressionToken::Invocation(invocation) => { +                    let Some(integer) = self.parse_integer_invocation(invocation, source) else { +                        error = true; continue; +                    }; +                    let token = IntermediateExpressionToken::Integer(integer.value); +                    intermediate.push(Tracked::from(token, integer.source)); +                } +            } +        } + +        if error { return None; } +        let expression = IntermediateExpression { tokens: intermediate }; +        let integer = IntermediateInteger::Expression(expression); +        Some(Tracked::from(integer, source.clone())) +    } +} + + +macro_rules! return_some { +    ($option:expr) => { +        if $option.is_some() { return $option; } +    }; +} + +fn integer_contains_label_reference(integer: &IntermediateInteger) -> Option<SourceSpan> { +    match integer { +        IntermediateInteger::Integer(_) => None, +        IntermediateInteger::LabelReference(label) => Some(label.source.clone()), +        IntermediateInteger::Expression(expr) => expression_contains_label_reference(expr), +    } +} + +fn expression_contains_label_reference(expression: &IntermediateExpression) -> Option<SourceSpan> { +    for token in &expression.tokens { +        if let IntermediateExpressionToken::Integer(integer) = &token.value { +            if let Some(child) = integer_contains_label_reference(&integer) { +                return Some(token.source.clone().wrap(child)); +            } +        } +    } +    return None; +} + +fn block_contains_label_definition(block: &BlockToken, source: &SourceSpan) -> Option<SourceSpan> { +    match &block { +        BlockToken::LabelDefinition(_) => { +            return Some(source.clone()); +        } +        BlockToken::Invocation(invocation) => { +            return_some!(invocation_contains_label_definition(invocation)) +        } +        BlockToken::Block(blocks) => { +            for block in blocks { +                return_some!(block_contains_label_definition(block, &block.source)) +            } +        } +        _ => (), +    } +    return None; +} + +fn invocation_contains_label_definition(invocation: &Invocation) -> Option<SourceSpan> { +    for argument in &invocation.arguments { +        match &argument.value { +            InvocationArgument::BlockToken(block) => { +                return_some!(block_contains_label_definition(&block, &argument.source)) +            } +            InvocationArgument::Invocation(invocation) => { +                return_some!(invocation_contains_label_definition(&invocation)) +            } +            _ => (), +        } +    } +    return None; +} + +fn evaluate_integer(integer: &IntermediateInteger, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> { +    match integer { +        IntermediateInteger::Integer(value) => Ok(*value), +        IntermediateInteger::LabelReference(name) => +            unreachable!("Uncaught label reference '{name}' in condition predicate or pinned address value"), +        IntermediateInteger::Expression(expr) => evaluate_expression(expr, source), +    } +} + +fn evaluate_expression(expression: &IntermediateExpression, source: &SourceSpan) -> Result<isize, Tracked<IntermediateError>> { +    let mut stack = ExpressionStack::new(); +    for token in &expression.tokens { +        let source = &token.source; +        match &token.value { +            IntermediateExpressionToken::Integer(integer) => match integer { +                IntermediateInteger::Integer(value) => { +                    stack.push(*value); +                } +                IntermediateInteger::Expression(expression) => { +                    stack.push(evaluate_expression(&expression, source)?); +                } +                IntermediateInteger::LabelReference(name) => { +                    unreachable!("Uncaught label reference '{name}' in condition predicate"); +                } +            } +            IntermediateExpressionToken::Operator(operator) => { +                if let Err(stack_error) = stack.apply(*operator, source) { +                    let error = IntermediateError::StackError(stack_error); +                    return Err(Tracked::from(error, token.source.clone())); +                } +            } +        } +    } +    match stack.pull_result() { +        Ok(value) => Ok(value), +        Err(err) => { +            let error = Tracked::from(err, source.clone()); +            Err(Tracked::from(IntermediateError::StackError(error), source.clone())) +        } +    } +} diff --git a/src/stages/intermediate_tokens.rs b/src/stages/intermediate_tokens.rs new file mode 100644 index 0000000..a09581e --- /dev/null +++ b/src/stages/intermediate_tokens.rs @@ -0,0 +1,149 @@ +use crate::*; + + +#[derive(Clone)] +pub enum IntermediateToken { +    Word(IntermediateWord), +    PinnedAddress(Tracked<usize>), +    LabelDefinition(String), +} + +#[derive(Clone)] +pub struct IntermediateWord { +    pub value: usize, +    /// Width of the word in bits. +    pub width: u32, +    pub fields: Vec<Tracked<IntermediateField>>, +} + +#[derive(Clone)] +pub struct IntermediateField { +    pub value: Tracked<IntermediateInteger>, +    /// Width of the field in bits. +    pub width: u32, +    /// Number of bits to the right of the field in the word. +    pub shift: u32, +} + +#[derive(Clone)] +pub enum IntermediateInteger { +    Integer(isize), +    Expression(IntermediateExpression), +    LabelReference(Tracked<String>), +} + +#[derive(Clone)] +pub struct IntermediateExpression { +    pub tokens: Vec<Tracked<IntermediateExpressionToken>>, +} + +#[derive(Clone)] +pub enum IntermediateExpressionToken { +    Integer(IntermediateInteger), +    Operator(Operator), +} + +#[derive(Clone)] +pub enum IntermediateValue { +    Integer(Tracked<IntermediateInteger>), +    Block(Vec<Tracked<IntermediateToken>>), +} + +pub enum RepeatedArgument { +    Loop(IntermediateValue), +    List(Vec<IntermediateValue>), +} + +impl RepeatedArgument { +    pub fn len(&self) -> usize { +        match self { +            Self::Loop(_) => 1, +            Self::List(list) => list.len(), +        } +    } +} + +pub enum IntermediateError { +    ExpectedInteger, +    ExpectedBlock, +    ListExhausted, +    LabelReferenceInConditionPredicate, +    LabelDefinitionInConditionBody, +    LabelReferenceInPinnedAddress, +    StackError(Tracked<StackError>), +    InvocationBeforeDefinition, +    /// expected, received +    IncorrectArgumentCount(usize, usize), +    /// expected, received +    IncorrectArgumentType(ArgumentType, ArgumentType), +} + +pub fn report_intermediate_errors(errors: &[Tracked<IntermediateError>], source_code: &str) { +    for error in errors { +        report_intermediate_error(error, source_code); +    } +} + +fn report_intermediate_error(error: &Tracked<IntermediateError>, source_code: &str) { +    let context = Context { source_code: &source_code, source: &error.source }; +    let message = match &error.value { +        IntermediateError::ExpectedInteger => +            "An integer value was expected here", +        IntermediateError::ExpectedBlock => +            "A block value was expected here", +        IntermediateError::ListExhausted => +            "This string is shorter than another string passed to the same invocation", +        IntermediateError::LabelReferenceInConditionPredicate => +            "The predicate of a conditional block cannot contain a label reference", +        IntermediateError::LabelDefinitionInConditionBody => +            "The body of a conditional block cannot contain a label definition", +        IntermediateError::LabelReferenceInPinnedAddress => +            "The value of a pinned address cannot contain a label reference", +        IntermediateError::StackError(stack_error) => { +            report_stack_error(stack_error, source_code); return; }, +        IntermediateError::InvocationBeforeDefinition => +            &format!("Macro cannot be invoked before it has been defined"), +        IntermediateError::IncorrectArgumentCount(expected, received) => +            &format!("Expected {expected} arguments, but received {received} instead"), +        IntermediateError::IncorrectArgumentType(expected, received) => +            &format!("Expected {expected} value but received {received} value instead"), +    }; + +    report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_intermediate_token(i: usize, token: &IntermediateToken) { +    match token { +        IntermediateToken::Word(word) => { +            indent!(i, "Word({:>0w$b})", word.value, w = word.width as usize); +            for field in &word.fields { +                print_intermediate_integer(i+1, &field.value.value); +            } +        } +        IntermediateToken::PinnedAddress(address) => +            indent!(i, "PinnedAddress({address})"), +        IntermediateToken::LabelDefinition(name) => +            indent!(i, "LabelDefinition({name})"), +    } +} + +fn print_intermediate_integer(i: usize, integer: &IntermediateInteger) { +    match integer { +        IntermediateInteger::Integer(value) => +            indent!(i, "Integer({value})"), +        IntermediateInteger::LabelReference(name) => +            indent!(i, "LabelReference({name})"), +        IntermediateInteger::Expression(expression) => { +            indent!(i, "Expression"); +            for token in &expression.tokens { +                match &token.value { +                    IntermediateExpressionToken::Integer(integer) => +                        print_intermediate_integer(i+1, integer), +                    IntermediateExpressionToken::Operator(operator) => +                        indent!(i+1, "Operator({operator})"), +                } +            } +        } +    } +} diff --git a/src/stages/mod.rs b/src/stages/mod.rs new file mode 100644 index 0000000..e735f05 --- /dev/null +++ b/src/stages/mod.rs @@ -0,0 +1,31 @@ +mod syntactic; +mod syntactic_tokens; +mod semantic; +mod semantic_tokens; +mod intermediate; +mod intermediate_tokens; +mod bytecode; +mod bytecode_tokens; + +pub use syntactic::*; +pub use syntactic_tokens::*; +pub use semantic::*; +pub use semantic_tokens::*; +pub use intermediate::*; +pub use intermediate_tokens::*; +pub use bytecode::*; +pub use bytecode_tokens::*; + + +#[macro_export] +macro_rules! indent { +    (0, $($tokens:tt)*) => {{ +        println!($($tokens)*); +    }}; +    ($indent:expr, $($tokens:tt)*) => {{ +        for _ in 0..$indent { print!("  "); } +        println!($($tokens)*); +    }}; +} + + diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs new file mode 100644 index 0000000..e225608 --- /dev/null +++ b/src/stages/semantic.rs @@ -0,0 +1,478 @@ +use crate::*; + +use std::collections::VecDeque; + + +pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> { +    SemanticParser::from(syntactic, Namespace::None).parse() +} + +#[derive(Clone)] +enum Namespace { +    Macro(String), +    Label(String), +    None, +} + + +struct SemanticParser { +    namespace: Namespace, +    syntactic: SyntacticTokenStream, +    semantic: Vec<Tracked<SemanticToken>>, +    errors: Vec<Tracked<SemanticError>>, +} + +impl SemanticParser { +    pub fn from(syntactic: Vec<Tracked<SyntacticToken>>, namespace: Namespace) -> Self { +        Self { +            namespace, +            syntactic: SyntacticTokenStream::from(syntactic), +            semantic: Vec::new(), +            errors: Vec::new(), +        } +    } + +    fn pull_from(&mut self, mut other: SemanticParser) { +        self.errors.append(&mut other.errors); +        if let Namespace::Macro(_) = other.namespace { +            () +        } else { +            self.namespace = other.namespace; +        } +    } + +    fn resolve_label_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> { +        match symbol { +            ScopedSymbol::Global(name) => match &self.namespace { +                Namespace::Macro(_) => { +                    let error = SemanticError::LabelInMacroDefinition; +                    self.errors.push(Tracked::from(error, source.to_owned())); +                    None +                } +                Namespace::Label(_) | Namespace::None => { +                    self.namespace = Namespace::Label(name.clone()); +                    Some(name) +                } +            } +            ScopedSymbol::Local(name) => match &self.namespace { +                Namespace::Macro(macro_ns) => { +                    Some(format!("{macro_ns}:{name}")) +                } +                Namespace::Label(label_ns) => { +                    Some(format!("{label_ns}/{name}")) +                } +                Namespace::None => { +                    let error = SemanticError::SublabelWithoutNamespace; +                    self.errors.push(Tracked::from(error, source.to_owned())); +                    None +                } +            } +        } +    } + +    fn resolve_symbol_name(&mut self, symbol: ScopedSymbol, source: &SourceSpan) -> Option<String> { +        match symbol { +            ScopedSymbol::Global(name) => { +                Some(name) +            } +            ScopedSymbol::Local(name) => match &self.namespace { +                Namespace::Macro(macro_ns) => { +                    Some(format!("{macro_ns}:{name}")) +                } +                Namespace::Label(label_ns) => { +                    Some(format!("{label_ns}/{name}")) +                } +                Namespace::None => { +                    let error = SemanticError::LocalSymbolWithoutNamespace; +                    self.errors.push(Tracked::from(error, source.to_owned())); +                    None +                } +            } +        } +    } + +    /// Parse the remaining syntactic tokens as a full program. +    pub fn parse(mut self) -> Result<Vec<Tracked<SemanticToken>>, Vec<Tracked<SemanticError>>> { +        while let Some(token) = self.syntactic.pop() { +            if let SyntacticToken::MacroDefinition(definition) = token.value { +                let namespace = Namespace::Macro(definition.name.to_string()); +                let mut parser = SemanticParser::from(definition.tokens, namespace); +                let mut arguments = Vec::new(); +                while let Some(argument) = parser.pull_argument_definition() { +                    arguments.push(argument); +                } +                let body = parser.parse_macro_definition_body(SemanticLocation::MacroDefinitionBody); +                self.pull_from(parser); +                let definition = MacroDefinition { name: definition.name, arguments, body }; +                let semantic = SemanticToken::MacroDefinition(definition); +                self.semantic.push(Tracked::from(semantic, token.source)); +            } else { +                self.syntactic.unpop(token); +                if let Some(token) = self.pull_block_token(SemanticLocation::Program) { +                    let semantic = SemanticToken::BlockToken(token.value); +                    self.semantic.push(Tracked::from(semantic, token.source)); +                } +            } +        } +        match self.errors.is_empty() { +            true => Ok(self.semantic), +            false => Err(self.errors), +        } +    } + +    /// Parse the remaining syntactic tokens as a macro definition body. +    fn parse_macro_definition_body(&mut self, location: SemanticLocation) -> MacroDefinitionBody { +        let mut tokens = Vec::new(); +        while !self.syntactic.is_empty() { +            if let Some(token) = self.pull_macro_definition_body_token() { +                tokens.push(token); +            } +        } +        if tokens.is_empty() { +            MacroDefinitionBody::Block(Vec::new()) +        } else if tokens.len() == 1 { +            tokens.pop().unwrap() +        } else { +            let mut block_tokens = Vec::new(); +            for token in tokens { +                match token { +                    MacroDefinitionBody::Integer(integer) => { +                        let error = SemanticError::ExpectedInteger(location); +                        let tracked = Tracked::from(error, integer.source); +                        self.errors.push(tracked); +                    } +                    MacroDefinitionBody::Block(mut tokens) => { +                        block_tokens.append(&mut tokens); +                    } +                    MacroDefinitionBody::Invocation(invocation) => { +                        // Convert invocation to a block invocation. +                        let token = BlockToken::Invocation(invocation.value); +                        block_tokens.push(Tracked::from(token, invocation.source)); +                    } +                } +            } +            MacroDefinitionBody::Block(block_tokens) +        } +    } + +    /// Attempt to pull a MacroDefinitionBody token from the token stream. +    /// Invalid values are noted and dropped, and a None is returned. +    /// Each BodyToken is wrapped in a separate MacroDefinitionBody. +    fn pull_macro_definition_body_token(&mut self) -> Option<MacroDefinitionBody> { +        let token = self.syntactic.pop()?; +        let source = token.source; +        match token.value { +            SyntacticToken::LabelDefinition(symbol) => { +                let name = self.resolve_label_name(symbol, &source)?; +                let token = BlockToken::LabelDefinition(name); +                let tracked = Tracked::from(token, source); +                Some(MacroDefinitionBody::Block(vec![tracked])) +            } +            SyntacticToken::MacroDefinition(_) => { +                let error = SemanticError::MisplacedMacroDefinition; +                self.errors.push(Tracked::from(error, source)); +                None +            } +            SyntacticToken::IntegerLiteral(value) => { +                let token = IntegerToken::IntegerLiteral(value); +                let tracked = Tracked::from(token, source); +                Some(MacroDefinitionBody::Integer(tracked)) +            } +            SyntacticToken::StringLiteral(_) => { +                let error = SemanticError::MisplacedStringLiteral; +                self.errors.push(Tracked::from(error, source)); +                None +            } +            SyntacticToken::WordTemplate(word_template) => { +                let token = BlockToken::WordTemplate(word_template); +                let tracked = Tracked::from(token, source); +                Some(MacroDefinitionBody::Block(vec![tracked])) +            } +            SyntacticToken::BlockLiteral(tokens) => { +                let mut parser = SemanticParser::from(tokens, self.namespace.clone()); +                let tokens = parser.parse_block(); +                self.pull_from(parser); +                let token = BlockToken::Block(tokens); +                let tracked = Tracked::from(token, source); +                Some(MacroDefinitionBody::Block(vec![tracked])) +            } +            SyntacticToken::Expression(tokens) => { +                let mut parser = SemanticParser::from(tokens, self.namespace.clone()); +                let expression = parser.parse_expression(); +                self.pull_from(parser); +                let token = IntegerToken::Expression(expression); +                let tracked = Tracked::from(token, source); +                Some(MacroDefinitionBody::Integer(tracked)) +            } +            SyntacticToken::Symbol(symbol) => { +                let name = self.resolve_symbol_name(symbol, &source)?; +                let arguments = self.pull_all_invocation_arguments(); +                // Extend invocation source span to cover all arguments. +                let mut source = source; +                if let Some(last) = arguments.last() { +                    source.in_merged.end = last.source.in_merged.end; +                    if let Some(last_in_source) = &last.source.in_source { +                        if let Some(in_source) = &mut source.in_source { +                            in_source.end = last_in_source.end.clone(); +                        } +                    } +                } +                let invocation = Invocation { name, arguments }; +                let tracked = Tracked::from(invocation, source); +                Some(MacroDefinitionBody::Invocation(tracked)) +            } +            SyntacticToken::Separator => { +                let error = SemanticError::MisplacedSeparator; +                self.errors.push(Tracked::from(error, source)); +                None +            } +            SyntacticToken::Condition => { +                let conditional = self.pull_conditional_block()?; +                let token = BlockToken::ConditionalBlock(Box::new(conditional)); +                let tracked = Tracked::from(token, source); +                Some(MacroDefinitionBody::Block(vec![tracked])) +            } +            SyntacticToken::Pin => { +                let integer = self.pull_integer_token(SemanticLocation::PinAddress)?; +                let token = BlockToken::PinnedAddress(integer); +                let tracked = Tracked::from(token, source); +                Some(MacroDefinitionBody::Block(vec![tracked])) +            } +        } +    } + +    /// Attempt to pull an integer token from the token stream. +    /// Invalid values are noted and dropped, and a None is returned. +    fn pull_integer_token(&mut self, location: SemanticLocation) -> Option<Tracked<IntegerToken>> { +        match self.pull_macro_definition_body_token()? { +            MacroDefinitionBody::Integer(integer) => { +                Some(integer) +            } +            MacroDefinitionBody::Invocation(invocation) => { +                // Convert invocation to an integer invocation. +                let token = IntegerToken::Invocation(invocation.value); +                Some(Tracked::from(token, invocation.source)) +            } +            MacroDefinitionBody::Block(mut tokens) => { +                assert_eq!(tokens.len(), 1); +                let token = tokens.pop().unwrap(); +                let error = SemanticError::ExpectedInteger(location); +                self.errors.push(Tracked::from(error, token.source)); +                None +            } +        } +    } + +    /// Attempt to pull a BlockToken from the token stream. +    /// Invalid values are noted and dropped, and a None is returned. +    fn pull_block_token(&mut self, location: SemanticLocation) -> Option<Tracked<BlockToken>> { +        match self.pull_macro_definition_body_token()? { +            MacroDefinitionBody::Block(mut tokens) => { +                assert_eq!(tokens.len(), 1); +                tokens.pop() +            } +            MacroDefinitionBody::Invocation(invocation) => { +                // Convert invocation to a block invocation. +                let token = BlockToken::Invocation(invocation.value); +                Some(Tracked::from(token, invocation.source)) +            } +            MacroDefinitionBody::Integer(integer) => { +                let error = SemanticError::ExpectedBlock(location); +                self.errors.push(Tracked::from(error, integer.source)); +                None +            } +        } +    } + +    /// Parse the remaining syntactic tokens as the contents of a block. +    fn parse_block(&mut self) -> Vec<Tracked<BlockToken>> { +        let mut tokens = Vec::new(); +        while !self.syntactic.is_empty() { +            if let Some(token) = self.pull_block_token(SemanticLocation::BlockLiteral) { +                tokens.push(token); +            } +        } +        tokens +    } + +    /// Parse the remaining syntactic tokens as a list of integer tokens. +    fn parse_integer_list(&mut self, location: SemanticLocation) -> Vec<Tracked<IntegerToken>> { +        let mut tokens = Vec::new(); +        while !self.syntactic.is_empty() { +            if let Some(token) = self.pull_integer_token(location) { +                tokens.push(token); +            } +        } +        tokens +    } + +    /// Parse the remaining syntactic tokens as the contents of an expression. +    fn parse_expression(&mut self) -> Expression { +        let mut tokens = Vec::new(); +        for token in self.parse_integer_list(SemanticLocation::Expression) { +            let source = token.source; +            match token.value { +                IntegerToken::IntegerLiteral(value) => { +                    let integer = Box::new(IntegerToken::IntegerLiteral(value)); +                    let token = ExpressionToken::IntegerToken(integer); +                    tokens.push(Tracked::from(token, source)); +                } +                IntegerToken::Expression(expression) => { +                    let integer = Box::new(IntegerToken::Expression(expression)); +                    let token = ExpressionToken::IntegerToken(integer); +                    tokens.push(Tracked::from(token, source)); +                } +                IntegerToken::Invocation(invocation) => { +                    // Parse the invocation as an operator instead. +                    if invocation.arguments.is_empty() { +                        if let Some(operator) = Operator::from_str(&invocation.name) { +                            let token = ExpressionToken::Operator(operator); +                            tokens.push(Tracked::from(token, source)); +                            continue; +                        } +                    } +                    // Parse the invocation as an invocation. +                    let integer = Box::new(IntegerToken::Invocation(invocation)); +                    let token = ExpressionToken::IntegerToken(integer); +                    tokens.push(Tracked::from(token, source)); +                } +            } +        } +        Expression { tokens } +    } + +    /// Attempt to pull a conditional block from the token stream. +    /// Invalid values are noted and dropped, and a None is returned. +    fn pull_conditional_block(&mut self) -> Option<ConditionalBlock> { +        let predicate = self.pull_integer_token(SemanticLocation::ConditionPredicate)?; +        let body = self.pull_block_token(SemanticLocation::ConditionBody)?; +        Some(ConditionalBlock { predicate, body }) +    } + +    /// Attempt to pull an invocation argument from the token stream. +    /// Invalid values are not dropped, a None indicates that no arguments remain. +    fn pull_invocation_argument(&mut self) -> Option<Tracked<InvocationArgument>> { +        self.syntactic.pop_if(is_separator)?; +        let token = self.syntactic.pop()?; +        let source = token.source; +        match token.value { +            SyntacticToken::StringLiteral(string_literal) => { +                let argument = InvocationArgument::String(string_literal); +                Some(Tracked::from(argument, source)) +            } +            SyntacticToken::IntegerLiteral(value) => { +                let integer = IntegerToken::IntegerLiteral(value); +                let argument = InvocationArgument::IntegerToken(integer); +                Some(Tracked::from(argument, source)) +            } +            SyntacticToken::Expression(tokens) => { +                let mut parser = SemanticParser::from(tokens, self.namespace.clone()); +                let expression = parser.parse_expression(); +                self.pull_from(parser); +                let integer = IntegerToken::Expression(expression); +                let argument = InvocationArgument::IntegerToken(integer); +                Some(Tracked::from(argument, source)) +            } +            SyntacticToken::BlockLiteral(tokens) => { +                let mut parser = SemanticParser::from(tokens, self.namespace.clone()); +                let tokens = parser.parse_block(); +                self.pull_from(parser); +                let block = BlockToken::Block(tokens); +                let argument = InvocationArgument::BlockToken(block); +                Some(Tracked::from(argument, source)) +            } +            SyntacticToken::Symbol(symbol) => { +                let name = self.resolve_symbol_name(symbol, &source)?; +                let invocation = Invocation { name, arguments: Vec::new() }; +                let argument = InvocationArgument::Invocation(invocation); +                Some(Tracked::from(argument, source)) +            } +            SyntacticToken::WordTemplate(word_template) => { +                let block = BlockToken::WordTemplate(word_template); +                let argument = InvocationArgument::BlockToken(block); +                Some(Tracked::from(argument, source)) +            } +            _ => { +                let error = SemanticError::InvalidInvocationArgument; +                self.errors.push(Tracked::from(error, source)); +                return None; +            } +        } +    } + +    fn pull_all_invocation_arguments(&mut self) -> Vec<Tracked<InvocationArgument>> { +        let mut arguments = Vec::new(); +        while let Some(argument) = self.pull_invocation_argument() { +            arguments.push(argument); +        } +        return arguments; +    } + +    /// Attempt to pull an argument definition from the token stream. +    /// Invalid values are not dropped, a None indicates that no arguments remain. +    fn pull_argument_definition(&mut self) -> Option<Tracked<ArgumentDefinition>> { +        self.syntactic.pop_if(is_separator)?; +        let token = self.syntactic.pop()?; +        let source = token.source; +        match token.value { +            SyntacticToken::Symbol(ScopedSymbol::Global(name)) => { +                let variant = ArgumentType::Integer; +                let definition = ArgumentDefinition { name, variant }; +                return Some(Tracked::from(definition, source)); +            } +            SyntacticToken::BlockLiteral(mut tokens) => { +                if tokens.len() == 1 { +                    let token = tokens.pop().unwrap(); +                    if let SyntacticToken::Symbol(ScopedSymbol::Global(name)) = token.value { +                        let variant = ArgumentType::Block; +                        let definition = ArgumentDefinition { name, variant }; +                        return Some(Tracked::from(definition, source)); +                    } +                } +            } +            _ => (), +        }; +        let error = SemanticError::InvalidArgumentDefinition; +        self.errors.push(Tracked::from(error, source)); +        return None; +    } +} + + + +struct SyntacticTokenStream { +    tokens: VecDeque<Tracked<SyntacticToken>>, +} + +impl SyntacticTokenStream { +    pub fn from<T: Into<VecDeque<Tracked<SyntacticToken>>>>(tokens: T) -> Self { +        Self { tokens: tokens.into() } +    } + +    pub fn pop(&mut self) -> Option<Tracked<SyntacticToken>> { +        self.tokens.pop_front() +    } + +    pub fn pop_if(&mut self, predicate: fn(&Tracked<SyntacticToken>) -> bool) -> Option<Tracked<SyntacticToken>> { +        match predicate(self.tokens.front()?) { +            true => self.tokens.pop_front(), +            false => None, +        } +    } + +    pub fn unpop(&mut self, token: Tracked<SyntacticToken>) { +        self.tokens.push_front(token); +    } + +    pub fn is_empty(&self) -> bool { +        self.tokens.is_empty() +    } +} + + +fn is_separator(token: &Tracked<SyntacticToken>) -> bool { +    match token.value { +        SyntacticToken::Separator => true, +        _ => false, +    } +} diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs new file mode 100644 index 0000000..dfbea1a --- /dev/null +++ b/src/stages/semantic_tokens.rs @@ -0,0 +1,296 @@ +use crate::*; + + +pub enum SemanticToken { +    MacroDefinition(MacroDefinition), +    BlockToken(BlockToken), +} + +pub struct MacroDefinition { +    pub name: Tracked<String>, +    pub arguments: Vec<Tracked<ArgumentDefinition>>, +    pub body: MacroDefinitionBody, +} + +pub struct ArgumentDefinition { +    pub name: String, +    pub variant: ArgumentType, +} + +#[derive(PartialEq)] +pub enum ArgumentType { +    Integer, +    Block, +} + +impl std::fmt::Display for ArgumentType { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        match self { +            ArgumentType::Integer => write!(f, "an integer"), +            ArgumentType::Block => write!(f, "a block"), +        } +    } +} + +pub enum MacroDefinitionBody { +    Integer(Tracked<IntegerToken>), +    Block(Vec<Tracked<BlockToken>>), +    Invocation(Tracked<Invocation>), +} + +pub struct ConditionalBlock { +    pub predicate: Tracked<IntegerToken>, +    pub body: Tracked<BlockToken>, +} + +pub enum IntegerToken { +    IntegerLiteral(isize), +    Expression(Expression), +    Invocation(Invocation), +} + +pub struct Expression { +    pub tokens: Vec<Tracked<ExpressionToken>>, +} + +pub enum ExpressionToken { +    IntegerToken(Box<IntegerToken>), +    Invocation(Invocation), +    Operator(Operator), +} + +pub enum BlockToken { +    LabelDefinition(String), +    PinnedAddress(Tracked<IntegerToken>), +    ConditionalBlock(Box<ConditionalBlock>), +    WordTemplate(WordTemplate), +    Block(Vec<Tracked<BlockToken>>), +    Invocation(Invocation), +} + +pub struct Invocation { +    pub name: String, +    pub arguments: Vec<Tracked<InvocationArgument>>, +} + +pub enum InvocationArgument { +    String(StringLiteral), +    IntegerToken(IntegerToken), +    BlockToken(BlockToken), +    Invocation(Invocation), +} + +pub enum SemanticError { +    MisplacedStringLiteral, +    MisplacedListLiteral, +    MisplacedSeparator, +    MisplacedMacroDefinition, + +    ExpectedInteger(SemanticLocation), +    ExpectedBlock(SemanticLocation), + +    InvalidArgumentDefinition, +    InvalidInvocationArgument, + +    LabelInMacroDefinition, +    SublabelWithoutNamespace, +    LocalSymbolWithoutNamespace, +} + +#[derive(Clone, Copy)] +pub enum SemanticLocation { +    MacroDefinitionBody, +    Expression, +    ConditionPredicate, +    ConditionBody, +    Program, +    BlockLiteral, +    PinAddress, +} + +impl std::fmt::Display for SemanticLocation { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        let string = match self { +            SemanticLocation::Expression => +                "inside this expression", +            SemanticLocation::ConditionPredicate => +                "as the predicate of this conditional block", +            SemanticLocation::ConditionBody => +                "as the body of this conditional block", +            SemanticLocation::Program => +                "at the outermost level of the program", +            SemanticLocation::BlockLiteral => +                "inside this block literal", +            SemanticLocation::MacroDefinitionBody => +                "inside the body of this macro definition", +            SemanticLocation::PinAddress => +                "as the address of this pin", +        }; +        write!(f, "{string}") +    } +} + + +pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) { +    for error in errors { +        report_semantic_error(error, source_code); +    } +} + +fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) { +    let context = Context { source_code: &source_code, source: &error.source }; +    let message = match &error.value { +        SemanticError::MisplacedStringLiteral => +            "A string literal can only be used as an invocation argument", +        SemanticError::MisplacedListLiteral => +            "A list literal can only be used as an invocation argument", +        SemanticError::MisplacedSeparator => +            "A separator can only be used to construct an argument list", +        SemanticError::MisplacedMacroDefinition => +            "A macro definition must be used at the outermost level of the program", + +        SemanticError::ExpectedInteger(location) => +            &format!("An integer value was expected {location}"), +        SemanticError::ExpectedBlock(location) => +            &format!("A block value was expected {location}"), + +        SemanticError::InvalidArgumentDefinition => +            "Argument definitions must be in the form 'name' or '{{name}}'", +        SemanticError::InvalidInvocationArgument => +            "This token cannot be used in an invocation argument", + +        SemanticError::LabelInMacroDefinition => +            &format!("Only sublabels can be defined inside macro definitions"), +        SemanticError::SublabelWithoutNamespace => +            &format!("Sublabel was not defined inside a macro definition or after a label"), +        SemanticError::LocalSymbolWithoutNamespace => +            &format!("Local symbol was not defined inside a macro definition or after a label"), +    }; + +    report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_semantic_token(i: usize, token: &SemanticToken) { +    match token { +        SemanticToken::MacroDefinition(definition) => { +            indent!(i, "MacroDefinition({})", definition.name); +            for argument in &definition.arguments { +                print_argument_definition(i+1, argument); +            } +            match &definition.body { +                MacroDefinitionBody::Integer(integer) => { +                    print_integer_token(i+1, integer) +                } +                MacroDefinitionBody::Block(tokens) => { +                    print_block(i+1, tokens); +                } +                MacroDefinitionBody::Invocation(invocation) => { +                    print_invocation(i+1, invocation); +                } +            } +        } +        SemanticToken::BlockToken(block) => print_block_token(0, block), +    } +} + +fn print_argument_definition(i: usize, argument: &ArgumentDefinition) { +    match argument.variant { +        ArgumentType::Integer => { +            indent!(i, "Argument({}, integer)", argument.name) +        } +        ArgumentType::Block => { +            indent!(i, "Argument({}, block)", argument.name) +        } +    } +} + +fn print_block_token(i: usize, block: &BlockToken) { +    match block { +        BlockToken::Invocation(invocation) => { +            print_invocation(i, invocation) +        } +        BlockToken::LabelDefinition(name) => { +            indent!(i, "LabelDefinition({name})") +        } +        BlockToken::Block(block) => { +            print_block(i, block); +        } +        BlockToken::PinnedAddress(integer) => { +            indent!(i, "PinnedAddress"); +            print_integer_token(i+1, integer); +        } +        BlockToken::ConditionalBlock(condition) => { +            indent!(i, "ConditionalBlock"); +            indent!(i+1, "Predicate"); +            print_integer_token(i+2, &condition.predicate); +            indent!(i+1, "Body"); +            print_block_token(i+2, &condition.body); +        } +        BlockToken::WordTemplate(word_template) => { +            indent!(i, "WordTemplate({word_template})") +        } +    } +} + +fn print_block(i: usize, tokens: &[Tracked<BlockToken>]) { +    indent!(i, "Block"); +    for token in tokens { +        print_block_token(i+1, token); +    } +} + +fn print_invocation(i: usize, invocation: &Invocation) { +    indent!(i, "Invocation({})", invocation.name); +    for argument in &invocation.arguments { +        print_invocation_argument(i+1, argument); +    } +} + +fn print_invocation_argument(i: usize, argument: &InvocationArgument) { +    match &argument { +        InvocationArgument::String(string_literal) => { +            indent!(i, "String({string_literal})") +        } +        InvocationArgument::IntegerToken(integer) => { +            print_integer_token(i, integer) +        } +        InvocationArgument::BlockToken(block) => { +            print_block_token(i, block) +        } +        InvocationArgument::Invocation(invocation) => { +            print_invocation(i, invocation) +        } +    } +} + +fn print_integer_token(i: usize, integer: &IntegerToken) { +    match integer { +        IntegerToken::IntegerLiteral(value) => { +            indent!(i, "IntegerValue({value})") +        } +        IntegerToken::Expression(expression) => { +            print_expression(i, expression) +        } +        IntegerToken::Invocation(invocation) => { +            print_invocation(i, invocation) +        } +    } +} + +fn print_expression(i: usize, expression: &Expression) { +    indent!(i, "Expression"); +    for token in &expression.tokens { +        match &token.value { +            ExpressionToken::IntegerToken(integer) => { +                print_integer_token(i+1, &integer) +            } +            ExpressionToken::Invocation(invocation) => { +                print_invocation(i+1, &invocation); +            } +            ExpressionToken::Operator(operator) => { +                indent!(i+1, "Operator({operator})") +            } +        } +    } +} diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs new file mode 100644 index 0000000..2e7f959 --- /dev/null +++ b/src/stages/syntactic.rs @@ -0,0 +1,323 @@ +use crate::*; + +use assembler::Tokeniser; + + +pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { +    parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path)) +} + +fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> { +    t.add_delimiters(&['@','&','%',';',':','|','{','}','(',')','[',']','#','~','"','\'']); +    let mut tokens = Vec::new(); +    let mut errors = Vec::new(); + +    macro_rules! push_err { +        ($error:expr) => {{ +            push_err!($error, t.get_source()); +        }}; +        ($error:expr, $source:expr) => {{ +            errors.push(Tracked::from($error, $source)); +            continue; +        }}; +    } + +    loop { +        t.eat_whitespace(); +        t.mark_start(); +        let Some(c) = t.eat_char() else { break }; +        let token = match c { +            '"' => { +                let source = t.get_source(); +                t.mark_child(); +                let is_any_close = |t: &mut Tokeniser| { +                    t.eat_char() == Some('"') +                }; +                if let Some(_) = t.track_until(is_any_close) { +                    let child = t.tokenise_child_span(); +                    SyntacticToken::StringLiteral(parse_string_literal(child)) +                } else { +                    push_err!(SyntacticError::UnterminatedStringLiteral, source); +                } +            } +            '\'' => { +                let source = t.get_source(); +                let is_any_close = |t: &mut Tokeniser| { +                    t.eat_char() == Some('\'') +                }; +                if let Some(string) = t.track_until(is_any_close) { +                    let mut chars: Vec<char> = string.chars().collect(); +                    if chars.len() == 1 { +                        let value = parse_char(chars.pop().unwrap()); +                        SyntacticToken::IntegerLiteral(value) +                    } else { +                        t.mark_end(); +                        push_err!(SyntacticError::ExpectedSingleCharacter, t.get_source()); +                    } +                } else { +                    push_err!(SyntacticError::UnterminatedCharacterLiteral, source); +                } +            } + +            '{' => { +                let source = t.get_source(); +                t.mark_child(); +                let mut depth = 1; +                let is_matching_close = |t: &mut Tokeniser| { +                    match t.eat_char() { +                        Some('{') => { depth += 1; false } +                        Some('}') => { depth -= 1; depth == 0 } +                        _ => false, +                    } +                }; +                if let Some(_) = t.track_until(is_matching_close) { +                    let child = t.tokenise_child_span(); +                    match parse_syntactic_from_tokeniser(child) { +                        Ok(tokens) => SyntacticToken::BlockLiteral(tokens), +                        Err(mut parse_errors) => { +                            errors.append(&mut parse_errors); +                            continue; +                        } +                    } +                } else { +                    push_err!(SyntacticError::UnterminatedBlock, source); +                } +            } +            '[' => { +                let source = t.get_source(); +                t.mark_child(); +                let mut depth = 1; +                let is_matching_close = |t: &mut Tokeniser| { +                    match t.eat_char() { +                        Some('[') => { depth += 1; false } +                        Some(']') => { depth -= 1; depth == 0 } +                        _ => false, +                    } +                }; +                if let Some(_) = t.track_until(is_matching_close) { +                    let child = t.tokenise_child_span(); +                    match parse_syntactic_from_tokeniser(child) { +                        Ok(tokens) => SyntacticToken::Expression(tokens), +                        Err(mut parse_errors) => { +                            errors.append(&mut parse_errors); +                            continue; +                        } +                    } +                } else { +                    push_err!(SyntacticError::UnterminatedExpression, source); +                } +            } +            '(' => { +                let source = t.get_source(); +                let mut depth = 1; +                let is_matching_close = |t: &mut Tokeniser| { +                    match t.eat_char() { +                        Some('(') => { depth += 1; false } +                        Some(')') => { depth -= 1; depth == 0 } +                        _ => false, +                    } +                }; +                if let Some(string) = t.track_until(is_matching_close) { +                    // Check if the comment fills the entire line. +                    if t.start.position.column == 0 && t.end_of_line() { +                        if let Some(path) = string.strip_prefix(": ") { +                            t.embedded_path = Some(PathBuf::from(path.trim())); +                            t.embedded_first_line = t.start.position.line + 1; +                        } +                    } +                    continue; +                } else { +                    push_err!(SyntacticError::UnterminatedComment, source); +                } +            } +            '%' => { +                let name = t.eat_token(); +                let source = t.get_source(); +                t.mark_child(); +                let is_any_close = |t: &mut Tokeniser| t.eat_char() == Some(';'); +                if let Some(_) = t.track_until(is_any_close) { +                    let child = t.tokenise_child_span(); +                    match parse_syntactic_from_tokeniser(child) { +                        Ok(tokens) => { +                            let name = Tracked::from(name, source); +                            let def = SyntacticMacroDefinition { name, tokens }; +                            SyntacticToken::MacroDefinition(def) +                        } +                        Err(mut parse_errors) => { +                            errors.append(&mut parse_errors); +                            continue; +                        } +                    } +                } else { +                    push_err!(SyntacticError::UnterminatedMacroDefinition(name), source); +                } +            } + +            '}' => push_err!(SyntacticError::UnmatchedBlockTerminator), +            ']' => push_err!(SyntacticError::UnmatchedExpressionTerminator), +            ')' => push_err!(SyntacticError::UnmatchedCommentTerminator), +            ';' => push_err!(SyntacticError::UnmatchedMacroTerminator), + +            '@' => SyntacticToken::LabelDefinition(ScopedSymbol::Global(t.eat_token())), +            '&' => SyntacticToken::LabelDefinition(ScopedSymbol::Local(t.eat_token())), +            '~' => SyntacticToken::Symbol(ScopedSymbol::Local(t.eat_token())), +            ':' => SyntacticToken::Separator, +            '|' => SyntacticToken::Pin, +            '?' => SyntacticToken::Condition, + +            '#' => { +                t.mark_child(); +                t.eat_token(); +                let child = t.tokenise_child_span(); +                match parse_word_template(child) { +                    Ok(word_template) => SyntacticToken::WordTemplate(word_template), +                    Err(mut parse_errors) => { +                        errors.append(&mut parse_errors); +                        continue; +                    } +                } +            }, + +            c => { +                let token = format!("{c}{}", t.eat_token()); +                if let Some(hex_string) = token.strip_prefix("0x") { +                    match parse_integer_literal(hex_string, 16) { +                        Ok(value) => SyntacticToken::IntegerLiteral(value), +                        Err(_) => push_err!(SyntacticError::InvalidHexadecimalLiteral(token)), +                    } +                } else if let Some(binary_string) = token.strip_prefix("0b") { +                    match parse_integer_literal(binary_string, 2) { +                        Ok(value) => SyntacticToken::IntegerLiteral(value), +                        Err(_) => push_err!(SyntacticError::InvalidBinaryLiteral(token)), +                    } +                } else { +                    match parse_integer_literal(&token, 10) { +                        Ok(value) => SyntacticToken::IntegerLiteral(value), +                        Err(true) => push_err!(SyntacticError::InvalidDecimalLiteral(token)), +                        Err(false) => SyntacticToken::Symbol(ScopedSymbol::Global(token)), +                    } +                } +            } +        }; + +        t.mark_end(); +        tokens.push(Tracked::from(token, t.get_source())) +    } +    match errors.is_empty() { +        true => Ok(tokens), +        false => Err(errors), +    } +} + + +fn parse_integer_literal(token: &str, radix: u32) -> Result<isize, bool> { +    match usize::from_str_radix(&token.replace('_', ""), radix) { +        Ok(value) => match isize::try_from(value) { +            Ok(value) => Ok(value), +            Err(_) => Err(true), +        } +        Err(_) => Err(false), +    } +} + + +fn parse_string_literal(mut t: Tokeniser) -> StringLiteral { +    let mut string = String::new(); +    let mut chars = Vec::new(); + +    while let Some(c) = t.eat_char() { +        string.push(c); +        chars.push(Tracked::from(parse_char(c), t.get_source())); +        t.mark_start(); +    } +    StringLiteral { string, chars } +} + +fn parse_char(c: char) -> isize { +    c as u32 as isize +} + + +fn parse_word_template(mut t: Tokeniser) -> Result<WordTemplate, Vec<Tracked<SyntacticError>>> { +    let mut value = 0;          // Value of the whole word template. +    let mut value_width = 0;    // Bit width of the whole word template. +    let mut field_width = 0;    // Width of the current bit field. +    let mut field_name = '\0';  // Name of the current bit field. +    let mut fields: Vec<Tracked<BitField>> = Vec::new(); +    let mut errors: Vec<Tracked<SyntacticError>> = Vec::new(); + +    macro_rules! push_field { +        () => { +            if fields.iter().any(|f| f.name == field_name) { +                let error = SyntacticError::DuplicateFieldNameInWord(field_name); +                errors.push(Tracked::from(error, t.get_source())); +            } else { +                let field = BitField { name: field_name, width: field_width, shift: 0}; +                fields.push(Tracked::from(field, t.get_source())); +            } +        }; +    } + +    while let Some(c) = t.eat_char() { +        // Ignore underscores. +        if c == '_' { +            t.mark.undo(); +            continue; +        } + +        // Add a bit to the value; +        value <<= 1; +        value_width += 1; +        for field in &mut fields { +            field.shift += 1; +        } + +        // Extend the current field. +        if c == field_name { +            field_width += 1; +            continue; +        } + +        // Commit the current field. +        if field_width > 0 { +            t.mark_end_prev(); +            push_field!(); +            field_width = 0; +            field_name = '\0'; +        } + +        // Parse bit literals. +        if c == '0' { +            continue; +        } +        if c == '1' { +            value |= 1; +            continue; +        } + +        t.mark_start_prev(); +        if c.is_alphabetic() { +            field_name = c; +            field_width = 1; +            continue; +        } else { +            t.mark_end(); +            let error = SyntacticError::InvalidCharacterInWord(c); +            errors.push(Tracked::from(error, t.get_source())); +        } +    } + +    // Commit the final field. +    for field in &mut fields { +        field.shift += 1; +    } +    if field_width > 0 { +        t.mark_end(); +        push_field!(); +    } + +    match errors.is_empty() { +        true => Ok(WordTemplate { value, width: value_width, fields }), +        false => Err(errors), +    } +} diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs new file mode 100644 index 0000000..eabf34b --- /dev/null +++ b/src/stages/syntactic_tokens.rs @@ -0,0 +1,160 @@ +use crate::*; + +pub enum SyntacticToken { +    LabelDefinition(ScopedSymbol), +    MacroDefinition(SyntacticMacroDefinition), + +    IntegerLiteral(isize), +    StringLiteral(StringLiteral), +    WordTemplate(WordTemplate), + +    BlockLiteral(Vec<Tracked<SyntacticToken>>), +    Expression(Vec<Tracked<SyntacticToken>>), + +    Symbol(ScopedSymbol), + +    Separator, +    Condition, +    Pin, +} + +pub struct SyntacticMacroDefinition { +    pub name: Tracked<String>, +    pub tokens: Vec<Tracked<SyntacticToken>>, +} + +pub struct StringLiteral { +    pub string: String, +    pub chars: Vec<Tracked<isize>>, +} + +impl std::fmt::Display for StringLiteral { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        self.string.fmt(f) +    } +} + +pub enum ScopedSymbol { +    Local(String), +    Global(String), +} + +impl std::fmt::Display for ScopedSymbol { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        match self { +            ScopedSymbol::Local(name) => write!(f, "~{name}"), +            ScopedSymbol::Global(name) => write!(f, "{name}"), +        } +    } +} + + +pub enum SyntacticError { +    UnterminatedBlock, +    UnterminatedExpression, +    UnterminatedComment, +    UnterminatedCharacterLiteral, +    UnterminatedStringLiteral, +    UnterminatedMacroDefinition(String), + +    UnmatchedBlockTerminator, +    UnmatchedExpressionTerminator, +    UnmatchedCommentTerminator, +    UnmatchedMacroTerminator, + +    ExpectedSingleCharacter, + +    DuplicateFieldNameInWord(char), +    InvalidCharacterInWord(char), + +    InvalidDecimalLiteral(String), +    InvalidHexadecimalLiteral(String), +    InvalidBinaryLiteral(String), +} + + +pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) { +    for error in errors { +        report_syntactic_error(error, source_code); +    } +} + +fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) { +    let context = Context { source_code: &source_code, source: &error.source }; +    let message = match &error.value { +        SyntacticError::UnterminatedBlock => +            "Block was not terminated, add a '}}' character to terminate", +        SyntacticError::UnterminatedExpression => +            "Expression was not terminated, add a ']' character to terminate", +        SyntacticError::UnterminatedComment => +            "Comment was not terminated, add a ')' character to terminate", +        SyntacticError::UnterminatedCharacterLiteral => +            "Character was not terminated, add a ' character to terminate", +        SyntacticError::UnterminatedStringLiteral => +            "String was not terminated, add a '\"' character to terminate", +        SyntacticError::UnterminatedMacroDefinition(name) => +            &format!("The '{name}' macro definition was not terminated, add a ';' character to terminate"), + +        SyntacticError::UnmatchedBlockTerminator => +            "Attempted to terminate a block, but no block was in progress", +        SyntacticError::UnmatchedExpressionTerminator => +            "Attempted to terminate an expression, but no expression was in progress", +        SyntacticError::UnmatchedCommentTerminator => +            "Attempted to terminate a comment, but no comment was in progress", +        SyntacticError::UnmatchedMacroTerminator => +            "Attempted to terminate a macro definition, but no macro definition was in progress", + +        SyntacticError::ExpectedSingleCharacter => +            "A character literal must contain exactly one character", + +        SyntacticError::DuplicateFieldNameInWord(name) => +            &format!("The field '{name}' has already been used in this word"), +        SyntacticError::InvalidCharacterInWord(c) => +            &format!("The character '{c}' cannot be used in a word"), + +        SyntacticError::InvalidDecimalLiteral(string) => +            &format!("The string '{string}' is not a valid decimal literal"), +        SyntacticError::InvalidHexadecimalLiteral(string) => +            &format!("The string '{string}' is not a valid hexadecimal literal"), +        SyntacticError::InvalidBinaryLiteral(string) => +            &format!("The string '{string}' is not a valid binary literal"), +    }; + +    report_source_issue(LogLevel::Error, &context, message); +} + + +pub fn print_syntactic_token(i: usize, token: &SyntacticToken) { +    match token { +        SyntacticToken::LabelDefinition(symbol) => indent!(i, "LabelDefinition({symbol})"), +        SyntacticToken::MacroDefinition(definition) => { +            indent!(i, "MacroDefinition({})", definition.name); +            for token in &definition.tokens { +                print_syntactic_token(i+1, token); +            } +        } + +        SyntacticToken::IntegerLiteral(value) => indent!(i, "IntegerLiteral({value})"), +        SyntacticToken::StringLiteral(literal) => indent!(i, "StringLiteral({literal})"), +        SyntacticToken::WordTemplate(template) => indent!(i, "WordTemplate({template})"), + +        SyntacticToken::BlockLiteral(tokens) => { +            indent!(i, "BlockLiteral"); +            for token in tokens { +                print_syntactic_token(i+1, token); +            } +        } +        SyntacticToken::Expression(tokens) => { +            indent!(i, "Expression"); +            for token in tokens { +                print_syntactic_token(i+1, token); +            } +        } + +        SyntacticToken::Symbol(symbol) => indent!(i, "Symbol({symbol})"), + +        SyntacticToken::Separator => indent!(i, "Separator"), +        SyntacticToken::Condition => indent!(i, "Condition"), +        SyntacticToken::Pin => indent!(i, "Pin"), +    } +} diff --git a/src/tokens/assembler.rs b/src/tokens/assembler.rs deleted file mode 100644 index 048062b..0000000 --- a/src/tokens/assembler.rs +++ /dev/null @@ -1,162 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub enum AssembledToken { -    Word(AssembledWord), -    LabelDefinition(LabelDefinition), -    PinnedAddress(PinnedAddress), -    Error(AssemblerError), -} - -#[derive(Clone)] -pub struct AssembledWord { -    pub source: SourceSpan, -    pub value: usize, -    pub bits: usize, -    pub fields: Vec<AssembledField>, -    pub errors: Vec<AssemblerError>, -} - -impl AssembledWord { -    pub fn count(&self) -> usize { -        // If there is at least one field, and all fields have empty string -        // values, then count will be zero. Else count will be at least one. -        let mut count = 0; -        let mut all_strings = !self.fields.is_empty(); -        for field in &self.fields { -            if let IntegerArgument::String(string) = &field.value { -                count = std::cmp::max(count, string.chars.len()); -            } else { -                all_strings = false; -            } -        } -        if !all_strings { -            count = std::cmp::max(count, 1); -        } -        return count; -    } -} - -#[derive(Clone)] -pub struct AssembledField { -    pub source: SourceSpan, -    pub value: IntegerArgument, -    /// Length of field in bits -    pub bits: usize, -    /// Distance to left-shift field in value -    pub shift: usize, -} - -#[derive(Clone)] -pub struct AssembledExpression { -    pub source: SourceSpan, -    pub tokens: Vec<AssembledExpressionToken>, -} - -#[derive(Clone)] -pub enum AssembledExpressionToken { -    Integer(TrackedInteger), -    LabelReference(Tracked<String>), -    Operator(Operator), -    Expression(Box<AssembledExpression>), -} - -#[derive(Clone)] -pub enum Argument { -    Integer(IntegerArgument), -    Block(Vec<AssembledToken>), -} - -#[derive(Clone)] -pub enum IntegerArgument { -    LabelReference(Tracked<String>), -    Integer(TrackedInteger), -    Expression(AssembledExpression), -    String(TrackedString), -} - -#[derive(Clone)] -pub struct AssemblerError { -    pub source: SourceSpan, -    pub variant: AssemblerErrorVariant, -} - -#[derive(Clone, Debug)] -pub enum AssemblerErrorVariant { -    DefinitionNotFound(String), -    NotAnInteger, -    NotABlock, -    IntegerInBlock, -    StringInExpression, -    /// expected, received -    IncorrectArgumentCount(usize, usize), -    /// expected, received, index -    IncorrectArgumentType(ArgumentVariant, ArgumentVariant), -} - -// ------------------------------------------------------------------------ // - -macro_rules! indent { -    ($indent:expr => $($tokens:tt)*) => {{ -        for _ in 0..$indent { print!("  "); } -        println!($($tokens)*); -    }}; -} - -pub fn print_assembled_tokens(tokens: &[AssembledToken]) { -    for token in tokens { -        match token { -            AssembledToken::LabelDefinition(definition) => { -                println!("LABEL {}", definition.name) -            } -            AssembledToken::PinnedAddress(address) => { -                println!("PINNED {}", address.address) -            } -            AssembledToken::Word(word) => { -                println!("WORD {:b}", word.value); -                for field in &word.fields { -                    print!("  FIELD ({} << {}) ", field.bits, field.shift); -                    match &field.value { -                        IntegerArgument::LabelReference(name) => { -                            println!("LABEL '{name}'"); -                        } -                        IntegerArgument::Integer(integer) => { -                            println!("INTEGER '{}'", integer.value); -                        } -                        IntegerArgument::String(string) => { -                            println!("STRING {string}"); -                        } -                        IntegerArgument::Expression(expr) => { -                            println!("EXPRESSION"); -                            print_assembled_expression(2, expr); -                        } -                    } -                } -            } -            AssembledToken::Error(error) => { -                println!("ERROR {:?}", error.variant) -            } -        } -    } -} - -fn print_assembled_expression(indent: usize, expr: &AssembledExpression) { -    for token in &expr.tokens { -        match token { -            AssembledExpressionToken::Integer(integer) => { -                indent!(indent => "INTEGER {}", integer.value) -            } -            AssembledExpressionToken::LabelReference(name) => { -                indent!(indent => "LABEL '{name}'") -            } -            AssembledExpressionToken::Operator(operator) => { -                indent!(indent => "OPERATOR {operator:?}") -            } -            AssembledExpressionToken::Expression(expr) => { -                indent!(indent => "EXPRESSION"); -                print_assembled_expression(indent+1, expr); -            } -        } -    } -} diff --git a/src/tokens/bytecode.rs b/src/tokens/bytecode.rs deleted file mode 100644 index 9ac340e..0000000 --- a/src/tokens/bytecode.rs +++ /dev/null @@ -1,49 +0,0 @@ -use crate::*; - - -pub struct Bytecode { -    pub words: Vec<Word>, -    pub errors: Vec<BytecodeError>, -} - -#[derive(Clone, Copy)] -pub struct Word { -    pub bits: usize, -    pub value: usize, -} - -impl std::fmt::Display for Word { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        for i in (0..self.bits).rev() { -            let is_first_bit = i+1 == self.bits; -            if !is_first_bit && (i+1) % 4 == 0 { -                write!(f, "_")?; -            } -            match (self.value >> i) & 1 { -                0 => write!(f, "0")?, -                _ => write!(f, "1")?, -            } -        } -        if self.bits == 0 { -            write!(f, "0")?; -        } -        return Ok(()); -    } -} - -pub struct BytecodeError { -    pub source: SourceSpan, -    pub variant: BytecodeErrorVariant, -} - -pub enum BytecodeErrorVariant { -    DefinitionNotFound(String), -    DuplicateLabelDefinition(String), -    /// pin, real -    PinnedAddressBacktrack(usize, usize), -    /// expected, received -    ValueTooLarge(usize, usize), -    StackUnderflow, -    MultipleReturnValues, -    NoReturnValue, -} diff --git a/src/tokens/expression.rs b/src/tokens/expression.rs deleted file mode 100644 index 1d8a336..0000000 --- a/src/tokens/expression.rs +++ /dev/null @@ -1,78 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub struct Expression { -    pub source: SourceSpan, -    pub tokens: Vec<ExpressionToken>, -} - -#[derive(Clone)] -pub struct ExpressionToken { -    pub source: SourceSpan, -    pub variant: ExpressionTokenVariant, -} - -#[derive(Clone)] -pub enum ExpressionTokenVariant { -    Invocation(String), -    Literal(isize), -    Operator(Operator), -    Error(ExpressionParseError), -} - -#[derive(Clone, Copy, Debug)] -pub enum Operator { -    Equal, -    NotEqual, -    LessThan, -    GreaterThan, -    LessThanEqual, -    GreaterThanEqual, -    Add, -    Subtract, -    LeftShift, -    RightShift, -    And, -    Or, -    Xor, -    Not, -} - -#[derive(Clone)] -pub enum ExpressionParseError { -    InvalidHexadecimalLiteral(String), -} - -impl std::fmt::Debug for Expression { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        for (i, token) in self.tokens.iter().enumerate() { -            let string = match &token.variant { -                ExpressionTokenVariant::Invocation(name) => name, -                ExpressionTokenVariant::Literal(value) => &value.to_string(), -                ExpressionTokenVariant::Operator(operator) => match operator { -                    Operator::Equal            => "=", -                    Operator::NotEqual         => "!=", -                    Operator::LessThan         => "<", -                    Operator::GreaterThan      => ">", -                    Operator::LessThanEqual    => "<=", -                    Operator::GreaterThanEqual => ">=", -                    Operator::Add              => "+", -                    Operator::Subtract         => "-", -                    Operator::LeftShift        => "<<", -                    Operator::RightShift       => ">>", -                    Operator::And              => "&", -                    Operator::Or               => "|", -                    Operator::Xor              => "^", -                    Operator::Not              => "~", -                } -                ExpressionTokenVariant::Error(_) => "<error>", -            }; -            match i { -                0 => write!(f, "{string}")?, -                _ => write!(f, " {string}")?, -            } -        } -        return Ok(()); -    } -} diff --git a/src/tokens/mod.rs b/src/tokens/mod.rs deleted file mode 100644 index 53ccc6e..0000000 --- a/src/tokens/mod.rs +++ /dev/null @@ -1,19 +0,0 @@ -mod expression; -mod packed_binary_literal; -mod tracked_integer; -mod tracked; - -pub use expression::*; -pub use packed_binary_literal::*; -pub use tracked_integer::*; -pub use tracked::*; - -mod syntactic; -mod semantic; -mod assembler; -mod bytecode; - -pub use syntactic::*; -pub use semantic::*; -pub use assembler::*; -pub use bytecode::*; diff --git a/src/tokens/semantic.rs b/src/tokens/semantic.rs deleted file mode 100644 index 225cd6b..0000000 --- a/src/tokens/semantic.rs +++ /dev/null @@ -1,192 +0,0 @@ -use crate::*; - -use indexmap::IndexMap; - - -/// The entire semantic program, ready to generate bytecode. -pub struct SemanticProgram { -    pub macro_definitions: IndexMap<String, MacroDefinition>, -    pub label_definitions: IndexMap<String, LabelDefinition>, -    pub body: Vec<SemanticToken>, -} - -/// A symbol definition. -pub struct MacroDefinition { -    pub source: SourceSpan, -    pub arguments: Vec<ArgumentDefinition>, -    pub value: Value, -    pub errors: Vec<SemanticParseError>, -} - -pub struct ArgumentDefinition { -    pub name: String, -    pub source: SourceSpan, -    pub variant: ArgumentVariant, -} - -#[derive(PartialEq, Clone, Copy, Debug)] -pub enum ArgumentVariant { -    Integer, -    Block, -} - -pub struct ArgumentInvocation { -    pub source: SourceSpan, -    pub value: Value, -} - -pub enum Value { -    Integer(Integer), -    Block(Vec<SemanticToken>), -    Invocation(Invocation), -} - -pub enum Integer { -    Literal(TrackedInteger), -    String(TrackedString), -    Expression(Expression), -    LabelReference(Tracked<String>), -} - -pub enum SemanticToken { -    Word(PackedBinaryLiteral), -    Invocation(Invocation), -    LabelDefinition(LabelDefinition), -    PinnedAddress(PinnedAddress), -    Error(SemanticParseError), -} - -pub struct Invocation { -    pub name: String, -    pub source: SourceSpan, -    pub arguments: Vec<ArgumentInvocation>, -    pub errors: Vec<SemanticParseError>, -} - -#[derive(Clone)] -pub struct LabelDefinition { -    pub source: SourceSpan, -    pub name: String, -} - -#[derive(Clone)] -pub struct PinnedAddress { -    pub source: SourceSpan, -    pub address: usize, -} - -pub struct SemanticParseError { -    pub source: SourceSpan, -    pub variant: SemanticParseErrorVariant, -} - -pub enum SemanticParseErrorVariant { -    UnterminatedMacroDefinition(String), -    UnterminatedBlock, -    InvalidToken, -} - - -impl std::fmt::Display for ArgumentVariant { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        match self { -            ArgumentVariant::Integer => write!(f, "integer"), -            ArgumentVariant::Block => write!(f, "block"), -        } -    } -} - -// ------------------------------------------------------------------------ // - -macro_rules! indent { -    ($indent:expr => $($tokens:tt)*) => {{ -        for _ in 0..$indent { print!("  "); } -        println!($($tokens)*); -    }}; -} - -impl SemanticProgram { -    pub fn print_definitions(&self) { -        for (name, definition) in &self.macro_definitions { -            let variant = match &definition.value { -                Value::Integer(_) => "INTEGER", -                Value::Block(_) => "BLOCK", -                Value::Invocation(_) => "INVOCATION", -            }; -            println!("DEFINE {variant} '{name}'"); -            for argument in &definition.arguments { -                self.print_argument_definition(argument); -            } -            match &definition.value { -                Value::Integer(integer) => -                    self.print_integer(1, integer), -                Value::Block(block) => -                    self.print_block(1, block), -                Value::Invocation(invocation) => -                    indent!(1 => "INVOCATION '{}'", invocation.name), -            }; -            println!(); -        } - -        println!("LABELS"); -        for (name, _) in &self.label_definitions { -            println!("  @{name}"); -        } -        println!(); - -        self.print_block(0, &self.body); -    } - -    fn print_argument_definition(&self, argument: &ArgumentDefinition) { -        let variant = match argument.variant { -            ArgumentVariant::Integer => "INTEGER", -            ArgumentVariant::Block => "BLOCK", -        }; -        println!("  ARGUMENT {variant} '{}'", argument.name); -    } - -    fn print_integer(&self, indent: usize, integer: &Integer) { -        match &integer { -            Integer::Literal(value) => -                indent!(indent => "LITERAL {value}"), -            Integer::Expression(expr) => -                indent!(indent => "EXPRESSION [{expr:?}]"), -            Integer::String(string) => -                indent!(indent => "STRING '{string}'"), -            Integer::LabelReference(name) => -                indent!(indent => "LABEL REFERENCE '{name}'"), -        } -    } - -    fn print_block(&self, indent: usize, block: &[SemanticToken]) { -        indent!(indent => "BLOCK"); -        for semantic_token in block { -            match &semantic_token { -                SemanticToken::Word(word) => -                    indent!(indent+1 => "WORD #{word}"), -                SemanticToken::Invocation(invocation) => -                    self.print_invocation(indent+1, invocation), -                SemanticToken::LabelDefinition(definition) => -                    indent!(indent+1 => "LABEL DEFINITION @{}", definition.name), -                SemanticToken::PinnedAddress(addr) => -                    indent!(indent+1 => "PINNED ADDRESS {}", addr.address), -                SemanticToken::Error(_) => -                    indent!(indent+1 => "ERROR"), -            } -        } -    } - -    fn print_invocation(&self, indent: usize, invocation: &Invocation) { -        indent!(indent => "INVOCATION '{}'", invocation.name); -        for argument in &invocation.arguments { -            match &argument.value { -                Value::Integer(integer) => -                    self.print_integer(indent+1, integer), -                Value::Block(block) => -                    self.print_block(indent+1, block), -                Value::Invocation(invocation) => -                    self.print_invocation(indent+1, invocation), -            }; -        } -    } -} diff --git a/src/tokens/syntactic.rs b/src/tokens/syntactic.rs deleted file mode 100644 index 780c950..0000000 --- a/src/tokens/syntactic.rs +++ /dev/null @@ -1,84 +0,0 @@ -use crate::*; - - -pub struct SyntacticToken { -    pub source: SourceSpan, -    pub variant: SyntacticTokenVariant, -} - -pub enum SyntacticTokenVariant { -    LabelDefinition(String), -    MacroDefinition(String), -    MacroDefinitionTerminator, - -    IntegerLiteral(isize), -    PackedBinaryLiteral(PackedBinaryLiteral), -    PinnedAddress(usize), - -    Expression(Expression), - -    String(TrackedString), - -    BlockOpen, -    BlockClose, -    Separator, - -    Symbol(String), - -    Error(SyntacticParseError), -} - -#[derive(Clone)] -pub struct TrackedString { -    pub source: SourceSpan, -    pub string: String, -    pub chars: Vec<Tracked<char>>, -} - -impl std::fmt::Display for TrackedString { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        self.string.fmt(f) -    } -} - -#[derive(Debug)] -pub enum SyntacticParseError { -    InvalidHexadecimalLiteral(String), -    InvalidDecimalLiteral(String), -    InvalidSymbolIdentifier(String), -    UnterminatedComment, -    UnterminatedString, -    UnterminatedExpression, -    LabelInMacroDefinition, -} - - -impl std::fmt::Debug for SyntacticToken { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        use SyntacticTokenVariant::*; -        let start = &self.source.in_merged; -        let name = match &self.variant { -            LabelDefinition(name) => format!("LabelDefinition({name})"), -            MacroDefinition(name) => format!("MacroDefinition({name})"), -            MacroDefinitionTerminator => format!("MacroDefinitionTerminator"), - -            IntegerLiteral(value) => format!("IntegerLiteral({value})"), -            PackedBinaryLiteral(pbl) => format!("PackedBinaryLiteral({pbl})"), -            PinnedAddress(value) => format!("PinnedAddress({value})"), - -            Expression(expr) => format!("Expression({expr:?})"), - -            String(string) => format!("String('{string}')"), - -            BlockOpen => format!("BlockOpen"), -            BlockClose => format!("BlockClose"), -            Separator => format!("Separator"), - -            Symbol(name) => format!("Symbol({name})"), - -            Error(error) => format!("Error({error:?})"), -        }; - -        write!(f, "{start} {name}") -    } -} diff --git a/src/tokens/tracked.rs b/src/tokens/tracked.rs deleted file mode 100644 index ea37047..0000000 --- a/src/tokens/tracked.rs +++ /dev/null @@ -1,47 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub struct Tracked<T> { -    pub source: SourceSpan, -    pub value: T, -} - -impl<T> Tracked<T> { -    pub fn from(value: T, source: SourceSpan) -> Self { -        Self { source, value } -    } -} - -impl<T> std::ops::Deref for Tracked<T> { -    type Target = T; -    fn deref(&self) -> &T { -        &self.value -    } -} - -impl<T> std::ops::DerefMut for Tracked<T> { -    fn deref_mut(&mut self) -> &mut T { -        &mut self.value -    } -} - -impl<T: std::fmt::Display> std::fmt::Display for Tracked<T> { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        write!(f, "{}", self.value) -    } -} - -impl<T: std::fmt::Debug> std::fmt::Debug for Tracked<T> { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        write!(f, "{:?}", self.value) -    } -} - -impl<T: PartialEq> PartialEq for Tracked<T> { -    fn eq(&self, other: &Tracked<T>) -> bool { -        self.value.eq(&other.value) -    } -} - -impl<T: Eq> Eq for Tracked<T> {} diff --git a/src/tokens/tracked_integer.rs b/src/tokens/tracked_integer.rs deleted file mode 100644 index fa55f09..0000000 --- a/src/tokens/tracked_integer.rs +++ /dev/null @@ -1,14 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub struct TrackedInteger { -    pub source: SourceSpan, -    pub value: isize, -} - -impl std::fmt::Display for TrackedInteger { -    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { -        write!(f, "{}", self.value) -    } -} diff --git a/src/types/expression_stack.rs b/src/types/expression_stack.rs new file mode 100644 index 0000000..4d26eb2 --- /dev/null +++ b/src/types/expression_stack.rs @@ -0,0 +1,89 @@ +use crate::*; + + +pub struct ExpressionStack { +    stack: Vec<isize>, +} + +impl ExpressionStack { +    pub fn new() -> Self { +        Self { +            stack: Vec::new(), +        } +    } + +    pub fn pull_result(mut self) -> Result<isize, StackError> { +        match self.stack.len() { +            0 => Err(StackError::NoReturnValue), +            1 => Ok(self.stack.pop().unwrap()), +            _ => Err(StackError::MultipleReturnValues), +        } +    } + +    pub fn push(&mut self, value: isize) { +        self.stack.push(value); +    } + +    pub fn apply(&mut self, operator: Operator, source: &SourceSpan) -> Result<(), Tracked<StackError>> { +        macro_rules! push { +            ($val:expr) => { self.stack.push($val) } +        } +        macro_rules! pop { +            ($name:ident) => { +                let $name = match self.stack.pop() { +                    Some(value) => value, +                    None => return Err(Tracked::from(StackError::Underflow, source.clone())), +                }; +            } +        } +        macro_rules! truth { +            ($bool:expr) => { match $bool { true => 1, false => 0 } }; +        } +        match operator { +            Operator::Equal            => { pop!(b); pop!(a); push!(truth!(a==b)) }, +            Operator::NotEqual         => { pop!(b); pop!(a); push!(truth!(a!=b)) }, +            Operator::LessThan         => { pop!(b); pop!(a); push!(truth!(a < b)) }, +            Operator::GreaterThan      => { pop!(b); pop!(a); push!(truth!(a > b)) }, +            Operator::LessThanEqual    => { pop!(b); pop!(a); push!(truth!(a <= b)) }, +            Operator::GreaterThanEqual => { pop!(b); pop!(a); push!(truth!(a >= b)) }, +            Operator::Add              => { pop!(b); pop!(a); push!(a + b) }, +            Operator::Subtract         => { pop!(b); pop!(a); push!(a - b) }, +            Operator::Multiply         => { pop!(b); pop!(a); push!(a * b) }, +            Operator::Divide           => { pop!(b); pop!(a); push!(a / b) }, +            Operator::Modulo           => { pop!(b); pop!(a); push!(a % b) }, +            Operator::Exponent         => { pop!(b); pop!(a); push!( +                if let Ok(b) = u32::try_from(b) { a.saturating_pow(b) } else { 0 }  ) }, +            Operator::LeftShift        => { pop!(b); pop!(a); push!( +                if b < 0 { a >> -b } else { a << b }  ) }, +            Operator::RightShift       => { pop!(b); pop!(a); push!( +                if b < 0 { a << -b } else { a >> b }  ) }, +            Operator::BitAnd           => { pop!(b); pop!(a); push!(a & b) }, +            Operator::BitOr            => { pop!(b); pop!(a); push!(a | b) }, +            Operator::BitXor           => { pop!(b); pop!(a); push!(a ^ b) }, +            Operator::BitNot           => {          pop!(a); push!(!a) }, +        } +        return Ok(()); +    } +} + + +pub enum StackError { +    Underflow, +    MultipleReturnValues, +    NoReturnValue, +} + + +pub fn report_stack_error(error: &Tracked<StackError>, source_code: &str) { +    let context = Context { source_code: &source_code, source: &error.source }; +    let message = match &error.value { +        StackError::Underflow => +            "A stack underflow occurred while evaluating this operator", +        StackError::MultipleReturnValues => +            "More than one value was left on the stack after this expression was evaluated", +        StackError::NoReturnValue => +            "No value was left on the stack after this expression was evaluated", +    }; + +    report_source_issue(LogLevel::Error, &context, message); +} diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 0000000..623d525 --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,7 @@ +mod expression_stack; +mod operator; +mod word_template; + +pub use expression_stack::*; +pub use operator::*; +pub use word_template::*; diff --git a/src/types/operator.rs b/src/types/operator.rs new file mode 100644 index 0000000..a7e7b9b --- /dev/null +++ b/src/types/operator.rs @@ -0,0 +1,87 @@ +#[derive(Clone, Copy)] +pub enum Operator { +    Equal, +    NotEqual, +    LessThan, +    GreaterThan, +    LessThanEqual, +    GreaterThanEqual, +    Add, +    Subtract, +    Multiply, +    Divide, +    Modulo, +    Exponent, +    LeftShift, +    RightShift, +    BitAnd, +    BitOr, +    BitXor, +    BitNot, +} + +impl Operator { +    pub fn from_str(string: &str) -> Option<Self> { +        match string { +            "="     => Some(Operator::Equal), +            "=="    => Some(Operator::Equal), +            "<eq>"  => Some(Operator::Equal), +            "!="    => Some(Operator::NotEqual), +            "<neq>" => Some(Operator::NotEqual), +            "<"     => Some(Operator::LessThan), +            "<lth>" => Some(Operator::LessThan), +            ">"     => Some(Operator::GreaterThan), +            "<gth>" => Some(Operator::GreaterThan), +            "<="    => Some(Operator::LessThanEqual), +            "<leq>" => Some(Operator::LessThanEqual), +            ">="    => Some(Operator::GreaterThanEqual), +            "<geq>" => Some(Operator::GreaterThanEqual), +            "+"     => Some(Operator::Add), +            "<add>" => Some(Operator::Add), +            "-"     => Some(Operator::Subtract), +            "<sub>" => Some(Operator::Subtract), +            "*"     => Some(Operator::Multiply), +            "<mul>" => Some(Operator::Multiply), +            "/"     => Some(Operator::Divide), +            "<div>" => Some(Operator::Divide), +            "<mod>" => Some(Operator::Modulo), +            "**"    => Some(Operator::Exponent), +            "<exp>" => Some(Operator::Exponent), +            "<<"    => Some(Operator::LeftShift), +            "<shl>" => Some(Operator::LeftShift), +            ">>"    => Some(Operator::RightShift), +            "<shr>" => Some(Operator::RightShift), +            "<and>" => Some(Operator::BitAnd), +            "<or>"  => Some(Operator::BitOr), +            "<xor>" => Some(Operator::BitXor), +            "<not>" => Some(Operator::BitNot), +            _      => None, +        } +    } +} + +impl std::fmt::Display for Operator { +    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { +        let string = match self { +            Operator::Equal            => "<eq>", +            Operator::NotEqual         => "<neq>", +            Operator::LessThan         => "<lth>", +            Operator::GreaterThan      => "<gth>", +            Operator::LessThanEqual    => "<leq>", +            Operator::GreaterThanEqual => "<geq>", +            Operator::Add              => "<add>", +            Operator::Subtract         => "<sub>", +            Operator::Multiply         => "<mul>", +            Operator::Divide           => "<div>", +            Operator::Modulo           => "<mod>", +            Operator::Exponent         => "<exp>", +            Operator::LeftShift        => "<shl>", +            Operator::RightShift       => "<shr>", +            Operator::BitAnd           => "<and>", +            Operator::BitOr            => "<or>", +            Operator::BitXor           => "<xor>", +            Operator::BitNot           => "<not>", +        }; +        write!(f, "{string}") +    } +} diff --git a/src/tokens/packed_binary_literal.rs b/src/types/word_template.rs index a2720b7..33d5933 100644 --- a/src/tokens/packed_binary_literal.rs +++ b/src/types/word_template.rs @@ -1,35 +1,23 @@  use crate::*; -pub struct PackedBinaryLiteral { -    pub source: SourceSpan, +pub struct WordTemplate {      pub value: usize, -    pub bits: usize, -    pub fields: Vec<BitField>, -    pub errors: Vec<PackedBinaryLiteralParseError>, +    /// Width of the word in bits. +    pub width: u32, +    pub fields: Vec<Tracked<BitField>>,  }  pub struct BitField {      pub name: char, -    pub source: SourceSpan, -    /// Length of field in bits -    pub bits: usize, -    /// Distance to left-shift field in value -    pub shift: usize, -} - -pub struct PackedBinaryLiteralParseError { -    pub source: SourceSpan, -    pub variant: PackedBinaryLiteralParseErrorVariant, -} - -pub enum PackedBinaryLiteralParseErrorVariant { -    DuplicateFieldName(char), -    InvalidCharacter(char), +    /// Width of the field in bits. +    pub width: u32, +    /// Number of bits to the right of the field in the word. +    pub shift: u32,  } -impl std::fmt::Display for PackedBinaryLiteral { +impl std::fmt::Display for WordTemplate {      fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {          if self.value == 0 {              write!(f, "0")?; @@ -41,7 +29,8 @@ impl std::fmt::Display for PackedBinaryLiteral {                      write!(f, "_")?;                  }                  for field in &self.fields { -                    if i <= field.bits + field.shift - 1 && i >= field.shift { +                    let i = i as u32; +                    if i <= field.width + field.shift - 1 && i >= field.shift {                          write!(f, "{}", field.name)?;                          continue 'bit;                      } | 
