From d9e0c4895608cdcb04b868222b49b3f117766ed0 Mon Sep 17 00:00:00 2001 From: Ben Bridle Date: Tue, 14 Oct 2025 21:33:30 +1300 Subject: Implement new bytecode stage This completes the Torque version 3 rewrite, other than some extensive testing that is yet to be done. --- src/bin/tq.rs | 58 +++++++++++++++++---------------------- src/lib.rs | 4 +-- src/stages/bytecode.rs | 47 +++++++++++++++++++++++++++++++ src/stages/bytecode_tokens.rs | 42 ++++++++++++++++++++++++++++ src/stages/intermediate.rs | 2 +- src/stages/intermediate_tokens.rs | 24 ++++++++++++++-- src/stages/mod.rs | 4 +++ 7 files changed, 143 insertions(+), 38 deletions(-) create mode 100644 src/stages/bytecode.rs create mode 100644 src/stages/bytecode_tokens.rs diff --git a/src/bin/tq.rs b/src/bin/tq.rs index 2afe8ec..4e1065a 100644 --- a/src/bin/tq.rs +++ b/src/bin/tq.rs @@ -31,7 +31,7 @@ fn main() { let no_libs = args.get("no-libs").as_bool(); let no_project_libs = args.get("no-project-libs").as_bool(); let no_env_libs = args.get("no-env-libs").as_bool(); - // let format = Format::from_str(args.get("format").as_str()); + let format = Format::from_str(args.get("format").as_str()); let width = args.get("width").as_u32_opt(); let dry_run = args.get("dry-run").as_bool(); let print_tree = args.get("tree").as_bool(); @@ -138,9 +138,9 @@ Output formats: std::process::exit(1); }); - // if !dry_run && format == Format::Source { - // write_bytes_and_exit(merged_source.as_bytes(), destination.as_ref()); - // } + if !dry_run && format == Format::Source { + write_bytes_and_exit(merged_source.as_bytes(), destination.as_ref()); + } // ----------------------------------------------------------------------- @@ -169,36 +169,28 @@ Output formats: } }; - // TODO - println!("INTERMEDIATE:"); - for token in &intermediate { - print_intermediate_token(1, token); + let segments = match parse_bytecode(intermediate, width) { + Ok(segments) => segments, + Err(errors) => { + report_bytecode_errors(&errors, &merged_source); + std::process::exit(1); + } + }; + + if !dry_run { + let result = match format { + Format::Cmd => format_cmd(&segments), + Format::Debug => format_debug(&segments), + Format::Inhx => format_inhx(&segments), + Format::Inhx32 => format_inhx32(&segments), + Format::Raw => format_raw(&segments, width), + Format::Source => unreachable!("Source output is handled before full assembly"), + }; + match result { + Ok(bytes) => write_bytes_and_exit(&bytes, destination.as_ref()), + Err(error) => report_format_error(&error, format, &merged_source), + } } - println!(); - - // let segments = match parse_bytecode(intermediate, width) { - // Ok(segments) => segments, - // Err(errors) => { - // report_bytecode_errors(&errors, &merged_source); - // std::process::exit(1); - // } - // }; - - - // if !dry_run { - // let result = match format { - // Format::Cmd => format_cmd(&segments), - // Format::Debug => format_debug(&segments), - // Format::Inhx => format_inhx(&segments), - // Format::Inhx32 => format_inhx32(&segments), - // Format::Raw => format_raw(&segments, width), - // Format::Source => unreachable!("Source output is handled before full assembly"), - // }; - // match result { - // Ok(bytes) => write_bytes_and_exit(&bytes, destination.as_ref()), - // Err(error) => report_format_error(&error, format, &merged_source), - // } - // } } diff --git a/src/lib.rs b/src/lib.rs index e10361d..9b9b87a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,11 +2,11 @@ const MAX_ITERATIONS_TO_STABILISE: usize = 4; mod stages; mod types; -// mod formats; +mod formats; pub use stages::*; pub use types::*; -// pub use formats::*; +pub use formats::*; pub use assembler::*; diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs new file mode 100644 index 0000000..2d73767 --- /dev/null +++ b/src/stages/bytecode.rs @@ -0,0 +1,47 @@ +use crate::*; + + +pub fn parse_bytecode(intermediate: Vec>, width: Option) -> Result, Vec>> { + let mut segments = Vec::new(); + let mut errors = Vec::new(); + let mut current_segment = Vec::new(); + let mut segment_source = None; + let mut segment_address = 0; + + + for token in intermediate { + match token.value { + IntermediateToken::Word(word) => { + if let Some(width) = width { + if word.width != width { + let error = BytecodeError::IncorrectWidth(width, word.width); + errors.push(Tracked::from(error, token.source.clone())); + } + } + let source = token.source.clone(); + current_segment.push(Tracked::from(word, source)); + } + IntermediateToken::PinnedAddress(pinned_address) => { + if !current_segment.is_empty() { + let address = segment_address; + let words = std::mem::take(&mut current_segment); + let source = std::mem::take(&mut segment_source); + segments.push(Segment { address, source, words }); + segment_address = pinned_address; + } + } + } + } + // Finish final segment. + if !current_segment.is_empty() { + let address = segment_address; + let words = std::mem::take(&mut current_segment); + let source = std::mem::take(&mut segment_source); + segments.push(Segment { address, source, words }); + } + + match errors.is_empty() { + true => Ok(segments), + false => Err(errors), + } +} diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs new file mode 100644 index 0000000..5020827 --- /dev/null +++ b/src/stages/bytecode_tokens.rs @@ -0,0 +1,42 @@ +use crate::*; + +pub struct Segment { + pub address: usize, + /// Source of the address value. + pub source: Option, + pub words: Vec>, +} + +pub fn print_segment(segment: &Segment) { + println!("SEGMENT: 0x{:>04x}", segment.address); + // Find maximum width of all words in the segment. + let width = segment.words.iter().map(|w| w.to_string().chars().count()).max().unwrap_or(0); + for word in &segment.words { + let string = word.to_string(); + println!(" {string:>w$}", w=width as usize); + } + +} + +pub enum BytecodeError { + // (expected, received) + IncorrectWidth(u32, u32) +} + +pub fn report_bytecode_errors(errors: &[Tracked], source_code: &str) { + for error in errors { + report_bytecode_error(error, source_code); + } +} + +fn report_bytecode_error(error: &Tracked, source_code: &str) { + let context = Context { source_code: &source_code, source: &error.source }; + let message = match &error.value { + BytecodeError::IncorrectWidth(expected, received) => + &format!("Word is {received} bits wide, but should be exactly {expected} bits wide"), + }; + + report_source_issue(LogLevel::Error, &context, message); +} + + diff --git a/src/stages/intermediate.rs b/src/stages/intermediate.rs index 7523baf..07773a0 100644 --- a/src/stages/intermediate.rs +++ b/src/stages/intermediate.rs @@ -266,7 +266,7 @@ impl IntermediateParser { } } } - let word = IntermediateWord { width: word_width, value: word_value }; + let word = Word { width: word_width, value: word_value }; let token = IntermediateToken::Word(word); intermediate.push(Tracked::from(token, source.clone())); self.address += 1; diff --git a/src/stages/intermediate_tokens.rs b/src/stages/intermediate_tokens.rs index 71dbd62..2fb29fa 100644 --- a/src/stages/intermediate_tokens.rs +++ b/src/stages/intermediate_tokens.rs @@ -3,17 +3,37 @@ use crate::*; #[derive(Clone)] pub enum IntermediateToken { - Word(IntermediateWord), + Word(Word), PinnedAddress(usize), } #[derive(Clone)] -pub struct IntermediateWord { +pub struct Word { pub value: usize, /// Width of the word in bits. pub width: u32, } +impl std::fmt::Display for Word { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + if self.width == 0 { + write!(f, "0") + } else { + for i in (0..self.width).rev() { + let is_first_bit = i+1 == self.width; + if !is_first_bit && (i+1) % 4 == 0 { + write!(f, "_")?; + } + match (self.value >> i) & 1 { + 0 => write!(f, "0")?, + _ => write!(f, "1")?, + } + } + Ok(()) + } + } +} + #[derive(Clone)] pub enum IntermediateValue { Integer(IntermediateInteger), diff --git a/src/stages/mod.rs b/src/stages/mod.rs index bf0d5a6..571fd65 100644 --- a/src/stages/mod.rs +++ b/src/stages/mod.rs @@ -5,6 +5,8 @@ mod semantic; mod semantic_tokens; mod intermediate; mod intermediate_tokens; +mod bytecode; +mod bytecode_tokens; pub use compiler::*; pub use syntactic::*; @@ -13,6 +15,8 @@ pub use semantic::*; pub use semantic_tokens::*; pub use intermediate::*; pub use intermediate_tokens::*; +pub use bytecode::*; +pub use bytecode_tokens::*; #[macro_export] -- cgit v1.2.3-70-g09d2