From 1ecee352f5844b0809d7ae66df52e34f42b44c8e Mon Sep 17 00:00:00 2001
From: Ben Bridle <ben@derelict.engineering>
Date: Thu, 6 Mar 2025 20:33:27 +1300
Subject: Rewrite entire assembler

The language is now more general, the code is better structured, error
reporting is more detailed, and many new language features have
been implemented:
- conditional blocks
- first-class strings
- more expression operators
- binary literals
- negative values
- invocations in constant expressions
---
 src/formats/debug.rs  | 18 +++++++++++
 src/formats/inhx.rs   | 28 +++++++++++------
 src/formats/inhx32.rs | 39 +++++++++++++++--------
 src/formats/mod.rs    | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/formats/raw.rs    | 29 +++++++++++++++++
 5 files changed, 178 insertions(+), 23 deletions(-)
 create mode 100644 src/formats/debug.rs
 create mode 100644 src/formats/raw.rs

(limited to 'src/formats')

diff --git a/src/formats/debug.rs b/src/formats/debug.rs
new file mode 100644
index 0000000..23fd34f
--- /dev/null
+++ b/src/formats/debug.rs
@@ -0,0 +1,18 @@
+use crate::*;
+
+
+pub fn format_debug(segments: &[Segment]) -> Result<Vec<u8>, FormatError> {
+    let mut output = String::new();
+    for segment in segments {
+        // Find maximum width of all words in the segment.
+        let width = segment.words.iter().map(|w| w.to_string().chars().count()).max().unwrap_or(0);
+        let address = &segment.address;
+        output.push_str(&format!("SEGMENT: 0x{address:>04x}\n"));
+        for word in &segment.words {
+            let string = word.to_string();
+            let w = width as usize;
+            output.push_str(&format!("  {string:>w$}\n"));
+        }
+    }
+    return Ok(output.as_bytes().to_vec());
+}
diff --git a/src/formats/inhx.rs b/src/formats/inhx.rs
index e83e870..fc4791b 100644
--- a/src/formats/inhx.rs
+++ b/src/formats/inhx.rs
@@ -1,10 +1,15 @@
 use crate::*;
 
 
-pub fn format_inhx(words: &[Word]) -> String {
+pub fn format_inhx(segments: &[Segment]) -> Result<Vec<u8>, FormatError> {
     let mut records = Vec::new();
-    for (i, chunk) in words.chunks(16).enumerate() {
-        records.push(data_record(chunk, (i * 16) as u16));
+    let mut address;
+    for segment in segments {
+        address = segment.address;
+        for chunk in segment.words.chunks(16) {
+            records.push(data_record(chunk, address)?);
+            address += 16;
+        }
     }
     records.push(terminating_record());
 
@@ -12,21 +17,24 @@ pub fn format_inhx(words: &[Word]) -> String {
     for record in records {
         output.push_str(&record.to_string());
     }
-    return output;
+    return Ok(output.as_bytes().to_vec());
 }
 
-fn data_record(words: &[Word], address: u16) -> InhxRecord {
+fn data_record(words: &[Tracked<Word>], address: usize) -> Result<InhxRecord, FormatError> {
+    let Ok(address) = u16::try_from(address) else {
+        return Err(FormatError::AddressTooLarge(u16::MAX as usize, address));
+    };
     let mut record = InhxRecord::new();
     record.byte((words.len()) as u8);
     record.be_double(address);
     record.byte(0x00);
     for word in words {
-        match word.bits <= 8 {
-            true => record.byte(word.value as u8),
-            false => panic!("Word '{word}' has more than 8 bits."),
-        };
+        if word.value.width > 8 {
+            return Err(FormatError::WordTooWide(8, word.width, word.source.clone()));
+        }
+        record.byte(word.value.value as u8);
     }
-    return record;
+    return Ok(record);
 }
 
 fn terminating_record() -> InhxRecord {
diff --git a/src/formats/inhx32.rs b/src/formats/inhx32.rs
index fd7fd7b..8febeae 100644
--- a/src/formats/inhx32.rs
+++ b/src/formats/inhx32.rs
@@ -1,11 +1,19 @@
 use crate::*;
 
 
-pub fn format_inhx32(words: &[Word]) -> String {
+pub fn format_inhx32(segments: &[Segment]) -> Result<Vec<u8>, FormatError> {
     let mut records = Vec::new();
-    records.push(extended_linear_address(0x0000));
-    for (i, chunk) in words.chunks(8).enumerate() {
-        records.push(data_record(chunk, (i * 8) as u16));
+    let mut address = 0;
+    records.push(extended_linear_address(0));
+    for segment in segments {
+        if (segment.address >> 16) != (address >> 16) {
+            records.push(extended_linear_address(segment.address));
+        }
+        address = segment.address;
+        for chunk in segment.words.chunks(8) {
+            records.push(data_record(chunk, address)?);
+            address += 8;
+        }
     }
     records.push(terminating_record());
 
@@ -13,24 +21,29 @@ pub fn format_inhx32(words: &[Word]) -> String {
     for record in records {
         output.push_str(&record.to_string());
     }
-    return output;
+    return Ok(output.as_bytes().to_vec());
 }
 
-fn data_record(words: &[Word], address: u16) -> InhxRecord {
+fn data_record(words: &[Tracked<Word>], address: usize) -> Result<InhxRecord, FormatError> {
+    let Ok(address) = u32::try_from(address * 2) else {
+        return Err(FormatError::AddressTooLarge(u32::MAX as usize / 2, address));
+    };
+    let address = address as u16;
     let mut record = InhxRecord::new();
     record.byte((words.len() * 2) as u8);
-    record.be_double(address * 2);
+    record.be_double(address);
     record.byte(0x00);
     for word in words {
-        match word.bits <= 16 {
-            true => record.le_double(word.value as u16),
-            false => panic!("Word '{word}' has more than 16 bits."),
-        };
+        if word.value.width > 16 {
+            return Err(FormatError::WordTooWide(16, word.width, word.source.clone()));
+        }
+        record.le_double(word.value.value as u16);
     }
-    return record;
+    return Ok(record);
 }
 
-fn extended_linear_address(address: u16) -> InhxRecord {
+fn extended_linear_address(address: usize) -> InhxRecord {
+    let address = (address >> 16) as u16;
     let mut record = InhxRecord::new();
     record.byte(0x02);
     record.be_double(0x0000);
diff --git a/src/formats/mod.rs b/src/formats/mod.rs
index 82f19f1..132001a 100644
--- a/src/formats/mod.rs
+++ b/src/formats/mod.rs
@@ -1,8 +1,78 @@
 mod inhx;
 mod inhx32;
+mod raw;
+mod debug;
 
 pub use inhx::*;
 pub use inhx32::*;
+pub use raw::*;
+pub use debug::*;
+
+use crate::*;
+
+use log::*;
+
+
+#[derive(Clone, Copy, PartialEq)]
+pub enum Format {
+    Debug,
+    Inhx,
+    Inhx32,
+    Raw,
+    Source,
+}
+
+impl Format {
+    pub fn from_str(string: &str) -> Self {
+        match string {
+            "debug" => Self::Debug,
+            "inhx" => Self::Inhx,
+            "inhx32" => Self::Inhx32,
+            "raw" => Self::Raw,
+            "source" => Self::Source,
+            _ => fatal!("Unknown format '{string}', expected 'debug', 'inhx', 'inhx32', 'raw', or 'source'. "),
+        }
+    }
+}
+
+impl std::fmt::Display for Format {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+        let string = match self {
+            Self::Debug => "debug",
+            Self::Inhx => "inhx",
+            Self::Inhx32 => "inhx32",
+            Self::Raw => "raw",
+            Self::Source => "source",
+        };
+        write!(f, "{string}")
+    }
+}
+
+
+pub enum FormatError {
+    /// (expected, received)
+    AddressTooLarge(usize, usize),
+    /// (expected, received)
+    WordTooWide(u32, u32, SourceSpan),
+    ///
+    ExpectedFixedWidth,
+}
+
+pub fn report_format_error(error: &FormatError, format: Format, source_code: &str) {
+    match error {
+        FormatError::AddressTooLarge(expected, received) =>
+            error!("The {format} format requires that addresses do not exceed {expected}, but the address {received} was reached"),
+        FormatError::WordTooWide(expected, received, source) => {
+            let message = format!("The {format} format requires that words are no wider than {expected} bits, but a {received} bit word was found");
+            let context = Context { source_code, source };
+            report_source_issue(LogLevel::Error, &context, &message);
+        }
+        FormatError::ExpectedFixedWidth =>
+            error!("The {format} format requires all words to be the same width"),
+    }
+    std::process::exit(1);
+}
+
 
 
 pub struct InhxRecord {
@@ -43,3 +113,20 @@ impl InhxRecord {
         format!(":{output}{checksum:0>2X}\n")
     }
 }
+
+
+pub fn calculate_fixed_width(segments: &[Segment]) -> Option<u32> {
+    let mut width = None;
+    for segment in segments {
+        for word in &segment.words {
+            let word_width = word.value.width;
+            match width {
+                Some(width) => if word_width != width {
+                    return None;
+                }
+                None => width = Some(word_width),
+            }
+        }
+    }
+    return width.or(Some(0));
+}
diff --git a/src/formats/raw.rs b/src/formats/raw.rs
new file mode 100644
index 0000000..ecc6473
--- /dev/null
+++ b/src/formats/raw.rs
@@ -0,0 +1,29 @@
+use crate::*;
+
+
+pub fn format_raw(segments: &[Segment], width: Option<u32>) -> Result<Vec<u8>, FormatError> {
+    let Some(width) = width.or_else(|| calculate_fixed_width(&segments)) else {
+        return Err(FormatError::ExpectedFixedWidth);
+    };
+
+    let mut address = 0;
+    let bytes_per_word = ((width + 7) / 8) as usize;
+    let mut bytes = Vec::new();
+
+    for segment in segments {
+        // Pad to the segment start address.
+        let padding = segment.address.saturating_sub(address);
+        bytes.resize(bytes.len() + (padding * bytes_per_word), 0);
+        for word in &segment.words {
+            // Decompose word value into bytes.
+            let value = word.value.value;
+            for i in (0..bytes_per_word).rev() {
+                let byte = (value >> (i*8) & 0xff) as u8;
+                bytes.push(byte);
+            }
+            address += 1;
+        }
+    }
+
+    return Ok(bytes);
+}
-- 
cgit v1.2.3-70-g09d2