summaryrefslogtreecommitdiff
path: root/src/stages
diff options
context:
space:
mode:
authorBen Bridle <bridle.benjamin@gmail.com>2025-05-29 12:18:16 +1200
committerBen Bridle <bridle.benjamin@gmail.com>2025-05-29 12:18:28 +1200
commitff99a5df4c5a3265d215afa4b937fbb95a55b96c (patch)
treeeb299429a57233881c47eb86622498eba841f03e /src/stages
downloadbedrock-asm-ff99a5df4c5a3265d215afa4b937fbb95a55b96c.zip
Initial commit
Diffstat (limited to 'src/stages')
-rw-r--r--src/stages/bytecode.rs158
-rw-r--r--src/stages/bytecode_tokens.rs37
-rw-r--r--src/stages/compiler.rs84
-rw-r--r--src/stages/mod.rs26
-rw-r--r--src/stages/semantic.rs141
-rw-r--r--src/stages/semantic_tokens.rs94
-rw-r--r--src/stages/syntactic.rs220
-rw-r--r--src/stages/syntactic_tokens.rs94
8 files changed, 854 insertions, 0 deletions
diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs
new file mode 100644
index 0000000..6878c42
--- /dev/null
+++ b/src/stages/bytecode.rs
@@ -0,0 +1,158 @@
+use crate::*;
+
+use std::collections::HashMap;
+
+
+/// Doesn't truncate trailing null bytes.
+pub fn generate_bytecode(semantic: &Program) -> Result<AssembledProgram, Vec<Tracked<BytecodeError>>> {
+ let mut generator = BytecodeGenerator::new(&semantic.definitions);
+ generator.parse(&semantic.tokens, false);
+ generator.fill_slots();
+ let mut symbols = Vec::new();
+ for (name, information) in generator.labels {
+ let source = semantic.definitions.get(&name).unwrap().source.clone();
+ let address = information.address;
+ symbols.push(AssembledSymbol { name, address, source });
+ }
+
+ match generator.errors.is_empty() {
+ true => Ok(
+ AssembledProgram {
+ bytecode: generator.bytecode,
+ symbols,
+ }
+ ),
+ false => Err(generator.errors),
+ }
+}
+
+
+pub struct BytecodeGenerator<'a> {
+ definitions: &'a HashMap<String, Tracked<Definition>>,
+ labels: HashMap<String, LabelInformation>,
+ stack: Vec<usize>,
+ bytecode: Vec<u8>,
+ errors: Vec<Tracked<BytecodeError>>,
+}
+
+struct LabelInformation {
+ address: usize,
+ slots: Vec<usize>,
+}
+
+impl<'a> BytecodeGenerator<'a> {
+ pub fn new(definitions: &'a HashMap<String, Tracked<Definition>>) -> Self {
+ let mut labels = HashMap::new();
+ for (name, definition) in definitions {
+ if let DefinitionVariant::LabelDefinition = definition.variant {
+ // Use fake address for now.
+ let information = LabelInformation { address: 0, slots: Vec::new() };
+ labels.insert(name.to_string(), information);
+ }
+ }
+ Self {
+ definitions,
+ labels,
+ stack: Vec::new(),
+ bytecode: Vec::new(),
+ errors: Vec::new(),
+ }
+ }
+
+ pub fn parse(&mut self, tokens: &[Tracked<SemanticToken>], in_macro: bool) {
+ macro_rules! byte {
+ ($byte:expr) => {
+ self.bytecode.push($byte)
+ };
+ }
+ macro_rules! double {
+ ($double:expr) => {{
+ let [high, low] = u16::to_be_bytes($double);
+ self.bytecode.push(high); self.bytecode.push(low);
+ }};
+ }
+
+ for token in tokens {
+ let i = self.bytecode.len();
+ match &token.value {
+ SemanticToken::Literal(value) => match value {
+ Value::Byte(byte) => byte!(*byte),
+ Value::Double(double) => double!(*double),
+ }
+ SemanticToken::Pad(value) => {
+ self.bytecode.resize(i + usize::from(value), 0);
+ },
+ SemanticToken::String(bytes) => {
+ self.bytecode.extend_from_slice(bytes)
+ },
+ SemanticToken::Comment(_) => (),
+ SemanticToken::BlockOpen(_) => {
+ self.stack.push(i);
+ // Use a fake index for now.
+ double!(0);
+ }
+ SemanticToken::BlockClose(_) => {
+ if i > 0xFFFF {
+ let error = BytecodeError::InvalidBlockAddress(i);
+ self.errors.push(Tracked::from(error, token.source.clone()));
+ }
+ let Some(addr) = self.stack.pop() else {
+ unreachable!("Uncaught unmatched block terminator");
+ };
+ let [high, low] = (i as u16).to_be_bytes();
+ self.bytecode[addr] = high;
+ self.bytecode[addr+1] = low;
+ }
+ SemanticToken::Symbol(name) => {
+ if let Some(definition) = self.definitions.get(name) {
+ match &definition.variant {
+ DefinitionVariant::MacroDefinition(body) => {
+ self.parse(body, true);
+ }
+ DefinitionVariant::LabelDefinition => {
+ let information = self.labels.get_mut(name).unwrap();
+ information.slots.push(i);
+ // Use a fake index for now.
+ double!(0);
+ }
+ }
+ } else {
+ unreachable!("Uncaught undefined symbol '{name}'");
+ }
+ }
+ SemanticToken::Instruction(instruction) => {
+ byte!(instruction.value)
+ }
+ SemanticToken::LabelDefinition(name) => if in_macro {
+ unreachable!("Uncaught label definition in macro");
+ } else {
+ if i > 0xFFFF {
+ let error = BytecodeError::InvalidLabelAddress(i);
+ self.errors.push(Tracked::from(error, token.source.clone()));
+ }
+ let information = self.labels.get_mut(name).unwrap();
+ // Replace fake index with real index.
+ information.address = i;
+ }
+ SemanticToken::MacroDefinition{ .. } => if in_macro {
+ unreachable!("Uncaught macro definition in macro");
+ }
+ }
+ }
+
+ if !self.stack.is_empty() {
+ unreachable!("Uncaught unterminated block");
+ }
+ }
+
+ /// Fill each label slot with a real label address.
+ pub fn fill_slots(&mut self) {
+ for information in self.labels.values() {
+ let [high, low] = (information.address as u16).to_be_bytes();
+ for addr in &information.slots {
+ self.bytecode[*addr] = high;
+ self.bytecode[*addr + 1] = low;
+ }
+ }
+ }
+}
diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs
new file mode 100644
index 0000000..902fcd7
--- /dev/null
+++ b/src/stages/bytecode_tokens.rs
@@ -0,0 +1,37 @@
+use crate::*;
+
+
+pub struct AssembledProgram {
+ pub bytecode: Vec<u8>,
+ pub symbols: Vec<AssembledSymbol>,
+}
+
+pub struct AssembledSymbol {
+ pub name: String,
+ pub address: usize,
+ pub source: SourceSpan,
+}
+
+pub enum BytecodeError {
+ InvalidLabelAddress(usize),
+ InvalidBlockAddress(usize),
+}
+
+
+pub fn report_bytecode_errors(errors: &[Tracked<BytecodeError>], source_code: &str) {
+ for error in errors {
+ report_bytecode_error(error, source_code);
+ }
+}
+
+
+fn report_bytecode_error(error: &Tracked<BytecodeError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ BytecodeError::InvalidLabelAddress(address) =>
+ &format!("The label address exceeds 0xFFFF: 0x{address:X}"),
+ BytecodeError::InvalidBlockAddress(address) =>
+ &format!("The block address exceeds 0xFFFF: 0x{address:X}"),
+ };
+ report_source_issue(LogLevel::Error, &context, message);
+}
diff --git a/src/stages/compiler.rs b/src/stages/compiler.rs
new file mode 100644
index 0000000..97bf20c
--- /dev/null
+++ b/src/stages/compiler.rs
@@ -0,0 +1,84 @@
+use crate::*;
+
+use assembler::SymbolRole::*;
+use assembler::DefinitionType::*;
+
+
+pub fn new_compiler() -> Compiler {
+ Compiler::new(parse_symbols, push_code)
+}
+
+
+/// Parse all symbols from a source code string.
+pub fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbol>> {
+ let syntactic = match parse_syntactic(source_code, path) {
+ Ok(syntactic) => syntactic,
+ Err(_) => return None,
+ };
+ Some(SymbolParser::new().parse(&syntactic))
+}
+
+/// Push source code to a source compilation string.
+pub fn push_code(compilation: &mut String, source_file: &SourceFile) {
+ // Skip blank files.
+ let source_code = &source_file.source_code;
+ if source_code.chars().all(|c| c.is_whitespace()) { return; }
+ // Ensure that the previous section is followed by two newline characters.
+ if !compilation.is_empty() {
+ if !compilation.ends_with('\n') { compilation.push('\n'); }
+ if !compilation.ends_with("\n\n") { compilation.push('\n'); }
+ }
+ // Push a path comment and the source code.
+ let path_str = source_file.path.as_os_str().to_string_lossy();
+ let path_comment = format!("(: {path_str} )\n");
+ compilation.push_str(&path_comment);
+ compilation.push_str(&source_code);
+}
+
+
+// Extract symbol definitions from a list of syntactic tokens.
+pub struct SymbolParser {
+ pub symbols: Vec<Symbol>,
+}
+
+impl SymbolParser {
+ pub fn new() -> Self {
+ Self {
+ symbols: Vec::new(),
+ }
+ }
+
+ fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) {
+ let name = name.to_string();
+ let namespace = Vec::new();
+ let source = source.to_owned();
+ self.symbols.push(Symbol { name, namespace, source, role });
+ }
+
+ pub fn parse(mut self, syntactic: &[Tracked<SyntacticToken>]) -> Vec<Symbol> {
+ for token in syntactic {
+ match &token.value {
+ SyntacticToken::MacroDefinition(definition) => {
+ self.record_symbol(
+ &definition.name,
+ &definition.name.source,
+ Definition(MustPrecedeReference),
+ );
+ for token in &definition.body {
+ if let SyntacticToken::Symbol(name) = &token.value {
+ self.record_symbol(&name, &token.source, Reference);
+ }
+ }
+ }
+ SyntacticToken::LabelDefinition(name) => {
+ self.record_symbol(&name, &token.source, Definition(CanFollowReference));
+ }
+ SyntacticToken::Symbol(name) => {
+ self.record_symbol(&name, &token.source, Reference);
+ }
+ _ => (),
+ }
+ }
+ return self.symbols;
+ }
+}
diff --git a/src/stages/mod.rs b/src/stages/mod.rs
new file mode 100644
index 0000000..76bda0d
--- /dev/null
+++ b/src/stages/mod.rs
@@ -0,0 +1,26 @@
+mod compiler;
+mod syntactic;
+mod syntactic_tokens;
+mod semantic;
+mod semantic_tokens;
+mod bytecode;
+mod bytecode_tokens;
+pub use compiler::*;
+pub use syntactic::*;
+pub use syntactic_tokens::*;
+pub use semantic::*;
+pub use semantic_tokens::*;
+pub use bytecode::*;
+pub use bytecode_tokens::*;
+
+
+#[macro_export]
+macro_rules! indent {
+ (0, $($tokens:tt)*) => {{
+ println!($($tokens)*);
+ }};
+ ($indent:expr, $($tokens:tt)*) => {{
+ for _ in 0..$indent { print!(" "); }
+ println!($($tokens)*);
+ }};
+}
diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs
new file mode 100644
index 0000000..f2774a4
--- /dev/null
+++ b/src/stages/semantic.rs
@@ -0,0 +1,141 @@
+use crate::*;
+
+use std::collections::{HashMap, HashSet};
+
+
+pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Program, Vec<Tracked<SemanticError>>> {
+ // Record all label definitions and macro names up front.
+ let mut definitions = HashMap::new();
+ let mut macro_names = HashSet::new();
+ for token in &syntactic {
+ match &token.value {
+ SyntacticToken::LabelDefinition(name) => {
+ // Use a fake index for now.
+ let definition = Definition::new(0, DefinitionVariant::LabelDefinition);
+ let tracked = Tracked::from(definition, token.source.clone());
+ if let Some(_) = definitions.insert(name.clone(), tracked) {
+ unreachable!("Uncaught duplicate label definition '{name}'");
+ }
+ }
+ SyntacticToken::MacroDefinition(definition) => {
+ let name = &definition.name;
+ if !macro_names.insert(name.clone()) {
+ unreachable!("Uncaught duplicate macro definition '{name}'")
+ }
+ }
+ _ => (),
+ }
+ }
+
+ // Convert syntactic tokens to semantic tokens.
+ let mut tokens: Vec<Tracked<SemanticToken>> = Vec::new();
+ let mut errors = Vec::new();
+ let mut stack = Vec::new();
+
+ for syn_token in syntactic {
+ let i = tokens.len();
+ let sem_token = match syn_token.value {
+ SyntacticToken::Literal(value) => SemanticToken::Literal(value),
+ SyntacticToken::Pad(value) => SemanticToken::Pad(value),
+ SyntacticToken::String(bytes) => SemanticToken::String(bytes),
+ SyntacticToken::Comment(string) => SemanticToken::Comment(string),
+ SyntacticToken::BlockOpen => {
+ stack.push(i);
+ // Use a fake index for now.
+ SemanticToken::BlockOpen(0)
+ }
+ SyntacticToken::BlockClose => {
+ let Some(k) = stack.pop() else {
+ unreachable!("Uncaught unmatched block terminator");
+ };
+ // Replace fake index with real index.
+ tokens[k].value = SemanticToken::BlockOpen(i);
+ SemanticToken::BlockClose(k)
+ }
+ SyntacticToken::Symbol(symbol) => {
+ if let Some(definition) = definitions.get_mut(&symbol) {
+ definition.value.references.push(i);
+ } else if let Some(definition) = macro_names.get(&symbol) {
+ let error = SemanticError::InvocationBeforeDefinition;
+ let source = syn_token.source.wrap(definition.source.clone());
+ errors.push(Tracked::from(error, source));
+ } else {
+ unreachable!("Uncaught undefined symbol '{symbol}'");
+ };
+ SemanticToken::Symbol(symbol)
+ }
+ SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction),
+ SyntacticToken::LabelDefinition(name) => {
+ let definition = definitions.get_mut(&name).unwrap();
+ // Replace fake index with real index.
+ definition.value.definition = i;
+ SemanticToken::LabelDefinition(name)
+ }
+ SyntacticToken::MacroDefinition(definition) => {
+ let name = definition.name.clone();
+ let mut body: Vec<Tracked<SemanticToken>> = Vec::new();
+ let mut body_stack = Vec::new();
+ for syn_token in definition.body {
+ let j = body.len();
+ let sem_token = match syn_token.value {
+ SyntacticToken::Literal(value) => SemanticToken::Literal(value),
+ SyntacticToken::Pad(value) => SemanticToken::Pad(value),
+ SyntacticToken::String(bytes) => SemanticToken::String(bytes),
+ SyntacticToken::Comment(string) => SemanticToken::Comment(string),
+ SyntacticToken::BlockOpen => {
+ body_stack.push(j);
+ // Use a fake index for now.
+ SemanticToken::BlockOpen(0)
+ }
+ SyntacticToken::BlockClose => {
+ let Some(k) = body_stack.pop() else {
+ unreachable!("Uncaught unmatched block terminator in macro '{name}'");
+ };
+ // Replace fake index with real index.
+ body[k].value = SemanticToken::BlockOpen(j);
+ SemanticToken::BlockClose(k)
+ }
+ SyntacticToken::Symbol(symbol) => {
+ if let Some(definition) = definitions.get_mut(&symbol) {
+ definition.value.deep_references.push((i, j));
+ } else if let Some(definition) = macro_names.get(&symbol) {
+ let error = SemanticError::InvocationBeforeDefinition;
+ let source = syn_token.source.wrap(definition.source.clone());
+ errors.push(Tracked::from(error, source));
+ } else {
+ unreachable!("Uncaught undefined symbol '{symbol}' in macro '{name}'");
+ };
+ SemanticToken::Symbol(symbol)
+ }
+ SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction),
+ SyntacticToken::LabelDefinition(label) =>
+ unreachable!("Uncaught label definition '{label}' in macro '{name}'"),
+ SyntacticToken::MacroDefinition(definition) =>
+ unreachable!("Uncaught macro definition '{}' in macro '{name}'", definition.name),
+ };
+ body.push(Tracked::from(sem_token, syn_token.source));
+ }
+
+ let variant = DefinitionVariant::MacroDefinition(body);
+ let source = definition.name.source.clone();
+ let tracked = Tracked::from(Definition::new(i, variant), source);
+ if let Some(_) = definitions.insert(name.value.clone(), tracked) {
+ unreachable!("Uncaught duplicate definition '{name}'")
+ };
+ if !body_stack.is_empty() {
+ unreachable!("Uncaught unterminated block in macro '{name}'");
+ }
+ SemanticToken::MacroDefinition(name)
+ }
+ };
+ tokens.push(Tracked::from(sem_token, syn_token.source));
+ }
+
+ if !stack.is_empty() {
+ unreachable!("Uncaught unterminated block");
+ }
+ match errors.is_empty() {
+ true => Ok(Program { definitions, tokens }),
+ false => Err(errors),
+ }
+}
diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs
new file mode 100644
index 0000000..fe49c26
--- /dev/null
+++ b/src/stages/semantic_tokens.rs
@@ -0,0 +1,94 @@
+use crate::*;
+
+use std::collections::HashMap;
+
+
+pub struct Program {
+ pub definitions: HashMap<String, Tracked<Definition>>,
+ pub tokens: Vec<Tracked<SemanticToken>>,
+}
+
+pub struct Definition {
+ pub variant: DefinitionVariant,
+ /// Index of definition token.
+ pub definition: usize,
+ /// Indices of symbols referencing this definition.
+ pub references: Vec<usize>,
+ /// Indices of references inside other definitions.
+ pub deep_references: Vec<(usize, usize)>,
+}
+
+impl Definition {
+ pub fn new(i: usize, variant: DefinitionVariant) -> Self {
+ Self {
+ variant,
+ definition: i,
+ references: Vec::new(),
+ deep_references: Vec::new(),
+ }
+ }
+}
+
+pub enum DefinitionVariant {
+ LabelDefinition,
+ MacroDefinition(Vec<Tracked<SemanticToken>>),
+}
+
+pub enum SemanticToken {
+ Literal(Value),
+ Pad(Value),
+ String(Vec<u8>),
+ Comment(String),
+ BlockOpen(usize), // index to matching block-close
+ BlockClose(usize), // index to matching block-open
+ Symbol(String),
+ Instruction(Instruction),
+ LabelDefinition(String),
+ MacroDefinition(Tracked<String>),
+}
+
+pub enum SemanticError {
+ InvocationBeforeDefinition,
+}
+
+
+pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) {
+ for error in errors {
+ report_semantic_error(error, source_code);
+ }
+}
+
+
+fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ SemanticError::InvocationBeforeDefinition =>
+ "Macro cannot be invoked before it has been defined",
+ };
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_semantic_token(i: usize, token: &SemanticToken, definitions: &HashMap<String, Tracked<Definition>>) {
+ match token {
+ SemanticToken::Literal(value) => indent!(i, "Literal({value})"),
+ SemanticToken::Pad(value) => indent!(i, "Pad({value})"),
+ SemanticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)),
+ SemanticToken::Comment(_) => indent!(i, "Comment"),
+ SemanticToken::BlockOpen(pointer) => indent!(i, "BlockOpen(*{pointer})"),
+ SemanticToken::BlockClose(pointer) => indent!(i, "BlockClose(*{pointer})"),
+ SemanticToken::Symbol(name) => indent!(i, "Symbol({name})"),
+ SemanticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"),
+ SemanticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"),
+ SemanticToken::MacroDefinition(name) => {
+ indent!(i, "MacroDefinition({name})");
+ if let Some(definition) = definitions.get(name.as_str()) {
+ if let DefinitionVariant::MacroDefinition(body) = &definition.variant {
+ for token in body {
+ print_semantic_token(i+1, token, definitions);
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs
new file mode 100644
index 0000000..6453ae0
--- /dev/null
+++ b/src/stages/syntactic.rs
@@ -0,0 +1,220 @@
+use crate::*;
+
+use std::path::PathBuf;
+
+
+pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path), "")
+}
+
+fn parse_syntactic_from_tokeniser(mut t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ t.add_delimiters(&['(',')','[',']','{','}',';']);
+ t.add_terminators(&[':']);
+ let mut tokens = Vec::new();
+ let mut errors = Vec::new();
+ let mut label_name = label_name.to_string();
+
+ macro_rules! err {
+ ($error:expr) => {{
+ err!($error, t.get_source());
+ }};
+ ($error:expr, $source:expr) => {{
+ errors.push(Tracked::from($error, $source));
+ continue;
+ }};
+ }
+
+ macro_rules! check_name {
+ ($name:expr) => {{
+ check_name!($name, t.get_source());
+ }};
+ ($name:expr, $source:expr) => {
+ if $name.chars().count() > 63 {
+ let error = SyntacticError::InvalidIdentifier($name.clone());
+ errors.push(Tracked::from(error, $source.clone()));
+ }
+ };
+ }
+
+ // Eat characters until the end character is found.
+ macro_rules! is_any_end {
+ ($end:expr) => {
+ |t: &mut Tokeniser| {
+ t.eat_char() == Some($end)
+ }
+ };
+ }
+
+ // Eat characters until the end character is found without a preceding back-slash.
+ macro_rules! is_plain_end {
+ ($end:expr) => {
+ |t: &mut Tokeniser| {
+ t.eat_if(concat!('\\', $end)).is_some() || t.eat_char() == Some($end)
+ }
+ };
+ }
+
+ loop {
+ // Eat leading whitespace.
+ while let Some(c) = t.peek_char() {
+ match [' ', '\n', '\r', '\t'].contains(&c) {
+ true => t.eat_char(),
+ false => break,
+ };
+ }
+ t.mark_start();
+ let Some(c) = t.eat_char() else { break };
+ let token = match c {
+ '"' => {
+ let source = t.get_source();
+ match t.track_until(is_plain_end!('"')) {
+ Some(string) => {
+ let mut bytes = string.into_bytes();
+ bytes.push(0x00);
+ SyntacticToken::String(bytes)
+ }
+ None => err!(SyntacticError::UnterminatedNullString, source),
+ }
+ }
+ '\'' => {
+ let source = t.get_source();
+ match t.track_until(is_plain_end!('\'')) {
+ Some(string) => SyntacticToken::String(string.into_bytes()),
+ None => err!(SyntacticError::UnterminatedRawString, source),
+ }
+ }
+ '(' => {
+ let source = t.get_source();
+ if let Some(string) = t.track_until(is_any_end!(')')) {
+ // Check if the comment fills the entire line.
+ if t.start.position.column == 0 && t.end_of_line() {
+ if let Some(path) = string.strip_prefix(": ") {
+ t.embedded_path = Some(PathBuf::from(path.trim()));
+ t.embedded_first_line = t.start.position.line + 1;
+ continue;
+ }
+ }
+ SyntacticToken::Comment(string)
+ } else {
+ err!(SyntacticError::UnterminatedComment, source)
+ }
+ }
+ ')' => err!(SyntacticError::UnmatchedCommentTerminator),
+ '%' => {
+ let name = t.eat_token();
+ let source = t.get_source();
+ check_name!(name, source);
+ t.mark_child();
+ if let Some(_) = t.track_until(is_any_end!(';')) {
+ let child = t.tokenise_child_span();
+ match parse_body_from_tokeniser(child, &label_name) {
+ Ok(body) => {
+ let name = Tracked::from(name, source);
+ let definition = SyntacticMacroDefinition { name, body };
+ SyntacticToken::MacroDefinition(definition)
+ }
+ Err(mut err) => {
+ errors.append(&mut err);
+ continue;
+ }
+ }
+ } else {
+ err!(SyntacticError::UnterminatedMacroDefinition, source);
+ }
+ }
+ ';' => err!(SyntacticError::UnmatchedMacroTerminator),
+ '{' => SyntacticToken::BlockOpen,
+ '}' => SyntacticToken::BlockClose,
+ '['|']' => continue,
+ '@' => {
+ label_name = t.eat_token();
+ check_name!(label_name);
+ SyntacticToken::LabelDefinition(label_name.clone())
+ }
+ '&' => {
+ let name = format!("{label_name}/{}", t.eat_token());
+ check_name!(name);
+ SyntacticToken::LabelDefinition(name)
+ }
+ '~' => {
+ let name = format!("{label_name}/{}", t.eat_token());
+ check_name!(name);
+ SyntacticToken::Symbol(name)
+ }
+ '#' => {
+ let token = t.eat_token();
+ match token.parse::<Value>() {
+ Ok(value) => SyntacticToken::Pad(value),
+ Err(_) => err!(SyntacticError::InvalidPadValue),
+ }
+ },
+ ':' => {
+ SyntacticToken::Symbol(String::from(':'))
+ }
+ c => {
+ let token = format!("{c}{}", t.eat_token());
+ if let Ok(value) = token.parse::<Value>() {
+ SyntacticToken::Literal(value)
+ } else if let Ok(instruction) = token.parse::<Instruction>() {
+ SyntacticToken::Instruction(instruction)
+ } else {
+ check_name!(token);
+ SyntacticToken::Symbol(token)
+ }
+ }
+ };
+
+ t.mark_end();
+ let source = t.get_source();
+ tokens.push(Tracked::from(token, source));
+ }
+
+ // Check that every block open matches a block close.
+ let mut stack = Vec::new();
+ for token in &tokens {
+ match &token.value {
+ SyntacticToken::BlockOpen => stack.push(token.source.clone()),
+ SyntacticToken::BlockClose => if let None = stack.pop() {
+ let error = SyntacticError::UnmatchedBlockTerminator;
+ errors.push(Tracked::from(error, token.source.clone()));
+ }
+ _ => (),
+ }
+ }
+ for source in stack {
+ let error = SyntacticError::UnterminatedBlock;
+ errors.push(Tracked::from(error, source));
+ }
+
+ match errors.is_empty() {
+ true => Ok(tokens),
+ false => Err(errors),
+ }
+}
+
+
+fn parse_body_from_tokeniser(t: Tokeniser, label_name: &str) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ let mut tokens = Vec::new();
+ let mut errors = Vec::new();
+
+ for token in parse_syntactic_from_tokeniser(t, label_name)? {
+ match token.value {
+ SyntacticToken::LabelDefinition(_) => {
+ let error = SyntacticError::LabelDefinitionInMacroDefinition;
+ errors.push(Tracked::from(error, token.source));
+ continue;
+ }
+ SyntacticToken::MacroDefinition(_) => {
+ let error = SyntacticError::MacroDefinitionInMacroDefinition;
+ errors.push(Tracked::from(error, token.source));
+ continue;
+ }
+ _ => tokens.push(token),
+ };
+ }
+
+ match errors.is_empty() {
+ true => Ok(tokens),
+ false => Err(errors),
+ }
+}
diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs
new file mode 100644
index 0000000..2a95967
--- /dev/null
+++ b/src/stages/syntactic_tokens.rs
@@ -0,0 +1,94 @@
+use crate::*;
+
+
+pub enum SyntacticToken {
+ Literal(Value),
+ Pad(Value),
+ String(Vec<u8>),
+ Comment(String),
+ BlockOpen,
+ BlockClose,
+ Symbol(String),
+ Instruction(Instruction),
+ LabelDefinition(String),
+ MacroDefinition(SyntacticMacroDefinition),
+}
+
+pub struct SyntacticMacroDefinition {
+ pub name: Tracked<String>,
+ pub body: Vec<Tracked<SyntacticToken>>,
+}
+
+pub enum SyntacticError {
+ UnterminatedBlock,
+ UnterminatedComment,
+ UnterminatedRawString,
+ UnterminatedNullString,
+ UnterminatedMacroDefinition,
+ UnmatchedBlockTerminator,
+ UnmatchedCommentTerminator,
+ UnmatchedMacroTerminator,
+ InvalidPadValue,
+ InvalidIdentifier(String),
+ MacroDefinitionInMacroDefinition,
+ LabelDefinitionInMacroDefinition,
+}
+
+
+pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) {
+ for error in errors {
+ report_syntactic_error(error, source_code);
+ }
+}
+
+fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ SyntacticError::UnterminatedBlock =>
+ "Block was not terminated, add a '}' character to terminate",
+ SyntacticError::UnterminatedComment =>
+ "Comment was not terminated, add a ')' character to terminate",
+ SyntacticError::UnterminatedRawString =>
+ "String was not terminated, add a ' character to terminate",
+ SyntacticError::UnterminatedNullString =>
+ "String was not terminated, add a '\"' character to terminate",
+ SyntacticError::UnterminatedMacroDefinition =>
+ "Macro definition was not terminated, add a ';' character to terminate",
+ SyntacticError::UnmatchedBlockTerminator =>
+ "Attempted to terminate a block, but no block was in progress",
+ SyntacticError::UnmatchedCommentTerminator =>
+ "Attempted to terminate a comment, but no comment was in progress",
+ SyntacticError::UnmatchedMacroTerminator =>
+ "Attempted to terminate a macro definition, but no macro definition was in progress",
+ SyntacticError::InvalidPadValue =>
+ "The pad value must be two or four hexadecimal digits",
+ SyntacticError::InvalidIdentifier(name) =>
+ &format!("An identifier cannot exceed 63 characters in length: {name}"),
+ SyntacticError::MacroDefinitionInMacroDefinition =>
+ "A macro cannot be defined inside another macro",
+ SyntacticError::LabelDefinitionInMacroDefinition =>
+ "A label cannot be defined inside a macro",
+ };
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_syntactic_token(i: usize, token: &SyntacticToken) {
+ match token {
+ SyntacticToken::Literal(value) => indent!(i, "Literal({value})"),
+ SyntacticToken::Pad(value) => indent!(i, "Pad({value})"),
+ SyntacticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)),
+ SyntacticToken::Comment(_) => indent!(i, "Comment"),
+ SyntacticToken::BlockOpen => indent!(i, "BlockOpen"),
+ SyntacticToken::BlockClose => indent!(i, "BlockOpen"),
+ SyntacticToken::Symbol(name) => indent!(i, "Symbol({name})"),
+ SyntacticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"),
+ SyntacticToken::LabelDefinition(name) => indent!(i, "LabelDefinition({name})"),
+ SyntacticToken::MacroDefinition(definition) => {
+ indent!(i, "MacroDefinition({})", definition.name);
+ for token in &definition.body {
+ print_syntactic_token(i+1, token);
+ }
+ }
+ }
+}