summaryrefslogtreecommitdiff
path: root/src/stages
diff options
context:
space:
mode:
Diffstat (limited to 'src/stages')
-rw-r--r--src/stages/bytecode.rs137
-rw-r--r--src/stages/bytecode_tokens.rs13
-rw-r--r--src/stages/compiler.rs80
-rw-r--r--src/stages/mod.rs27
-rw-r--r--src/stages/semantic.rs141
-rw-r--r--src/stages/semantic_tokens.rs101
-rw-r--r--src/stages/syntactic.rs185
-rw-r--r--src/stages/syntactic_tokens.rs107
8 files changed, 791 insertions, 0 deletions
diff --git a/src/stages/bytecode.rs b/src/stages/bytecode.rs
new file mode 100644
index 0000000..db6ff6d
--- /dev/null
+++ b/src/stages/bytecode.rs
@@ -0,0 +1,137 @@
+use crate::*;
+
+use std::collections::HashMap;
+
+
+/// Doesn't truncate trailing null bytes.
+pub fn generate_bytecode(semantic: &Program) -> AssembledProgram {
+ let mut generator = BytecodeGenerator::new(&semantic.definitions);
+ generator.parse(&semantic.tokens, false);
+ generator.fill_slots();
+ let mut symbols = Vec::new();
+ for (name, information) in generator.labels {
+ let source = semantic.definitions.get(&name).unwrap().source.clone();
+ let address = information.address;
+ symbols.push(AssembledSymbol { name, address, source });
+ }
+ AssembledProgram {
+ bytecode: generator.bytecode,
+ symbols,
+ }
+}
+
+
+pub struct BytecodeGenerator<'a> {
+ definitions: &'a HashMap<String, Tracked<Definition>>,
+ labels: HashMap<String, LabelInformation>,
+ stack: Vec<usize>,
+ bytecode: Vec<u8>,
+}
+
+struct LabelInformation {
+ address: usize,
+ slots: Vec<usize>,
+}
+
+impl<'a> BytecodeGenerator<'a> {
+ pub fn new(definitions: &'a HashMap<String, Tracked<Definition>>) -> Self {
+ let mut labels = HashMap::new();
+ for (name, definition) in definitions {
+ if let DefinitionKind::LabelDefinition = definition.kind {
+ labels.insert(name, LabelInformation { address: 0, slots: Vec::new() });
+ }
+ }
+ Self {
+ definitions,
+ labels: HashMap::new(),
+ stack: Vec::new(),
+ bytecode: Vec::new(),
+ }
+ }
+
+ pub fn parse(&mut self, tokens: &[Tracked<SemanticToken>], in_macro: bool) {
+ macro_rules! byte {
+ ($byte:expr) => {
+ self.bytecode.push($byte)
+ };
+ }
+ macro_rules! double {
+ ($double:expr) => {{
+ let [high, low] = u16::to_be_bytes($double);
+ self.bytecode.push(high);
+ self.bytecode.push(low);
+ }};
+ }
+
+ for token in tokens {
+ let i = self.bytecode.len();
+ match &token.value {
+ SemanticToken::Comment(_) => (),
+
+ SemanticToken::LabelDefinition(name) => if in_macro {
+ unreachable!("Uncaught label definition in macro");
+ } else {
+ let information = self.labels.get_mut(name).unwrap();
+ information.address = i;
+ }
+ SemanticToken::MacroDefinition{ .. } => if in_macro {
+ unreachable!("Uncaught macro definition in macro");
+ }
+
+ SemanticToken::RawValue(value) => match value {
+ Value::Byte(byte) => byte!(*byte),
+ Value::Double(double) => double!(*double),
+ }
+ SemanticToken::Instruction(instruction) => {
+ byte!(instruction.value)
+ }
+ SemanticToken::Invocation(name) => {
+ if let Some(definition) = self.definitions.get(name) {
+ match &definition.kind {
+ DefinitionKind::MacroDefinition(body) => {
+ self.parse(body, true);
+ }
+ DefinitionKind::LabelDefinition => {
+ let information = self.labels.get_mut(name).unwrap();
+ information.slots.push(i);
+ double!(0);
+ }
+ }
+ } else {
+ unreachable!("Uncaught undefined symbol '{name}'");
+ }
+ }
+
+ SemanticToken::Padding(value) => {
+ self.bytecode.resize(i + usize::from(value), 0);
+ },
+ SemanticToken::String(bytes) => {
+ self.bytecode.extend_from_slice(bytes)
+ },
+
+ SemanticToken::BlockOpen(_) => {
+ self.stack.push(i);
+ double!(0);
+ }
+ SemanticToken::BlockClose(_) => {
+ let Some(addr) = self.stack.pop() else {
+ unreachable!("Uncaught unmatched block terminator");
+ };
+ let [high, low] = (addr as u16).to_be_bytes();
+ self.bytecode[addr] = high;
+ self.bytecode[addr+1] = low;
+ }
+ }
+ }
+ }
+
+ pub fn fill_slots(&mut self) {
+ for information in self.labels.values() {
+ let [high, low] = (information.address as u16).to_be_bytes();
+ for addr in &information.slots {
+ self.bytecode[*addr] = high;
+ self.bytecode[*addr + 1] = low;
+ }
+ }
+ }
+}
diff --git a/src/stages/bytecode_tokens.rs b/src/stages/bytecode_tokens.rs
new file mode 100644
index 0000000..aef27f9
--- /dev/null
+++ b/src/stages/bytecode_tokens.rs
@@ -0,0 +1,13 @@
+use crate::*;
+
+
+pub struct AssembledProgram {
+ pub bytecode: Vec<u8>,
+ pub symbols: Vec<AssembledSymbol>,
+}
+
+pub struct AssembledSymbol {
+ pub name: String,
+ pub address: usize,
+ pub source: SourceSpan,
+}
diff --git a/src/stages/compiler.rs b/src/stages/compiler.rs
new file mode 100644
index 0000000..7dad5e4
--- /dev/null
+++ b/src/stages/compiler.rs
@@ -0,0 +1,80 @@
+use crate::*;
+use assembler::SymbolRole::*;
+use assembler::DefinitionType::*;
+
+
+pub fn new_compiler() -> Compiler {
+ Compiler::new(parse_symbols, push_code)
+}
+
+
+/// Parse all symbols from a source code string.
+pub fn parse_symbols(source_code: &str, path: Option<&Path>) -> Option<Vec<Symbol>> {
+ let syntactic = match parse_syntactic(source_code, path) {
+ Ok(syntactic) => syntactic,
+ Err(_errors) => return None,
+ };
+ Some(SymbolParser::new().parse(&syntactic))
+}
+
+/// Push source code to a source compilation string.
+pub fn push_code(compilation: &mut String, source_file: &SourceFile) {
+ // Skip blank files.
+ let source_code = &source_file.source_code;
+ if source_code.chars().all(|c| c.is_whitespace()) { return; }
+ // Ensure that the previous section is followed by two newline characters.
+ if !compilation.is_empty() {
+ if !compilation.ends_with('\n') { compilation.push('\n'); }
+ if !compilation.ends_with("\n\n") { compilation.push('\n'); }
+ }
+ // Push a path comment and the source code.
+ let path_str = source_file.path.as_os_str().to_string_lossy();
+ let path_comment = format!("(: {path_str} )\n");
+ compilation.push_str(&path_comment);
+ compilation.push_str(&source_code);
+}
+
+
+// Extract symbol definitions from a list of semantic tokens.
+pub struct SymbolParser {
+ pub symbols: Vec<Symbol>,
+}
+
+impl SymbolParser {
+ pub fn new() -> Self {
+ Self {
+ symbols: Vec::new(),
+ }
+ }
+
+ fn record_symbol(&mut self, name: &str, source: &SourceSpan, role: SymbolRole) {
+ let name = name.to_string();
+ let namespace = Vec::new();
+ let source = source.to_owned();
+ self.symbols.push(Symbol { name, namespace, source, role });
+ }
+
+ pub fn parse(mut self, semantic: &[Tracked<SyntacticToken>]) -> Vec<Symbol> {
+ for token in semantic {
+ match &token.value {
+ SyntacticToken::MacroDefinition(definition) => {
+ self.record_symbol(
+ &definition.name,
+ &definition.name.source,
+ Definition(MustPrecedeReference),
+ );
+ for token in &definition.body {
+ if let SyntacticToken::Invocation(name) = &token.value {
+ self.record_symbol(&name, &token.source, Reference);
+ }
+ }
+ }
+ SyntacticToken::LabelDefinition(name) => {
+ self.record_symbol(&name, &token.source, Definition(CanFollowReference));
+ }
+ _ => (),
+ }
+ }
+ return self.symbols;
+ }
+}
diff --git a/src/stages/mod.rs b/src/stages/mod.rs
new file mode 100644
index 0000000..65d14d7
--- /dev/null
+++ b/src/stages/mod.rs
@@ -0,0 +1,27 @@
+mod compiler;
+mod syntactic;
+mod syntactic_tokens;
+mod semantic;
+mod semantic_tokens;
+mod bytecode;
+mod bytecode_tokens;
+
+pub use compiler::*;
+pub use syntactic::*;
+pub use syntactic_tokens::*;
+pub use semantic::*;
+pub use semantic_tokens::*;
+pub use bytecode::*;
+pub use bytecode_tokens::*;
+
+
+#[macro_export]
+macro_rules! indent {
+ (0, $($tokens:tt)*) => {{
+ println!($($tokens)*);
+ }};
+ ($indent:expr, $($tokens:tt)*) => {{
+ for _ in 0..$indent { print!(" "); }
+ println!($($tokens)*);
+ }};
+}
diff --git a/src/stages/semantic.rs b/src/stages/semantic.rs
new file mode 100644
index 0000000..da804ec
--- /dev/null
+++ b/src/stages/semantic.rs
@@ -0,0 +1,141 @@
+use crate::*;
+
+use std::collections::{HashMap, HashSet};
+
+
+pub fn parse_semantic(syntactic: Vec<Tracked<SyntacticToken>>) -> Result<Program, Vec<Tracked<SemanticError>>> {
+ // Record all label definitions and macro names up front.
+ let mut definitions = HashMap::new();
+ let mut macro_names = HashSet::new();
+ for token in &syntactic {
+ match &token.value {
+ SyntacticToken::LabelDefinition(name) => {
+ let name = name.clone();
+ let definition = Definition::new(0, DefinitionKind::LabelDefinition);
+ let tracked = Tracked::from(definition, token.source.clone());
+ if let Some(_) = definitions.insert(name.clone(), tracked) {
+ unreachable!("Uncaught duplicate label definition '{name}'");
+ }
+ }
+ SyntacticToken::MacroDefinition(definition) => {
+ let name = definition.name.clone();
+ if !macro_names.insert(name.clone()) {
+ unreachable!("Uncaught duplicate macro definition '{name}'")
+ }
+ }
+ _ => (),
+ }
+ }
+
+ // Convert syntactic tokens to semantic tokens.
+ let mut tokens: Vec<Tracked<SemanticToken>> = Vec::new();
+ let mut errors = Vec::new();
+ let mut stack = Vec::new();
+
+ for syn_token in syntactic {
+ let i = tokens.len();
+ let sem_token = match syn_token.value {
+ SyntacticToken::Comment(string) => SemanticToken::Comment(string),
+
+ SyntacticToken::LabelDefinition(name) => {
+ let definition = definitions.get_mut(&name).unwrap();
+ definition.value.definition = i;
+ SemanticToken::LabelDefinition(name)
+ }
+ SyntacticToken::MacroDefinition(definition) => {
+ let source = definition.name.source.clone();
+ let name = definition.name.clone();
+
+ let mut body: Vec<Tracked<SemanticToken>> = Vec::new();
+ let mut body_stack = Vec::new();
+ for syn_token in definition.body {
+ let j = body.len();
+ let sem_token = match syn_token.value {
+ SyntacticToken::Comment(string) =>
+ SemanticToken::Comment(string),
+
+ SyntacticToken::LabelDefinition(label) =>
+ unreachable!("Uncaught label definition '{label}' in macro '{name}'"),
+ SyntacticToken::MacroDefinition(definition) =>
+ unreachable!("Uncaught macro definition '{}' in macro '{name}'", definition.name),
+
+ SyntacticToken::RawValue(value) => SemanticToken::RawValue(value),
+ SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction),
+ SyntacticToken::Invocation(symbol) => {
+ if let Some(definition) = definitions.get_mut(&symbol) {
+ definition.value.deep_references.push((i, j));
+ } else if macro_names.contains(&symbol) {
+ let error = SemanticError::InvocationBeforeDefinition;
+ errors.push(Tracked::from(error, syn_token.source.clone()));
+ } else {
+ unreachable!("Uncaught undefined symbol '{symbol}'");
+ };
+ SemanticToken::Invocation(symbol)
+ }
+
+ SyntacticToken::Padding(value) => SemanticToken::Padding(value),
+ SyntacticToken::String(bytes) => SemanticToken::String(bytes),
+
+ SyntacticToken::BlockOpen => {
+ body_stack.push(j);
+ SemanticToken::BlockOpen(0)
+ }
+ SyntacticToken::BlockClose => {
+ let Some(k) = body_stack.pop() else {
+ unreachable!("Uncaught unmatched block terminator in macro {name}");
+ };
+ body[k].value = SemanticToken::BlockOpen(j);
+ SemanticToken::BlockClose(k)
+ }
+ };
+ body.push(Tracked::from(sem_token, syn_token.source));
+ }
+
+ let kind = DefinitionKind::MacroDefinition(body);
+ let tracked = Tracked::from(Definition::new(i, kind), source);
+ if let Some(_) = definitions.insert(name.value.clone(), tracked) {
+ unreachable!("Uncaught duplicate definition '{name}'")
+ };
+
+ if !body_stack.is_empty() {
+ unreachable!("Uncaught unterminated block in macro {name}");
+ }
+ SemanticToken::MacroDefinition(name)
+ }
+
+ SyntacticToken::RawValue(value) => SemanticToken::RawValue(value),
+ SyntacticToken::Instruction(instruction) => SemanticToken::Instruction(instruction),
+ SyntacticToken::Invocation(symbol) => {
+ let Some(definition) = definitions.get_mut(&symbol) else {
+ unreachable!("Uncaught undefined symbol '{symbol}'");
+ };
+ definition.value.references.push(i);
+ SemanticToken::Invocation(symbol)
+ }
+
+ SyntacticToken::Padding(value) => SemanticToken::Padding(value),
+ SyntacticToken::String(bytes) => SemanticToken::String(bytes),
+
+ SyntacticToken::BlockOpen => {
+ stack.push(i);
+ SemanticToken::BlockOpen(0)
+ }
+ SyntacticToken::BlockClose => {
+ let Some(k) = stack.pop() else {
+ unreachable!("Uncaught unmatched block terminator");
+ };
+ tokens[k].value = SemanticToken::BlockOpen(i);
+ SemanticToken::BlockClose(k)
+ }
+ };
+ tokens.push(Tracked::from(sem_token, syn_token.source));
+ }
+
+ if !stack.is_empty() {
+ unreachable!("Uncaught unterminated block");
+ }
+ match errors.is_empty() {
+ true => Ok(Program { definitions, tokens }),
+ false => Err(errors),
+ }
+}
diff --git a/src/stages/semantic_tokens.rs b/src/stages/semantic_tokens.rs
new file mode 100644
index 0000000..ca5b27a
--- /dev/null
+++ b/src/stages/semantic_tokens.rs
@@ -0,0 +1,101 @@
+use crate::*;
+
+use std::collections::HashMap;
+
+
+pub struct Program {
+ pub definitions: HashMap<String, Tracked<Definition>>,
+ pub tokens: Vec<Tracked<SemanticToken>>,
+}
+
+pub struct Definition {
+ pub kind: DefinitionKind,
+ pub definition: usize,
+ pub references: Vec<usize>,
+ pub deep_references: Vec<(usize, usize)>,
+}
+
+impl Definition {
+ pub fn new(i: usize, kind: DefinitionKind) -> Self {
+ Self {
+ kind,
+ definition: i,
+ references: Vec::new(),
+ deep_references: Vec::new(),
+ }
+ }
+}
+
+pub enum DefinitionKind {
+ MacroDefinition(Vec<Tracked<SemanticToken>>),
+ LabelDefinition,
+}
+
+pub enum SemanticToken {
+ Comment(String),
+
+ LabelDefinition(String),
+ MacroDefinition(Tracked<String>),
+
+ RawValue(Value),
+ Instruction(Instruction),
+ Invocation(String),
+
+ Padding(Value),
+ String(Vec<u8>),
+
+ BlockOpen(usize),
+ BlockClose(usize),
+}
+
+pub enum SemanticError {
+ InvocationBeforeDefinition,
+}
+
+
+pub fn report_semantic_errors(errors: &[Tracked<SemanticError>], source_code: &str) {
+ for error in errors {
+ report_semantic_error(error, source_code);
+ }
+}
+
+fn report_semantic_error(error: &Tracked<SemanticError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ SemanticError::InvocationBeforeDefinition =>
+ "Invocation before definition",
+ };
+
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_semantic_token(i: usize, token: &SemanticToken, definitions: &HashMap<String, Tracked<Definition>>) {
+ match token {
+ SemanticToken::Comment(_) =>
+ indent!(i, "Comment"),
+
+ SemanticToken::LabelDefinition(name) =>
+ indent!(i, "LabelDefinition({name})"),
+ SemanticToken::MacroDefinition(name) => {
+ indent!(i, "MacroDefinition({name})");
+ if let Some(definition) = definitions.get(name.as_str()) {
+ if let DefinitionKind::MacroDefinition(body) = &definition.kind {
+ for token in body {
+ print_semantic_token(i+1, token, definitions);
+ }
+ }
+ }
+ }
+
+ SemanticToken::RawValue(value) => indent!(i, "RawValue({value})"),
+ SemanticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"),
+ SemanticToken::Invocation(name) => indent!(i, "Invocation({name})"),
+
+ SemanticToken::Padding(value) => indent!(i, "Padding({value})"),
+ SemanticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)),
+
+ SemanticToken::BlockOpen(pointer) => indent!(i, "BlockOpen(*{pointer})"),
+ SemanticToken::BlockClose(pointer) => indent!(i, "BlockOpen(*{pointer})"),
+ }
+}
diff --git a/src/stages/syntactic.rs b/src/stages/syntactic.rs
new file mode 100644
index 0000000..c680700
--- /dev/null
+++ b/src/stages/syntactic.rs
@@ -0,0 +1,185 @@
+use crate::*;
+
+use std::path::PathBuf;
+
+
+pub fn parse_syntactic<P: Into<PathBuf>>(source_code: &str, path: Option<P>) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ parse_syntactic_from_tokeniser(Tokeniser::new(source_code, path))
+}
+
+fn parse_syntactic_from_tokeniser(mut t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ t.add_delimiters(&['@','&','%',';','{','}','(',')','[',']','#','~','"','\'']);
+ t.add_terminators(&[':']);
+ let mut tokens = Vec::new();
+ let mut errors = Vec::new();
+ let mut label_name = String::new();
+
+ macro_rules! err {
+ ($error:expr) => {{
+ err!($error, t.get_source());
+ }};
+ ($error:expr, $source:expr) => {{
+ errors.push(Tracked::from($error, $source));
+ continue;
+ }};
+ }
+
+ macro_rules! is_any {
+ ($close:expr) => {
+ |t: &mut Tokeniser| { t.eat_char() == Some($close) }
+ };
+ }
+
+ loop {
+ t.eat_whitespace();
+ t.mark_start();
+ let Some(c) = t.eat_char() else { break };
+ let token = match c {
+ '"' => {
+ let source = t.get_source();
+ match t.track_until(is_any!('"')) {
+ Some(string) => {
+ let mut bytes = string.into_bytes();
+ bytes.push(0x00);
+ SyntacticToken::String(bytes)
+ }
+ None => err!(SyntacticError::UnterminatedNullString, source),
+ }
+ }
+ '\'' => {
+ let source = t.get_source();
+ match t.track_until(is_any!('\'')) {
+ Some(string) => SyntacticToken::String(string.into_bytes()),
+ None => err!(SyntacticError::UnterminatedRawString, source),
+ }
+ }
+ '(' => {
+ let source = t.get_source();
+ if let Some(string) = t.track_until(is_any!(')')) {
+ // Check if the comment fills the entire line.
+ if t.start.position.column == 0 && t.end_of_line() {
+ if let Some(path) = string.strip_prefix(": ") {
+ t.embedded_path = Some(PathBuf::from(path.trim()));
+ t.embedded_first_line = t.start.position.line + 1;
+ continue;
+ }
+ }
+ SyntacticToken::Comment(string)
+ } else {
+ err!(SyntacticError::UnterminatedComment, source)
+ }
+ }
+ '%' => {
+ let name = t.eat_token();
+ let source = t.get_source();
+ t.mark_child();
+ if let Some(_) = t.track_until(is_any!(';')) {
+ let child = t.tokenise_child_span();
+ match parse_body_from_tokeniser(child) {
+ Ok(body) => {
+ let name = Tracked::from(name, source);
+ let definition = SyntacticMacroDefinition { name, body };
+ SyntacticToken::MacroDefinition(definition)
+ }
+ Err(mut err) => {
+ errors.append(&mut err);
+ continue;
+ }
+ }
+ } else {
+ err!(SyntacticError::UnterminatedMacroDefinition, source);
+ }
+ }
+ '{' => SyntacticToken::BlockOpen,
+ '}' => SyntacticToken::BlockClose,
+ '[' => continue,
+ ']' => continue,
+
+ ')' => err!(SyntacticError::UnmatchedCommentTerminator),
+ ';' => err!(SyntacticError::UnmatchedMacroTerminator),
+
+ '@' => {
+ label_name = t.eat_token();
+ SyntacticToken::LabelDefinition(label_name.clone())
+ }
+ '&' => {
+ let name = t.eat_token();
+ SyntacticToken::LabelDefinition(format!("{label_name}/{name}"))
+ }
+ '~' => {
+ let name = t.eat_token();
+ SyntacticToken::Invocation(format!("{label_name}/{name}"))
+ }
+ '#' => {
+ let token = t.eat_token();
+ match token.parse::<Value>() {
+ Ok(value) => SyntacticToken::Padding(value),
+ Err(_) => err!(SyntacticError::InvalidPaddingValue),
+ }
+ },
+ c => {
+ let token = format!("{c}{}", t.eat_token());
+ match token.parse::<Value>() {
+ Ok(value) => SyntacticToken::RawValue(value),
+ Err(_) => match token.parse::<Instruction>() {
+ Ok(instruction) => SyntacticToken::Instruction(instruction),
+ Err(_) => SyntacticToken::Invocation(token),
+ }
+ }
+ }
+ };
+
+ t.mark_end();
+ let source = t.get_source();
+ tokens.push(Tracked::from(token, source));
+ }
+
+ // Check that every block open matches a block close.
+ let mut stack = Vec::new();
+ for token in &tokens {
+ match &token.value {
+ SyntacticToken::BlockOpen => stack.push(token.source.clone()),
+ SyntacticToken::BlockClose => if let None = stack.pop() {
+ let error = SyntacticError::UnmatchedBlockTerminator;
+ errors.push(Tracked::from(error, token.source.clone()));
+ }
+ _ => (),
+ }
+ }
+ for source in stack {
+ let error = SyntacticError::UnterminatedBlock;
+ errors.push(Tracked::from(error, source));
+ }
+
+ match errors.is_empty() {
+ true => Ok(tokens),
+ false => Err(errors),
+ }
+}
+
+
+fn parse_body_from_tokeniser(t: Tokeniser) -> Result<Vec<Tracked<SyntacticToken>>, Vec<Tracked<SyntacticError>>> {
+ let mut tokens = Vec::new();
+ let mut errors = Vec::new();
+
+ for token in parse_syntactic_from_tokeniser(t)? {
+ match token.value {
+ SyntacticToken::LabelDefinition(_) => {
+ let error = SyntacticError::LabelDefinitionInMacroDefinition;
+ errors.push(Tracked::from(error, token.source));
+ continue;
+ }
+ SyntacticToken::MacroDefinition(_) => {
+ let error = SyntacticError::MacroDefinitionInMacroDefinition;
+ errors.push(Tracked::from(error, token.source));
+ continue;
+ }
+ _ => tokens.push(token),
+ };
+ }
+
+ match errors.is_empty() {
+ true => Ok(tokens),
+ false => Err(errors),
+ }
+}
diff --git a/src/stages/syntactic_tokens.rs b/src/stages/syntactic_tokens.rs
new file mode 100644
index 0000000..57e78e7
--- /dev/null
+++ b/src/stages/syntactic_tokens.rs
@@ -0,0 +1,107 @@
+use crate::*;
+
+pub enum SyntacticToken {
+ Comment(String),
+
+ LabelDefinition(String),
+ MacroDefinition(SyntacticMacroDefinition),
+
+ RawValue(Value),
+ Instruction(Instruction),
+ Invocation(String),
+
+ Padding(Value),
+ String(Vec<u8>),
+
+ BlockOpen,
+ BlockClose,
+}
+
+pub struct SyntacticMacroDefinition {
+ pub name: Tracked<String>,
+ pub body: Vec<Tracked<SyntacticToken>>,
+}
+
+pub enum SyntacticError {
+ UnterminatedBlock,
+ UnterminatedComment,
+ UnterminatedRawString,
+ UnterminatedNullString,
+ UnterminatedMacroDefinition,
+
+ UnmatchedBlockTerminator,
+ UnmatchedCommentTerminator,
+ UnmatchedMacroTerminator,
+
+ InvalidPaddingValue,
+
+ MacroDefinitionInMacroDefinition,
+ LabelDefinitionInMacroDefinition,
+}
+
+
+pub fn report_syntactic_errors(errors: &[Tracked<SyntacticError>], source_code: &str) {
+ for error in errors {
+ report_syntactic_error(error, source_code);
+ }
+}
+
+fn report_syntactic_error(error: &Tracked<SyntacticError>, source_code: &str) {
+ let context = Context { source_code: &source_code, source: &error.source };
+ let message = match &error.value {
+ SyntacticError::UnterminatedBlock =>
+ "Block was not terminated, add a '}}' character to terminate",
+ SyntacticError::UnterminatedComment =>
+ "Comment was not terminated, add a ')' character to terminate",
+ SyntacticError::UnterminatedRawString =>
+ "Raw string was not terminated, add a ' character to terminate",
+ SyntacticError::UnterminatedNullString =>
+ "Null-terminated string was not terminated, add a '\"' character to terminate",
+ SyntacticError::UnterminatedMacroDefinition =>
+ "Macro definition was not terminated, add a ';' character to terminate",
+
+ SyntacticError::UnmatchedBlockTerminator =>
+ "Attempted to terminate a block, but no block was in progress",
+ SyntacticError::UnmatchedCommentTerminator =>
+ "Attempted to terminate a comment, but no comment was in progress",
+ SyntacticError::UnmatchedMacroTerminator =>
+ "Attempted to terminate a macro definition, but no macro definition was in progress",
+
+ SyntacticError::InvalidPaddingValue =>
+ "The padding value must be either two or four hexadecimal digits",
+
+ SyntacticError::MacroDefinitionInMacroDefinition =>
+ "A macro cannot be defined inside another macro",
+ SyntacticError::LabelDefinitionInMacroDefinition =>
+ "A label cannot be defined inside a macro",
+ };
+
+ report_source_issue(LogLevel::Error, &context, message);
+}
+
+
+pub fn print_syntactic_token(i: usize, token: &SyntacticToken) {
+ match token {
+ SyntacticToken::Comment(_) =>
+ indent!(i, "Comment"),
+
+ SyntacticToken::LabelDefinition(name) =>
+ indent!(i, "LabelDefinition({name})"),
+ SyntacticToken::MacroDefinition(definition) => {
+ indent!(i, "MacroDefinition({})", definition.name);
+ for token in &definition.body {
+ print_syntactic_token(i+1, token);
+ }
+ }
+
+ SyntacticToken::RawValue(value) => indent!(i, "RawValue({value})"),
+ SyntacticToken::Instruction(instruction) => indent!(i, "Instruction({instruction})"),
+ SyntacticToken::Invocation(name) => indent!(i, "Invocation({name})"),
+
+ SyntacticToken::Padding(value) => indent!(i, "Padding({value})"),
+ SyntacticToken::String(bytes) => indent!(i, "String({})", String::from_utf8_lossy(bytes)),
+
+ SyntacticToken::BlockOpen => indent!(i, "BlockOpen"),
+ SyntacticToken::BlockClose => indent!(i, "BlockOpen"),
+ }
+}