From 8b6bb67e39b59f68dc005550dd42f031b6f415e8 Mon Sep 17 00:00:00 2001
From: Ben Bridle <ben@derelict.engineering>
Date: Mon, 6 Jan 2025 17:19:06 +1300
Subject: Initial version

---
 src/generate_html.rs | 244 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/main.rs          | 197 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 441 insertions(+)
 create mode 100644 src/generate_html.rs
 create mode 100644 src/main.rs

(limited to 'src')

diff --git a/src/generate_html.rs b/src/generate_html.rs
new file mode 100644
index 0000000..3f22d3b
--- /dev/null
+++ b/src/generate_html.rs
@@ -0,0 +1,244 @@
+use crate::*;
+
+use markdown::*;
+
+
+pub fn generate_html(document: &MarkdownDocument, page: &SourceFile, website: &Website) -> String {
+    format!("\
+<!DOCTYPE html>
+<head>
+<title>{} &mdash; {}</title>
+<meta charset='UTF-8'>
+<meta name='viewport' content='width=device-width, initial-scale=1'>
+{}
+</head>
+<body>
+<main>
+{}
+</main>
+</body>
+</html> \
+",
+        page.name, website.name,
+        get_html_head(document),
+        document_to_html(document, page, website)
+    )
+}
+
+
+
+pub fn get_html_head(document: &MarkdownDocument) -> String {
+    if let Some(Block::Fragment { language, content }) = document.blocks.first() {
+        if language == "embed-html-head" {
+            return content.to_string();
+        }
+    }
+    String::from("\
+<link rel='stylesheet' type='text/css' media='screen' href='static/screen.css'>
+<link rel='stylesheet' type='text/css' media='print' href='static/print.css'>
+<script src='static/render_math.js' defer></script> \
+    ")
+}
+
+
+
+pub fn document_to_html(document: &MarkdownDocument, page: &SourceFile, website: &Website) -> String {
+    let mut html = String::new();
+
+    macro_rules! line_to_html {
+        ($l:expr) => {{ line_to_html(&$l, page, website) }}; }
+    macro_rules! html {
+        ($($arg:tt)*) => {{ html.push_str(&format!($($arg)*)); html.push('\n'); }}; }
+    macro_rules! tag {
+        ($t:expr,$l:expr) => { html!("<{}>{}</{}>", $t, line_to_html!($l), $t) }; }
+    macro_rules! wrap {
+        ($t:expr,$f:expr) => {{ html!("<{}>", $t); $f; html!("</{}>", $t); }};
+    }
+
+    for block in &document.blocks {
+        match block {
+            Block::Heading { level, line } => match level {
+                Level::Heading1 => tag!("h1", line),
+                Level::Heading2 => tag!("h2", line),
+                Level::Heading3 => tag!("h3", line),
+            }
+            Block::Paragraph(line) => tag!("p", line),
+            Block::List(lines) => wrap!("ul", for line in lines {
+                // Insert a <br> tag directly after the first untagged colon.
+                let mut depth = 0;
+                let mut prev = '\0';
+                let mut output = String::new();
+                for c in line_to_html!(line).chars() {
+                    output.push(c);
+                    if c == '<' {
+                        depth += 1;
+                    } else if c == '/' && prev == '<' {
+                        depth -= 2;
+                    } else if c == ':' && depth == 0 {
+                        output.pop(); output.push_str("<br>"); depth += 99;
+                    }
+                    prev = c;
+                }
+                match output.contains("<br>") {
+                    true => html!("<li class='extended'>{output}</li>"),
+                    false => html!("<li>{output}</li>"),
+                }
+            }),
+            Block::Note(lines) => wrap!("aside", for line in lines { tag!("p", line) }),
+            Block::Embedded { label, path } => match path.rsplit_once('.') {
+                Some((_, extension)) => match extension.to_lowercase().as_str() {
+                    "jpg"|"jpeg"|"png"|"webp"|"gif"|"tiff" => html!(
+                        "<figure><a href='{}'><img src='{}' alt='{}' title='{}'></a></figure>",
+                        path, path, label, label
+                    ),
+                    "mp3"|"wav"|"m4a" => html!("<audio src='{path}' controls>"),
+                    ext @ _ => error!("Unrecognised extension for embedded file '{path}' with extension '{ext}'"),
+                }
+                _ => error!("Cannot embed file '{path}' with no file extension"),
+            }
+            Block::Fragment { language, content } => {
+                match language.as_str() {
+                    "embed-html" => html!("{}", content),
+                    "embed-css" => wrap!("style", html!("{}", content)),
+                    "embed-javascript"|"embed-js" => wrap!("script", html!("{}", content)),
+                    "hidden"|"todo"|"embed-html-head" => (),
+                    _ => {
+                        html!("<pre class='{}'>", language);
+                        html!("{}", sanitize_text(content));
+                        html!("</pre>");
+                    },
+                }
+            }
+            Block::Break => html!("<hr>"),
+            Block::Table(table) => wrap!("table", {
+                wrap!("thead",
+                    wrap!("tr", for column in &table.columns {
+                        tag!("th", column.name);
+                    })
+                );
+                wrap!("tbody", for section in &table.sections {
+                    for row in section {
+                        wrap!("tr", for (column, cell) in std::iter::zip(&table.columns, row) {
+                            let text_raw = line_to_html!(cell);
+                            let text = match text_raw.as_str() {
+                                "Yes" => "✓",
+                                "No"  => "✗",
+                                other => other,
+                            };
+                            let align = match text {
+                                "--" => "c",
+                                _ => match column.alignment {
+                                    Alignment::Left => "l",
+                                    Alignment::Center => "c",
+                                    Alignment::Right => "r",
+                                },
+                            };
+                            let class = match ["No", "--", "0"].contains(&text_raw.as_str()) {
+                                true  => format!("{align} dim"),
+                                false => format!("{align}"),
+                            };
+                            html!("<td class='{}'>{}</td>", class, text);
+                        })
+                    }
+                });
+            })
+        }
+    }
+    return html;
+}
+
+
+
+fn line_to_html(line: &Line, page: &SourceFile, website: &Website) -> String {
+    let mut html = String::new();
+    for line_element in &line.tokens {
+        match line_element {
+            Token::Normal(text) => {
+                let text = &sanitize_text(text); html.push_str(text) }
+            Token::Bold(text) => {
+                let text = &sanitize_text(text); html.push_str(&format!("<b>{text}</b>")) }
+            Token::Italic(text) => {
+                let text = &sanitize_text(text); html.push_str(&format!("<i>{text}</i>")) }
+            Token::Monospace(text) => {
+                let text = &sanitize_text(text); html.push_str(&format!("<code>{text}</code>")) }
+            Token::Math(text) => {
+                let text = &sanitize_text(text); html.push_str(&format!("<span class='math'>{text}</span>")) }
+            Token::InternalLink(path) => {
+                let (label, class, path) = match path.split_once('#') {
+                    Some(("", section)) => (section, "heading",  format!("#{}", make_url_safe(path))),
+                    Some((page, section)) => (section, "page", format!("{}.html#{}", make_url_safe(page), make_url_safe(section))),
+                    _ => (path.as_str(), "page", format!("{}.html", make_url_safe(path))),
+                };
+                let full_label = sanitize_text(label);
+                let label = match full_label.split_once('/') {
+                    Some((_parent, label)) => label.trim(),
+                    None => &full_label,
+                };
+                // Check that the linked internal page exists.
+                if class == "page" {
+                    let path_no_ext = path.strip_suffix(".html").unwrap();
+                    if !website.has_page(&path_no_ext) {
+                        error!("Page {:?} contains invalid link to {:?}", page.name, path_no_ext);
+                    }
+                }
+                // Return to the site root before descending into a link.
+                let mut back = String::new();
+                let levels = page.full_url.chars().filter(|c| *c == '/').count();
+                for _ in 0..levels { back.push_str("../") }
+                html.push_str(&format!("<a href='{back}{path}' class='{class}'>{label}</a>"))
+            }
+            Token::ExternalLink { label, path } => {
+                let is_internal = path.find("/").is_none();
+                let (new_label, class, path) = match (is_internal, path.split_once("#")) {
+                    (true, Some(("", frag)))   => (sanitize_text(frag),   "heading",  format!("#{}", make_url_safe(frag)) ),
+                    (true, Some((page, frag))) => (sanitize_text(frag),   "page", format!("{}.html#{}", make_url_safe(page), make_url_safe(frag)) ),
+                    (true, None)               => (sanitize_text(path), "page", if path.contains(".") { path.clone() } else { format!("{}.html", make_url_safe(path)) } ),
+                    (false, _)                 => (sanitize_text(path), "external", path.clone() ) };
+                let label = match label.is_empty() { true => new_label, false => sanitize_text(label) };
+                html.push_str(&format!("<a href='{path}' class='{class}'>{label}</a>"));
+            }
+        }
+    }
+    return html;
+}
+
+
+
+/// Replace each HTML-reserved character with an HTML-escaped character.
+fn sanitize_text(text: &str) -> String {
+    let mut output = String::new();
+    let chars: Vec<char> = text.chars().collect();
+    for (i, c) in chars.iter().enumerate() {
+        let prev = match i > 0 {
+            true => chars[i - 1],
+            false => ' ',
+        };
+        let next = match i + 1 < chars.len() {
+            true => chars[i + 1],
+            false => ' ',
+        };
+        match c {
+            '&' => {
+                // The HTML syntax for unicode characters is &#0000
+                if let Some('#') = chars.get(i+1) { output.push(*c) }
+                else { output.push_str("&amp;") }
+            },
+            '<' => output.push_str("&lt;"),
+            '>' => output.push_str("&gt;"),
+            '"' => match prev.is_whitespace() {
+                true  => output.push('“'),
+                false => output.push('”'),
+            },
+            '\'' => match prev.is_whitespace() {
+                true  => output.push('‘'),
+                false => output.push('’'),
+            },
+            '-' => match prev.is_whitespace() && next.is_whitespace() {
+                true => output.push('—'),
+                false => output.push('-'),
+            }
+            _ => output.push(*c),
+        }
+    }
+    return output;
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..d8c9274
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,197 @@
+#![feature(path_add_extension)]
+
+mod generate_html;
+pub use generate_html::*;
+
+use markdown::*;
+use vagabond::*;
+
+
+const NORMAL: &str = "\x1b[0m";
+const BOLD:   &str = "\x1b[1m";
+const WHITE:  &str = "\x1b[37m";
+const RED:    &str = "\x1b[31m";
+const BLUE:   &str = "\x1b[34m";
+
+static mut VERBOSE: bool = false;
+#[macro_export] macro_rules! verbose {
+    ($($tokens:tt)*) => { if unsafe { VERBOSE } {
+        eprint!("{BOLD}{BLUE}[INFO]{NORMAL}: "); eprint!($($tokens)*);
+        eprintln!("{NORMAL}");
+    } };
+}
+#[macro_export] macro_rules! error {
+    ($($tokens:tt)*) => {{
+        eprint!("{BOLD}{RED}[ERROR]{WHITE}: "); eprint!($($tokens)*);
+        eprintln!("{NORMAL}"); std::process::exit(1);
+    }};
+}
+
+fn main() {
+    let args = Arguments::from_env_or_exit();
+    if args.version {
+        let version = env!("CARGO_PKG_VERSION");
+        eprintln!("Markdown website generator, version {version}");
+        std::process::exit(0);
+    }
+    if args.verbose {
+        unsafe { VERBOSE = true; }
+    }
+
+    let mut website = Website {
+        source_files: Vec::new(),
+        static_files: Vec::new(),
+        name: match Entry::from_path(&args.source) {
+            Ok(entry) => entry.name,
+            Err(err) => error!("Couldn't open {:?}: {:?}", args.source, err),
+        },
+        error: false,
+    };
+
+    // Collect all website files.
+    match traverse_directory(&args.source) {
+        Ok(entries) => for entry in entries {
+            // Generate name, stripping any leading digit sequence.
+            let (mut name, extension) = entry.split_name();
+            if let Some((prefix, suffix)) = name.split_once(' ') {
+                if prefix.chars().all(|c| "0123456789-".contains(c)) {
+                    name = suffix.to_string();
+                }
+            }
+            // Generate full URL with stripped name, no extension.
+            let source_path = entry.original_path;
+            let relative_path = source_path.strip_prefix(&args.source).unwrap_or_else(
+                // Probably unreachable.
+                |_| error!("Path doesn't start with {:?}: {:?}", args.source, source_path));
+            let mut full_url = String::new();
+            let mut components: Vec<_> = relative_path.components().collect();
+            components.pop();  // Remove file segment, use the stripped name instead.
+            for c in components {
+                full_url.push_str(&make_url_safe(&c.as_os_str().to_string_lossy()));
+                full_url.push('/')
+            };
+            full_url.push_str(&make_url_safe(&name));
+
+
+            if extension == "md" {
+                let mut file_url = make_url_safe(&name);
+                if file_url == "+index" {
+                    let components: Vec<_> = relative_path.components().collect();
+                    if components.len() == 1 {
+                        name = String::from("Home");
+                        file_url = String::from("index");
+                        full_url = String::from("index");
+                    } else {
+                        let parent = components[components.len()-2];
+                        let parent_string = parent.as_os_str().to_string_lossy().to_string();
+                        name = parent_string;
+                        file_url = make_url_safe(&name);
+                        full_url.clear();
+                        for c in &components[..components.len()-2] {
+                            full_url.push_str(&make_url_safe(&c.as_os_str().to_string_lossy()));
+                            full_url.push('/')
+                        };
+                        full_url.push_str(&file_url);
+                    }
+                }
+                website.source_files.push(SourceFile { name, file_url, full_url, source_path });
+            } else {
+                full_url.push('.'); full_url.push_str(&extension);
+                website.static_files.push(StaticFile { full_url, source_path });
+            }
+        }
+        Err(err) => error!("Could not read from source directory: {:?}", err),
+    }
+
+    let mut destination = args.destination.clone();
+    destination.push(make_url_safe(&website.name));
+
+    for source_file in &website.source_files {
+        let markdown = std::fs::read_to_string(&source_file.source_path).unwrap();
+        let document = MarkdownDocument::from_str(&markdown);
+        let mut destination = destination.clone();
+        destination.push(&source_file.full_url);
+        // Convert document to different formats.
+        if args.html {
+            let html = generate_html(&document, source_file, &website);
+            write_file(&html, &destination, "html");
+        }
+        // Copy original markdown file.
+        write_file(&markdown, &destination, "md");
+    }
+
+    for static_file in &website.static_files {
+        let mut destination = destination.clone();
+        destination.push(&static_file.full_url);
+        verbose!("Copying static file to {destination:?}");
+        make_parent_directory(&destination).unwrap();
+        copy(&static_file.source_path, &destination).unwrap();
+    }
+}
+
+
+
+pub fn write_file(text: &str, destination: &PathBuf, ext: &str) {
+    let mut destination = destination.clone();
+    destination.add_extension(ext);
+    verbose!("Generating {destination:?}");
+    make_parent_directory(&destination).unwrap();
+    write_to_file(destination, text).unwrap();
+}
+
+pub fn make_url_safe(text: &str) -> String {
+    text.to_ascii_lowercase().chars().filter_map(|c|
+        if c.is_alphanumeric() || "-_~.+/".contains(c) { Some(c) }
+        else if c == ' ' { Some('-') }
+        else { None } )
+    .collect()
+}
+
+
+pub struct Website {
+    pub name: String,
+    pub source_files: Vec<SourceFile>,
+    pub static_files: Vec<StaticFile>,
+    pub error: bool,
+}
+
+impl Website {
+    pub fn has_page(&self, path: &str) -> bool {
+        for source_file in &self.source_files {
+            if source_file.full_url == path {
+                return true;
+            }
+        }
+        return false;
+    }
+}
+
+pub struct SourceFile {
+    pub name: String,
+    pub file_url: String,  // URL file segment, no extension
+    pub full_url: String,  // URL full path, no extension
+    pub source_path: PathBuf,
+}
+
+pub struct StaticFile {
+    pub full_url: String,  // URL full path, with extension
+    pub source_path: PathBuf,
+}
+
+xflags::xflags! {
+    /// Generate a website from a structured directory of markdown files.
+    cmd arguments {
+        /// Source directory with markdown files
+        required source: PathBuf
+        /// Path to output directory
+        required destination: PathBuf
+        /// Generate HTML output
+        optional --html
+        /// Generate Gemtext output
+        optional --gmi
+        /// Print information as each file is parsed
+        optional -v, --verbose
+        /// Print the program version and exit
+        optional --version
+    }
+}
-- 
cgit v1.2.3-70-g09d2