From ee28abce78ffa3b53ff039ff8640a3b37dc5348b Mon Sep 17 00:00:00 2001
From: Ben Bridle <ben@derelict.engineering>
Date: Thu, 9 Jan 2025 22:15:55 +1300
Subject: Rewrite link handling and add navigation features to generated HTML

---
 Cargo.lock           |   4 +-
 Cargo.toml           |   2 +-
 src/collect_files.rs | 174 +++++++++++++++++++++++++++++++--------------------
 src/generate_html.rs | 105 ++++++++++++++++++++++++-------
 src/main.rs          |  22 +++++++
 5 files changed, 212 insertions(+), 95 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 0f29dd4..f527498 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,8 +4,8 @@ version = 4
 
 [[package]]
 name = "markdown"
-version = "2.1.0"
-source = "git+git://benbridle.com/markdown?tag=v2.1.0#36daed5dc398697905ac579f636fbcbc56d30efb"
+version = "2.1.1"
+source = "git+git://benbridle.com/markdown?tag=v2.1.1#259eeb0094b70d80cb8300707fe89f5adf554b1d"
 
 [[package]]
 name = "toaster"
diff --git a/Cargo.toml b/Cargo.toml
index b56c9e7..4c7bf13 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,7 +5,7 @@ edition = "2021"
 
 [dependencies]
 vagabond = { git = "git://benbridle.com/vagabond", tag = "v1.0.2" }
-markdown = { git = "git://benbridle.com/markdown", tag = "v2.1.0" }
+markdown = { git = "git://benbridle.com/markdown", tag = "v2.1.1" }
 xflags = "0.4.0-pre.1"
 
 [profile.release]
diff --git a/src/collect_files.rs b/src/collect_files.rs
index 88d065f..d9cfb37 100644
--- a/src/collect_files.rs
+++ b/src/collect_files.rs
@@ -11,13 +11,20 @@ pub struct Website {
 }
 
 pub struct Page {
-    pub name: String,                 // Display name
-    pub parent_url: String,           // URL base for relative links
-    pub file_url: String,             // Safe file name, no extension
-    pub full_url: String,             // Safe full URL, no extension
+    pub name: String,                 // Display name of this page
+    pub name_url: String,             // URL name for this page, no extension
+    pub full_url: String,             // Full URL for this page, no extension
+    pub parents: Vec<String>,         // Parent directory components, unsafe
+    pub parent_url: String,           // Base URL for links in this page
     pub source_path: PathBuf,         // Absolute path to source file
     pub document: MarkdownDocument,   // File content parsed as markdown
-    pub headings: Vec<String>,        // Safe name of each document heading
+    pub headings: Vec<Heading>,       // Ordered list of all headings in page
+}
+
+pub struct Heading {
+    pub name: String,
+    pub url: String,
+    pub level: Level,
 }
 
 pub struct StaticItem {
@@ -28,14 +35,12 @@ pub struct StaticItem {
 
 
 impl Page {
-    pub fn back_string(&self) -> String {
-        let mut back = String::new();
-        for c in self.full_url.chars() {
-            if c == '/' {
-                back.push_str("../");
-            }
+    pub fn root(&self) -> String {
+        let mut root = String::new();
+        for _ in &self.parents {
+            root.push_str("../");
         }
-        return back;
+        return root;
     }
 }
 
@@ -59,9 +64,7 @@ impl Website {
     fn collect_entry(&mut self, path: &Path, prefix: &Path) {
         let entry = Entry::from_path(path).unwrap();
         // Ignore dotted entries.
-        if entry.name.starts_with('.') {
-            return;
-        }
+        if entry.name.starts_with('.') { return }
         // Get name and extension.
         let (mut name, extension) = entry.split_name();
         if let Some((prefix, suffix)) = name.split_once(' ') {
@@ -69,18 +72,14 @@ impl Website {
                 name = suffix.to_string();
             }
         }
-        let file_url = make_url_safe(&name);
+        let name_url = make_url_safe(&name);
         // Generate parent URL, used only for files.
         let source_path = entry.original_path.clone();
         let relative_path = source_path.strip_prefix(prefix).unwrap_or_else(
-            |_| error!("Path doesn't start with {:?}: {:?}", prefix, source_path));
-        let mut parent_url = String::new();
-        let mut components: Vec<_> = relative_path.components().collect();
-        components.pop();  // Remove file segment.
-        for c in &components {
-            let segment = &make_url_safe(&c.as_os_str().to_string_lossy());
-            parent_url.push_str(segment); parent_url.push('/')
-        };
+            |_| error!("Path doesn't start with {prefix:?}: {source_path:?}"));
+        let mut parents: Vec<_> = relative_path.components()
+            .map(|c| c.as_os_str().to_string_lossy().to_string()).collect();
+        parents.pop();  // Remove file segment.
 
         // Process each entry.
         if entry.is_directory() {
@@ -98,34 +97,74 @@ impl Website {
                     let markdown = std::fs::read_to_string(&source_path).unwrap();
                     let document = MarkdownDocument::from_str(&markdown);
                     let headings = document.blocks.iter()
-                        .filter_map(|block| if let Block::Heading { line, .. } = block {
-                            Some(make_url_safe(&line.to_string()))
+                        .filter_map(|block| if let Block::Heading { line, level } = block {
+                            let name = line.to_string();
+                            let url = make_url_safe(&name);
+                            let level = level.to_owned();
+                            Some(Heading { name, url, level })
                         } else {
                             None
                         }).collect();
-                    // Change name and path if this is an index file.
-                    let mut name = name;
-                    let mut file_url = file_url;
-                    let mut full_url = format!("{parent_url}{file_url}");
-                    if file_url == "+index" {
-                        if components.is_empty() {
+                    if name_url == "+index" {
+                        if parents.is_empty() {
                             // This is the index file for the whole site.
-                            name = String::from("Home");
-                            file_url = String::from("index");
-                            full_url = String::from("index");
+                            self.pages.push(Page {
+                                name: String::from("Home"),
+                                name_url: String::from("index"),
+                                full_url: String::from("index"),
+                                parents,
+                                parent_url: String::from(""),
+                                source_path,
+                                document,
+                                headings,
+                            });
                         } else {
                             // This is an index file for a directory.
-                            name = components[components.len()-1]
-                                .as_os_str().to_string_lossy().to_string();
-                            file_url = make_url_safe(&name);
-                            full_url = parent_url.strip_suffix('/').unwrap_or(&parent_url).to_string();
+                            let name = parents[parents.len()-1].clone();
+                            let name_url = make_url_safe(&name);
+                            let mut full_url = String::new();
+                            for parent in &parents {
+                                full_url.push_str(&make_url_safe(parent));
+                                full_url.push('/');
+                            }
+                            let parent_url = full_url.clone();
+                            full_url.pop();
+                            parents.pop();
+                            self.pages.push(Page {
+                                name,
+                                name_url,
+                                full_url,
+                                parents,
+                                parent_url,
+                                source_path,
+                                document,
+                                headings,
+                            });
                         }
+                    } else {
+                        let mut full_url = String::new();
+                        for parent in &parents {
+                                full_url.push_str(&make_url_safe(parent));
+                                full_url.push('/');
+                        }
+                        full_url.push_str(&name_url);
+                        let mut parent_url = full_url.clone();
+                        parent_url.push('/');
+                        self.pages.push(Page {
+                            name, name_url, full_url,
+                            parents, parent_url,
+                            source_path,
+                            document, headings,
+                        });
                     }
-                    self.pages.push(
-                        Page { name, parent_url, file_url, full_url, source_path, document, headings });
                 },
                 _ => {
-                    let full_url = format!("{parent_url}{file_url}.{extension}");
+                    let mut parent_url = String::new();
+                    for parent in &parents {
+                            parent_url.push_str(&make_url_safe(parent));
+                            parent_url.push('/');
+                    }
+                    let full_url = format!("{parent_url}{name_url}.{extension}");
                     self.static_files.push(StaticItem { full_url, source_path });
                 },
             }
@@ -144,42 +183,41 @@ impl Website {
             }
             None => (path, None),
         };
-        let path = path.strip_suffix(&format!(".{ext}")).unwrap_or(path);
-
-        // Attach parent of current page to given path.
-        let directory = match from.parent_url.rsplit_once('/') {
-            Some((parent, _)) => parent,
-            None => &from.parent_url,
-        };
-        let full_path = match path.starts_with("/") {
-            true => path.to_string(),
-            false => format!("{directory}/{path}"),
-        };
+        let mut path = path.strip_suffix(&format!(".{ext}")).unwrap_or(path).to_string();
+        // Attach parent if not an absolute path.
+        if !path.starts_with('/') {
+            path = format!("{}{path}", from.parent_url);
+        }
 
-        // Remove relative portions of path.
-        let segments: Vec<&str> = full_path.split("/")
-            .filter(|seg| !seg.is_empty() && *seg != ".")
+        // Iteratively collapse ".." segments.
+        let mut segments: Vec<&str> = path.split('/')
+            .filter(|s| !s.is_empty() && *s != ".")
             .collect();
-        let mut reduced_segments: Vec<&str> = segments.windows(2)
-            .filter(|w| w[1] != "..")
-            .map(|w| w[1])
-            .collect();
-        // The first segment is always skipped by the previous step.
-        if !segments.is_empty() && segments.get(1) != Some(&"..") {
-            if segments[0] != ".." {
-                reduced_segments.insert(0, segments[0]);
+        'outer: loop {
+            for i in 0..(segments.len()-1) {
+                if segments[i] == ".." {
+                    if i == 0 {
+                        segments.remove(0);
+                    } else {
+                        segments.remove(i-1);
+                        segments.remove(i-1);
+                    }
+                    continue 'outer;
+                }
             }
+            break;
         }
-        let path = reduced_segments.join("/");
-
+        // Find page with this path in website.
+        let path = segments.join("/");
         for page in &self.pages {
             if page.full_url == path {
                 if let Some(heading) = heading {
-                    if !page.headings.contains(&make_url_safe(heading)) {
+                    if !page.headings.iter().any(|h| h.url == make_url_safe(heading)) {
                         warn!("Page {:?} contains link to nonexistent heading {heading:?} on page {path:?}", from.name);
                     }
                 }
-                return Some(format!("{path}.{ext}"));
+                let root = from.root();
+                return Some(format!("{root}{path}.{ext}"));
             }
         }
         return None;
diff --git a/src/generate_html.rs b/src/generate_html.rs
index 84c3bdb..dd08885 100644
--- a/src/generate_html.rs
+++ b/src/generate_html.rs
@@ -4,44 +4,94 @@ use markdown::*;
 
 
 pub fn generate_html(document: &MarkdownDocument, page: &Page, website: &Website) -> String {
+    let root = page.root();
+    let page_name = &page.name;
+    let site_name = &website.name;
+    let mut parent_url = String::new();
+    for segment in &page.parents {
+        parent_url.push_str(&make_url_safe(segment)); parent_url.push('/');
+    }
+    parent_url.pop();
+    let parent_name = match page.parents.get(page.parents.len()-1) {
+        Some(parent) => parent.to_string(),
+        None => String::new(),
+    };
+
+    let head = get_html_head(document, page); let head = head.trim();
+    let mut home = format!("<a id='home' href='{root}index.html'>{site_name}</a>");
+    let mut parent = format!("<a id='parent' href='../{parent_url}.html'>{parent_name}</a>");
+    let mut title = format!("<h1 id='title'>{page_name}</h1>");
+    let mut toc = get_table_of_contents(page);
+    let main = document_to_html(document, page, website); let main = main.trim();
+
+    if page.parents.is_empty() {
+        parent.clear();
+        if page.name_url == "index" {
+            home.clear();
+            title.clear();
+            toc.clear();
+        }
+    }
+
     format!("\
 <!DOCTYPE html>
 <head>
-<title>{} &mdash; {}</title>
+<title>{page_name} &mdash; {site_name}</title>
 <meta charset='UTF-8'>
 <meta name='viewport' content='width=device-width, initial-scale=1'>
-{}
+{head}
 </head>
 <body>
+<header>
+<nav id='up'>
+{home}
+{parent}
+</nav>
+{title}
+{toc}
+</header>
 <main>
-{}
+{main}
 </main>
 </body>
-</html> \
-",
-        page.name, website.name,
-        get_html_head(document, page).trim(),
-        document_to_html(document, page, website).trim()
-    )
+</html>")
 }
 
 
-
 pub fn get_html_head(document: &MarkdownDocument, page: &Page) -> String {
     if let Some(Block::Fragment { language, content }) = document.blocks.first() {
         if language == "embed-html-head" {
             return content.to_string();
         }
     }
-    let back = page.back_string();
+    let root = page.root();
     format!("\
-<link rel='stylesheet' type='text/css' media='screen' href='{back}static/screen.css'>
-<link rel='stylesheet' type='text/css' media='print' href='{back}static/print.css'>
-<script src='{back}static/render_math.js' defer></script> \
+<link rel='stylesheet' type='text/css' media='screen' href='{root}static/screen.css'>
+<link rel='stylesheet' type='text/css' media='print' href='{root}static/print.css'>
+<script src='{root}static/render_math.js' defer></script> \
     ")
 }
 
 
+pub fn get_table_of_contents(page: &Page) -> String {
+    if page.headings.len() < 3 {
+        return String::new();
+    }
+    let mut toc = String::from("<nav id='toc'><details><summary></summary><ul>\n");
+    for heading in &page.headings {
+        let name = &heading.name;
+        let url = &heading.url;
+        let class = match heading.level {
+            Level::Heading1 => "l1",
+            Level::Heading2 => "l2",
+            Level::Heading3 => "l3",
+        };
+        toc.push_str(&format!("<li><a href='#{url}' class='{class}'>{name}</a></li>\n"));
+    }
+    toc.push_str("</ul></details></nav>\n");
+    return toc;
+}
+
 
 pub fn document_to_html(document: &MarkdownDocument, page: &Page, website: &Website) -> String {
     let mut html = String::new();
@@ -54,9 +104,10 @@ pub fn document_to_html(document: &MarkdownDocument, page: &Page, website: &Webs
         ($t:expr,$l:expr,$c:expr) => { html!("<{} {}>{}</{}>", $t, $c, line_to_html!($l), $t) };
         ($t:expr,$l:expr)         => { html!("<{}>{}</{}>",    $t,     line_to_html!($l), $t) }; }
     macro_rules! wrap {
-        ($t:expr,$f:expr) => {{ html!("<{}>", $t); $f; html!("</{}>", $t); }};
-    }
+        ($t:expr,$c:expr,$f:expr) => {{ html!("<{} {}>", $t, $c); $f; html!("</{}>", $t); }};
+        ($t:expr,$f:expr)         => {{ html!("<{}>", $t);        $f; html!("</{}>", $t); }}; }
 
+    let root = page.root();
     for block in &document.blocks {
         match block {
             Block::Heading { level, line } => match level {
@@ -96,11 +147,17 @@ pub fn document_to_html(document: &MarkdownDocument, page: &Page, website: &Webs
             }),
             Block::Note(lines) => wrap!("aside", for line in lines { tag!("p", line) }),
             Block::Embedded { label, path } => match path.rsplit_once('.') {
-                Some((_, extension)) => match extension.to_lowercase().as_str() {
-                    "jpg"|"jpeg"|"png"|"webp"|"gif"|"tiff" => html!(
-                        "<figure><a href='{path}'><img src='{path}' alt='{label}' title='{label}'></a></figure>"),
-                    "mp3"|"wav"|"m4a" => html!("<audio src='{path}' controls>"),
-                    ext @ _ => warn!("Unrecognised extension for embedded file {path:?} with extension {ext:?} in page {:?}", page.name),
+                Some((_, extension)) => {
+                    let path = match path.strip_prefix('/') {
+                        Some(stripped) => format!("{root}{stripped}"),
+                        None => path.to_string(),
+                    };
+                    match extension.to_lowercase().as_str() {
+                        "jpg"|"jpeg"|"png"|"webp"|"gif"|"tiff" => html!(
+                            "<figure><a href='{path}'><img src='{path}' alt='{label}' title='{label}'></a></figure>"),
+                        "mp3"|"wav"|"m4a" => html!("<audio src='{path}' controls>"),
+                        ext @ _ => warn!("Unrecognised extension for embedded file {path:?} with extension {ext:?} in page {:?}", page.name),
+                    }
                 }
                 _ => warn!("Cannot embed file {path:?} with no file extension in page {:?}", page.name),
             }
@@ -119,7 +176,7 @@ pub fn document_to_html(document: &MarkdownDocument, page: &Page, website: &Webs
                 }
             }
             Block::Break => html!("<hr>"),
-            Block::Table(table) => wrap!("table", {
+            Block::Table(table) => wrap!("div", "class='table'", wrap!("table", {
                 wrap!("thead",
                     wrap!("tr", for column in &table.columns {
                         tag!("th", column.name);
@@ -150,7 +207,7 @@ pub fn document_to_html(document: &MarkdownDocument, page: &Page, website: &Webs
                         })
                     })
                 };
-            })
+            }))
         }
     }
     return html;
@@ -194,7 +251,7 @@ fn line_to_html(line: &Line, page: &Page, website: &Website) -> String {
                 // Check that the heading exists.
                 if class == "heading" {
                     let heading = path.strip_prefix('#').unwrap().to_string();
-                    if !page.headings.contains(&heading) {
+                    if !page.headings.iter().any(|h| h.url == heading) {
                         warn!("Page {:?} contains link to nonexistent internal heading {heading:?}", page.name);
                     }
                 }
diff --git a/src/main.rs b/src/main.rs
index 2950ee9..a41f801 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -8,6 +8,8 @@ pub use generate_html::*;
 use markdown::*;
 use vagabond::*;
 
+use std::collections::HashSet;
+
 
 const NORMAL: &str = "\x1b[0m";
 const BOLD:   &str = "\x1b[1m";
@@ -55,6 +57,25 @@ fn main() {
 
     let website = Website::from_path(&source_directory);
 
+    // Check for duplicate output paths for pages.
+    let mut destinations: HashSet<&str> = HashSet::new();
+    let mut duplicates: HashSet<&str> = HashSet::new();
+    for page in &website.pages {
+        if !destinations.insert(&page.full_url) {
+            duplicates.insert(&page.full_url);
+        };
+    }
+    if !duplicates.is_empty() {
+        for destination in duplicates {
+            warn!("Multiple pages have the output path {destination:?}");
+            for page in &website.pages {
+                if page.full_url == destination {
+                    eprintln!(":: {:?}", page.source_path);
+                }
+            }
+        }
+    }
+
     let mut destination = destination_directory.clone();
     destination.push(make_url_safe(&website.name));
 
@@ -64,6 +85,7 @@ fn main() {
             error!("Failed to delete existing destination directory {destination:?}"));
     }
 
+
     for page in &website.pages {
         let mut destination = destination.clone();
         destination.push(&page.full_url);
-- 
cgit v1.2.3-70-g09d2