Detect duplicate headings within a page

This is an issue because the duplicate headings will have identical ids, and all links to the lower heading will instead link to the upper heading.
author: Ben Bridle <ben@derelict.engineering> 2025-01-14 16:26:21 +1300
committer: Ben Bridle <ben@derelict.engineering> 2025-01-14 16:26:21 +1300
commit: 6bec7be420aa070b903fa8c77b1c58c0259b1073 (patch)
tree: fa2bf7295f1ef4d49669fa70bb27f581b2c434e7 /src/collect_files.rs
parent: e868f063b04b04f3ee40d13799f6bc45a28d16d8 (diff)
download: toaster-6bec7be420aa070b903fa8c77b1c58c0259b1073.zip
1 files changed, 8 insertions, 0 deletions
diff --git a/src/collect_files.rs b/src/collect_files.rs
index d9cfb37..49f5cc0 100644
--- a/src/collect_files.rs
+++ b/src/collect_files.rs
@@ -96,15 +96,23 @@ impl Website {
                 "md" => {
                     let markdown = std::fs::read_to_string(&source_path).unwrap();
                     let document = MarkdownDocument::from_str(&markdown);
+                    let mut heading_set = HashSet::new();
+                    let mut duplicates = HashSet::new();
                     let headings = document.blocks.iter()
                         .filter_map(|block| if let Block::Heading { line, level } = block {
                             let name = line.to_string();
                             let url = make_url_safe(&name);
                             let level = level.to_owned();
+                            if !heading_set.insert(url.clone()) {
+                                duplicates.insert(url.clone());
+                            }
                             Some(Heading { name, url, level })
                         } else {
                             None
                         }).collect();
+                    for url in duplicates {
+                        warn!("Page {name:?} contains multiple headings with ID \"#{url}\"");
+                    }
                     if name_url == "+index" {
                         if parents.is_empty() {
                             // This is the index file for the whole site.
author	Ben Bridle <ben@derelict.engineering>	2025-01-14 16:26:21 +1300
committer	Ben Bridle <ben@derelict.engineering>	2025-01-14 16:26:21 +1300
commit	6bec7be420aa070b903fa8c77b1c58c0259b1073 (patch)
tree	fa2bf7295f1ef4d49669fa70bb27f581b2c434e7 /src/collect_files.rs
parent	e868f063b04b04f3ee40d13799f6bc45a28d16d8 (diff)
download	toaster-6bec7be420aa070b903fa8c77b1c58c0259b1073.zip