diff options
| author | Ben Bridle <ben@derelict.engineering> | 2026-02-22 10:15:06 +1300 |
|---|---|---|
| committer | Ben Bridle <ben@derelict.engineering> | 2026-02-22 10:15:33 +1300 |
| commit | 700c0ddd79fc6ca01d52250b69b02c1a13d4ddef (patch) | |
| tree | 9c3c31e8d9cde40dbcc689c0abd876e57a10f028 /src/collect_files.rs | |
| parent | 8c2ac6d92f6a4579591f748eebcbca2b9913d92d (diff) | |
| download | toaster-700c0ddd79fc6ca01d52250b69b02c1a13d4ddef.zip | |
Big rewrite
A quick list of everything that's changed:
- links to a duplicate heading beneath the same level 1 heading now work
- rss feed generation using a .feed file
- customisation of the html template using the html.template key
- option to use symlinks instead of copying static files
- fixed incorrect resolution of internal links
- simplified different name forms with the Name type
- allow linking to a redirect
Diffstat (limited to 'src/collect_files.rs')
| -rw-r--r-- | src/collect_files.rs | 457 |
1 files changed, 256 insertions, 201 deletions
diff --git a/src/collect_files.rs b/src/collect_files.rs index e3d3a11..3616c8a 100644 --- a/src/collect_files.rs +++ b/src/collect_files.rs @@ -1,60 +1,75 @@ use crate::*; -use highlight::*; -use vagabond::*; - -use std::collections::HashMap; use std::fmt::Debug; pub struct Website { - pub name: String, - pub config: HashMap<String, String>, - pub highlighters: Highlighters, + pub name: Name, + pub config: Config, pub pages: Vec<Page>, pub redirects: Vec<Redirect>, - pub static_files: Vec<StaticItem>, // Redirects, !-prefixed-dir contents - pub static_dirs: Vec<StaticItem>, // Only !-prefixed static dirs + pub static_files: Vec<StaticFile>, // !-prefixed-dir contents + pub feeds: Vec<Feed>, // RSS feeds } pub struct Page { - pub name: String, // Display name of this page - pub name_url: String, // Safe URL name, no extension - pub full_url: String, // Safe full URL, no extension - pub parents: Vec<String>, // Parent directory components, unsafe - pub parent_url: String, // Base URL for links in this page + pub name: Name, + pub url: String, // No extension + pub parents: Vec<Name>, // Parent names + pub parent_url: String, // With trailing slash pub source_path: PathBuf, // Absolute path to source file pub document: MarkdownDocument, // File content parsed as markdown pub headings: Vec<Heading>, // Ordered list of all headings in page - pub last_modified: Option<SystemTime>, // last-modified time of source file + pub last_modified: Option<SystemTime>, // Last-modified time of source file } pub struct Heading { - pub name: String, - pub url: String, + pub name: Name, + pub prefix: Option<Name>, // Disambiguation pub level: Level, - pub block_id: usize, + pub block_id: usize, // Pointer to heading element in document } -pub struct StaticItem { - pub full_url: String, // Safe full URL, with extension +impl Heading { + pub fn slug(&self) -> String { + match &self.prefix { + Some(prefix) => format!("{}-{}", prefix.slug(), self.name.slug()), + None => self.name.slug(), + } + } +} + +pub struct StaticFile { + pub url: String, // With extension pub source_path: PathBuf, // Absolute path to source file - pub last_modified: Option<SystemTime>, // last-modified time of source file + pub last_modified: Option<SystemTime>, // Last-modified time of source file +} + +pub struct Feed { + pub name: Name, // Taken from file name + pub url: String, // With extension + pub parents: Vec<Name>, // Parent names + pub parent_url: String, // Base URL for feed pages + pub source_path: PathBuf, // Absolute path to source file + pub last_modified: Option<SystemTime>, // Last-modified time of source file } pub struct Redirect { - pub name: String, // Display name of this redirect - pub full_url: String, // Safe full URL, no extension - pub parents: Vec<String>, // Parent directory components, unsafe + pub name: Name, + pub url: String, // No extension + pub parents: Vec<Name>, // Parent names pub parent_url: String, // Base URL for relative redirects - pub redirect: String, // Page to redirect to, as an internal link - pub last_modified: Option<SystemTime>, // last-modified time of source file + pub target: String, // Page to redirect to, internal link + pub source_path: PathBuf, // Absolute path to source file, for logging + pub last_modified: Option<SystemTime>, // Last-modified time of source file } + +/// Calculate correct relative path from this entity to a specified page. pub trait LinkFrom: Debug { - fn name(&self) -> &str; + fn name(&self) -> &Name; fn parent_url(&self) -> &str; - fn parents(&self) -> &[String]; + fn parents(&self) -> &[Name]; fn root(&self) -> String { let mut root = String::new(); for _ in self.parents() { @@ -68,16 +83,28 @@ pub trait LinkFrom: Debug { None => format!("/{}", self.name()), } } -} - -pub struct Highlighters { - pub languages: HashMap<String, usize>, - pub highlighters: Vec<Highlighter>, -} - -pub struct ImagePaths { - pub thumb: String, - pub large: String, + /// Convert an internal link to a canonical page URL and optional heading, + /// both as slugs. + /// + /// `path` and returned URL have no extension. + fn canonicalise(&self, path: &str) -> (String, Option<String>) { + // Remove heading fragment from path. + let (path, heading) = match path.rsplit_once('#') { + Some((path, heading)) => match heading.is_empty() { + true => (path, None), + false => (path, Some(to_slug(heading))), + } + None => (path, None), + }; + let mut path = path.to_string(); + // Attach parent URL if not an absolute path. + if !path.starts_with('/') { + path = format!("{}{path}", self.parent_url()); + } + // Convert path to a canonical URL. + path = to_slug(&collapse_path(&path)); + return (path, heading); + } } impl Page { @@ -102,16 +129,33 @@ impl Debug for Redirect { } } +impl Debug for Feed { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "\"{}\"", self.qualified_name()) + } +} + impl LinkFrom for Page { - fn name(&self) -> &str { &self.name } + fn name(&self) -> &Name { &self.name } fn parent_url(&self) -> &str { &self.parent_url } - fn parents(&self) -> &[String] { &self.parents } + fn parents(&self) -> &[Name] { &self.parents } } impl LinkFrom for Redirect { - fn name(&self) -> &str { &self.name } + fn name(&self) -> &Name { &self.name } + fn parent_url(&self) -> &str { &self.parent_url } + fn parents(&self) -> &[Name] { &self.parents } +} + +impl LinkFrom for Feed { + fn name(&self) -> &Name { &self.name } fn parent_url(&self) -> &str { &self.parent_url } - fn parents(&self) -> &[String] { &self.parents } + fn parents(&self) -> &[Name] { &self.parents } +} + +pub struct ImagePaths { + pub thumb: String, + pub large: String, } @@ -121,41 +165,73 @@ impl Website { pages: Vec::new(), redirects: Vec::new(), static_files: Vec::new(), - static_dirs: Vec::new(), + feeds: Vec::new(), name: match Entry::from_path(path) { - Ok(entry) => entry.name, + Ok(entry) => entry.name.into(), Err(err) => fatal!("Couldn't open {:?}: {:?}", &path, err), }, - config: HashMap::new(), - highlighters: Highlighters { - languages: HashMap::new(), - highlighters: Vec::new(), - }, + config: Config::new(), }; + // Recursively collect entire website. new.collect_entry(path, path); - new.parse_highlighters(); + new.parse_hoisted_folders(); return new; } + /// Read the hoisted_folders config key, make root redirects for each + /// child of each listed directory. + fn parse_hoisted_folders(&mut self) { + for line in self.config.get("hoisted_folders").lines() { + if line.is_empty() { continue } + // Turn line into a path + let path = PathBuf::from(line); + let prefix: Vec<Name> = path.components() + .filter(|c| if let std::path::Component::Normal(_) = c {true} else {false}) + .map(|c| c.as_os_str().to_string_lossy().to_string()) + .map(|s| strip_numeric_prefix(&s).into()) + .collect(); + for page in &self.pages { + if page.parents == prefix { + let name = page.name.clone(); + let url = name.slug(); + let parents = Vec::new(); + let parent_url = String::new(); + let target = page.url.clone(); + let source_path = "<hoisted child>".into(); + let last_modified = self.config.last_modified; + self.redirects.push(Redirect { + name, + url, + parents, + parent_url, + target, + source_path, + last_modified, + }); + } + } + } + } + + /// Recursively collect an entry and all children. + /// `prefix` is the base directory path for the entire website. fn collect_entry(&mut self, path: &Path, prefix: &Path) { let entry = Entry::from_path(path).unwrap(); // Ignore dotted entries. if entry.name.starts_with('.') { return } - // Get name and extension. - let (mut name, extension) = entry.split_name(); - name = strip_numeric_prefix(&name); - let name_url = make_url_safe(&name); - // Get last-modified time. + // Get name, extension, last-modified. + let (name_raw, extension) = entry.split_name(); + let name: Name = strip_numeric_prefix(&name_raw).into(); let last_modified = entry.last_modified; // Generate parent URL, used only for files. let source_path = entry.original_path.clone(); let relative_path = source_path.strip_prefix(prefix).unwrap_or_else( |_| fatal!("Path doesn't start with {prefix:?}: {source_path:?}")); - let mut parents: Vec<_> = relative_path.components() + let mut parents: Vec<Name> = relative_path.components() .map(|c| c.as_os_str().to_string_lossy().to_string()) - .map(|s| strip_numeric_prefix(&s)) + .map(|s| strip_numeric_prefix(&s).into()) .collect(); - parents.pop(); // Remove file segment. + parents.pop(); // Remove final (non-parent) segment. // Process each entry. if entry.is_directory() { @@ -166,11 +242,9 @@ impl Website { let relative_path = source_path.strip_prefix(&entry.original_path).unwrap_or_else( |_| fatal!("Path doesn't start with {prefix:?}: {source_path:?}")) .as_os_str().to_string_lossy().to_string(); - let full_url = format!("{stripped}/{relative_path}"); - self.static_files.push(StaticItem { full_url, source_path, last_modified }) + let url = format!("{stripped}/{relative_path}"); + self.static_files.push(StaticFile { url, source_path, last_modified }) } - let full_url = make_url_safe(stripped); - self.static_dirs.push(StaticItem { full_url, source_path, last_modified }); } else { for child in list_directory(entry.original_path).unwrap() { self.collect_entry(&child.original_path, prefix); @@ -178,78 +252,100 @@ impl Website { } } else if parents.is_empty() && entry.name.to_lowercase() == "toaster.conf" { info!("Reading configuration file at {path:?}"); - // Parse the config file. - let config = std::fs::read_to_string(&source_path).unwrap(); - let mut key = None; - let mut value = String::new(); - for line in config.lines() { - if line.starts_with(" ") || line.trim().is_empty() { - value.push_str(line.trim()); - value.push('\n'); - } else { - if let Some(key) = key { - self.config.insert(key, std::mem::take(&mut value)); - } - key = Some(line.trim().to_lowercase().to_string()); - } - } - if let Some(key) = key { - self.config.insert(key, std::mem::take(&mut value)); - } + let content = std::fs::read_to_string(&source_path).unwrap(); + self.config.parse_file(&content, last_modified); } else { - let full_name = match parents.last() { + // Used for error messages, to distinguish between pages of the same name. + let qualified_name = match parents.last() { Some(parent) => format!("{parent}/{name}"), None => name.to_string(), }; match extension.as_str() { + "feed" => { + let mut url = String::new(); + for parent in &parents { + url.push_str(&parent.slug()); + url.push('/'); + } + let parent_url = url.clone(); + url.push_str(&name.plain()); + self.feeds.push(Feed { + name, + url, + parents, + parent_url, + source_path, + last_modified, + }); + } + "redirect" => { + let mut url = String::new(); + for parent in &parents { + url.push_str(&parent.slug()); + url.push('/'); + } + let parent_url = url.clone(); + url.push_str(&name.slug()); + let target = std::fs::read_to_string(&source_path) + .unwrap().trim().to_string(); + self.redirects.push(Redirect { + name, + url, + parents, + parent_url, + target, + source_path, + last_modified, + }); + } "md" => { let markdown = std::fs::read_to_string(&source_path).unwrap(); let document = MarkdownDocument::from_str(&markdown); // Collect headings, check for duplicates. - let mut heading_set = HashSet::new(); - let mut duplicates = HashSet::new(); - let mut headings: Vec<_> = document.blocks.iter().enumerate() + let mut names_set = HashSet::new(); // all heading names + let mut duplicates = HashSet::new(); // only duplicates + let mut headings: Vec<Heading> = document.blocks.iter().enumerate() .filter_map(|(block_id, block)| if let Block::Heading { line, level } = block { - let name = line.to_string(); - let url = make_url_safe(strip_appendix(&name)); + let name: Name = line.to_string().into(); let level = level.to_owned(); - if !heading_set.insert(url.clone()) { - duplicates.insert(url.clone()); + let heading = Heading { name, prefix: None, level, block_id }; + if !names_set.insert(heading.slug()) { + duplicates.insert(heading.slug()); } - Some(Heading { name, url, level, block_id }) + Some(heading) } else { None }).collect(); // Namespace any duplicate headings to the parent h1 heading. - let mut parent_url = String::new(); + let mut prefix = None; for heading in &mut headings { if let Level::Heading1 = heading.level { - parent_url = heading.url.clone(); - } - if duplicates.contains(&heading.url) { - heading.url = format!("{parent_url}-{}", heading.url); + prefix = Some(heading.name.clone()); + } else { + if duplicates.contains(&heading.slug()) { + heading.prefix = prefix.clone(); + } } } - // Check for duplicates again, and warn if any. - heading_set.clear(); + // Check for duplicates once more, and warn if any. + names_set.clear(); duplicates.clear(); for heading in &headings { - if !heading_set.insert(heading.url.clone()) { - duplicates.insert(heading.url.clone()); + if !names_set.insert(heading.slug()) { + duplicates.insert(heading.slug()); } } - for url in duplicates { - warn!("Page {full_name:?} contains multiple headings with ID \"#{url}\""); + for slug in duplicates { + warn!("Page {qualified_name:?} contains multiple headings with ID \"#{slug}\""); } - if name_url == "+index" { + if name.slug() == "+index" { if parents.is_empty() { // This is the index file for the whole site. self.pages.push(Page { - name: String::from("Home"), - name_url: String::from("index"), - full_url: String::from("index"), + name: "Home".into(), + url: String::from("index"), parents, parent_url: String::from(""), source_path, @@ -259,20 +355,18 @@ impl Website { }); } else { // This is an index file for a directory. - let name = parents[parents.len()-1].clone(); - let name_url = make_url_safe(&name); - let mut full_url = String::new(); + let name = parents.last().unwrap().clone(); + let mut url = String::new(); for parent in &parents { - full_url.push_str(&make_url_safe(parent)); - full_url.push('/'); + url.push_str(&parent.slug()); + url.push('/'); } - let parent_url = full_url.clone(); - full_url.pop(); - parents.pop(); + let parent_url = url.clone(); + url.pop(); // Remove the trailing slash + parents.pop(); // Remove this directory self.pages.push(Page { name, - name_url, - full_url, + url, parents, parent_url, source_path, @@ -282,111 +376,72 @@ impl Website { }); } } else { - let mut full_url = String::new(); + // This is a regular page. + let mut url = String::new(); for parent in &parents { - full_url.push_str(&make_url_safe(parent)); - full_url.push('/'); + url.push_str(&parent.slug()); + url.push('/'); } - full_url.push_str(&name_url); - let mut parent_url = full_url.clone(); + // Children descend from this page, so the parent + // url must contain this page. + url.push_str(&name.slug()); + let mut parent_url = url.clone(); parent_url.push('/'); self.pages.push(Page { - name, name_url, full_url, - parents, parent_url, + name, + url, + parents, + parent_url, source_path, - document, headings, + document, + headings, last_modified, }); } }, - "redirect" => { - let mut full_url = String::new(); - for parent in &parents { - full_url.push_str(&make_url_safe(parent)); - full_url.push('/'); - } - let parent_url = full_url.clone(); - full_url.push_str(&name_url); - let redirect = std::fs::read_to_string(&source_path) - .unwrap().trim().to_string(); - self.redirects.push(Redirect { - name, full_url, parents, parent_url, - redirect, last_modified, - }); - } _ => { + // This is a static file. let mut parent_url = String::new(); for parent in &parents { - parent_url.push_str(&make_url_safe(parent)); - parent_url.push('/'); + parent_url.push_str(&parent.slug()); + parent_url.push('/'); } - let full_url = format!("{parent_url}{name_url}.{extension}"); - self.static_files.push(StaticItem { full_url, source_path, last_modified }); + let name_slug = name.slug(); + let url = format!("{parent_url}{name_slug}.{extension}"); + self.static_files.push(StaticFile { url, source_path, last_modified }); }, } } } - pub fn parse_highlighters(&mut self) { - let mut languages = Vec::new(); - let mut source = String::new(); - for line in self.get_config("highlighters").lines() { - if let Some(line) = line.trim().strip_prefix('[') { - if let Some(line) = line.strip_suffix(']') { - // Bank the current source. - if !languages.is_empty() { - let i = self.highlighters.highlighters.len(); - for language in languages { - self.highlighters.languages.insert(language, i); - } - let highlighter = Highlighter::from_str(&source); - self.highlighters.highlighters.push(highlighter); - } - languages = line.split('/').map(|s| s.trim().to_string()).collect(); - source.clear(); - continue; - } - } - source.push_str(line); - source.push('\n'); - } - // Bank the current source. - if !languages.is_empty() { - let i = self.highlighters.highlighters.len(); - for language in languages { - self.highlighters.languages.insert(language, i); - } - let highlighter = Highlighter::from_str(&source); - self.highlighters.highlighters.push(highlighter); - } - } - - // Ext is extension without a dot. - // Checks if a relative link to an internal page name can be reached from - // the current page, and returns a resolved absolute link to the page with extension. + /// Check if the internal link `path` is valid, pointing to a real internal + /// page with extension `ext` and heading, relative to the current page (`from`). + /// Returns a resolved absolute link to the page, with extension. pub fn has_page(&self, from: &impl LinkFrom, path: &str, ext: &str) -> Option<String> { - // Remove heading fragment and file extension. - let (path, heading) = match path.rsplit_once('#') { - Some((path, heading)) => match heading.is_empty() { - true => (path, None), - false => (path, Some(heading)), - } - None => (path, None), + let original_path = path; + let (mut path, mut heading) = from.canonicalise(path); + if let Some(stripped) = path.strip_suffix(&format!(".{ext}")) { + path = stripped.to_string(); }; - let mut path = path.strip_suffix(&format!(".{ext}")).unwrap_or(path).to_string(); - // Attach parent if not an absolute path. - if !path.starts_with('/') { - path = format!("{}{path}", from.parent_url()); - } - path = make_url_safe(&collapse_path(&path)); - // Find page with this path in website. + // Find page with this path in website, resolving any redirect first. + for redirect in &self.redirects { + if redirect.url == path { + let (target_path, target_heading) = redirect.canonicalise(&redirect.target); + path = target_path; + if target_heading.is_some() && heading.is_some() { + warn!("Page {from:?} contains link {original_path:?} to a redirect that also links to a heading"); + } + if heading.is_none() { + heading = target_heading; + } + } + } for page in &self.pages { - if page.full_url == path { + if page.url == path { let root = from.root(); if let Some(heading) = heading { - let heading = make_url_safe(strip_appendix(heading)); - if !page.headings.iter().any(|h| h.url == heading) { + if !page.headings.iter().any(|h| h.slug() == heading) { warn!("Page {from:?} contains link to nonexistent heading {heading:?} on page {path:?}"); } return Some(format!("{root}{path}.{ext}#{heading}")); @@ -398,14 +453,17 @@ impl Website { return None; } + /// Check if the external link `path` points to a valid static file. + /// Returns a resolved absolute link to the file. pub fn has_static(&self, from: &impl LinkFrom, path: &str) -> Option<String> { // Attach parent if not an absolute path. + // We don't want to canonicalise/sluggify the path. let path = match !path.starts_with('/') { true => collapse_path(&format!("{}{path}", from.parent_url())), false => collapse_path(path), }; for file in &self.static_files { - if file.full_url == path { + if file.url == path { let root = from.root(); return Some(format!("{root}{path}")); } @@ -413,9 +471,10 @@ impl Website { return None; } + /// Check if a particular image exists. pub fn has_image(&self, file_name: &str, root: &str) -> Option<ImagePaths> { let check = |path: String| - match self.static_files.iter().any(|s| s.full_url == path) { + match self.static_files.iter().any(|s| s.url == path) { true => Some(format!("{root}{path}")), false => None, }; @@ -430,10 +489,6 @@ impl Website { large: large_path.or(fallback_path.clone())?, }) } - - pub fn get_config(&self, key: &str) -> String { - self.config.get(key).map(String::to_owned).unwrap_or_else(String::new) - } } |
