summaryrefslogtreecommitdiff
path: root/src/collect_files.rs
diff options
context:
space:
mode:
authorBen Bridle <ben@derelict.engineering>2026-02-22 10:15:06 +1300
committerBen Bridle <ben@derelict.engineering>2026-02-22 10:15:33 +1300
commit700c0ddd79fc6ca01d52250b69b02c1a13d4ddef (patch)
tree9c3c31e8d9cde40dbcc689c0abd876e57a10f028 /src/collect_files.rs
parent8c2ac6d92f6a4579591f748eebcbca2b9913d92d (diff)
downloadtoaster-700c0ddd79fc6ca01d52250b69b02c1a13d4ddef.zip
Big rewrite
A quick list of everything that's changed: - links to a duplicate heading beneath the same level 1 heading now work - rss feed generation using a .feed file - customisation of the html template using the html.template key - option to use symlinks instead of copying static files - fixed incorrect resolution of internal links - simplified different name forms with the Name type - allow linking to a redirect
Diffstat (limited to 'src/collect_files.rs')
-rw-r--r--src/collect_files.rs457
1 files changed, 256 insertions, 201 deletions
diff --git a/src/collect_files.rs b/src/collect_files.rs
index e3d3a11..3616c8a 100644
--- a/src/collect_files.rs
+++ b/src/collect_files.rs
@@ -1,60 +1,75 @@
use crate::*;
-use highlight::*;
-use vagabond::*;
-
-use std::collections::HashMap;
use std::fmt::Debug;
pub struct Website {
- pub name: String,
- pub config: HashMap<String, String>,
- pub highlighters: Highlighters,
+ pub name: Name,
+ pub config: Config,
pub pages: Vec<Page>,
pub redirects: Vec<Redirect>,
- pub static_files: Vec<StaticItem>, // Redirects, !-prefixed-dir contents
- pub static_dirs: Vec<StaticItem>, // Only !-prefixed static dirs
+ pub static_files: Vec<StaticFile>, // !-prefixed-dir contents
+ pub feeds: Vec<Feed>, // RSS feeds
}
pub struct Page {
- pub name: String, // Display name of this page
- pub name_url: String, // Safe URL name, no extension
- pub full_url: String, // Safe full URL, no extension
- pub parents: Vec<String>, // Parent directory components, unsafe
- pub parent_url: String, // Base URL for links in this page
+ pub name: Name,
+ pub url: String, // No extension
+ pub parents: Vec<Name>, // Parent names
+ pub parent_url: String, // With trailing slash
pub source_path: PathBuf, // Absolute path to source file
pub document: MarkdownDocument, // File content parsed as markdown
pub headings: Vec<Heading>, // Ordered list of all headings in page
- pub last_modified: Option<SystemTime>, // last-modified time of source file
+ pub last_modified: Option<SystemTime>, // Last-modified time of source file
}
pub struct Heading {
- pub name: String,
- pub url: String,
+ pub name: Name,
+ pub prefix: Option<Name>, // Disambiguation
pub level: Level,
- pub block_id: usize,
+ pub block_id: usize, // Pointer to heading element in document
}
-pub struct StaticItem {
- pub full_url: String, // Safe full URL, with extension
+impl Heading {
+ pub fn slug(&self) -> String {
+ match &self.prefix {
+ Some(prefix) => format!("{}-{}", prefix.slug(), self.name.slug()),
+ None => self.name.slug(),
+ }
+ }
+}
+
+pub struct StaticFile {
+ pub url: String, // With extension
pub source_path: PathBuf, // Absolute path to source file
- pub last_modified: Option<SystemTime>, // last-modified time of source file
+ pub last_modified: Option<SystemTime>, // Last-modified time of source file
+}
+
+pub struct Feed {
+ pub name: Name, // Taken from file name
+ pub url: String, // With extension
+ pub parents: Vec<Name>, // Parent names
+ pub parent_url: String, // Base URL for feed pages
+ pub source_path: PathBuf, // Absolute path to source file
+ pub last_modified: Option<SystemTime>, // Last-modified time of source file
}
pub struct Redirect {
- pub name: String, // Display name of this redirect
- pub full_url: String, // Safe full URL, no extension
- pub parents: Vec<String>, // Parent directory components, unsafe
+ pub name: Name,
+ pub url: String, // No extension
+ pub parents: Vec<Name>, // Parent names
pub parent_url: String, // Base URL for relative redirects
- pub redirect: String, // Page to redirect to, as an internal link
- pub last_modified: Option<SystemTime>, // last-modified time of source file
+ pub target: String, // Page to redirect to, internal link
+ pub source_path: PathBuf, // Absolute path to source file, for logging
+ pub last_modified: Option<SystemTime>, // Last-modified time of source file
}
+
+/// Calculate correct relative path from this entity to a specified page.
pub trait LinkFrom: Debug {
- fn name(&self) -> &str;
+ fn name(&self) -> &Name;
fn parent_url(&self) -> &str;
- fn parents(&self) -> &[String];
+ fn parents(&self) -> &[Name];
fn root(&self) -> String {
let mut root = String::new();
for _ in self.parents() {
@@ -68,16 +83,28 @@ pub trait LinkFrom: Debug {
None => format!("/{}", self.name()),
}
}
-}
-
-pub struct Highlighters {
- pub languages: HashMap<String, usize>,
- pub highlighters: Vec<Highlighter>,
-}
-
-pub struct ImagePaths {
- pub thumb: String,
- pub large: String,
+ /// Convert an internal link to a canonical page URL and optional heading,
+ /// both as slugs.
+ ///
+ /// `path` and returned URL have no extension.
+ fn canonicalise(&self, path: &str) -> (String, Option<String>) {
+ // Remove heading fragment from path.
+ let (path, heading) = match path.rsplit_once('#') {
+ Some((path, heading)) => match heading.is_empty() {
+ true => (path, None),
+ false => (path, Some(to_slug(heading))),
+ }
+ None => (path, None),
+ };
+ let mut path = path.to_string();
+ // Attach parent URL if not an absolute path.
+ if !path.starts_with('/') {
+ path = format!("{}{path}", self.parent_url());
+ }
+ // Convert path to a canonical URL.
+ path = to_slug(&collapse_path(&path));
+ return (path, heading);
+ }
}
impl Page {
@@ -102,16 +129,33 @@ impl Debug for Redirect {
}
}
+impl Debug for Feed {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+ write!(f, "\"{}\"", self.qualified_name())
+ }
+}
+
impl LinkFrom for Page {
- fn name(&self) -> &str { &self.name }
+ fn name(&self) -> &Name { &self.name }
fn parent_url(&self) -> &str { &self.parent_url }
- fn parents(&self) -> &[String] { &self.parents }
+ fn parents(&self) -> &[Name] { &self.parents }
}
impl LinkFrom for Redirect {
- fn name(&self) -> &str { &self.name }
+ fn name(&self) -> &Name { &self.name }
+ fn parent_url(&self) -> &str { &self.parent_url }
+ fn parents(&self) -> &[Name] { &self.parents }
+}
+
+impl LinkFrom for Feed {
+ fn name(&self) -> &Name { &self.name }
fn parent_url(&self) -> &str { &self.parent_url }
- fn parents(&self) -> &[String] { &self.parents }
+ fn parents(&self) -> &[Name] { &self.parents }
+}
+
+pub struct ImagePaths {
+ pub thumb: String,
+ pub large: String,
}
@@ -121,41 +165,73 @@ impl Website {
pages: Vec::new(),
redirects: Vec::new(),
static_files: Vec::new(),
- static_dirs: Vec::new(),
+ feeds: Vec::new(),
name: match Entry::from_path(path) {
- Ok(entry) => entry.name,
+ Ok(entry) => entry.name.into(),
Err(err) => fatal!("Couldn't open {:?}: {:?}", &path, err),
},
- config: HashMap::new(),
- highlighters: Highlighters {
- languages: HashMap::new(),
- highlighters: Vec::new(),
- },
+ config: Config::new(),
};
+ // Recursively collect entire website.
new.collect_entry(path, path);
- new.parse_highlighters();
+ new.parse_hoisted_folders();
return new;
}
+ /// Read the hoisted_folders config key, make root redirects for each
+ /// child of each listed directory.
+ fn parse_hoisted_folders(&mut self) {
+ for line in self.config.get("hoisted_folders").lines() {
+ if line.is_empty() { continue }
+ // Turn line into a path
+ let path = PathBuf::from(line);
+ let prefix: Vec<Name> = path.components()
+ .filter(|c| if let std::path::Component::Normal(_) = c {true} else {false})
+ .map(|c| c.as_os_str().to_string_lossy().to_string())
+ .map(|s| strip_numeric_prefix(&s).into())
+ .collect();
+ for page in &self.pages {
+ if page.parents == prefix {
+ let name = page.name.clone();
+ let url = name.slug();
+ let parents = Vec::new();
+ let parent_url = String::new();
+ let target = page.url.clone();
+ let source_path = "<hoisted child>".into();
+ let last_modified = self.config.last_modified;
+ self.redirects.push(Redirect {
+ name,
+ url,
+ parents,
+ parent_url,
+ target,
+ source_path,
+ last_modified,
+ });
+ }
+ }
+ }
+ }
+
+ /// Recursively collect an entry and all children.
+ /// `prefix` is the base directory path for the entire website.
fn collect_entry(&mut self, path: &Path, prefix: &Path) {
let entry = Entry::from_path(path).unwrap();
// Ignore dotted entries.
if entry.name.starts_with('.') { return }
- // Get name and extension.
- let (mut name, extension) = entry.split_name();
- name = strip_numeric_prefix(&name);
- let name_url = make_url_safe(&name);
- // Get last-modified time.
+ // Get name, extension, last-modified.
+ let (name_raw, extension) = entry.split_name();
+ let name: Name = strip_numeric_prefix(&name_raw).into();
let last_modified = entry.last_modified;
// Generate parent URL, used only for files.
let source_path = entry.original_path.clone();
let relative_path = source_path.strip_prefix(prefix).unwrap_or_else(
|_| fatal!("Path doesn't start with {prefix:?}: {source_path:?}"));
- let mut parents: Vec<_> = relative_path.components()
+ let mut parents: Vec<Name> = relative_path.components()
.map(|c| c.as_os_str().to_string_lossy().to_string())
- .map(|s| strip_numeric_prefix(&s))
+ .map(|s| strip_numeric_prefix(&s).into())
.collect();
- parents.pop(); // Remove file segment.
+ parents.pop(); // Remove final (non-parent) segment.
// Process each entry.
if entry.is_directory() {
@@ -166,11 +242,9 @@ impl Website {
let relative_path = source_path.strip_prefix(&entry.original_path).unwrap_or_else(
|_| fatal!("Path doesn't start with {prefix:?}: {source_path:?}"))
.as_os_str().to_string_lossy().to_string();
- let full_url = format!("{stripped}/{relative_path}");
- self.static_files.push(StaticItem { full_url, source_path, last_modified })
+ let url = format!("{stripped}/{relative_path}");
+ self.static_files.push(StaticFile { url, source_path, last_modified })
}
- let full_url = make_url_safe(stripped);
- self.static_dirs.push(StaticItem { full_url, source_path, last_modified });
} else {
for child in list_directory(entry.original_path).unwrap() {
self.collect_entry(&child.original_path, prefix);
@@ -178,78 +252,100 @@ impl Website {
}
} else if parents.is_empty() && entry.name.to_lowercase() == "toaster.conf" {
info!("Reading configuration file at {path:?}");
- // Parse the config file.
- let config = std::fs::read_to_string(&source_path).unwrap();
- let mut key = None;
- let mut value = String::new();
- for line in config.lines() {
- if line.starts_with(" ") || line.trim().is_empty() {
- value.push_str(line.trim());
- value.push('\n');
- } else {
- if let Some(key) = key {
- self.config.insert(key, std::mem::take(&mut value));
- }
- key = Some(line.trim().to_lowercase().to_string());
- }
- }
- if let Some(key) = key {
- self.config.insert(key, std::mem::take(&mut value));
- }
+ let content = std::fs::read_to_string(&source_path).unwrap();
+ self.config.parse_file(&content, last_modified);
} else {
- let full_name = match parents.last() {
+ // Used for error messages, to distinguish between pages of the same name.
+ let qualified_name = match parents.last() {
Some(parent) => format!("{parent}/{name}"),
None => name.to_string(),
};
match extension.as_str() {
+ "feed" => {
+ let mut url = String::new();
+ for parent in &parents {
+ url.push_str(&parent.slug());
+ url.push('/');
+ }
+ let parent_url = url.clone();
+ url.push_str(&name.plain());
+ self.feeds.push(Feed {
+ name,
+ url,
+ parents,
+ parent_url,
+ source_path,
+ last_modified,
+ });
+ }
+ "redirect" => {
+ let mut url = String::new();
+ for parent in &parents {
+ url.push_str(&parent.slug());
+ url.push('/');
+ }
+ let parent_url = url.clone();
+ url.push_str(&name.slug());
+ let target = std::fs::read_to_string(&source_path)
+ .unwrap().trim().to_string();
+ self.redirects.push(Redirect {
+ name,
+ url,
+ parents,
+ parent_url,
+ target,
+ source_path,
+ last_modified,
+ });
+ }
"md" => {
let markdown = std::fs::read_to_string(&source_path).unwrap();
let document = MarkdownDocument::from_str(&markdown);
// Collect headings, check for duplicates.
- let mut heading_set = HashSet::new();
- let mut duplicates = HashSet::new();
- let mut headings: Vec<_> = document.blocks.iter().enumerate()
+ let mut names_set = HashSet::new(); // all heading names
+ let mut duplicates = HashSet::new(); // only duplicates
+ let mut headings: Vec<Heading> = document.blocks.iter().enumerate()
.filter_map(|(block_id, block)| if let Block::Heading { line, level } = block {
- let name = line.to_string();
- let url = make_url_safe(strip_appendix(&name));
+ let name: Name = line.to_string().into();
let level = level.to_owned();
- if !heading_set.insert(url.clone()) {
- duplicates.insert(url.clone());
+ let heading = Heading { name, prefix: None, level, block_id };
+ if !names_set.insert(heading.slug()) {
+ duplicates.insert(heading.slug());
}
- Some(Heading { name, url, level, block_id })
+ Some(heading)
} else {
None
}).collect();
// Namespace any duplicate headings to the parent h1 heading.
- let mut parent_url = String::new();
+ let mut prefix = None;
for heading in &mut headings {
if let Level::Heading1 = heading.level {
- parent_url = heading.url.clone();
- }
- if duplicates.contains(&heading.url) {
- heading.url = format!("{parent_url}-{}", heading.url);
+ prefix = Some(heading.name.clone());
+ } else {
+ if duplicates.contains(&heading.slug()) {
+ heading.prefix = prefix.clone();
+ }
}
}
- // Check for duplicates again, and warn if any.
- heading_set.clear();
+ // Check for duplicates once more, and warn if any.
+ names_set.clear();
duplicates.clear();
for heading in &headings {
- if !heading_set.insert(heading.url.clone()) {
- duplicates.insert(heading.url.clone());
+ if !names_set.insert(heading.slug()) {
+ duplicates.insert(heading.slug());
}
}
- for url in duplicates {
- warn!("Page {full_name:?} contains multiple headings with ID \"#{url}\"");
+ for slug in duplicates {
+ warn!("Page {qualified_name:?} contains multiple headings with ID \"#{slug}\"");
}
- if name_url == "+index" {
+ if name.slug() == "+index" {
if parents.is_empty() {
// This is the index file for the whole site.
self.pages.push(Page {
- name: String::from("Home"),
- name_url: String::from("index"),
- full_url: String::from("index"),
+ name: "Home".into(),
+ url: String::from("index"),
parents,
parent_url: String::from(""),
source_path,
@@ -259,20 +355,18 @@ impl Website {
});
} else {
// This is an index file for a directory.
- let name = parents[parents.len()-1].clone();
- let name_url = make_url_safe(&name);
- let mut full_url = String::new();
+ let name = parents.last().unwrap().clone();
+ let mut url = String::new();
for parent in &parents {
- full_url.push_str(&make_url_safe(parent));
- full_url.push('/');
+ url.push_str(&parent.slug());
+ url.push('/');
}
- let parent_url = full_url.clone();
- full_url.pop();
- parents.pop();
+ let parent_url = url.clone();
+ url.pop(); // Remove the trailing slash
+ parents.pop(); // Remove this directory
self.pages.push(Page {
name,
- name_url,
- full_url,
+ url,
parents,
parent_url,
source_path,
@@ -282,111 +376,72 @@ impl Website {
});
}
} else {
- let mut full_url = String::new();
+ // This is a regular page.
+ let mut url = String::new();
for parent in &parents {
- full_url.push_str(&make_url_safe(parent));
- full_url.push('/');
+ url.push_str(&parent.slug());
+ url.push('/');
}
- full_url.push_str(&name_url);
- let mut parent_url = full_url.clone();
+ // Children descend from this page, so the parent
+ // url must contain this page.
+ url.push_str(&name.slug());
+ let mut parent_url = url.clone();
parent_url.push('/');
self.pages.push(Page {
- name, name_url, full_url,
- parents, parent_url,
+ name,
+ url,
+ parents,
+ parent_url,
source_path,
- document, headings,
+ document,
+ headings,
last_modified,
});
}
},
- "redirect" => {
- let mut full_url = String::new();
- for parent in &parents {
- full_url.push_str(&make_url_safe(parent));
- full_url.push('/');
- }
- let parent_url = full_url.clone();
- full_url.push_str(&name_url);
- let redirect = std::fs::read_to_string(&source_path)
- .unwrap().trim().to_string();
- self.redirects.push(Redirect {
- name, full_url, parents, parent_url,
- redirect, last_modified,
- });
- }
_ => {
+ // This is a static file.
let mut parent_url = String::new();
for parent in &parents {
- parent_url.push_str(&make_url_safe(parent));
- parent_url.push('/');
+ parent_url.push_str(&parent.slug());
+ parent_url.push('/');
}
- let full_url = format!("{parent_url}{name_url}.{extension}");
- self.static_files.push(StaticItem { full_url, source_path, last_modified });
+ let name_slug = name.slug();
+ let url = format!("{parent_url}{name_slug}.{extension}");
+ self.static_files.push(StaticFile { url, source_path, last_modified });
},
}
}
}
- pub fn parse_highlighters(&mut self) {
- let mut languages = Vec::new();
- let mut source = String::new();
- for line in self.get_config("highlighters").lines() {
- if let Some(line) = line.trim().strip_prefix('[') {
- if let Some(line) = line.strip_suffix(']') {
- // Bank the current source.
- if !languages.is_empty() {
- let i = self.highlighters.highlighters.len();
- for language in languages {
- self.highlighters.languages.insert(language, i);
- }
- let highlighter = Highlighter::from_str(&source);
- self.highlighters.highlighters.push(highlighter);
- }
- languages = line.split('/').map(|s| s.trim().to_string()).collect();
- source.clear();
- continue;
- }
- }
- source.push_str(line);
- source.push('\n');
- }
- // Bank the current source.
- if !languages.is_empty() {
- let i = self.highlighters.highlighters.len();
- for language in languages {
- self.highlighters.languages.insert(language, i);
- }
- let highlighter = Highlighter::from_str(&source);
- self.highlighters.highlighters.push(highlighter);
- }
- }
-
- // Ext is extension without a dot.
- // Checks if a relative link to an internal page name can be reached from
- // the current page, and returns a resolved absolute link to the page with extension.
+ /// Check if the internal link `path` is valid, pointing to a real internal
+ /// page with extension `ext` and heading, relative to the current page (`from`).
+ /// Returns a resolved absolute link to the page, with extension.
pub fn has_page(&self, from: &impl LinkFrom, path: &str, ext: &str) -> Option<String> {
- // Remove heading fragment and file extension.
- let (path, heading) = match path.rsplit_once('#') {
- Some((path, heading)) => match heading.is_empty() {
- true => (path, None),
- false => (path, Some(heading)),
- }
- None => (path, None),
+ let original_path = path;
+ let (mut path, mut heading) = from.canonicalise(path);
+ if let Some(stripped) = path.strip_suffix(&format!(".{ext}")) {
+ path = stripped.to_string();
};
- let mut path = path.strip_suffix(&format!(".{ext}")).unwrap_or(path).to_string();
- // Attach parent if not an absolute path.
- if !path.starts_with('/') {
- path = format!("{}{path}", from.parent_url());
- }
- path = make_url_safe(&collapse_path(&path));
- // Find page with this path in website.
+ // Find page with this path in website, resolving any redirect first.
+ for redirect in &self.redirects {
+ if redirect.url == path {
+ let (target_path, target_heading) = redirect.canonicalise(&redirect.target);
+ path = target_path;
+ if target_heading.is_some() && heading.is_some() {
+ warn!("Page {from:?} contains link {original_path:?} to a redirect that also links to a heading");
+ }
+ if heading.is_none() {
+ heading = target_heading;
+ }
+ }
+ }
for page in &self.pages {
- if page.full_url == path {
+ if page.url == path {
let root = from.root();
if let Some(heading) = heading {
- let heading = make_url_safe(strip_appendix(heading));
- if !page.headings.iter().any(|h| h.url == heading) {
+ if !page.headings.iter().any(|h| h.slug() == heading) {
warn!("Page {from:?} contains link to nonexistent heading {heading:?} on page {path:?}");
}
return Some(format!("{root}{path}.{ext}#{heading}"));
@@ -398,14 +453,17 @@ impl Website {
return None;
}
+ /// Check if the external link `path` points to a valid static file.
+ /// Returns a resolved absolute link to the file.
pub fn has_static(&self, from: &impl LinkFrom, path: &str) -> Option<String> {
// Attach parent if not an absolute path.
+ // We don't want to canonicalise/sluggify the path.
let path = match !path.starts_with('/') {
true => collapse_path(&format!("{}{path}", from.parent_url())),
false => collapse_path(path),
};
for file in &self.static_files {
- if file.full_url == path {
+ if file.url == path {
let root = from.root();
return Some(format!("{root}{path}"));
}
@@ -413,9 +471,10 @@ impl Website {
return None;
}
+ /// Check if a particular image exists.
pub fn has_image(&self, file_name: &str, root: &str) -> Option<ImagePaths> {
let check = |path: String|
- match self.static_files.iter().any(|s| s.full_url == path) {
+ match self.static_files.iter().any(|s| s.url == path) {
true => Some(format!("{root}{path}")),
false => None,
};
@@ -430,10 +489,6 @@ impl Website {
large: large_path.or(fallback_path.clone())?,
})
}
-
- pub fn get_config(&self, key: &str) -> String {
- self.config.get(key).map(String::to_owned).unwrap_or_else(String::new)
- }
}