diff options
| author | Ben Bridle <ben@derelict.engineering> | 2026-02-22 10:15:06 +1300 |
|---|---|---|
| committer | Ben Bridle <ben@derelict.engineering> | 2026-02-22 10:15:33 +1300 |
| commit | 700c0ddd79fc6ca01d52250b69b02c1a13d4ddef (patch) | |
| tree | 9c3c31e8d9cde40dbcc689c0abd876e57a10f028 /src | |
| parent | 8c2ac6d92f6a4579591f748eebcbca2b9913d92d (diff) | |
| download | toaster-700c0ddd79fc6ca01d52250b69b02c1a13d4ddef.zip | |
Big rewrite
A quick list of everything that's changed:
- links to a duplicate heading beneath the same level 1 heading now work
- rss feed generation using a .feed file
- customisation of the html template using the html.template key
- option to use symlinks instead of copying static files
- fixed incorrect resolution of internal links
- simplified different name forms with the Name type
- allow linking to a redirect
Diffstat (limited to 'src')
| -rw-r--r-- | src/collect_files.rs | 457 | ||||
| -rw-r--r-- | src/config.rs | 92 | ||||
| -rw-r--r-- | src/generate_html.rs | 167 | ||||
| -rw-r--r-- | src/generate_rss.rs | 72 | ||||
| -rw-r--r-- | src/main.rs | 79 | ||||
| -rw-r--r-- | src/string_utils.rs | 67 |
6 files changed, 646 insertions, 288 deletions
diff --git a/src/collect_files.rs b/src/collect_files.rs index e3d3a11..3616c8a 100644 --- a/src/collect_files.rs +++ b/src/collect_files.rs @@ -1,60 +1,75 @@ use crate::*; -use highlight::*; -use vagabond::*; - -use std::collections::HashMap; use std::fmt::Debug; pub struct Website { - pub name: String, - pub config: HashMap<String, String>, - pub highlighters: Highlighters, + pub name: Name, + pub config: Config, pub pages: Vec<Page>, pub redirects: Vec<Redirect>, - pub static_files: Vec<StaticItem>, // Redirects, !-prefixed-dir contents - pub static_dirs: Vec<StaticItem>, // Only !-prefixed static dirs + pub static_files: Vec<StaticFile>, // !-prefixed-dir contents + pub feeds: Vec<Feed>, // RSS feeds } pub struct Page { - pub name: String, // Display name of this page - pub name_url: String, // Safe URL name, no extension - pub full_url: String, // Safe full URL, no extension - pub parents: Vec<String>, // Parent directory components, unsafe - pub parent_url: String, // Base URL for links in this page + pub name: Name, + pub url: String, // No extension + pub parents: Vec<Name>, // Parent names + pub parent_url: String, // With trailing slash pub source_path: PathBuf, // Absolute path to source file pub document: MarkdownDocument, // File content parsed as markdown pub headings: Vec<Heading>, // Ordered list of all headings in page - pub last_modified: Option<SystemTime>, // last-modified time of source file + pub last_modified: Option<SystemTime>, // Last-modified time of source file } pub struct Heading { - pub name: String, - pub url: String, + pub name: Name, + pub prefix: Option<Name>, // Disambiguation pub level: Level, - pub block_id: usize, + pub block_id: usize, // Pointer to heading element in document } -pub struct StaticItem { - pub full_url: String, // Safe full URL, with extension +impl Heading { + pub fn slug(&self) -> String { + match &self.prefix { + Some(prefix) => format!("{}-{}", prefix.slug(), self.name.slug()), + None => self.name.slug(), + } + } +} + +pub struct StaticFile { + pub url: String, // With extension pub source_path: PathBuf, // Absolute path to source file - pub last_modified: Option<SystemTime>, // last-modified time of source file + pub last_modified: Option<SystemTime>, // Last-modified time of source file +} + +pub struct Feed { + pub name: Name, // Taken from file name + pub url: String, // With extension + pub parents: Vec<Name>, // Parent names + pub parent_url: String, // Base URL for feed pages + pub source_path: PathBuf, // Absolute path to source file + pub last_modified: Option<SystemTime>, // Last-modified time of source file } pub struct Redirect { - pub name: String, // Display name of this redirect - pub full_url: String, // Safe full URL, no extension - pub parents: Vec<String>, // Parent directory components, unsafe + pub name: Name, + pub url: String, // No extension + pub parents: Vec<Name>, // Parent names pub parent_url: String, // Base URL for relative redirects - pub redirect: String, // Page to redirect to, as an internal link - pub last_modified: Option<SystemTime>, // last-modified time of source file + pub target: String, // Page to redirect to, internal link + pub source_path: PathBuf, // Absolute path to source file, for logging + pub last_modified: Option<SystemTime>, // Last-modified time of source file } + +/// Calculate correct relative path from this entity to a specified page. pub trait LinkFrom: Debug { - fn name(&self) -> &str; + fn name(&self) -> &Name; fn parent_url(&self) -> &str; - fn parents(&self) -> &[String]; + fn parents(&self) -> &[Name]; fn root(&self) -> String { let mut root = String::new(); for _ in self.parents() { @@ -68,16 +83,28 @@ pub trait LinkFrom: Debug { None => format!("/{}", self.name()), } } -} - -pub struct Highlighters { - pub languages: HashMap<String, usize>, - pub highlighters: Vec<Highlighter>, -} - -pub struct ImagePaths { - pub thumb: String, - pub large: String, + /// Convert an internal link to a canonical page URL and optional heading, + /// both as slugs. + /// + /// `path` and returned URL have no extension. + fn canonicalise(&self, path: &str) -> (String, Option<String>) { + // Remove heading fragment from path. + let (path, heading) = match path.rsplit_once('#') { + Some((path, heading)) => match heading.is_empty() { + true => (path, None), + false => (path, Some(to_slug(heading))), + } + None => (path, None), + }; + let mut path = path.to_string(); + // Attach parent URL if not an absolute path. + if !path.starts_with('/') { + path = format!("{}{path}", self.parent_url()); + } + // Convert path to a canonical URL. + path = to_slug(&collapse_path(&path)); + return (path, heading); + } } impl Page { @@ -102,16 +129,33 @@ impl Debug for Redirect { } } +impl Debug for Feed { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "\"{}\"", self.qualified_name()) + } +} + impl LinkFrom for Page { - fn name(&self) -> &str { &self.name } + fn name(&self) -> &Name { &self.name } fn parent_url(&self) -> &str { &self.parent_url } - fn parents(&self) -> &[String] { &self.parents } + fn parents(&self) -> &[Name] { &self.parents } } impl LinkFrom for Redirect { - fn name(&self) -> &str { &self.name } + fn name(&self) -> &Name { &self.name } + fn parent_url(&self) -> &str { &self.parent_url } + fn parents(&self) -> &[Name] { &self.parents } +} + +impl LinkFrom for Feed { + fn name(&self) -> &Name { &self.name } fn parent_url(&self) -> &str { &self.parent_url } - fn parents(&self) -> &[String] { &self.parents } + fn parents(&self) -> &[Name] { &self.parents } +} + +pub struct ImagePaths { + pub thumb: String, + pub large: String, } @@ -121,41 +165,73 @@ impl Website { pages: Vec::new(), redirects: Vec::new(), static_files: Vec::new(), - static_dirs: Vec::new(), + feeds: Vec::new(), name: match Entry::from_path(path) { - Ok(entry) => entry.name, + Ok(entry) => entry.name.into(), Err(err) => fatal!("Couldn't open {:?}: {:?}", &path, err), }, - config: HashMap::new(), - highlighters: Highlighters { - languages: HashMap::new(), - highlighters: Vec::new(), - }, + config: Config::new(), }; + // Recursively collect entire website. new.collect_entry(path, path); - new.parse_highlighters(); + new.parse_hoisted_folders(); return new; } + /// Read the hoisted_folders config key, make root redirects for each + /// child of each listed directory. + fn parse_hoisted_folders(&mut self) { + for line in self.config.get("hoisted_folders").lines() { + if line.is_empty() { continue } + // Turn line into a path + let path = PathBuf::from(line); + let prefix: Vec<Name> = path.components() + .filter(|c| if let std::path::Component::Normal(_) = c {true} else {false}) + .map(|c| c.as_os_str().to_string_lossy().to_string()) + .map(|s| strip_numeric_prefix(&s).into()) + .collect(); + for page in &self.pages { + if page.parents == prefix { + let name = page.name.clone(); + let url = name.slug(); + let parents = Vec::new(); + let parent_url = String::new(); + let target = page.url.clone(); + let source_path = "<hoisted child>".into(); + let last_modified = self.config.last_modified; + self.redirects.push(Redirect { + name, + url, + parents, + parent_url, + target, + source_path, + last_modified, + }); + } + } + } + } + + /// Recursively collect an entry and all children. + /// `prefix` is the base directory path for the entire website. fn collect_entry(&mut self, path: &Path, prefix: &Path) { let entry = Entry::from_path(path).unwrap(); // Ignore dotted entries. if entry.name.starts_with('.') { return } - // Get name and extension. - let (mut name, extension) = entry.split_name(); - name = strip_numeric_prefix(&name); - let name_url = make_url_safe(&name); - // Get last-modified time. + // Get name, extension, last-modified. + let (name_raw, extension) = entry.split_name(); + let name: Name = strip_numeric_prefix(&name_raw).into(); let last_modified = entry.last_modified; // Generate parent URL, used only for files. let source_path = entry.original_path.clone(); let relative_path = source_path.strip_prefix(prefix).unwrap_or_else( |_| fatal!("Path doesn't start with {prefix:?}: {source_path:?}")); - let mut parents: Vec<_> = relative_path.components() + let mut parents: Vec<Name> = relative_path.components() .map(|c| c.as_os_str().to_string_lossy().to_string()) - .map(|s| strip_numeric_prefix(&s)) + .map(|s| strip_numeric_prefix(&s).into()) .collect(); - parents.pop(); // Remove file segment. + parents.pop(); // Remove final (non-parent) segment. // Process each entry. if entry.is_directory() { @@ -166,11 +242,9 @@ impl Website { let relative_path = source_path.strip_prefix(&entry.original_path).unwrap_or_else( |_| fatal!("Path doesn't start with {prefix:?}: {source_path:?}")) .as_os_str().to_string_lossy().to_string(); - let full_url = format!("{stripped}/{relative_path}"); - self.static_files.push(StaticItem { full_url, source_path, last_modified }) + let url = format!("{stripped}/{relative_path}"); + self.static_files.push(StaticFile { url, source_path, last_modified }) } - let full_url = make_url_safe(stripped); - self.static_dirs.push(StaticItem { full_url, source_path, last_modified }); } else { for child in list_directory(entry.original_path).unwrap() { self.collect_entry(&child.original_path, prefix); @@ -178,78 +252,100 @@ impl Website { } } else if parents.is_empty() && entry.name.to_lowercase() == "toaster.conf" { info!("Reading configuration file at {path:?}"); - // Parse the config file. - let config = std::fs::read_to_string(&source_path).unwrap(); - let mut key = None; - let mut value = String::new(); - for line in config.lines() { - if line.starts_with(" ") || line.trim().is_empty() { - value.push_str(line.trim()); - value.push('\n'); - } else { - if let Some(key) = key { - self.config.insert(key, std::mem::take(&mut value)); - } - key = Some(line.trim().to_lowercase().to_string()); - } - } - if let Some(key) = key { - self.config.insert(key, std::mem::take(&mut value)); - } + let content = std::fs::read_to_string(&source_path).unwrap(); + self.config.parse_file(&content, last_modified); } else { - let full_name = match parents.last() { + // Used for error messages, to distinguish between pages of the same name. + let qualified_name = match parents.last() { Some(parent) => format!("{parent}/{name}"), None => name.to_string(), }; match extension.as_str() { + "feed" => { + let mut url = String::new(); + for parent in &parents { + url.push_str(&parent.slug()); + url.push('/'); + } + let parent_url = url.clone(); + url.push_str(&name.plain()); + self.feeds.push(Feed { + name, + url, + parents, + parent_url, + source_path, + last_modified, + }); + } + "redirect" => { + let mut url = String::new(); + for parent in &parents { + url.push_str(&parent.slug()); + url.push('/'); + } + let parent_url = url.clone(); + url.push_str(&name.slug()); + let target = std::fs::read_to_string(&source_path) + .unwrap().trim().to_string(); + self.redirects.push(Redirect { + name, + url, + parents, + parent_url, + target, + source_path, + last_modified, + }); + } "md" => { let markdown = std::fs::read_to_string(&source_path).unwrap(); let document = MarkdownDocument::from_str(&markdown); // Collect headings, check for duplicates. - let mut heading_set = HashSet::new(); - let mut duplicates = HashSet::new(); - let mut headings: Vec<_> = document.blocks.iter().enumerate() + let mut names_set = HashSet::new(); // all heading names + let mut duplicates = HashSet::new(); // only duplicates + let mut headings: Vec<Heading> = document.blocks.iter().enumerate() .filter_map(|(block_id, block)| if let Block::Heading { line, level } = block { - let name = line.to_string(); - let url = make_url_safe(strip_appendix(&name)); + let name: Name = line.to_string().into(); let level = level.to_owned(); - if !heading_set.insert(url.clone()) { - duplicates.insert(url.clone()); + let heading = Heading { name, prefix: None, level, block_id }; + if !names_set.insert(heading.slug()) { + duplicates.insert(heading.slug()); } - Some(Heading { name, url, level, block_id }) + Some(heading) } else { None }).collect(); // Namespace any duplicate headings to the parent h1 heading. - let mut parent_url = String::new(); + let mut prefix = None; for heading in &mut headings { if let Level::Heading1 = heading.level { - parent_url = heading.url.clone(); - } - if duplicates.contains(&heading.url) { - heading.url = format!("{parent_url}-{}", heading.url); + prefix = Some(heading.name.clone()); + } else { + if duplicates.contains(&heading.slug()) { + heading.prefix = prefix.clone(); + } } } - // Check for duplicates again, and warn if any. - heading_set.clear(); + // Check for duplicates once more, and warn if any. + names_set.clear(); duplicates.clear(); for heading in &headings { - if !heading_set.insert(heading.url.clone()) { - duplicates.insert(heading.url.clone()); + if !names_set.insert(heading.slug()) { + duplicates.insert(heading.slug()); } } - for url in duplicates { - warn!("Page {full_name:?} contains multiple headings with ID \"#{url}\""); + for slug in duplicates { + warn!("Page {qualified_name:?} contains multiple headings with ID \"#{slug}\""); } - if name_url == "+index" { + if name.slug() == "+index" { if parents.is_empty() { // This is the index file for the whole site. self.pages.push(Page { - name: String::from("Home"), - name_url: String::from("index"), - full_url: String::from("index"), + name: "Home".into(), + url: String::from("index"), parents, parent_url: String::from(""), source_path, @@ -259,20 +355,18 @@ impl Website { }); } else { // This is an index file for a directory. - let name = parents[parents.len()-1].clone(); - let name_url = make_url_safe(&name); - let mut full_url = String::new(); + let name = parents.last().unwrap().clone(); + let mut url = String::new(); for parent in &parents { - full_url.push_str(&make_url_safe(parent)); - full_url.push('/'); + url.push_str(&parent.slug()); + url.push('/'); } - let parent_url = full_url.clone(); - full_url.pop(); - parents.pop(); + let parent_url = url.clone(); + url.pop(); // Remove the trailing slash + parents.pop(); // Remove this directory self.pages.push(Page { name, - name_url, - full_url, + url, parents, parent_url, source_path, @@ -282,111 +376,72 @@ impl Website { }); } } else { - let mut full_url = String::new(); + // This is a regular page. + let mut url = String::new(); for parent in &parents { - full_url.push_str(&make_url_safe(parent)); - full_url.push('/'); + url.push_str(&parent.slug()); + url.push('/'); } - full_url.push_str(&name_url); - let mut parent_url = full_url.clone(); + // Children descend from this page, so the parent + // url must contain this page. + url.push_str(&name.slug()); + let mut parent_url = url.clone(); parent_url.push('/'); self.pages.push(Page { - name, name_url, full_url, - parents, parent_url, + name, + url, + parents, + parent_url, source_path, - document, headings, + document, + headings, last_modified, }); } }, - "redirect" => { - let mut full_url = String::new(); - for parent in &parents { - full_url.push_str(&make_url_safe(parent)); - full_url.push('/'); - } - let parent_url = full_url.clone(); - full_url.push_str(&name_url); - let redirect = std::fs::read_to_string(&source_path) - .unwrap().trim().to_string(); - self.redirects.push(Redirect { - name, full_url, parents, parent_url, - redirect, last_modified, - }); - } _ => { + // This is a static file. let mut parent_url = String::new(); for parent in &parents { - parent_url.push_str(&make_url_safe(parent)); - parent_url.push('/'); + parent_url.push_str(&parent.slug()); + parent_url.push('/'); } - let full_url = format!("{parent_url}{name_url}.{extension}"); - self.static_files.push(StaticItem { full_url, source_path, last_modified }); + let name_slug = name.slug(); + let url = format!("{parent_url}{name_slug}.{extension}"); + self.static_files.push(StaticFile { url, source_path, last_modified }); }, } } } - pub fn parse_highlighters(&mut self) { - let mut languages = Vec::new(); - let mut source = String::new(); - for line in self.get_config("highlighters").lines() { - if let Some(line) = line.trim().strip_prefix('[') { - if let Some(line) = line.strip_suffix(']') { - // Bank the current source. - if !languages.is_empty() { - let i = self.highlighters.highlighters.len(); - for language in languages { - self.highlighters.languages.insert(language, i); - } - let highlighter = Highlighter::from_str(&source); - self.highlighters.highlighters.push(highlighter); - } - languages = line.split('/').map(|s| s.trim().to_string()).collect(); - source.clear(); - continue; - } - } - source.push_str(line); - source.push('\n'); - } - // Bank the current source. - if !languages.is_empty() { - let i = self.highlighters.highlighters.len(); - for language in languages { - self.highlighters.languages.insert(language, i); - } - let highlighter = Highlighter::from_str(&source); - self.highlighters.highlighters.push(highlighter); - } - } - - // Ext is extension without a dot. - // Checks if a relative link to an internal page name can be reached from - // the current page, and returns a resolved absolute link to the page with extension. + /// Check if the internal link `path` is valid, pointing to a real internal + /// page with extension `ext` and heading, relative to the current page (`from`). + /// Returns a resolved absolute link to the page, with extension. pub fn has_page(&self, from: &impl LinkFrom, path: &str, ext: &str) -> Option<String> { - // Remove heading fragment and file extension. - let (path, heading) = match path.rsplit_once('#') { - Some((path, heading)) => match heading.is_empty() { - true => (path, None), - false => (path, Some(heading)), - } - None => (path, None), + let original_path = path; + let (mut path, mut heading) = from.canonicalise(path); + if let Some(stripped) = path.strip_suffix(&format!(".{ext}")) { + path = stripped.to_string(); }; - let mut path = path.strip_suffix(&format!(".{ext}")).unwrap_or(path).to_string(); - // Attach parent if not an absolute path. - if !path.starts_with('/') { - path = format!("{}{path}", from.parent_url()); - } - path = make_url_safe(&collapse_path(&path)); - // Find page with this path in website. + // Find page with this path in website, resolving any redirect first. + for redirect in &self.redirects { + if redirect.url == path { + let (target_path, target_heading) = redirect.canonicalise(&redirect.target); + path = target_path; + if target_heading.is_some() && heading.is_some() { + warn!("Page {from:?} contains link {original_path:?} to a redirect that also links to a heading"); + } + if heading.is_none() { + heading = target_heading; + } + } + } for page in &self.pages { - if page.full_url == path { + if page.url == path { let root = from.root(); if let Some(heading) = heading { - let heading = make_url_safe(strip_appendix(heading)); - if !page.headings.iter().any(|h| h.url == heading) { + if !page.headings.iter().any(|h| h.slug() == heading) { warn!("Page {from:?} contains link to nonexistent heading {heading:?} on page {path:?}"); } return Some(format!("{root}{path}.{ext}#{heading}")); @@ -398,14 +453,17 @@ impl Website { return None; } + /// Check if the external link `path` points to a valid static file. + /// Returns a resolved absolute link to the file. pub fn has_static(&self, from: &impl LinkFrom, path: &str) -> Option<String> { // Attach parent if not an absolute path. + // We don't want to canonicalise/sluggify the path. let path = match !path.starts_with('/') { true => collapse_path(&format!("{}{path}", from.parent_url())), false => collapse_path(path), }; for file in &self.static_files { - if file.full_url == path { + if file.url == path { let root = from.root(); return Some(format!("{root}{path}")); } @@ -413,9 +471,10 @@ impl Website { return None; } + /// Check if a particular image exists. pub fn has_image(&self, file_name: &str, root: &str) -> Option<ImagePaths> { let check = |path: String| - match self.static_files.iter().any(|s| s.full_url == path) { + match self.static_files.iter().any(|s| s.url == path) { true => Some(format!("{root}{path}")), false => None, }; @@ -430,10 +489,6 @@ impl Website { large: large_path.or(fallback_path.clone())?, }) } - - pub fn get_config(&self, key: &str) -> String { - self.config.get(key).map(String::to_owned).unwrap_or_else(String::new) - } } diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..1933a70 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,92 @@ +use crate::*; + +use highlight::*; + +use std::collections::HashMap; + + +pub struct Config { + pub config: HashMap<String, String>, + pub last_modified: Option<SystemTime>, + pub languages: HashMap<String, usize>, + pub highlighters: Vec<Highlighter>, + pub root_redirects: Vec<String>, +} + +impl Config { + pub fn new() -> Self { + Self { + config: HashMap::new(), + last_modified: None, + languages: HashMap::new(), + highlighters: Vec::new(), + root_redirects: Vec::new(), + } + } + + pub fn get(&self, key: &str) -> String { + match self.config.get(key) { + Some(value) => value.to_owned(), + None => String::new(), + } + } + + pub fn parse_file(&mut self, content: &str, last_modified: Option<SystemTime>) { + if self.last_modified.is_none() || self.last_modified > last_modified { + self.last_modified = last_modified; + } + + let mut key = None; + let mut value = String::new(); + macro_rules! bank_value { + () => { + value = value.trim().to_string(); + if let Some(key) = key { + if key == "highlighters" { + self.parse_highlighters(&value); + } + self.config.insert(key, std::mem::take(&mut value)); + } + }; + } + for line in content.lines() { + if line.starts_with(" ") || line.trim().is_empty() { + value.push_str(line.trim()); + value.push('\n'); + } else { + bank_value!(); + key = Some(line.trim().to_lowercase().to_string()); + } + } + bank_value!(); + } + + fn parse_highlighters(&mut self, value: &str) { + let mut languages = Vec::new(); + let mut source = String::new(); + macro_rules! bank_sources { + () => { + if !languages.is_empty() { + let i = self.highlighters.len(); + for language in std::mem::take(&mut languages) { + self.languages.insert(language, i); + } + let highlighter = Highlighter::from_str(&std::mem::take(&mut source)); + self.highlighters.push(highlighter); + } + }; + } + for line in value.lines() { + if let Some(line) = line.trim().strip_prefix('[') { + if let Some(line) = line.strip_suffix(']') { + bank_sources!(); + languages = line.split('/').map(|s| s.trim().to_string()).collect(); + continue; + } + } + source.push_str(line); + source.push('\n'); + } + bank_sources!(); + } +} diff --git a/src/generate_html.rs b/src/generate_html.rs index 5526a06..0e6ce75 100644 --- a/src/generate_html.rs +++ b/src/generate_html.rs @@ -4,82 +4,89 @@ use markdown::*; use recipe::*; +const DEFAULT_TEMPLATE: &str = "\ +<!DOCTYPE html> + <head> + <title>{page_name} — {site_name}</title> + <meta charset='UTF-8'> + <meta name='viewport' content='width=device-width, initial-scale=1'> + {head} + </head> + <body> + <header> + <nav id='up'> + {home_link} + {parent_link} + </nav> + <h1 id='title'>{page_name_html}</h1> + </header> + <main> + {main} + </main> + </body> +</html>"; + + pub fn generate_html(page: &Page, website: &Website) -> String { let root = page.root(); // Get page name as a plain string and as an HTML fragment. - let mut page_name_plain = page.name.clone(); - let mut page_name_html = sanitize_text(&page_name_plain, true); + let mut page_name = page.name.plain(); + let mut page_name_html = sanitize_text(&page_name, true); // Find any override-title fragments. for block in &page.document.blocks { if let Block::Fragment { language, content } = block { if language == "override-title" { let line = Line::from_str(content); - page_name_plain = line.to_string(); - page_name_html = line_to_html(&line, page, website); + page_name = line.to_string(); + page_name_html = line_to_html(&line, page, website, &None); } } } - page_name_plain = sanitize_text(&page_name_plain, true); + page_name = sanitize_text(&page_name, true); // Get the URL of the parent page. - let site_name = sanitize_text(&website.name, true); + let site_name = sanitize_text(&website.name.plain(), true); let mut parent_url = String::new(); for segment in &page.parents { - parent_url.push_str(&make_url_safe(segment)); parent_url.push('/'); + parent_url.push_str(&segment.slug()); parent_url.push('/'); } parent_url.pop(); let head = get_html_head(page, website); let head = head.trim(); let home_link = format!("<a id='home' href='{root}index.html'>{site_name}</a>"); let parent_link = match page.parents.last() { - Some(name) => format!("<a id='parent' href='../{}.html'>{name}</a>", make_url_safe(name)), + Some(name) => format!("<a id='parent' href='../{}.html'>{name}</a>", name.slug()), None => String::new(), }; // Format tables of contents and the main page. let toc = get_table_of_contents(page); - let toc_main = if page.headings.len() >= 3 { + let toc_compact = if page.headings.len() >= 3 { format!("<details><summary></summary>\n{toc}</details>\n") } else { String::new() }; - let toc_side = format!("<div>{toc}</div>\n"); let main = document_to_html(page, website); let main = main.trim(); - format!("\ -<!DOCTYPE html> -<head> -<title>{page_name_plain} — {site_name}</title> -<meta charset='UTF-8'> -<meta name='viewport' content='width=device-width, initial-scale=1'> -{head} -</head> -<body> -<nav id='outline' class='hidden'> -<h1></h1> -{toc_side} -</nav> -<div id='page'> -<header> -<nav id='up'> -{home_link} -{parent_link} -</nav> -<h1 id='title'>{page_name_html}</h1> -<nav id='toc'> -{toc_main} -</nav> -</header> -<main> -{main} -</main> -</div> -</body> -</html>") + let mut template = website.config.get("html.template"); + if template.trim().is_empty() { + template = DEFAULT_TEMPLATE.to_string(); + } + + template + .replace("{site_name}", &site_name ) + .replace("{page_name}", &page_name ) + .replace("{page_name_html}", &page_name_html) + .replace("{home_link}", &home_link ) + .replace("{parent_link}", &parent_link ) + .replace("{head}", &head ) + .replace("{toc_compact}", &toc_compact ) + .replace("{toc}", &toc ) + .replace("{main}", &main ) } pub fn generate_html_redirect(path: &str, website: &Website) -> String { - let head = website.get_config("html.redirect.head"); let head = head.trim(); + let head = website.config.get("html.redirect.head"); let head = head.trim(); let path = sanitize_text(path, false); format!("\ <!DOCTYPE html> @@ -107,7 +114,7 @@ pub fn get_html_head(page: &Page, website: &Website) -> String { } } if include_default_head { - html_head.insert_str(0, &website.get_config("html.head")); + html_head.insert_str(0, &website.config.get("html.head")); } let root = page.root(); html_head @@ -118,12 +125,12 @@ pub fn get_html_head(page: &Page, website: &Website) -> String { pub fn get_table_of_contents(page: &Page) -> String { let mut toc = String::from("<ul>\n"); - let site_name = sanitize_text(&page.name, true); + let site_name = sanitize_text(&page.name.plain(), true); toc.push_str(&format!("<li class='l1'><a href='#title'>{site_name}</a></li>\n")); for heading in &page.headings { let name = &heading.name; - let url = &heading.url; + let url = &heading.slug(); let class = match heading.level { Level::Heading1 => "l1", Level::Heading2 => "l2", @@ -140,9 +147,10 @@ pub fn document_to_html(page: &Page, website: &Website) -> String { let from = &page; let root = page.root(); let mut html = String::new(); + let mut prefix = None; macro_rules! line_to_html { - ($l:expr) => {{ line_to_html(&$l, page, website) }}; } + ($l:expr) => {{ line_to_html(&$l, page, website, &prefix) }}; } macro_rules! html { ($($arg:tt)*) => {{ html.push_str(&format!($($arg)*)); html.push('\n'); }}; } macro_rules! tag { @@ -159,14 +167,15 @@ pub fn document_to_html(page: &Page, website: &Website) -> String { if let Level::Heading1 = level { html!("</article>"); html!("<article>"); + prefix = Some(to_slug(&line.to_string())); // html!("<nav class='return'><a href='#'></a></nav>"); }; // Find namespaced heading ID from headings list. let url = match page.headings.iter().find(|h| h.block_id == i) { - Some(heading) => heading.url.clone(), + Some(heading) => heading.slug(), None => unreachable!("Couldn't find heading in headings list"), }; - // let url = make_url_safe(strip_appendix(&line.to_string())); + // let url = to_slug(strip_appendix(&line.to_string())); let heading_tag = match level { Level::Heading1 => "h1", Level::Heading2 => "h2", @@ -288,7 +297,7 @@ pub fn document_to_html(page: &Page, website: &Website) -> String { if let Some((name, file_name)) = line.split_once("::") { let name = name.trim(); let file_name = file_name.trim(); - let ParsedLink { path, class, label } = parse_internal_link(name, page, website); + let ParsedLink { path, class, label } = parse_internal_link(name, page, website, &prefix); if let Some(image_paths) = website.has_image(file_name, &root) { let thumb = sanitize_text(&image_paths.thumb, false); html!("<a href='{path}' class='{class}'><img src='{thumb}'/><p>{label}</p></a>") @@ -301,9 +310,9 @@ pub fn document_to_html(page: &Page, website: &Website) -> String { } }), _ => wrap!("pre", format!("class='{language}'"), { - if let Some(i) = website.highlighters.languages.get(language) { + if let Some(i) = website.config.languages.get(language) { let mut source = String::new(); - let highlighter = &website.highlighters.highlighters[*i]; + let highlighter = &website.config.highlighters[*i]; for span in highlighter.highlight(content) { if span.tag.is_empty() { source.push_str(&sanitize_text(&span.text, false)); @@ -366,7 +375,7 @@ pub fn document_to_html(page: &Page, website: &Website) -> String { -fn line_to_html(line: &Line, page: &Page, website: &Website) -> String { +fn line_to_html(line: &Line, page: &Page, website: &Website, prefix: &Option<String>) -> String { let mut html = String::new(); for line_element in &line.tokens { match line_element { @@ -381,7 +390,7 @@ fn line_to_html(line: &Line, page: &Page, website: &Website) -> String { Token::Math(text) => { let text = &sanitize_text(text, false); html.push_str(&format!("<span class='math'>{text}</span>")) } Token::InternalLink{ label: link_label, path } => { - let ParsedLink { path, class, mut label } = parse_internal_link(path, page, website); + let ParsedLink { path, class, mut label } = parse_internal_link(path, page, website, prefix); if !link_label.is_empty() { label = link_label.to_string(); } @@ -404,30 +413,56 @@ struct ParsedLink { pub class: &'static str, } -fn parse_internal_link(name: &str, page: &Page, website: &Website) -> ParsedLink { +fn parse_internal_link(name: &str, page: &Page, website: &Website, prefix: &Option<String>) -> ParsedLink { let from = &page; let (class, label, path) = match name.split_once('#') { - Some(("", heading)) => ("heading", heading, format!("#{}", strip_appendix(heading))), - Some((page, heading)) => ("page", heading, format!("{page}.html#{}", strip_appendix(heading))), + Some(("", heading)) => ("heading", heading, format!("#{heading}")), + Some((page, heading)) => ("page", heading, format!("{page}.html#{heading}")), _ => ("page", name, format!("{name}.html")), }; - let mut path = make_url_safe(&path); + let mut path = to_slug(&path); let label = match label.rsplit_once('/') { Some((_, label)) => sanitize_text(label.trim(), true), None => sanitize_text(label.trim(), true), }; - // Check that the linked internal page exists. + // Check that the linked internal page with heading exists. if class == "page" { match website.has_page(page, &path, "html") { Some(resolved) => path = resolved, None => warn!("Page {from:?} contains link to nonexistent page {path:?}"), } } - // Check that the heading exists. + // Check that the heading exists on this page. if class == "heading" { - let heading = path.strip_prefix('#').unwrap(); - if !page.headings.iter().any(|h| h.url == heading) { - warn!("Page {from:?} contains link to nonexistent internal heading {heading:?}"); + let plain_heading = path.strip_prefix('#').unwrap().to_string(); + let prefixed_heading = match prefix { + Some(prefix) => format!("{prefix}-{plain_heading}"), + None => plain_heading.to_string(), + }; + let mut matched = false; + for heading in &page.headings { + if heading.name.slug() == plain_heading { + if heading.prefix.is_some() { + // The matched heading has a prefix, so is one of many. + // The prefix must match, we must disambiguate the path. + if heading.slug() == prefixed_heading { + matched = true; + path = format!("#{prefixed_heading}"); + break; + } + } else { + // The matched heading has no prefix, so is unique on the page. + matched = true; + break + } + } + } + if !matched { + let prefix_note = match prefix { + Some(prefix) => format!(" (under {prefix:?})"), + None => format!(""), + }; + warn!("Page {from:?} contains link to nonexistent internal heading {plain_heading:?}{prefix_note}"); } } let path = url_encode(&path); @@ -466,13 +501,3 @@ fn parse_external_link(label: &str, path: &str, page: &Page, website: &Website) let label = sanitize_text(&label, true); ParsedLink { path, class: "external", label } } - -/// Remove a 'Appendix #: ' prefix from a string. -pub fn strip_appendix(text: &str) -> &str { - if let Some((prefix, name)) = text.split_once(": ") { - if prefix.starts_with("Appendix") { - return name; - } - } - return text; -} diff --git a/src/generate_rss.rs b/src/generate_rss.rs new file mode 100644 index 0000000..48a6917 --- /dev/null +++ b/src/generate_rss.rs @@ -0,0 +1,72 @@ +use crate::*; + +use std::collections::VecDeque; + +use chrono::{DateTime, Utc, Local}; + + +pub fn generate_rss(feed: &Feed, website: &Website) -> String { + let path = &feed.source_path; + let content = std::fs::read_to_string(path).unwrap(); + let mut lines: VecDeque<&str> = content.lines().collect(); + let base_url = website.config.get("rss.base_url"); + if base_url.is_empty() { + warn!("No value was given for 'rss.base_url' key in toaster.conf"); + } + let (parent_url, _) = feed.url.split_once('/').unwrap(); + + let channel_title = lines.pop_front().unwrap_or("No title"); + let last_build_date = match feed.last_modified { + Some(system_time) => system_time.into(), + None => Utc::now(), + }.to_rfc2822(); + let mut all_entries = String::new(); + + for line in &lines { + if line.is_empty() { continue } + if let Some((timestamp, name)) = line.split_once("::") { + let mut timestamp = timestamp.to_string(); + let entry_title = name; + if !timestamp.contains('T') { + timestamp.push_str("T00:00:00"); + } + if !timestamp.contains('Z') || timestamp.contains('+') { + let offset = Local::now().offset().to_string(); + timestamp.push_str(&offset); + } + let Ok(entry_time) = DateTime::parse_from_rfc3339(×tamp) else { + warn!("Invalid timestamp in RSS file {path:?}: {timestamp:?}"); + continue; + }; + let entry_link = format!("{base_url}/{parent_url}/{}.html", to_slug(name)); + + // Check that child page exists. + if let None = website.has_page(feed, name, "html") { + warn!("Feed {feed:?} contains link to nonexistent page {name:?}"); + } + + + let entry_string = format!(" + <item> + <title>{entry_title}</title> + <link>{entry_link}</link> + <pubDate>{entry_time}</pubDate> + </item> +"); + all_entries.push_str(&entry_string); + } else { + warn!("Invalid line in RSS file {path:?}: {line:?}"); + } + } + + format!( +r#"<?xml version="1.0" encoding="UTF-8" ?> +<rss version="2.0"> + <channel> + <title>{channel_title}</title> + <link>{base_url}</link> + <lastBuildDate>{last_build_date}</lastBuildDate> +{all_entries} + </channel> +</rss>"#) +} diff --git a/src/main.rs b/src/main.rs index 32b0ab9..dad8377 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,15 @@ #![feature(path_add_extension)] mod collect_files; +mod config; mod generate_html; +mod generate_rss; mod string_utils; + pub use collect_files::*; +pub use config::*; pub use generate_html::*; +pub use generate_rss::*; pub use string_utils::*; use markdown::*; @@ -43,11 +48,17 @@ fn main() { args.positional("destination"); args.named("delete"); args.named("html"); + args.named("use-symlinks"); args.raise_errors(); let source = args.get("source").as_path(); let destination = args.get("destination").as_path(); let delete_existing = args.get("delete").as_bool(); let export_html = args.get("html").as_bool(); + let use_symlinks = args.get("use-symlinks").as_bool(); + #[cfg(not(target_family = "unix"))] + if use_symlinks { + fatal!("Symbolic links are only supported on Linux"); + } // Parse entire website directory. let source = match source.canonicalize() { @@ -59,26 +70,46 @@ fn main() { // ------------------------------------------------------------ // Check for duplicate output paths for pages. - let mut destinations: HashSet<&str> = HashSet::new(); + let mut urls: HashSet<&str> = HashSet::new(); let mut duplicates: HashSet<&str> = HashSet::new(); for page in &website.pages { - if !destinations.insert(&page.full_url) { - duplicates.insert(&page.full_url); + if !urls.insert(&page.url) { + duplicates.insert(&page.url); + }; + } + for static_file in &website.static_files { + if !urls.insert(&static_file.url) { + duplicates.insert(&static_file.url); + }; + } + for redirect in &website.redirects { + if !urls.insert(&redirect.url) { + duplicates.insert(&redirect.url); }; } if !duplicates.is_empty() { for destination in duplicates { - warn!("Multiple pages have the output path {destination:?}"); + warn!("Multiple files, pages, or redirects have the output path {destination:?}"); for page in &website.pages { - if page.full_url == destination { + if page.url == destination { eprintln!(":: {:?}", page.source_path); } } + for static_file in &website.static_files { + if static_file.url == destination { + eprintln!(":: {:?}", static_file.source_path); + } + } + for redirect in &website.redirects { + if redirect.url == destination { + eprintln!(":: {:?}", redirect.source_path); + } + } } } let mut destination = destination; - destination.push(make_url_safe(&website.name)); + destination.push(&website.name.slug()); if delete_existing && Entry::from_path(&destination).is_ok() { info!("Deleting existing destination directory {destination:?}"); @@ -89,7 +120,7 @@ fn main() { for page in &website.pages { let mut destination = destination.clone(); - destination.push(&page.full_url); + destination.push(&page.url); // Convert document to different formats. if export_html { let html = generate_html(page, &website); @@ -105,20 +136,32 @@ fn main() { for static_file in &website.static_files { let mut destination = destination.clone(); - destination.push(&static_file.full_url); - info!("Copying static file to {destination:?}"); - make_parent_directory(&destination).unwrap(); - copy(&static_file.source_path, &destination).unwrap_or_else(|_| - error!("Failed to copy static file {:?} to {:?}", - static_file.source_path, destination)); + destination.push(&static_file.url); + if use_symlinks { + #[cfg(target_family = "unix")] + { + info!("Linking static file to {destination:?}"); + make_parent_directory(&destination).unwrap(); + let _ = remove(&destination); + std::os::unix::fs::symlink(&static_file.source_path, &destination).unwrap_or_else(|_| + error!("Failed to link static file {:?} to {:?}", + static_file.source_path, destination)); + } + } else { + info!("Copying static file to {destination:?}"); + copy(&static_file.source_path, &destination).unwrap_or_else(|_| + error!("Failed to copy static file {:?} to {:?}", + static_file.source_path, destination)); + } + } // NOTE: Static dir contents are copied as part of all static files. for redirect in &website.redirects { let mut destination = destination.clone(); - destination.push(&redirect.full_url); - let path = &redirect.redirect; + destination.push(&redirect.url); + let path = &redirect.target; if export_html { if !path.contains("://") { if let Some(path) = website.has_page(redirect, &path, "html") { @@ -131,6 +174,12 @@ fn main() { } } } + + for feed in &website.feeds { + let mut destination = destination.clone(); + destination.push(&feed.url); + write_file(&generate_rss(feed, &website), &destination, "rss", feed.last_modified); + } } fn print_help() { diff --git a/src/string_utils.rs b/src/string_utils.rs index b23c349..2061110 100644 --- a/src/string_utils.rs +++ b/src/string_utils.rs @@ -1,6 +1,71 @@ +use crate::*; + + +#[derive(Clone)] +pub struct Name { + raw: String, +} + +impl Name { + /// Preserve markdown syntax, return raw string. + pub fn raw(&self) -> String { + self.raw.clone() + } + /// Parse markdown syntax, return styled line. + pub fn styled(&self) -> Line { + Line::from_str(&self.raw) + } + /// Strip out markdown syntax, return plain text. + pub fn plain(&self) -> String { + self.styled().to_string() + } + /// Strip out markdown syntax, return url-safe text. + pub fn slug(&self) -> String { + to_slug(&self.plain()) + } +} + +impl std::fmt::Display for Name { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + self.raw.fmt(f) + } +} + +impl std::fmt::Debug for Name { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + self.raw.fmt(f) + } +} + +impl PartialEq for Name { + fn eq(&self, other: &string_utils::Name) -> bool { + self.slug() == other.slug() + } +} + +impl Eq for Name {} +impl std::hash::Hash for Name { + fn hash<H>(&self, hasher: &mut H) where H: std::hash::Hasher { + self.slug().hash(hasher) + } +} + +impl From<String> for Name { + fn from(raw: String) -> Self { + Self { raw } + } +} + +impl From<&str> for Name { + fn from(raw: &str) -> Self { + Self { raw: raw.to_string() } + } +} + + // Turn a string into a tidy URL slug. -pub fn make_url_safe(text: &str) -> String { +pub fn to_slug(text: &str) -> String { text.to_ascii_lowercase().chars().filter_map(|c| if c.is_alphanumeric() || "-_~.+/#".contains(c) { Some(c) } else if c == ' ' { Some('-') } |
