diff options
author | Ben Bridle <bridle.benjamin@gmail.com> | 2025-01-08 12:26:03 +1300 |
---|---|---|
committer | Ben Bridle <bridle.benjamin@gmail.com> | 2025-01-08 12:26:31 +1300 |
commit | b190726a783a4aae98fad6b8fcfb266cabc4eb79 (patch) | |
tree | d8c9fb60e9792c562f60f489a659a4004593b672 /src/collect_files.rs | |
parent | af2507508f7877eace40b119c2a6fab1aefc6bc2 (diff) | |
download | toaster-b190726a783a4aae98fad6b8fcfb266cabc4eb79.zip |
More robust file traversal logic
File traversal can now cope with symbolic links. The original path to
each file is preserved where before they were unintentionally
canonicalized, so the source folder prefix can now be correctly
stripped from the path of each source file.
This commit also adds the !folder syntax from the original toaster.
Directories with a !-prefix are copied without modification to the site
root.
A --delete option has been added to the program. When this option is
set, the destination directory is first deleted if it already exists.
Diffstat (limited to 'src/collect_files.rs')
-rw-r--r-- | src/collect_files.rs | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/src/collect_files.rs b/src/collect_files.rs new file mode 100644 index 0000000..88d065f --- /dev/null +++ b/src/collect_files.rs @@ -0,0 +1,188 @@ +use crate::*; + +use vagabond::*; + + +pub struct Website { + pub name: String, + pub pages: Vec<Page>, + pub static_files: Vec<StaticItem>, + pub static_dirs: Vec<StaticItem>, +} + +pub struct Page { + pub name: String, // Display name + pub parent_url: String, // URL base for relative links + pub file_url: String, // Safe file name, no extension + pub full_url: String, // Safe full URL, no extension + pub source_path: PathBuf, // Absolute path to source file + pub document: MarkdownDocument, // File content parsed as markdown + pub headings: Vec<String>, // Safe name of each document heading +} + +pub struct StaticItem { + pub full_url: String, // Safe full URL, with extension + pub source_path: PathBuf, // Absolute path to source file +} + + + +impl Page { + pub fn back_string(&self) -> String { + let mut back = String::new(); + for c in self.full_url.chars() { + if c == '/' { + back.push_str("../"); + } + } + return back; + } +} + + + +impl Website { + pub fn from_path(path: &Path) -> Self { + let mut new = Self { + pages: Vec::new(), + static_files: Vec::new(), + static_dirs: Vec::new(), + name: match Entry::from_path(path) { + Ok(entry) => entry.name, + Err(err) => error!("Couldn't open {:?}: {:?}", &path, err), + }, + }; + new.collect_entry(path, path); + return new; + } + + fn collect_entry(&mut self, path: &Path, prefix: &Path) { + let entry = Entry::from_path(path).unwrap(); + // Ignore dotted entries. + if entry.name.starts_with('.') { + return; + } + // Get name and extension. + let (mut name, extension) = entry.split_name(); + if let Some((prefix, suffix)) = name.split_once(' ') { + if prefix.chars().all(|c| "0123456789-".contains(c)) { + name = suffix.to_string(); + } + } + let file_url = make_url_safe(&name); + // Generate parent URL, used only for files. + let source_path = entry.original_path.clone(); + let relative_path = source_path.strip_prefix(prefix).unwrap_or_else( + |_| error!("Path doesn't start with {:?}: {:?}", prefix, source_path)); + let mut parent_url = String::new(); + let mut components: Vec<_> = relative_path.components().collect(); + components.pop(); // Remove file segment. + for c in &components { + let segment = &make_url_safe(&c.as_os_str().to_string_lossy()); + parent_url.push_str(segment); parent_url.push('/') + }; + + // Process each entry. + if entry.is_directory() { + if let Some(stripped) = entry.name.strip_prefix("!") { + let full_url = make_url_safe(stripped); + self.static_dirs.push(StaticItem { full_url, source_path }); + } else { + for child in list_directory(entry.original_path).unwrap() { + self.collect_entry(&child.original_path, prefix); + } + } + } else { + match extension.as_str() { + "md" => { + let markdown = std::fs::read_to_string(&source_path).unwrap(); + let document = MarkdownDocument::from_str(&markdown); + let headings = document.blocks.iter() + .filter_map(|block| if let Block::Heading { line, .. } = block { + Some(make_url_safe(&line.to_string())) + } else { + None + }).collect(); + // Change name and path if this is an index file. + let mut name = name; + let mut file_url = file_url; + let mut full_url = format!("{parent_url}{file_url}"); + if file_url == "+index" { + if components.is_empty() { + // This is the index file for the whole site. + name = String::from("Home"); + file_url = String::from("index"); + full_url = String::from("index"); + } else { + // This is an index file for a directory. + name = components[components.len()-1] + .as_os_str().to_string_lossy().to_string(); + file_url = make_url_safe(&name); + full_url = parent_url.strip_suffix('/').unwrap_or(&parent_url).to_string(); + } + } + self.pages.push( + Page { name, parent_url, file_url, full_url, source_path, document, headings }); + }, + _ => { + let full_url = format!("{parent_url}{file_url}.{extension}"); + self.static_files.push(StaticItem { full_url, source_path }); + }, + } + } + } + + // Ext is extension without a dot. + // Checks if a relative link to an internal page name can be reached from + // the current page, and returns a resolved absolute link to the page with extension. + pub fn has_page(&self, from: &Page, path: &str, ext: &str) -> Option<String> { + // Remove heading fragment and file extension. + let (path, heading) = match path.rsplit_once('#') { + Some((path, heading)) => match heading.is_empty() { + true => (path, None), + false => (path, Some(heading)), + } + None => (path, None), + }; + let path = path.strip_suffix(&format!(".{ext}")).unwrap_or(path); + + // Attach parent of current page to given path. + let directory = match from.parent_url.rsplit_once('/') { + Some((parent, _)) => parent, + None => &from.parent_url, + }; + let full_path = match path.starts_with("/") { + true => path.to_string(), + false => format!("{directory}/{path}"), + }; + + // Remove relative portions of path. + let segments: Vec<&str> = full_path.split("/") + .filter(|seg| !seg.is_empty() && *seg != ".") + .collect(); + let mut reduced_segments: Vec<&str> = segments.windows(2) + .filter(|w| w[1] != "..") + .map(|w| w[1]) + .collect(); + // The first segment is always skipped by the previous step. + if !segments.is_empty() && segments.get(1) != Some(&"..") { + if segments[0] != ".." { + reduced_segments.insert(0, segments[0]); + } + } + let path = reduced_segments.join("/"); + + for page in &self.pages { + if page.full_url == path { + if let Some(heading) = heading { + if !page.headings.contains(&make_url_safe(heading)) { + warn!("Page {:?} contains link to nonexistent heading {heading:?} on page {path:?}", from.name); + } + } + return Some(format!("{path}.{ext}")); + } + } + return None; + } +} + |