From b190726a783a4aae98fad6b8fcfb266cabc4eb79 Mon Sep 17 00:00:00 2001 From: Ben Bridle Date: Wed, 8 Jan 2025 12:26:03 +1300 Subject: More robust file traversal logic File traversal can now cope with symbolic links. The original path to each file is preserved where before they were unintentionally canonicalized, so the source folder prefix can now be correctly stripped from the path of each source file. This commit also adds the !folder syntax from the original toaster. Directories with a !-prefix are copied without modification to the site root. A --delete option has been added to the program. When this option is set, the destination directory is first deleted if it already exists. --- src/main.rs | 207 ++++++++++-------------------------------------------------- 1 file changed, 34 insertions(+), 173 deletions(-) (limited to 'src/main.rs') diff --git a/src/main.rs b/src/main.rs index 0a62ec2..2950ee9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,7 @@ #![feature(path_add_extension)] +mod collect_files; +pub use collect_files::*; mod generate_html; pub use generate_html::*; @@ -50,101 +52,32 @@ fn main() { let source_directory = args.source.unwrap().canonicalize().unwrap(); let destination_directory = args.destination.unwrap(); - let mut website = Website { - source_files: Vec::new(), - static_files: Vec::new(), - name: match Entry::from_path(&source_directory) { - Ok(entry) => entry.name, - Err(err) => error!("Couldn't open {:?}: {:?}", &source_directory, err), - }, - error: false, - }; - - // Collect all website files. - match traverse_directory(&source_directory) { - Ok(entries) => for entry in entries { - // Ignore dot files. - if entry.name.starts_with(".") { - continue; - } - // Generate name, stripping any leading digit sequence. - let (mut name, extension) = entry.split_name(); - if let Some((prefix, suffix)) = name.split_once(' ') { - if prefix.chars().all(|c| "0123456789-".contains(c)) { - name = suffix.to_string(); - } - } - // Generate full URL with stripped name, no extension. - let source_path = entry.original_path; - let relative_path = source_path.strip_prefix(&source_directory).unwrap_or_else( - // Probably unreachable. - |_| error!("Path doesn't start with {:?}: {:?}", source_directory, source_path)); - let mut full_url = String::new(); - let mut components: Vec<_> = relative_path.components().collect(); - components.pop(); // Remove file segment, use the stripped name instead. - for c in components { - full_url.push_str(&make_url_safe(&c.as_os_str().to_string_lossy())); - full_url.push('/') - }; - let parent_url = full_url.clone(); - full_url.push_str(&make_url_safe(&name)); - - - if extension == "md" { - // Rename and relocate index files. - let mut file_url = make_url_safe(&name); - if file_url == "+index" { - let components: Vec<_> = relative_path.components().collect(); - if components.len() == 1 { - name = String::from("Home"); - file_url = String::from("index"); - full_url = String::from("index"); - } else { - let parent = components[components.len()-2]; - let parent_string = parent.as_os_str().to_string_lossy().to_string(); - name = parent_string; - file_url = make_url_safe(&name); - full_url.clear(); - for c in &components[..components.len()-2] { - full_url.push_str(&make_url_safe(&c.as_os_str().to_string_lossy())); - full_url.push('/') - }; - full_url.push_str(&file_url); - } - } - // Load and parse the markdown. - let markdown = std::fs::read_to_string(&source_path).unwrap(); - let document = MarkdownDocument::from_str(&markdown); - let headings = document.blocks.iter().filter_map(|block| match block { - Block::Heading { line, .. } => Some(make_url_safe(&line.to_string())), - _ => None, - }).collect(); - website.source_files.push( - SourceFile { name, parent_url, file_url, full_url, source_path, document, headings }); - } else { - full_url.push('.'); full_url.push_str(&extension); - website.static_files.push(StaticFile { full_url, source_path }); - } - } - Err(err) => error!("Could not read from source directory: {:?}", err), - } + let website = Website::from_path(&source_directory); let mut destination = destination_directory.clone(); destination.push(make_url_safe(&website.name)); - for source_file in &website.source_files { + if args.delete && Entry::from_path(&destination).is_ok() { + verbose!("Deleting existing destination directory {destination:?}"); + remove(&destination).unwrap_or_else(|_| + error!("Failed to delete existing destination directory {destination:?}")); + } + + for page in &website.pages { let mut destination = destination.clone(); - destination.push(&source_file.full_url); + destination.push(&page.full_url); // Convert document to different formats. if args.html { - let html = generate_html(&source_file.document, source_file, &website); + let html = generate_html(&page.document, page, &website); write_file(&html, &destination, "html"); } // Copy original markdown file. destination.add_extension("md"); verbose!("Copying original markdown file to {destination:?}"); - copy(&source_file.source_path, &destination).unwrap(); + copy(&page.source_path, &destination).unwrap_or_else(|_| + error!("Failed to copy original markdown file {:?} to {:?}", + page.source_path, destination)); } for static_file in &website.static_files { @@ -152,7 +85,19 @@ fn main() { destination.push(&static_file.full_url); verbose!("Copying static file to {destination:?}"); make_parent_directory(&destination).unwrap(); - copy(&static_file.source_path, &destination).unwrap(); + copy(&static_file.source_path, &destination).unwrap_or_else(|_| + error!("Failed to copy static file {:?} to {:?}", + static_file.source_path, destination)); + } + + for static_dir in &website.static_dirs { + let mut destination = destination.clone(); + destination.push(&static_dir.full_url); + verbose!("Copying static directory to {destination:?}"); + make_parent_directory(&destination).unwrap(); + copy(&static_dir.source_path, &destination).unwrap_or_else(|_| + error!("Failed to copy static directory {:?} to {:?}", + static_dir.source_path, destination)); } } @@ -162,8 +107,10 @@ pub fn write_file(text: &str, destination: &PathBuf, ext: &str) { let mut destination = destination.clone(); destination.add_extension(ext); verbose!("Generating {destination:?}"); - make_parent_directory(&destination).unwrap(); - write_to_file(destination, text).unwrap(); + make_parent_directory(&destination).unwrap_or_else(|_| + error!("Failed to create parent directories for {destination:?}")); + write_to_file(&destination, text).unwrap_or_else(|_| + error!("Failed to write generated {ext} file to {destination:?}")); } pub fn make_url_safe(text: &str) -> String { @@ -175,94 +122,6 @@ pub fn make_url_safe(text: &str) -> String { } -pub struct Website { - pub name: String, - pub source_files: Vec, - pub static_files: Vec, - pub error: bool, -} - -impl Website { - // Ext is extension without a dot. - // Checks if a relative link to an internal page name can be reached from - // the current page, and returns a resolved absolute link to the page with extension. - pub fn has_page(&self, from: &SourceFile, path: &str, ext: &str) -> Option { - // Remove heading fragment and file extension. - let (path, heading) = match path.rsplit_once('#') { - Some((path, heading)) => match heading.is_empty() { - true => (path, None), - false => (path, Some(heading)), - } - None => (path, None), - }; - let path = path.strip_suffix(&format!(".{ext}")).unwrap_or(path); - - // Attach parent of current page to given path. - let directory = match from.parent_url.rsplit_once('/') { - Some((parent, _)) => parent, - None => &from.parent_url, - }; - let full_path = match path.starts_with("/") { - true => path.to_string(), - false => format!("{directory}/{path}"), - }; - - // Remove relative portions of path. - let segments: Vec<&str> = full_path.split("/") - .filter(|seg| !seg.is_empty() && *seg != ".") - .collect(); - let mut reduced_segments: Vec<&str> = segments.windows(2) - .filter(|w| w[1] != "..") - .map(|w| w[1]) - .collect(); - // The first segment is always skipped by the previous step. - if !segments.is_empty() && segments.get(1) != Some(&"..") { - if segments[0] != ".." { - reduced_segments.insert(0, segments[0]); - } - } - let path = reduced_segments.join("/"); - - for source_file in &self.source_files { - if source_file.full_url == path { - if let Some(heading) = heading { - if !source_file.headings.contains(&make_url_safe(heading)) { - warn!("Page {:?} contains link to nonexistent heading {heading:?} on page {path:?}", from.name); - } - } - return Some(format!("{path}.{ext}")); - } - } - return None; - } -} - -pub struct SourceFile { - pub name: String, - pub parent_url: String, // URL base of child pages - pub file_url: String, // URL file segment, no extension - pub full_url: String, // URL full path, no extension - pub source_path: PathBuf, - pub document: MarkdownDocument, - pub headings: Vec, -} - -impl SourceFile { - pub fn back_string(&self) -> String { - let mut back = String::new(); - for c in self.full_url.chars() { - if c == '/' { - back.push_str("../"); - } - } - return back; - } -} - -pub struct StaticFile { - pub full_url: String, // URL full path, with extension - pub source_path: PathBuf, -} xflags::xflags! { /// Generate a website from a structured directory of markdown files. @@ -271,6 +130,8 @@ xflags::xflags! { optional source: PathBuf /// Path to output directory optional destination: PathBuf + /// Delete the destination directory first if it exists + optional --delete /// Generate HTML output optional --html /// Generate Gemtext output -- cgit v1.2.3-70-g09d2