summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBen Bridle <bridle.benjamin@gmail.com>2025-01-08 12:26:03 +1300
committerBen Bridle <bridle.benjamin@gmail.com>2025-01-08 12:26:31 +1300
commitb190726a783a4aae98fad6b8fcfb266cabc4eb79 (patch)
treed8c9fb60e9792c562f60f489a659a4004593b672 /src
parentaf2507508f7877eace40b119c2a6fab1aefc6bc2 (diff)
downloadtoaster-b190726a783a4aae98fad6b8fcfb266cabc4eb79.zip
More robust file traversal logic
File traversal can now cope with symbolic links. The original path to each file is preserved where before they were unintentionally canonicalized, so the source folder prefix can now be correctly stripped from the path of each source file. This commit also adds the !folder syntax from the original toaster. Directories with a !-prefix are copied without modification to the site root. A --delete option has been added to the program. When this option is set, the destination directory is first deleted if it already exists.
Diffstat (limited to 'src')
-rw-r--r--src/collect_files.rs188
-rw-r--r--src/generate_html.rs8
-rw-r--r--src/main.rs207
3 files changed, 226 insertions, 177 deletions
diff --git a/src/collect_files.rs b/src/collect_files.rs
new file mode 100644
index 0000000..88d065f
--- /dev/null
+++ b/src/collect_files.rs
@@ -0,0 +1,188 @@
+use crate::*;
+
+use vagabond::*;
+
+
+pub struct Website {
+ pub name: String,
+ pub pages: Vec<Page>,
+ pub static_files: Vec<StaticItem>,
+ pub static_dirs: Vec<StaticItem>,
+}
+
+pub struct Page {
+ pub name: String, // Display name
+ pub parent_url: String, // URL base for relative links
+ pub file_url: String, // Safe file name, no extension
+ pub full_url: String, // Safe full URL, no extension
+ pub source_path: PathBuf, // Absolute path to source file
+ pub document: MarkdownDocument, // File content parsed as markdown
+ pub headings: Vec<String>, // Safe name of each document heading
+}
+
+pub struct StaticItem {
+ pub full_url: String, // Safe full URL, with extension
+ pub source_path: PathBuf, // Absolute path to source file
+}
+
+
+
+impl Page {
+ pub fn back_string(&self) -> String {
+ let mut back = String::new();
+ for c in self.full_url.chars() {
+ if c == '/' {
+ back.push_str("../");
+ }
+ }
+ return back;
+ }
+}
+
+
+
+impl Website {
+ pub fn from_path(path: &Path) -> Self {
+ let mut new = Self {
+ pages: Vec::new(),
+ static_files: Vec::new(),
+ static_dirs: Vec::new(),
+ name: match Entry::from_path(path) {
+ Ok(entry) => entry.name,
+ Err(err) => error!("Couldn't open {:?}: {:?}", &path, err),
+ },
+ };
+ new.collect_entry(path, path);
+ return new;
+ }
+
+ fn collect_entry(&mut self, path: &Path, prefix: &Path) {
+ let entry = Entry::from_path(path).unwrap();
+ // Ignore dotted entries.
+ if entry.name.starts_with('.') {
+ return;
+ }
+ // Get name and extension.
+ let (mut name, extension) = entry.split_name();
+ if let Some((prefix, suffix)) = name.split_once(' ') {
+ if prefix.chars().all(|c| "0123456789-".contains(c)) {
+ name = suffix.to_string();
+ }
+ }
+ let file_url = make_url_safe(&name);
+ // Generate parent URL, used only for files.
+ let source_path = entry.original_path.clone();
+ let relative_path = source_path.strip_prefix(prefix).unwrap_or_else(
+ |_| error!("Path doesn't start with {:?}: {:?}", prefix, source_path));
+ let mut parent_url = String::new();
+ let mut components: Vec<_> = relative_path.components().collect();
+ components.pop(); // Remove file segment.
+ for c in &components {
+ let segment = &make_url_safe(&c.as_os_str().to_string_lossy());
+ parent_url.push_str(segment); parent_url.push('/')
+ };
+
+ // Process each entry.
+ if entry.is_directory() {
+ if let Some(stripped) = entry.name.strip_prefix("!") {
+ let full_url = make_url_safe(stripped);
+ self.static_dirs.push(StaticItem { full_url, source_path });
+ } else {
+ for child in list_directory(entry.original_path).unwrap() {
+ self.collect_entry(&child.original_path, prefix);
+ }
+ }
+ } else {
+ match extension.as_str() {
+ "md" => {
+ let markdown = std::fs::read_to_string(&source_path).unwrap();
+ let document = MarkdownDocument::from_str(&markdown);
+ let headings = document.blocks.iter()
+ .filter_map(|block| if let Block::Heading { line, .. } = block {
+ Some(make_url_safe(&line.to_string()))
+ } else {
+ None
+ }).collect();
+ // Change name and path if this is an index file.
+ let mut name = name;
+ let mut file_url = file_url;
+ let mut full_url = format!("{parent_url}{file_url}");
+ if file_url == "+index" {
+ if components.is_empty() {
+ // This is the index file for the whole site.
+ name = String::from("Home");
+ file_url = String::from("index");
+ full_url = String::from("index");
+ } else {
+ // This is an index file for a directory.
+ name = components[components.len()-1]
+ .as_os_str().to_string_lossy().to_string();
+ file_url = make_url_safe(&name);
+ full_url = parent_url.strip_suffix('/').unwrap_or(&parent_url).to_string();
+ }
+ }
+ self.pages.push(
+ Page { name, parent_url, file_url, full_url, source_path, document, headings });
+ },
+ _ => {
+ let full_url = format!("{parent_url}{file_url}.{extension}");
+ self.static_files.push(StaticItem { full_url, source_path });
+ },
+ }
+ }
+ }
+
+ // Ext is extension without a dot.
+ // Checks if a relative link to an internal page name can be reached from
+ // the current page, and returns a resolved absolute link to the page with extension.
+ pub fn has_page(&self, from: &Page, path: &str, ext: &str) -> Option<String> {
+ // Remove heading fragment and file extension.
+ let (path, heading) = match path.rsplit_once('#') {
+ Some((path, heading)) => match heading.is_empty() {
+ true => (path, None),
+ false => (path, Some(heading)),
+ }
+ None => (path, None),
+ };
+ let path = path.strip_suffix(&format!(".{ext}")).unwrap_or(path);
+
+ // Attach parent of current page to given path.
+ let directory = match from.parent_url.rsplit_once('/') {
+ Some((parent, _)) => parent,
+ None => &from.parent_url,
+ };
+ let full_path = match path.starts_with("/") {
+ true => path.to_string(),
+ false => format!("{directory}/{path}"),
+ };
+
+ // Remove relative portions of path.
+ let segments: Vec<&str> = full_path.split("/")
+ .filter(|seg| !seg.is_empty() && *seg != ".")
+ .collect();
+ let mut reduced_segments: Vec<&str> = segments.windows(2)
+ .filter(|w| w[1] != "..")
+ .map(|w| w[1])
+ .collect();
+ // The first segment is always skipped by the previous step.
+ if !segments.is_empty() && segments.get(1) != Some(&"..") {
+ if segments[0] != ".." {
+ reduced_segments.insert(0, segments[0]);
+ }
+ }
+ let path = reduced_segments.join("/");
+
+ for page in &self.pages {
+ if page.full_url == path {
+ if let Some(heading) = heading {
+ if !page.headings.contains(&make_url_safe(heading)) {
+ warn!("Page {:?} contains link to nonexistent heading {heading:?} on page {path:?}", from.name);
+ }
+ }
+ return Some(format!("{path}.{ext}"));
+ }
+ }
+ return None;
+ }
+}
+
diff --git a/src/generate_html.rs b/src/generate_html.rs
index f14d5f9..05456d0 100644
--- a/src/generate_html.rs
+++ b/src/generate_html.rs
@@ -3,7 +3,7 @@ use crate::*;
use markdown::*;
-pub fn generate_html(document: &MarkdownDocument, page: &SourceFile, website: &Website) -> String {
+pub fn generate_html(document: &MarkdownDocument, page: &Page, website: &Website) -> String {
format!("\
<!DOCTYPE html>
<head>
@@ -27,7 +27,7 @@ pub fn generate_html(document: &MarkdownDocument, page: &SourceFile, website: &W
-pub fn get_html_head(document: &MarkdownDocument, page: &SourceFile) -> String {
+pub fn get_html_head(document: &MarkdownDocument, page: &Page) -> String {
if let Some(Block::Fragment { language, content }) = document.blocks.first() {
if language == "embed-html-head" {
return content.to_string();
@@ -43,7 +43,7 @@ pub fn get_html_head(document: &MarkdownDocument, page: &SourceFile) -> String {
-pub fn document_to_html(document: &MarkdownDocument, page: &SourceFile, website: &Website) -> String {
+pub fn document_to_html(document: &MarkdownDocument, page: &Page, website: &Website) -> String {
let mut html = String::new();
macro_rules! line_to_html {
@@ -158,7 +158,7 @@ pub fn document_to_html(document: &MarkdownDocument, page: &SourceFile, website:
-fn line_to_html(line: &Line, page: &SourceFile, website: &Website) -> String {
+fn line_to_html(line: &Line, page: &Page, website: &Website) -> String {
let mut html = String::new();
for line_element in &line.tokens {
match line_element {
diff --git a/src/main.rs b/src/main.rs
index 0a62ec2..2950ee9 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,5 +1,7 @@
#![feature(path_add_extension)]
+mod collect_files;
+pub use collect_files::*;
mod generate_html;
pub use generate_html::*;
@@ -50,101 +52,32 @@ fn main() {
let source_directory = args.source.unwrap().canonicalize().unwrap();
let destination_directory = args.destination.unwrap();
- let mut website = Website {
- source_files: Vec::new(),
- static_files: Vec::new(),
- name: match Entry::from_path(&source_directory) {
- Ok(entry) => entry.name,
- Err(err) => error!("Couldn't open {:?}: {:?}", &source_directory, err),
- },
- error: false,
- };
-
- // Collect all website files.
- match traverse_directory(&source_directory) {
- Ok(entries) => for entry in entries {
- // Ignore dot files.
- if entry.name.starts_with(".") {
- continue;
- }
- // Generate name, stripping any leading digit sequence.
- let (mut name, extension) = entry.split_name();
- if let Some((prefix, suffix)) = name.split_once(' ') {
- if prefix.chars().all(|c| "0123456789-".contains(c)) {
- name = suffix.to_string();
- }
- }
- // Generate full URL with stripped name, no extension.
- let source_path = entry.original_path;
- let relative_path = source_path.strip_prefix(&source_directory).unwrap_or_else(
- // Probably unreachable.
- |_| error!("Path doesn't start with {:?}: {:?}", source_directory, source_path));
- let mut full_url = String::new();
- let mut components: Vec<_> = relative_path.components().collect();
- components.pop(); // Remove file segment, use the stripped name instead.
- for c in components {
- full_url.push_str(&make_url_safe(&c.as_os_str().to_string_lossy()));
- full_url.push('/')
- };
- let parent_url = full_url.clone();
- full_url.push_str(&make_url_safe(&name));
-
-
- if extension == "md" {
- // Rename and relocate index files.
- let mut file_url = make_url_safe(&name);
- if file_url == "+index" {
- let components: Vec<_> = relative_path.components().collect();
- if components.len() == 1 {
- name = String::from("Home");
- file_url = String::from("index");
- full_url = String::from("index");
- } else {
- let parent = components[components.len()-2];
- let parent_string = parent.as_os_str().to_string_lossy().to_string();
- name = parent_string;
- file_url = make_url_safe(&name);
- full_url.clear();
- for c in &components[..components.len()-2] {
- full_url.push_str(&make_url_safe(&c.as_os_str().to_string_lossy()));
- full_url.push('/')
- };
- full_url.push_str(&file_url);
- }
- }
- // Load and parse the markdown.
- let markdown = std::fs::read_to_string(&source_path).unwrap();
- let document = MarkdownDocument::from_str(&markdown);
- let headings = document.blocks.iter().filter_map(|block| match block {
- Block::Heading { line, .. } => Some(make_url_safe(&line.to_string())),
- _ => None,
- }).collect();
- website.source_files.push(
- SourceFile { name, parent_url, file_url, full_url, source_path, document, headings });
- } else {
- full_url.push('.'); full_url.push_str(&extension);
- website.static_files.push(StaticFile { full_url, source_path });
- }
- }
- Err(err) => error!("Could not read from source directory: {:?}", err),
- }
+ let website = Website::from_path(&source_directory);
let mut destination = destination_directory.clone();
destination.push(make_url_safe(&website.name));
- for source_file in &website.source_files {
+ if args.delete && Entry::from_path(&destination).is_ok() {
+ verbose!("Deleting existing destination directory {destination:?}");
+ remove(&destination).unwrap_or_else(|_|
+ error!("Failed to delete existing destination directory {destination:?}"));
+ }
+
+ for page in &website.pages {
let mut destination = destination.clone();
- destination.push(&source_file.full_url);
+ destination.push(&page.full_url);
// Convert document to different formats.
if args.html {
- let html = generate_html(&source_file.document, source_file, &website);
+ let html = generate_html(&page.document, page, &website);
write_file(&html, &destination, "html");
}
// Copy original markdown file.
destination.add_extension("md");
verbose!("Copying original markdown file to {destination:?}");
- copy(&source_file.source_path, &destination).unwrap();
+ copy(&page.source_path, &destination).unwrap_or_else(|_|
+ error!("Failed to copy original markdown file {:?} to {:?}",
+ page.source_path, destination));
}
for static_file in &website.static_files {
@@ -152,7 +85,19 @@ fn main() {
destination.push(&static_file.full_url);
verbose!("Copying static file to {destination:?}");
make_parent_directory(&destination).unwrap();
- copy(&static_file.source_path, &destination).unwrap();
+ copy(&static_file.source_path, &destination).unwrap_or_else(|_|
+ error!("Failed to copy static file {:?} to {:?}",
+ static_file.source_path, destination));
+ }
+
+ for static_dir in &website.static_dirs {
+ let mut destination = destination.clone();
+ destination.push(&static_dir.full_url);
+ verbose!("Copying static directory to {destination:?}");
+ make_parent_directory(&destination).unwrap();
+ copy(&static_dir.source_path, &destination).unwrap_or_else(|_|
+ error!("Failed to copy static directory {:?} to {:?}",
+ static_dir.source_path, destination));
}
}
@@ -162,8 +107,10 @@ pub fn write_file(text: &str, destination: &PathBuf, ext: &str) {
let mut destination = destination.clone();
destination.add_extension(ext);
verbose!("Generating {destination:?}");
- make_parent_directory(&destination).unwrap();
- write_to_file(destination, text).unwrap();
+ make_parent_directory(&destination).unwrap_or_else(|_|
+ error!("Failed to create parent directories for {destination:?}"));
+ write_to_file(&destination, text).unwrap_or_else(|_|
+ error!("Failed to write generated {ext} file to {destination:?}"));
}
pub fn make_url_safe(text: &str) -> String {
@@ -175,94 +122,6 @@ pub fn make_url_safe(text: &str) -> String {
}
-pub struct Website {
- pub name: String,
- pub source_files: Vec<SourceFile>,
- pub static_files: Vec<StaticFile>,
- pub error: bool,
-}
-
-impl Website {
- // Ext is extension without a dot.
- // Checks if a relative link to an internal page name can be reached from
- // the current page, and returns a resolved absolute link to the page with extension.
- pub fn has_page(&self, from: &SourceFile, path: &str, ext: &str) -> Option<String> {
- // Remove heading fragment and file extension.
- let (path, heading) = match path.rsplit_once('#') {
- Some((path, heading)) => match heading.is_empty() {
- true => (path, None),
- false => (path, Some(heading)),
- }
- None => (path, None),
- };
- let path = path.strip_suffix(&format!(".{ext}")).unwrap_or(path);
-
- // Attach parent of current page to given path.
- let directory = match from.parent_url.rsplit_once('/') {
- Some((parent, _)) => parent,
- None => &from.parent_url,
- };
- let full_path = match path.starts_with("/") {
- true => path.to_string(),
- false => format!("{directory}/{path}"),
- };
-
- // Remove relative portions of path.
- let segments: Vec<&str> = full_path.split("/")
- .filter(|seg| !seg.is_empty() && *seg != ".")
- .collect();
- let mut reduced_segments: Vec<&str> = segments.windows(2)
- .filter(|w| w[1] != "..")
- .map(|w| w[1])
- .collect();
- // The first segment is always skipped by the previous step.
- if !segments.is_empty() && segments.get(1) != Some(&"..") {
- if segments[0] != ".." {
- reduced_segments.insert(0, segments[0]);
- }
- }
- let path = reduced_segments.join("/");
-
- for source_file in &self.source_files {
- if source_file.full_url == path {
- if let Some(heading) = heading {
- if !source_file.headings.contains(&make_url_safe(heading)) {
- warn!("Page {:?} contains link to nonexistent heading {heading:?} on page {path:?}", from.name);
- }
- }
- return Some(format!("{path}.{ext}"));
- }
- }
- return None;
- }
-}
-
-pub struct SourceFile {
- pub name: String,
- pub parent_url: String, // URL base of child pages
- pub file_url: String, // URL file segment, no extension
- pub full_url: String, // URL full path, no extension
- pub source_path: PathBuf,
- pub document: MarkdownDocument,
- pub headings: Vec<String>,
-}
-
-impl SourceFile {
- pub fn back_string(&self) -> String {
- let mut back = String::new();
- for c in self.full_url.chars() {
- if c == '/' {
- back.push_str("../");
- }
- }
- return back;
- }
-}
-
-pub struct StaticFile {
- pub full_url: String, // URL full path, with extension
- pub source_path: PathBuf,
-}
xflags::xflags! {
/// Generate a website from a structured directory of markdown files.
@@ -271,6 +130,8 @@ xflags::xflags! {
optional source: PathBuf
/// Path to output directory
optional destination: PathBuf
+ /// Delete the destination directory first if it exists
+ optional --delete
/// Generate HTML output
optional --html
/// Generate Gemtext output