diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/generate_html.rs | 244 | ||||
-rw-r--r-- | src/main.rs | 197 |
2 files changed, 441 insertions, 0 deletions
diff --git a/src/generate_html.rs b/src/generate_html.rs new file mode 100644 index 0000000..3f22d3b --- /dev/null +++ b/src/generate_html.rs @@ -0,0 +1,244 @@ +use crate::*; + +use markdown::*; + + +pub fn generate_html(document: &MarkdownDocument, page: &SourceFile, website: &Website) -> String { + format!("\ +<!DOCTYPE html> +<head> +<title>{} — {}</title> +<meta charset='UTF-8'> +<meta name='viewport' content='width=device-width, initial-scale=1'> +{} +</head> +<body> +<main> +{} +</main> +</body> +</html> \ +", + page.name, website.name, + get_html_head(document), + document_to_html(document, page, website) + ) +} + + + +pub fn get_html_head(document: &MarkdownDocument) -> String { + if let Some(Block::Fragment { language, content }) = document.blocks.first() { + if language == "embed-html-head" { + return content.to_string(); + } + } + String::from("\ +<link rel='stylesheet' type='text/css' media='screen' href='static/screen.css'> +<link rel='stylesheet' type='text/css' media='print' href='static/print.css'> +<script src='static/render_math.js' defer></script> \ + ") +} + + + +pub fn document_to_html(document: &MarkdownDocument, page: &SourceFile, website: &Website) -> String { + let mut html = String::new(); + + macro_rules! line_to_html { + ($l:expr) => {{ line_to_html(&$l, page, website) }}; } + macro_rules! html { + ($($arg:tt)*) => {{ html.push_str(&format!($($arg)*)); html.push('\n'); }}; } + macro_rules! tag { + ($t:expr,$l:expr) => { html!("<{}>{}</{}>", $t, line_to_html!($l), $t) }; } + macro_rules! wrap { + ($t:expr,$f:expr) => {{ html!("<{}>", $t); $f; html!("</{}>", $t); }}; + } + + for block in &document.blocks { + match block { + Block::Heading { level, line } => match level { + Level::Heading1 => tag!("h1", line), + Level::Heading2 => tag!("h2", line), + Level::Heading3 => tag!("h3", line), + } + Block::Paragraph(line) => tag!("p", line), + Block::List(lines) => wrap!("ul", for line in lines { + // Insert a <br> tag directly after the first untagged colon. + let mut depth = 0; + let mut prev = '\0'; + let mut output = String::new(); + for c in line_to_html!(line).chars() { + output.push(c); + if c == '<' { + depth += 1; + } else if c == '/' && prev == '<' { + depth -= 2; + } else if c == ':' && depth == 0 { + output.pop(); output.push_str("<br>"); depth += 99; + } + prev = c; + } + match output.contains("<br>") { + true => html!("<li class='extended'>{output}</li>"), + false => html!("<li>{output}</li>"), + } + }), + Block::Note(lines) => wrap!("aside", for line in lines { tag!("p", line) }), + Block::Embedded { label, path } => match path.rsplit_once('.') { + Some((_, extension)) => match extension.to_lowercase().as_str() { + "jpg"|"jpeg"|"png"|"webp"|"gif"|"tiff" => html!( + "<figure><a href='{}'><img src='{}' alt='{}' title='{}'></a></figure>", + path, path, label, label + ), + "mp3"|"wav"|"m4a" => html!("<audio src='{path}' controls>"), + ext @ _ => error!("Unrecognised extension for embedded file '{path}' with extension '{ext}'"), + } + _ => error!("Cannot embed file '{path}' with no file extension"), + } + Block::Fragment { language, content } => { + match language.as_str() { + "embed-html" => html!("{}", content), + "embed-css" => wrap!("style", html!("{}", content)), + "embed-javascript"|"embed-js" => wrap!("script", html!("{}", content)), + "hidden"|"todo"|"embed-html-head" => (), + _ => { + html!("<pre class='{}'>", language); + html!("{}", sanitize_text(content)); + html!("</pre>"); + }, + } + } + Block::Break => html!("<hr>"), + Block::Table(table) => wrap!("table", { + wrap!("thead", + wrap!("tr", for column in &table.columns { + tag!("th", column.name); + }) + ); + wrap!("tbody", for section in &table.sections { + for row in section { + wrap!("tr", for (column, cell) in std::iter::zip(&table.columns, row) { + let text_raw = line_to_html!(cell); + let text = match text_raw.as_str() { + "Yes" => "✓", + "No" => "✗", + other => other, + }; + let align = match text { + "--" => "c", + _ => match column.alignment { + Alignment::Left => "l", + Alignment::Center => "c", + Alignment::Right => "r", + }, + }; + let class = match ["No", "--", "0"].contains(&text_raw.as_str()) { + true => format!("{align} dim"), + false => format!("{align}"), + }; + html!("<td class='{}'>{}</td>", class, text); + }) + } + }); + }) + } + } + return html; +} + + + +fn line_to_html(line: &Line, page: &SourceFile, website: &Website) -> String { + let mut html = String::new(); + for line_element in &line.tokens { + match line_element { + Token::Normal(text) => { + let text = &sanitize_text(text); html.push_str(text) } + Token::Bold(text) => { + let text = &sanitize_text(text); html.push_str(&format!("<b>{text}</b>")) } + Token::Italic(text) => { + let text = &sanitize_text(text); html.push_str(&format!("<i>{text}</i>")) } + Token::Monospace(text) => { + let text = &sanitize_text(text); html.push_str(&format!("<code>{text}</code>")) } + Token::Math(text) => { + let text = &sanitize_text(text); html.push_str(&format!("<span class='math'>{text}</span>")) } + Token::InternalLink(path) => { + let (label, class, path) = match path.split_once('#') { + Some(("", section)) => (section, "heading", format!("#{}", make_url_safe(path))), + Some((page, section)) => (section, "page", format!("{}.html#{}", make_url_safe(page), make_url_safe(section))), + _ => (path.as_str(), "page", format!("{}.html", make_url_safe(path))), + }; + let full_label = sanitize_text(label); + let label = match full_label.split_once('/') { + Some((_parent, label)) => label.trim(), + None => &full_label, + }; + // Check that the linked internal page exists. + if class == "page" { + let path_no_ext = path.strip_suffix(".html").unwrap(); + if !website.has_page(&path_no_ext) { + error!("Page {:?} contains invalid link to {:?}", page.name, path_no_ext); + } + } + // Return to the site root before descending into a link. + let mut back = String::new(); + let levels = page.full_url.chars().filter(|c| *c == '/').count(); + for _ in 0..levels { back.push_str("../") } + html.push_str(&format!("<a href='{back}{path}' class='{class}'>{label}</a>")) + } + Token::ExternalLink { label, path } => { + let is_internal = path.find("/").is_none(); + let (new_label, class, path) = match (is_internal, path.split_once("#")) { + (true, Some(("", frag))) => (sanitize_text(frag), "heading", format!("#{}", make_url_safe(frag)) ), + (true, Some((page, frag))) => (sanitize_text(frag), "page", format!("{}.html#{}", make_url_safe(page), make_url_safe(frag)) ), + (true, None) => (sanitize_text(path), "page", if path.contains(".") { path.clone() } else { format!("{}.html", make_url_safe(path)) } ), + (false, _) => (sanitize_text(path), "external", path.clone() ) }; + let label = match label.is_empty() { true => new_label, false => sanitize_text(label) }; + html.push_str(&format!("<a href='{path}' class='{class}'>{label}</a>")); + } + } + } + return html; +} + + + +/// Replace each HTML-reserved character with an HTML-escaped character. +fn sanitize_text(text: &str) -> String { + let mut output = String::new(); + let chars: Vec<char> = text.chars().collect(); + for (i, c) in chars.iter().enumerate() { + let prev = match i > 0 { + true => chars[i - 1], + false => ' ', + }; + let next = match i + 1 < chars.len() { + true => chars[i + 1], + false => ' ', + }; + match c { + '&' => { + // The HTML syntax for unicode characters is � + if let Some('#') = chars.get(i+1) { output.push(*c) } + else { output.push_str("&") } + }, + '<' => output.push_str("<"), + '>' => output.push_str(">"), + '"' => match prev.is_whitespace() { + true => output.push('“'), + false => output.push('”'), + }, + '\'' => match prev.is_whitespace() { + true => output.push('‘'), + false => output.push('’'), + }, + '-' => match prev.is_whitespace() && next.is_whitespace() { + true => output.push('—'), + false => output.push('-'), + } + _ => output.push(*c), + } + } + return output; +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..d8c9274 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,197 @@ +#![feature(path_add_extension)] + +mod generate_html; +pub use generate_html::*; + +use markdown::*; +use vagabond::*; + + +const NORMAL: &str = "\x1b[0m"; +const BOLD: &str = "\x1b[1m"; +const WHITE: &str = "\x1b[37m"; +const RED: &str = "\x1b[31m"; +const BLUE: &str = "\x1b[34m"; + +static mut VERBOSE: bool = false; +#[macro_export] macro_rules! verbose { + ($($tokens:tt)*) => { if unsafe { VERBOSE } { + eprint!("{BOLD}{BLUE}[INFO]{NORMAL}: "); eprint!($($tokens)*); + eprintln!("{NORMAL}"); + } }; +} +#[macro_export] macro_rules! error { + ($($tokens:tt)*) => {{ + eprint!("{BOLD}{RED}[ERROR]{WHITE}: "); eprint!($($tokens)*); + eprintln!("{NORMAL}"); std::process::exit(1); + }}; +} + +fn main() { + let args = Arguments::from_env_or_exit(); + if args.version { + let version = env!("CARGO_PKG_VERSION"); + eprintln!("Markdown website generator, version {version}"); + std::process::exit(0); + } + if args.verbose { + unsafe { VERBOSE = true; } + } + + let mut website = Website { + source_files: Vec::new(), + static_files: Vec::new(), + name: match Entry::from_path(&args.source) { + Ok(entry) => entry.name, + Err(err) => error!("Couldn't open {:?}: {:?}", args.source, err), + }, + error: false, + }; + + // Collect all website files. + match traverse_directory(&args.source) { + Ok(entries) => for entry in entries { + // Generate name, stripping any leading digit sequence. + let (mut name, extension) = entry.split_name(); + if let Some((prefix, suffix)) = name.split_once(' ') { + if prefix.chars().all(|c| "0123456789-".contains(c)) { + name = suffix.to_string(); + } + } + // Generate full URL with stripped name, no extension. + let source_path = entry.original_path; + let relative_path = source_path.strip_prefix(&args.source).unwrap_or_else( + // Probably unreachable. + |_| error!("Path doesn't start with {:?}: {:?}", args.source, source_path)); + let mut full_url = String::new(); + let mut components: Vec<_> = relative_path.components().collect(); + components.pop(); // Remove file segment, use the stripped name instead. + for c in components { + full_url.push_str(&make_url_safe(&c.as_os_str().to_string_lossy())); + full_url.push('/') + }; + full_url.push_str(&make_url_safe(&name)); + + + if extension == "md" { + let mut file_url = make_url_safe(&name); + if file_url == "+index" { + let components: Vec<_> = relative_path.components().collect(); + if components.len() == 1 { + name = String::from("Home"); + file_url = String::from("index"); + full_url = String::from("index"); + } else { + let parent = components[components.len()-2]; + let parent_string = parent.as_os_str().to_string_lossy().to_string(); + name = parent_string; + file_url = make_url_safe(&name); + full_url.clear(); + for c in &components[..components.len()-2] { + full_url.push_str(&make_url_safe(&c.as_os_str().to_string_lossy())); + full_url.push('/') + }; + full_url.push_str(&file_url); + } + } + website.source_files.push(SourceFile { name, file_url, full_url, source_path }); + } else { + full_url.push('.'); full_url.push_str(&extension); + website.static_files.push(StaticFile { full_url, source_path }); + } + } + Err(err) => error!("Could not read from source directory: {:?}", err), + } + + let mut destination = args.destination.clone(); + destination.push(make_url_safe(&website.name)); + + for source_file in &website.source_files { + let markdown = std::fs::read_to_string(&source_file.source_path).unwrap(); + let document = MarkdownDocument::from_str(&markdown); + let mut destination = destination.clone(); + destination.push(&source_file.full_url); + // Convert document to different formats. + if args.html { + let html = generate_html(&document, source_file, &website); + write_file(&html, &destination, "html"); + } + // Copy original markdown file. + write_file(&markdown, &destination, "md"); + } + + for static_file in &website.static_files { + let mut destination = destination.clone(); + destination.push(&static_file.full_url); + verbose!("Copying static file to {destination:?}"); + make_parent_directory(&destination).unwrap(); + copy(&static_file.source_path, &destination).unwrap(); + } +} + + + +pub fn write_file(text: &str, destination: &PathBuf, ext: &str) { + let mut destination = destination.clone(); + destination.add_extension(ext); + verbose!("Generating {destination:?}"); + make_parent_directory(&destination).unwrap(); + write_to_file(destination, text).unwrap(); +} + +pub fn make_url_safe(text: &str) -> String { + text.to_ascii_lowercase().chars().filter_map(|c| + if c.is_alphanumeric() || "-_~.+/".contains(c) { Some(c) } + else if c == ' ' { Some('-') } + else { None } ) + .collect() +} + + +pub struct Website { + pub name: String, + pub source_files: Vec<SourceFile>, + pub static_files: Vec<StaticFile>, + pub error: bool, +} + +impl Website { + pub fn has_page(&self, path: &str) -> bool { + for source_file in &self.source_files { + if source_file.full_url == path { + return true; + } + } + return false; + } +} + +pub struct SourceFile { + pub name: String, + pub file_url: String, // URL file segment, no extension + pub full_url: String, // URL full path, no extension + pub source_path: PathBuf, +} + +pub struct StaticFile { + pub full_url: String, // URL full path, with extension + pub source_path: PathBuf, +} + +xflags::xflags! { + /// Generate a website from a structured directory of markdown files. + cmd arguments { + /// Source directory with markdown files + required source: PathBuf + /// Path to output directory + required destination: PathBuf + /// Generate HTML output + optional --html + /// Generate Gemtext output + optional --gmi + /// Print information as each file is parsed + optional -v, --verbose + /// Print the program version and exit + optional --version + } +} |