From 8c2ac6d92f6a4579591f748eebcbca2b9913d92d Mon Sep 17 00:00:00 2001 From: Ben Bridle Date: Fri, 6 Feb 2026 14:55:05 +1300 Subject: Move string utilities to separate module This makes things tidier. --- src/generate_html.rs | 52 ------------------------------------- src/main.rs | 24 ++---------------- src/string_utils.rs | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 74 deletions(-) create mode 100644 src/string_utils.rs diff --git a/src/generate_html.rs b/src/generate_html.rs index b635767..5526a06 100644 --- a/src/generate_html.rs +++ b/src/generate_html.rs @@ -467,58 +467,6 @@ fn parse_external_link(label: &str, path: &str, page: &Page, website: &Website) ParsedLink { path, class: "external", label } } - -/// Replace each HTML-reserved character with an HTML-escaped character. -fn sanitize_text(text: &str, fancy: bool) -> String { - let mut output = String::new(); - let chars: Vec = text.chars().collect(); - for (i, c) in chars.iter().enumerate() { - let prev = match i > 0 { - true => chars[i - 1], - false => ' ', - }; - let next = match i + 1 < chars.len() { - true => chars[i + 1], - false => ' ', - }; - let is_whitespace = |c: char| c.is_whitespace() || "()[].,".contains(c); - - match c { - '&' => { - // The HTML syntax for unicode characters is � - if let Some('#') = chars.get(i+1) { output.push(*c) } - else { output.push_str("&") } - }, - '<' => output.push_str("<"), - '>' => output.push_str(">"), - '"' => match fancy { - true => match is_whitespace(prev) { - true => output.push('“'), - false => output.push('”'), - } - false => output.push_str("""), - }, - '\'' => match fancy { - true => match is_whitespace(prev) { - true => output.push('‘'), - false => output.push('’'), - } - false => output.push_str("'"), - }, - '-' if fancy => match prev.is_whitespace() && next.is_whitespace() { - true => match i > 0 { - true => output.push('—'), // em-dash, for mid-sentence - false => output.push('–'), // en-dash, for start of line - } - false => output.push('-'), // regular dash, for mid-word - } - _ => output.push(*c), - } - } - return output; -} - - /// Remove a 'Appendix #: ' prefix from a string. pub fn strip_appendix(text: &str) -> &str { if let Some((prefix, name)) = text.split_once(": ") { diff --git a/src/main.rs b/src/main.rs index 33fe16d..32b0ab9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,8 +2,10 @@ mod collect_files; mod generate_html; +mod string_utils; pub use collect_files::*; pub use generate_html::*; +pub use string_utils::*; use markdown::*; use vagabond::*; @@ -165,25 +167,3 @@ pub fn write_file(text: &str, destination: &PathBuf, ext: &str, last_modified: O } } } - -// Turn a string into a tidy URL slug. -pub fn make_url_safe(text: &str) -> String { - text.to_ascii_lowercase().chars().filter_map(|c| - if c.is_alphanumeric() || "-_~.+/#".contains(c) { Some(c) } - else if c == ' ' { Some('-') } - else { None } ) - .collect() -} - -// Prevent link hrefs from breaking out of quotations. -pub fn url_encode(text: &str) -> String { - let mut output = String::new(); - for c in text.chars() { - match c { - '"' => output.push_str("%22"), - '\'' => output.push_str("%27"), - _ => output.push(c), - } - } - return output; -} diff --git a/src/string_utils.rs b/src/string_utils.rs new file mode 100644 index 0000000..b23c349 --- /dev/null +++ b/src/string_utils.rs @@ -0,0 +1,72 @@ + +// Turn a string into a tidy URL slug. +pub fn make_url_safe(text: &str) -> String { + text.to_ascii_lowercase().chars().filter_map(|c| + if c.is_alphanumeric() || "-_~.+/#".contains(c) { Some(c) } + else if c == ' ' { Some('-') } + else { None } ) + .collect() +} + +// Prevent link hrefs from breaking out of quotations. +pub fn url_encode(text: &str) -> String { + let mut output = String::new(); + for c in text.chars() { + match c { + '"' => output.push_str("%22"), + '\'' => output.push_str("%27"), + _ => output.push(c), + } + } + return output; +} + +/// Replace each HTML-reserved character with an HTML-escaped character. +pub fn sanitize_text(text: &str, fancy: bool) -> String { + let mut output = String::new(); + let chars: Vec = text.chars().collect(); + for (i, c) in chars.iter().enumerate() { + let prev = match i > 0 { + true => chars[i - 1], + false => ' ', + }; + let next = match i + 1 < chars.len() { + true => chars[i + 1], + false => ' ', + }; + let is_whitespace = |c: char| c.is_whitespace() || "()[].,".contains(c); + + match c { + '&' => { + // The HTML syntax for unicode characters is � + if let Some('#') = chars.get(i+1) { output.push(*c) } + else { output.push_str("&") } + }, + '<' => output.push_str("<"), + '>' => output.push_str(">"), + '"' => match fancy { + true => match is_whitespace(prev) { + true => output.push('“'), + false => output.push('”'), + } + false => output.push_str("""), + }, + '\'' => match fancy { + true => match is_whitespace(prev) { + true => output.push('‘'), + false => output.push('’'), + } + false => output.push_str("'"), + }, + '-' if fancy => match prev.is_whitespace() && next.is_whitespace() { + true => match i > 0 { + true => output.push('—'), // em-dash, for mid-sentence + false => output.push('–'), // en-dash, for start of line + } + false => output.push('-'), // regular dash, for mid-word + } + _ => output.push(*c), + } + } + return output; +} -- cgit v1.2.3-70-g09d2