summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Bridle <bridle.benjamin@gmail.com>2026-02-06 14:55:05 +1300
committerBen Bridle <bridle.benjamin@gmail.com>2026-02-06 14:55:05 +1300
commit8c2ac6d92f6a4579591f748eebcbca2b9913d92d (patch)
treeb93f4fed5676f292e5070d11e2817655516fb355
parentdc985df5fe8c748e05181a8f5062eba3f9a2b64a (diff)
downloadtoaster-8c2ac6d92f6a4579591f748eebcbca2b9913d92d.zip
Move string utilities to separate module
This makes things tidier.
-rw-r--r--src/generate_html.rs52
-rw-r--r--src/main.rs24
-rw-r--r--src/string_utils.rs72
3 files changed, 74 insertions, 74 deletions
diff --git a/src/generate_html.rs b/src/generate_html.rs
index b635767..5526a06 100644
--- a/src/generate_html.rs
+++ b/src/generate_html.rs
@@ -467,58 +467,6 @@ fn parse_external_link(label: &str, path: &str, page: &Page, website: &Website)
ParsedLink { path, class: "external", label }
}
-
-/// Replace each HTML-reserved character with an HTML-escaped character.
-fn sanitize_text(text: &str, fancy: bool) -> String {
- let mut output = String::new();
- let chars: Vec<char> = text.chars().collect();
- for (i, c) in chars.iter().enumerate() {
- let prev = match i > 0 {
- true => chars[i - 1],
- false => ' ',
- };
- let next = match i + 1 < chars.len() {
- true => chars[i + 1],
- false => ' ',
- };
- let is_whitespace = |c: char| c.is_whitespace() || "()[].,".contains(c);
-
- match c {
- '&' => {
- // The HTML syntax for unicode characters is &#0000
- if let Some('#') = chars.get(i+1) { output.push(*c) }
- else { output.push_str("&amp;") }
- },
- '<' => output.push_str("&lt;"),
- '>' => output.push_str("&gt;"),
- '"' => match fancy {
- true => match is_whitespace(prev) {
- true => output.push('“'),
- false => output.push('”'),
- }
- false => output.push_str("&#34;"),
- },
- '\'' => match fancy {
- true => match is_whitespace(prev) {
- true => output.push('‘'),
- false => output.push('’'),
- }
- false => output.push_str("&#39;"),
- },
- '-' if fancy => match prev.is_whitespace() && next.is_whitespace() {
- true => match i > 0 {
- true => output.push('—'), // em-dash, for mid-sentence
- false => output.push('–'), // en-dash, for start of line
- }
- false => output.push('-'), // regular dash, for mid-word
- }
- _ => output.push(*c),
- }
- }
- return output;
-}
-
-
/// Remove a 'Appendix #: ' prefix from a string.
pub fn strip_appendix(text: &str) -> &str {
if let Some((prefix, name)) = text.split_once(": ") {
diff --git a/src/main.rs b/src/main.rs
index 33fe16d..32b0ab9 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,8 +2,10 @@
mod collect_files;
mod generate_html;
+mod string_utils;
pub use collect_files::*;
pub use generate_html::*;
+pub use string_utils::*;
use markdown::*;
use vagabond::*;
@@ -165,25 +167,3 @@ pub fn write_file(text: &str, destination: &PathBuf, ext: &str, last_modified: O
}
}
}
-
-// Turn a string into a tidy URL slug.
-pub fn make_url_safe(text: &str) -> String {
- text.to_ascii_lowercase().chars().filter_map(|c|
- if c.is_alphanumeric() || "-_~.+/#".contains(c) { Some(c) }
- else if c == ' ' { Some('-') }
- else { None } )
- .collect()
-}
-
-// Prevent link hrefs from breaking out of quotations.
-pub fn url_encode(text: &str) -> String {
- let mut output = String::new();
- for c in text.chars() {
- match c {
- '"' => output.push_str("%22"),
- '\'' => output.push_str("%27"),
- _ => output.push(c),
- }
- }
- return output;
-}
diff --git a/src/string_utils.rs b/src/string_utils.rs
new file mode 100644
index 0000000..b23c349
--- /dev/null
+++ b/src/string_utils.rs
@@ -0,0 +1,72 @@
+
+// Turn a string into a tidy URL slug.
+pub fn make_url_safe(text: &str) -> String {
+ text.to_ascii_lowercase().chars().filter_map(|c|
+ if c.is_alphanumeric() || "-_~.+/#".contains(c) { Some(c) }
+ else if c == ' ' { Some('-') }
+ else { None } )
+ .collect()
+}
+
+// Prevent link hrefs from breaking out of quotations.
+pub fn url_encode(text: &str) -> String {
+ let mut output = String::new();
+ for c in text.chars() {
+ match c {
+ '"' => output.push_str("%22"),
+ '\'' => output.push_str("%27"),
+ _ => output.push(c),
+ }
+ }
+ return output;
+}
+
+/// Replace each HTML-reserved character with an HTML-escaped character.
+pub fn sanitize_text(text: &str, fancy: bool) -> String {
+ let mut output = String::new();
+ let chars: Vec<char> = text.chars().collect();
+ for (i, c) in chars.iter().enumerate() {
+ let prev = match i > 0 {
+ true => chars[i - 1],
+ false => ' ',
+ };
+ let next = match i + 1 < chars.len() {
+ true => chars[i + 1],
+ false => ' ',
+ };
+ let is_whitespace = |c: char| c.is_whitespace() || "()[].,".contains(c);
+
+ match c {
+ '&' => {
+ // The HTML syntax for unicode characters is &#0000
+ if let Some('#') = chars.get(i+1) { output.push(*c) }
+ else { output.push_str("&amp;") }
+ },
+ '<' => output.push_str("&lt;"),
+ '>' => output.push_str("&gt;"),
+ '"' => match fancy {
+ true => match is_whitespace(prev) {
+ true => output.push('“'),
+ false => output.push('”'),
+ }
+ false => output.push_str("&#34;"),
+ },
+ '\'' => match fancy {
+ true => match is_whitespace(prev) {
+ true => output.push('‘'),
+ false => output.push('’'),
+ }
+ false => output.push_str("&#39;"),
+ },
+ '-' if fancy => match prev.is_whitespace() && next.is_whitespace() {
+ true => match i > 0 {
+ true => output.push('—'), // em-dash, for mid-sentence
+ false => output.push('–'), // en-dash, for start of line
+ }
+ false => output.push('-'), // regular dash, for mid-word
+ }
+ _ => output.push(*c),
+ }
+ }
+ return output;
+}