1 files changed, 72 insertions, 0 deletions
diff --git a/src/string_utils.rs b/src/string_utils.rs
new file mode 100644
index 0000000..b23c349
--- /dev/null
+++ b/src/string_utils.rs
@@ -0,0 +1,72 @@
+
+// Turn a string into a tidy URL slug.
+pub fn make_url_safe(text: &str) -> String {
+    text.to_ascii_lowercase().chars().filter_map(|c|
+        if c.is_alphanumeric() || "-_~.+/#".contains(c) { Some(c) }
+        else if c == ' ' { Some('-') }
+        else { None } )
+    .collect()
+}
+
+// Prevent link hrefs from breaking out of quotations.
+pub fn url_encode(text: &str) -> String {
+    let mut output = String::new();
+    for c in text.chars() {
+        match c {
+            '"' => output.push_str("%22"),
+            '\'' => output.push_str("%27"),
+            _ => output.push(c),
+        }
+    }
+    return output;
+}
+
+/// Replace each HTML-reserved character with an HTML-escaped character.
+pub fn sanitize_text(text: &str, fancy: bool) -> String {
+    let mut output = String::new();
+    let chars: Vec<char> = text.chars().collect();
+    for (i, c) in chars.iter().enumerate() {
+        let prev = match i > 0 {
+            true => chars[i - 1],
+            false => ' ',
+        };
+        let next = match i + 1 < chars.len() {
+            true => chars[i + 1],
+            false => ' ',
+        };
+        let is_whitespace = |c: char| c.is_whitespace() || "()[].,".contains(c);
+
+        match c {
+            '&' => {
+                // The HTML syntax for unicode characters is &#0000
+                if let Some('#') = chars.get(i+1) { output.push(*c) }
+                else { output.push_str("&amp;") }
+            },
+            '<' => output.push_str("&lt;"),
+            '>' => output.push_str("&gt;"),
+            '"' => match fancy {
+                true => match is_whitespace(prev) {
+                    true  => output.push('“'),
+                    false => output.push('”'),
+                }
+                false => output.push_str("&#34;"),
+            },
+            '\'' => match fancy {
+                true => match is_whitespace(prev) {
+                    true  => output.push('‘'),
+                    false => output.push('’'),
+                }
+                false => output.push_str("&#39;"),
+            },
+            '-' if fancy => match prev.is_whitespace() && next.is_whitespace() {
+                true => match i > 0 {
+                    true => output.push('—'),  // em-dash, for mid-sentence
+                    false => output.push('–'),  // en-dash, for start of line
+                }
+                false => output.push('-'),      // regular dash, for mid-word
+            }
+            _ => output.push(*c),
+        }
+    }
+    return output;
+}