summaryrefslogtreecommitdiff
path: root/src/string_utils.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/string_utils.rs')
-rw-r--r--src/string_utils.rs72
1 files changed, 72 insertions, 0 deletions
diff --git a/src/string_utils.rs b/src/string_utils.rs
new file mode 100644
index 0000000..b23c349
--- /dev/null
+++ b/src/string_utils.rs
@@ -0,0 +1,72 @@
+
+// Turn a string into a tidy URL slug.
+pub fn make_url_safe(text: &str) -> String {
+ text.to_ascii_lowercase().chars().filter_map(|c|
+ if c.is_alphanumeric() || "-_~.+/#".contains(c) { Some(c) }
+ else if c == ' ' { Some('-') }
+ else { None } )
+ .collect()
+}
+
+// Prevent link hrefs from breaking out of quotations.
+pub fn url_encode(text: &str) -> String {
+ let mut output = String::new();
+ for c in text.chars() {
+ match c {
+ '"' => output.push_str("%22"),
+ '\'' => output.push_str("%27"),
+ _ => output.push(c),
+ }
+ }
+ return output;
+}
+
+/// Replace each HTML-reserved character with an HTML-escaped character.
+pub fn sanitize_text(text: &str, fancy: bool) -> String {
+ let mut output = String::new();
+ let chars: Vec<char> = text.chars().collect();
+ for (i, c) in chars.iter().enumerate() {
+ let prev = match i > 0 {
+ true => chars[i - 1],
+ false => ' ',
+ };
+ let next = match i + 1 < chars.len() {
+ true => chars[i + 1],
+ false => ' ',
+ };
+ let is_whitespace = |c: char| c.is_whitespace() || "()[].,".contains(c);
+
+ match c {
+ '&' => {
+ // The HTML syntax for unicode characters is &#0000
+ if let Some('#') = chars.get(i+1) { output.push(*c) }
+ else { output.push_str("&amp;") }
+ },
+ '<' => output.push_str("&lt;"),
+ '>' => output.push_str("&gt;"),
+ '"' => match fancy {
+ true => match is_whitespace(prev) {
+ true => output.push('“'),
+ false => output.push('”'),
+ }
+ false => output.push_str("&#34;"),
+ },
+ '\'' => match fancy {
+ true => match is_whitespace(prev) {
+ true => output.push('‘'),
+ false => output.push('’'),
+ }
+ false => output.push_str("&#39;"),
+ },
+ '-' if fancy => match prev.is_whitespace() && next.is_whitespace() {
+ true => match i > 0 {
+ true => output.push('—'), // em-dash, for mid-sentence
+ false => output.push('–'), // en-dash, for start of line
+ }
+ false => output.push('-'), // regular dash, for mid-word
+ }
+ _ => output.push(*c),
+ }
+ }
+ return output;
+}