]> Untitled Git - lemmy.git/blob - crates/utils/src/utils/validation.rs
43f3cb35fa5294d98660bb99db42c0be45fbadfa
[lemmy.git] / crates / utils / src / utils / validation.rs
1 use itertools::Itertools;
2 use once_cell::sync::Lazy;
3 use regex::Regex;
4 use url::Url;
5
6 static VALID_ACTOR_NAME_REGEX: Lazy<Regex> =
7   Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex"));
8 static VALID_POST_TITLE_REGEX: Lazy<Regex> =
9   Lazy::new(|| Regex::new(r".*\S{3,}.*").expect("compile regex"));
10 static VALID_MATRIX_ID_REGEX: Lazy<Regex> = Lazy::new(|| {
11   Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex")
12 });
13 // taken from https://en.wikipedia.org/wiki/UTM_parameters
14 static CLEAN_URL_PARAMS_REGEX: Lazy<Regex> = Lazy::new(|| {
15   Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$")
16     .expect("compile regex")
17 });
18
19 fn has_newline(name: &str) -> bool {
20   name.contains('\n')
21 }
22
23 pub fn is_valid_actor_name(name: &str, actor_name_max_length: usize) -> bool {
24   name.chars().count() <= actor_name_max_length
25     && VALID_ACTOR_NAME_REGEX.is_match(name)
26     && !has_newline(name)
27 }
28
29 // Can't do a regex here, reverse lookarounds not supported
30 pub fn is_valid_display_name(name: &str, actor_name_max_length: usize) -> bool {
31   !name.starts_with('@')
32     && !name.starts_with('\u{200b}')
33     && name.chars().count() >= 3
34     && name.chars().count() <= actor_name_max_length
35     && !has_newline(name)
36 }
37
38 pub fn is_valid_matrix_id(matrix_id: &str) -> bool {
39   VALID_MATRIX_ID_REGEX.is_match(matrix_id) && !has_newline(matrix_id)
40 }
41
42 pub fn is_valid_post_title(title: &str) -> bool {
43   VALID_POST_TITLE_REGEX.is_match(title) && !has_newline(title)
44 }
45
46 pub fn clean_url_params(url: &Url) -> Url {
47   let mut url_out = url.clone();
48   if url.query().is_some() {
49     let new_query = url
50       .query_pairs()
51       .filter(|q| !CLEAN_URL_PARAMS_REGEX.is_match(&q.0))
52       .map(|q| format!("{}={}", q.0, q.1))
53       .join("&");
54     url_out.set_query(Some(&new_query));
55   }
56   url_out
57 }
58
59 #[cfg(test)]
60 mod tests {
61   use crate::utils::validation::{
62     clean_url_params,
63     is_valid_actor_name,
64     is_valid_display_name,
65     is_valid_matrix_id,
66     is_valid_post_title,
67   };
68   use url::Url;
69
70   #[test]
71   fn test_clean_url_params() {
72     let url = Url::parse("https://example.com/path/123?utm_content=buffercf3b2&utm_medium=social&username=randomuser&id=123").unwrap();
73     let cleaned = clean_url_params(&url);
74     let expected = Url::parse("https://example.com/path/123?username=randomuser&id=123").unwrap();
75     assert_eq!(expected.to_string(), cleaned.to_string());
76
77     let url = Url::parse("https://example.com/path/123").unwrap();
78     let cleaned = clean_url_params(&url);
79     assert_eq!(url.to_string(), cleaned.to_string());
80   }
81
82   #[test]
83   fn regex_checks() {
84     assert!(!is_valid_post_title("hi"));
85     assert!(is_valid_post_title("him"));
86     assert!(!is_valid_post_title("n\n\n\n\nanother"));
87     assert!(!is_valid_post_title("hello there!\n this is a test."));
88     assert!(is_valid_post_title("hello there! this is a test."));
89   }
90
91   #[test]
92   fn test_valid_actor_name() {
93     let actor_name_max_length = 20;
94     assert!(is_valid_actor_name("Hello_98", actor_name_max_length));
95     assert!(is_valid_actor_name("ten", actor_name_max_length));
96     assert!(!is_valid_actor_name("Hello-98", actor_name_max_length));
97     assert!(!is_valid_actor_name("a", actor_name_max_length));
98     assert!(!is_valid_actor_name("", actor_name_max_length));
99   }
100
101   #[test]
102   fn test_valid_display_name() {
103     let actor_name_max_length = 20;
104     assert!(is_valid_display_name("hello @there", actor_name_max_length));
105     assert!(!is_valid_display_name(
106       "@hello there",
107       actor_name_max_length
108     ));
109
110     // Make sure zero-space with an @ doesn't work
111     assert!(!is_valid_display_name(
112       &format!("{}@my name is", '\u{200b}'),
113       actor_name_max_length
114     ));
115   }
116
117   #[test]
118   fn test_valid_post_title() {
119     assert!(is_valid_post_title("Post Title"));
120     assert!(is_valid_post_title("   POST TITLE ðŸ˜ƒðŸ˜ƒðŸ˜ƒðŸ˜ƒðŸ˜ƒ"));
121     assert!(!is_valid_post_title("\n \n \n \n                   ")); // tabs/spaces/newlines
122   }
123
124   #[test]
125   fn test_valid_matrix_id() {
126     assert!(is_valid_matrix_id("@dess:matrix.org"));
127     assert!(!is_valid_matrix_id("dess:matrix.org"));
128     assert!(!is_valid_matrix_id(" @dess:matrix.org"));
129     assert!(!is_valid_matrix_id("@dess:matrix.org t"));
130   }
131 }