crates/utils/src/utils.rs

   1 use crate::{IpAddr, LemmyError};
   2 use actix_web::dev::ConnectionInfo;
   3 use chrono::{DateTime, FixedOffset, NaiveDateTime};
   4 use itertools::Itertools;
   5 use once_cell::sync::Lazy;
   6 use rand::{distributions::Alphanumeric, thread_rng, Rng};
   7 use regex::Regex;
   8 use url::Url;
   9
  10 static MENTIONS_REGEX: Lazy<Regex> = Lazy::new(|| {
  11   Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._:-]+)").expect("compile regex")
  12 });
  13 static VALID_ACTOR_NAME_REGEX: Lazy<Regex> =
  14   Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex"));
  15 static VALID_POST_TITLE_REGEX: Lazy<Regex> =
  16   Lazy::new(|| Regex::new(r".*\S{3,}.*").expect("compile regex"));
  17 static VALID_MATRIX_ID_REGEX: Lazy<Regex> = Lazy::new(|| {
  18   Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex")
  19 });
  20 // taken from https://en.wikipedia.org/wiki/UTM_parameters
  21 static CLEAN_URL_PARAMS_REGEX: Lazy<Regex> = Lazy::new(|| {
  22   Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$")
  23     .expect("compile regex")
  24 });
  25
  26 pub fn naive_from_unix(time: i64) -> NaiveDateTime {
  27   NaiveDateTime::from_timestamp(time, 0)
  28 }
  29
  30 pub fn convert_datetime(datetime: NaiveDateTime) -> DateTime<FixedOffset> {
  31   DateTime::<FixedOffset>::from_utc(datetime, FixedOffset::east(0))
  32 }
  33
  34 pub fn remove_slurs(test: &str, slur_regex: &Option<Regex>) -> String {
  35   if let Some(slur_regex) = slur_regex {
  36     slur_regex.replace_all(test, "*removed*").to_string()
  37   } else {
  38     test.to_string()
  39   }
  40 }
  41
  42 pub(crate) fn slur_check<'a>(
  43   test: &'a str,
  44   slur_regex: &'a Option<Regex>,
  45 ) -> Result<(), Vec<&'a str>> {
  46   if let Some(slur_regex) = slur_regex {
  47     let mut matches: Vec<&str> = slur_regex.find_iter(test).map(|mat| mat.as_str()).collect();
  48
  49     // Unique
  50     matches.sort_unstable();
  51     matches.dedup();
  52
  53     if matches.is_empty() {
  54       Ok(())
  55     } else {
  56       Err(matches)
  57     }
  58   } else {
  59     Ok(())
  60   }
  61 }
  62
  63 pub fn check_slurs(text: &str, slur_regex: &Option<Regex>) -> Result<(), LemmyError> {
  64   if let Err(slurs) = slur_check(text, slur_regex) {
  65     Err(LemmyError::from_error_message(
  66       anyhow::anyhow!("{}", slurs_vec_to_str(slurs)),
  67       "slurs",
  68     ))
  69   } else {
  70     Ok(())
  71   }
  72 }
  73
  74 pub fn check_slurs_opt(
  75   text: &Option<String>,
  76   slur_regex: &Option<Regex>,
  77 ) -> Result<(), LemmyError> {
  78   match text {
  79     Some(t) => check_slurs(t, slur_regex),
  80     None => Ok(()),
  81   }
  82 }
  83
  84 pub(crate) fn slurs_vec_to_str(slurs: Vec<&str>) -> String {
  85   let start = "No slurs - ";
  86   let combined = &slurs.join(", ");
  87   [start, combined].concat()
  88 }
  89
  90 pub fn generate_random_string() -> String {
  91   thread_rng()
  92     .sample_iter(&Alphanumeric)
  93     .map(char::from)
  94     .take(30)
  95     .collect()
  96 }
  97
  98 pub fn markdown_to_html(text: &str) -> String {
  99   comrak::markdown_to_html(text, &comrak::ComrakOptions::default())
 100 }
 101
 102 // TODO nothing is done with community / group webfingers yet, so just ignore those for now
 103 #[derive(Clone, PartialEq, Eq, Hash)]
 104 pub struct MentionData {
 105   pub name: String,
 106   pub domain: String,
 107 }
 108
 109 impl MentionData {
 110   pub fn is_local(&self, hostname: &str) -> bool {
 111     hostname.eq(&self.domain)
 112   }
 113   pub fn full_name(&self) -> String {
 114     format!("@{}@{}", &self.name, &self.domain)
 115   }
 116 }
 117
 118 pub fn scrape_text_for_mentions(text: &str) -> Vec<MentionData> {
 119   let mut out: Vec<MentionData> = Vec::new();
 120   for caps in MENTIONS_REGEX.captures_iter(text) {
 121     out.push(MentionData {
 122       name: caps["name"].to_string(),
 123       domain: caps["domain"].to_string(),
 124     });
 125   }
 126   out.into_iter().unique().collect()
 127 }
 128
 129 fn has_newline(name: &str) -> bool {
 130   name.contains('\n')
 131 }
 132
 133 pub fn is_valid_actor_name(name: &str, actor_name_max_length: usize) -> bool {
 134   name.chars().count() <= actor_name_max_length
 135     && VALID_ACTOR_NAME_REGEX.is_match(name)
 136     && !has_newline(name)
 137 }
 138
 139 // Can't do a regex here, reverse lookarounds not supported
 140 pub fn is_valid_display_name(name: &str, actor_name_max_length: usize) -> bool {
 141   !name.starts_with('@')
 142     && !name.starts_with('\u{200b}')
 143     && name.chars().count() >= 3
 144     && name.chars().count() <= actor_name_max_length
 145     && !has_newline(name)
 146 }
 147
 148 pub fn is_valid_matrix_id(matrix_id: &str) -> bool {
 149   VALID_MATRIX_ID_REGEX.is_match(matrix_id) && !has_newline(matrix_id)
 150 }
 151
 152 pub fn is_valid_post_title(title: &str) -> bool {
 153   VALID_POST_TITLE_REGEX.is_match(title) && !has_newline(title)
 154 }
 155
 156 pub fn get_ip(conn_info: &ConnectionInfo) -> IpAddr {
 157   IpAddr(
 158     conn_info
 159       .realip_remote_addr()
 160       .unwrap_or("127.0.0.1:12345")
 161       .split(':')
 162       .next()
 163       .unwrap_or("127.0.0.1")
 164       .to_string(),
 165   )
 166 }
 167
 168 pub fn clean_url_params(mut url: Url) -> Url {
 169   if url.query().is_some() {
 170     let new_query = url
 171       .query_pairs()
 172       .filter(|q| !CLEAN_URL_PARAMS_REGEX.is_match(&q.0))
 173       .map(|q| format!("{}={}", q.0, q.1))
 174       .join("&");
 175     url.set_query(Some(&new_query));
 176   }
 177   url
 178 }
 179
 180 pub fn clean_optional_text(text: &Option<String>) -> Option<String> {
 181   if let Some(text) = text {
 182     let trimmed = text.trim();
 183     if trimmed.is_empty() {
 184       None
 185     } else {
 186       Some(trimmed.to_owned())
 187     }
 188   } else {
 189     None
 190   }
 191 }
 192
 193 #[cfg(test)]
 194 mod tests {
 195   use crate::utils::{clean_url_params, is_valid_post_title};
 196   use url::Url;
 197
 198   #[test]
 199   fn test_clean_url_params() {
 200     let url = Url::parse("https://example.com/path/123?utm_content=buffercf3b2&utm_medium=social&username=randomuser&id=123").unwrap();
 201     let cleaned = clean_url_params(url);
 202     let expected = Url::parse("https://example.com/path/123?username=randomuser&id=123").unwrap();
 203     assert_eq!(expected.to_string(), cleaned.to_string());
 204
 205     let url = Url::parse("https://example.com/path/123").unwrap();
 206     let cleaned = clean_url_params(url.clone());
 207     assert_eq!(url.to_string(), cleaned.to_string());
 208   }
 209
 210   #[test]
 211   fn regex_checks() {
 212     assert!(!is_valid_post_title("hi"));
 213     assert!(is_valid_post_title("him"));
 214     assert!(!is_valid_post_title("n\n\n\n\nanother"));
 215     assert!(!is_valid_post_title("hello there!\n this is a test."));
 216     assert!(is_valid_post_title("hello there! this is a test."));
 217   }
 218 }