crates/utils/src/utils.rs

   1 use crate::{ApiError, IpAddr};
   2 use actix_web::dev::ConnectionInfo;
   3 use chrono::{DateTime, FixedOffset, NaiveDateTime};
   4 use itertools::Itertools;
   5 use rand::{distributions::Alphanumeric, thread_rng, Rng};
   6 use regex::Regex;
   7 use url::Url;
   8
   9 lazy_static! {
  10   static ref EMAIL_REGEX: Regex = Regex::new(r"^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$").expect("compile regex");
  11
  12   static ref USERNAME_MATCHES_REGEX: Regex = Regex::new(r"/u/[a-zA-Z][0-9a-zA-Z_]*").expect("compile regex");
  13   // TODO keep this old one, it didn't work with port well tho
  14   // static ref MENTIONS_REGEX: Regex = Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)").expect("compile regex");
  15   static ref MENTIONS_REGEX: Regex = Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._:-]+)").expect("compile regex");
  16   static ref VALID_ACTOR_NAME_REGEX: Regex = Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex");
  17   static ref VALID_POST_TITLE_REGEX: Regex = Regex::new(r".*\S.*").expect("compile regex");
  18   static ref VALID_MATRIX_ID_REGEX: Regex = Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex");
  19   // taken from https://en.wikipedia.org/wiki/UTM_parameters
  20   static ref CLEAN_URL_PARAMS_REGEX: Regex = Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$").expect("compile regex");
  21 }
  22
  23 pub fn naive_from_unix(time: i64) -> NaiveDateTime {
  24   NaiveDateTime::from_timestamp(time, 0)
  25 }
  26
  27 pub fn convert_datetime(datetime: NaiveDateTime) -> DateTime<FixedOffset> {
  28   DateTime::<FixedOffset>::from_utc(datetime, FixedOffset::east(0))
  29 }
  30
  31 pub fn remove_slurs(test: &str, slur_regex: &Regex) -> String {
  32   slur_regex.replace_all(test, "*removed*").to_string()
  33 }
  34
  35 pub(crate) fn slur_check<'a>(test: &'a str, slur_regex: &'a Regex) -> Result<(), Vec<&'a str>> {
  36   let mut matches: Vec<&str> = slur_regex.find_iter(test).map(|mat| mat.as_str()).collect();
  37
  38   // Unique
  39   matches.sort_unstable();
  40   matches.dedup();
  41
  42   if matches.is_empty() {
  43     Ok(())
  44   } else {
  45     Err(matches)
  46   }
  47 }
  48
  49 pub fn check_slurs(text: &str, slur_regex: &Regex) -> Result<(), ApiError> {
  50   slur_check(text, slur_regex)
  51     .map_err(|slurs| ApiError::err_plain(&slurs_vec_to_str(slurs.clone())))
  52 }
  53
  54 pub fn check_slurs_opt(text: &Option<String>, slur_regex: &Regex) -> Result<(), ApiError> {
  55   match text {
  56     Some(t) => check_slurs(t, slur_regex),
  57     None => Ok(()),
  58   }
  59 }
  60
  61 pub(crate) fn slurs_vec_to_str(slurs: Vec<&str>) -> String {
  62   let start = "No slurs - ";
  63   let combined = &slurs.join(", ");
  64   [start, combined].concat()
  65 }
  66
  67 pub fn generate_random_string() -> String {
  68   thread_rng()
  69     .sample_iter(&Alphanumeric)
  70     .map(char::from)
  71     .take(30)
  72     .collect()
  73 }
  74
  75 pub fn markdown_to_html(text: &str) -> String {
  76   comrak::markdown_to_html(text, &comrak::ComrakOptions::default())
  77 }
  78
  79 // TODO nothing is done with community / group webfingers yet, so just ignore those for now
  80 #[derive(Clone, PartialEq, Eq, Hash)]
  81 pub struct MentionData {
  82   pub name: String,
  83   pub domain: String,
  84 }
  85
  86 impl MentionData {
  87   pub fn is_local(&self, hostname: &str) -> bool {
  88     hostname.eq(&self.domain)
  89   }
  90   pub fn full_name(&self) -> String {
  91     format!("@{}@{}", &self.name, &self.domain)
  92   }
  93 }
  94
  95 pub fn scrape_text_for_mentions(text: &str) -> Vec<MentionData> {
  96   let mut out: Vec<MentionData> = Vec::new();
  97   for caps in MENTIONS_REGEX.captures_iter(text) {
  98     out.push(MentionData {
  99       name: caps["name"].to_string(),
 100       domain: caps["domain"].to_string(),
 101     });
 102   }
 103   out.into_iter().unique().collect()
 104 }
 105
 106 pub fn is_valid_actor_name(name: &str, actor_name_max_length: usize) -> bool {
 107   name.chars().count() <= actor_name_max_length && VALID_ACTOR_NAME_REGEX.is_match(name)
 108 }
 109
 110 // Can't do a regex here, reverse lookarounds not supported
 111 pub fn is_valid_display_name(name: &str, actor_name_max_length: usize) -> bool {
 112   !name.starts_with('@')
 113     && !name.starts_with('\u{200b}')
 114     && name.chars().count() >= 3
 115     && name.chars().count() <= actor_name_max_length
 116 }
 117
 118 pub fn is_valid_matrix_id(matrix_id: &str) -> bool {
 119   VALID_MATRIX_ID_REGEX.is_match(matrix_id)
 120 }
 121
 122 pub fn is_valid_post_title(title: &str) -> bool {
 123   VALID_POST_TITLE_REGEX.is_match(title)
 124 }
 125
 126 pub fn get_ip(conn_info: &ConnectionInfo) -> IpAddr {
 127   IpAddr(
 128     conn_info
 129       .realip_remote_addr()
 130       .unwrap_or("127.0.0.1:12345")
 131       .split(':')
 132       .next()
 133       .unwrap_or("127.0.0.1")
 134       .to_string(),
 135   )
 136 }
 137
 138 pub fn clean_url_params(mut url: Url) -> Url {
 139   if url.query().is_some() {
 140     let new_query = url
 141       .query_pairs()
 142       .filter(|q| !CLEAN_URL_PARAMS_REGEX.is_match(&q.0))
 143       .map(|q| format!("{}={}", q.0, q.1))
 144       .join("&");
 145     url.set_query(Some(&new_query));
 146   }
 147   url
 148 }
 149
 150 #[cfg(test)]
 151 mod tests {
 152   use crate::utils::clean_url_params;
 153   use url::Url;
 154
 155   #[test]
 156   fn test_clean_url_params() {
 157     let url = Url::parse("https://example.com/path/123?utm_content=buffercf3b2&utm_medium=social&username=randomuser&id=123").unwrap();
 158     let cleaned = clean_url_params(url);
 159     let expected = Url::parse("https://example.com/path/123?username=randomuser&id=123").unwrap();
 160     assert_eq!(expected.to_string(), cleaned.to_string());
 161
 162     let url = Url::parse("https://example.com/path/123").unwrap();
 163     let cleaned = clean_url_params(url.clone());
 164     assert_eq!(url.to_string(), cleaned.to_string());
 165   }
 166 }