1 use crate::{error::LemmyError, IpAddr};
2 use actix_web::dev::ConnectionInfo;
3 use chrono::{DateTime, FixedOffset, NaiveDateTime};
4 use itertools::Itertools;
5 use once_cell::sync::Lazy;
6 use rand::{distributions::Alphanumeric, thread_rng, Rng};
10 static MENTIONS_REGEX: Lazy<Regex> = Lazy::new(|| {
11 Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._:-]+)").expect("compile regex")
13 static VALID_ACTOR_NAME_REGEX: Lazy<Regex> =
14 Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex"));
15 static VALID_POST_TITLE_REGEX: Lazy<Regex> =
16 Lazy::new(|| Regex::new(r".*\S{3,}.*").expect("compile regex"));
17 static VALID_MATRIX_ID_REGEX: Lazy<Regex> = Lazy::new(|| {
18 Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex")
20 // taken from https://en.wikipedia.org/wiki/UTM_parameters
21 static CLEAN_URL_PARAMS_REGEX: Lazy<Regex> = Lazy::new(|| {
22 Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$")
23 .expect("compile regex")
26 pub fn naive_from_unix(time: i64) -> NaiveDateTime {
27 NaiveDateTime::from_timestamp(time, 0)
30 pub fn convert_datetime(datetime: NaiveDateTime) -> DateTime<FixedOffset> {
31 DateTime::<FixedOffset>::from_utc(datetime, FixedOffset::east(0))
34 pub fn remove_slurs(test: &str, slur_regex: &Option<Regex>) -> String {
35 if let Some(slur_regex) = slur_regex {
36 slur_regex.replace_all(test, "*removed*").to_string()
42 pub(crate) fn slur_check<'a>(
44 slur_regex: &'a Option<Regex>,
45 ) -> Result<(), Vec<&'a str>> {
46 if let Some(slur_regex) = slur_regex {
47 let mut matches: Vec<&str> = slur_regex.find_iter(test).map(|mat| mat.as_str()).collect();
50 matches.sort_unstable();
53 if matches.is_empty() {
63 pub fn check_slurs(text: &str, slur_regex: &Option<Regex>) -> Result<(), LemmyError> {
64 if let Err(slurs) = slur_check(text, slur_regex) {
65 Err(LemmyError::from_error_message(
66 anyhow::anyhow!("{}", slurs_vec_to_str(slurs)),
74 pub fn check_slurs_opt(
75 text: &Option<String>,
76 slur_regex: &Option<Regex>,
77 ) -> Result<(), LemmyError> {
79 Some(t) => check_slurs(t, slur_regex),
84 pub(crate) fn slurs_vec_to_str(slurs: Vec<&str>) -> String {
85 let start = "No slurs - ";
86 let combined = &slurs.join(", ");
87 [start, combined].concat()
90 pub fn generate_random_string() -> String {
92 .sample_iter(&Alphanumeric)
98 pub fn markdown_to_html(text: &str) -> String {
99 comrak::markdown_to_html(text, &comrak::ComrakOptions::default())
102 // TODO nothing is done with community / group webfingers yet, so just ignore those for now
103 #[derive(Clone, PartialEq, Eq, Hash)]
104 pub struct MentionData {
110 pub fn is_local(&self, hostname: &str) -> bool {
111 hostname.eq(&self.domain)
113 pub fn full_name(&self) -> String {
114 format!("@{}@{}", &self.name, &self.domain)
118 pub fn scrape_text_for_mentions(text: &str) -> Vec<MentionData> {
119 let mut out: Vec<MentionData> = Vec::new();
120 for caps in MENTIONS_REGEX.captures_iter(text) {
121 out.push(MentionData {
122 name: caps["name"].to_string(),
123 domain: caps["domain"].to_string(),
126 out.into_iter().unique().collect()
129 fn has_newline(name: &str) -> bool {
133 pub fn is_valid_actor_name(name: &str, actor_name_max_length: usize) -> bool {
134 name.chars().count() <= actor_name_max_length
135 && VALID_ACTOR_NAME_REGEX.is_match(name)
136 && !has_newline(name)
139 // Can't do a regex here, reverse lookarounds not supported
140 pub fn is_valid_display_name(name: &str, actor_name_max_length: usize) -> bool {
141 !name.starts_with('@')
142 && !name.starts_with('\u{200b}')
143 && name.chars().count() >= 3
144 && name.chars().count() <= actor_name_max_length
145 && !has_newline(name)
148 pub fn is_valid_matrix_id(matrix_id: &str) -> bool {
149 VALID_MATRIX_ID_REGEX.is_match(matrix_id) && !has_newline(matrix_id)
152 pub fn is_valid_post_title(title: &str) -> bool {
153 VALID_POST_TITLE_REGEX.is_match(title) && !has_newline(title)
156 pub fn get_ip(conn_info: &ConnectionInfo) -> IpAddr {
159 .realip_remote_addr()
160 .unwrap_or("127.0.0.1:12345")
163 .unwrap_or("127.0.0.1")
168 pub fn clean_url_params(url: &Url) -> Url {
169 let mut url_out = url.to_owned();
170 if url.query().is_some() {
173 .filter(|q| !CLEAN_URL_PARAMS_REGEX.is_match(&q.0))
174 .map(|q| format!("{}={}", q.0, q.1))
176 url_out.set_query(Some(&new_query));
183 use crate::utils::{clean_url_params, is_valid_post_title};
187 fn test_clean_url_params() {
188 let url = Url::parse("https://example.com/path/123?utm_content=buffercf3b2&utm_medium=social&username=randomuser&id=123").unwrap();
189 let cleaned = clean_url_params(&url);
190 let expected = Url::parse("https://example.com/path/123?username=randomuser&id=123").unwrap();
191 assert_eq!(expected.to_string(), cleaned.to_string());
193 let url = Url::parse("https://example.com/path/123").unwrap();
194 let cleaned = clean_url_params(&url);
195 assert_eq!(url.to_string(), cleaned.to_string());
200 assert!(!is_valid_post_title("hi"));
201 assert!(is_valid_post_title("him"));
202 assert!(!is_valid_post_title("n\n\n\n\nanother"));
203 assert!(!is_valid_post_title("hello there!\n this is a test."));
204 assert!(is_valid_post_title("hello there! this is a test."));