From: Felix Ableitner Date: Fri, 18 Jun 2021 18:38:34 +0000 (+0200) Subject: Remove tracking params from post url (fixes #768) X-Git-Url: http://these/git/%7B%60%24%7BwebArchiveUrl%7D/%22%7B%7D/%22https:/nerdica.net/%7Biframely.url%7D?a=commitdiff_plain;h=f289374e70ca6cbd67c226292cd2f2521d04a182;p=lemmy.git Remove tracking params from post url (fixes #768) --- diff --git a/crates/api_crud/src/post/create.rs b/crates/api_crud/src/post/create.rs index 835518a1..4ce7c064 100644 --- a/crates/api_crud/src/post/create.rs +++ b/crates/api_crud/src/post/create.rs @@ -13,7 +13,7 @@ use lemmy_db_schema::source::post::*; use lemmy_db_views::post_view::PostView; use lemmy_utils::{ request::fetch_iframely_and_pictrs_data, - utils::{check_slurs, check_slurs_opt, is_valid_post_title}, + utils::{check_slurs, check_slurs_opt, clean_url_params, is_valid_post_title}, ApiError, ConnectionId, LemmyError, @@ -48,7 +48,7 @@ impl PerformCrud for CreatePost { let post_form = PostForm { name: data.name.trim().to_owned(), - url: data_url.map(|u| u.to_owned().into()), + url: data_url.map(|u| clean_url_params(u.to_owned()).into()), body: data.body.to_owned(), community_id: data.community_id, creator_id: local_user_view.person.id, diff --git a/crates/api_crud/src/post/update.rs b/crates/api_crud/src/post/update.rs index ca7634d5..59f8e446 100644 --- a/crates/api_crud/src/post/update.rs +++ b/crates/api_crud/src/post/update.rs @@ -7,7 +7,7 @@ use lemmy_db_schema::{naive_now, source::post::*}; use lemmy_db_views::post_view::PostView; use lemmy_utils::{ request::fetch_iframely_and_pictrs_data, - utils::{check_slurs_opt, is_valid_post_title}, + utils::{check_slurs_opt, clean_url_params, is_valid_post_title}, ApiError, ConnectionId, LemmyError, @@ -59,7 +59,7 @@ impl PerformCrud for EditPost { creator_id: orig_post.creator_id.to_owned(), community_id: orig_post.community_id, name: data.name.to_owned().unwrap_or(orig_post.name), - url: data_url.map(|u| u.to_owned().into()), + url: data_url.map(|u| clean_url_params(u.to_owned()).into()), body: data.body.to_owned(), nsfw: data.nsfw, updated: Some(naive_now()), diff --git a/crates/utils/src/utils.rs b/crates/utils/src/utils.rs index 31936788..c6f1aaaf 100644 --- a/crates/utils/src/utils.rs +++ b/crates/utils/src/utils.rs @@ -4,6 +4,7 @@ use chrono::{DateTime, FixedOffset, NaiveDateTime}; use itertools::Itertools; use rand::{distributions::Alphanumeric, thread_rng, Rng}; use regex::{Regex, RegexBuilder}; +use url::Url; lazy_static! { static ref EMAIL_REGEX: Regex = Regex::new(r"^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$").expect("compile regex"); @@ -25,6 +26,8 @@ lazy_static! { static ref VALID_COMMUNITY_NAME_REGEX: Regex = Regex::new(r"^[a-z0-9_]{3,20}$").expect("compile regex"); static ref VALID_POST_TITLE_REGEX: Regex = Regex::new(r".*\S.*").expect("compile regex"); static ref VALID_MATRIX_ID_REGEX: Regex = Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex"); + // taken from https://en.wikipedia.org/wiki/UTM_parameters + static ref CLEAN_URL_PARAMS_REGEX: Regex = Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$").expect("compile regex"); } pub fn naive_from_unix(time: i64) -> NaiveDateTime { @@ -148,3 +151,27 @@ pub fn get_ip(conn_info: &ConnectionInfo) -> IpAddr { .to_string(), ) } + +pub fn clean_url_params(mut url: Url) -> Url { + let new_query = url + .query_pairs() + .filter(|q| !CLEAN_URL_PARAMS_REGEX.is_match(&q.0)) + .map(|q| format!("{}={}", q.0, q.1)) + .join("&"); + url.set_query(Some(&new_query)); + url +} + +#[cfg(test)] +mod tests { + use crate::utils::clean_url_params; + use url::Url; + + #[test] + fn test_clean_url_params() { + let url = Url::parse("https://example.com/path/123?utm_content=buffercf3b2&utm_medium=social&username=randomuser&id=123").unwrap(); + let cleaned = clean_url_params(url); + let expected = Url::parse("https://example.com/path/123?username=randomuser&id=123").unwrap(); + assert_eq!(expected.to_string(), cleaned.to_string()); + } +}