]> Untitled Git - lemmy.git/commitdiff
Merge pull request #1637 from LemmyNet/remove-url-tracking-params
authorDessalines <dessalines@users.noreply.github.com>
Thu, 24 Jun 2021 18:54:02 +0000 (14:54 -0400)
committerGitHub <noreply@github.com>
Thu, 24 Jun 2021 18:54:02 +0000 (14:54 -0400)
Remove tracking params from post url (fixes #768)

crates/api_crud/src/post/create.rs
crates/api_crud/src/post/update.rs
crates/utils/src/utils.rs

index 835518a14b517569f687b4f5eb440c32ad11d675..4ce7c0644607e9da3676b5b9ca19feae0b5b8a79 100644 (file)
@@ -13,7 +13,7 @@ use lemmy_db_schema::source::post::*;
 use lemmy_db_views::post_view::PostView;
 use lemmy_utils::{
   request::fetch_iframely_and_pictrs_data,
-  utils::{check_slurs, check_slurs_opt, is_valid_post_title},
+  utils::{check_slurs, check_slurs_opt, clean_url_params, is_valid_post_title},
   ApiError,
   ConnectionId,
   LemmyError,
@@ -48,7 +48,7 @@ impl PerformCrud for CreatePost {
 
     let post_form = PostForm {
       name: data.name.trim().to_owned(),
-      url: data_url.map(|u| u.to_owned().into()),
+      url: data_url.map(|u| clean_url_params(u.to_owned()).into()),
       body: data.body.to_owned(),
       community_id: data.community_id,
       creator_id: local_user_view.person.id,
index ca7634d5ab8a2eed7d499a67504698a6168942c2..59f8e446a170ad3a22c2dfaa24c1b2705a462d23 100644 (file)
@@ -7,7 +7,7 @@ use lemmy_db_schema::{naive_now, source::post::*};
 use lemmy_db_views::post_view::PostView;
 use lemmy_utils::{
   request::fetch_iframely_and_pictrs_data,
-  utils::{check_slurs_opt, is_valid_post_title},
+  utils::{check_slurs_opt, clean_url_params, is_valid_post_title},
   ApiError,
   ConnectionId,
   LemmyError,
@@ -59,7 +59,7 @@ impl PerformCrud for EditPost {
       creator_id: orig_post.creator_id.to_owned(),
       community_id: orig_post.community_id,
       name: data.name.to_owned().unwrap_or(orig_post.name),
-      url: data_url.map(|u| u.to_owned().into()),
+      url: data_url.map(|u| clean_url_params(u.to_owned()).into()),
       body: data.body.to_owned(),
       nsfw: data.nsfw,
       updated: Some(naive_now()),
index 31936788a7e276dd06fd96cf5bb1c0890ddc8b26..c6f1aaaf0647b455c7e09321f589a0a6b97d54b6 100644 (file)
@@ -4,6 +4,7 @@ use chrono::{DateTime, FixedOffset, NaiveDateTime};
 use itertools::Itertools;
 use rand::{distributions::Alphanumeric, thread_rng, Rng};
 use regex::{Regex, RegexBuilder};
+use url::Url;
 
 lazy_static! {
   static ref EMAIL_REGEX: Regex = Regex::new(r"^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$").expect("compile regex");
@@ -25,6 +26,8 @@ lazy_static! {
   static ref VALID_COMMUNITY_NAME_REGEX: Regex = Regex::new(r"^[a-z0-9_]{3,20}$").expect("compile regex");
   static ref VALID_POST_TITLE_REGEX: Regex = Regex::new(r".*\S.*").expect("compile regex");
   static ref VALID_MATRIX_ID_REGEX: Regex = Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex");
+  // taken from https://en.wikipedia.org/wiki/UTM_parameters
+  static ref CLEAN_URL_PARAMS_REGEX: Regex = Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$").expect("compile regex");
 }
 
 pub fn naive_from_unix(time: i64) -> NaiveDateTime {
@@ -148,3 +151,27 @@ pub fn get_ip(conn_info: &ConnectionInfo) -> IpAddr {
       .to_string(),
   )
 }
+
+pub fn clean_url_params(mut url: Url) -> Url {
+  let new_query = url
+    .query_pairs()
+    .filter(|q| !CLEAN_URL_PARAMS_REGEX.is_match(&q.0))
+    .map(|q| format!("{}={}", q.0, q.1))
+    .join("&");
+  url.set_query(Some(&new_query));
+  url
+}
+
+#[cfg(test)]
+mod tests {
+  use crate::utils::clean_url_params;
+  use url::Url;
+
+  #[test]
+  fn test_clean_url_params() {
+    let url = Url::parse("https://example.com/path/123?utm_content=buffercf3b2&utm_medium=social&username=randomuser&id=123").unwrap();
+    let cleaned = clean_url_params(url);
+    let expected = Url::parse("https://example.com/path/123?username=randomuser&id=123").unwrap();
+    assert_eq!(expected.to_string(), cleaned.to_string());
+  }
+}