]> Untitled Git - lemmy.git/commitdiff
Dont federate embeds, but refetch them for security (#106)
authornutomic <nutomic@noreply.yerbamate.dev>
Thu, 24 Sep 2020 17:43:42 +0000 (17:43 +0000)
committerdessalines <dessalines@noreply.yerbamate.dev>
Thu, 24 Sep 2020 17:43:42 +0000 (17:43 +0000)
Dont federate embeds, but refetch them for security (#ref 647)

Co-authored-by: Felix Ableitner <me@nutomic.com>
Reviewed-on: https://yerbamate.dev/LemmyNet/lemmy/pulls/106

Cargo.lock
lemmy_api/Cargo.toml
lemmy_api/src/lib.rs
lemmy_api/src/post.rs
lemmy_apub/src/post.rs
lemmy_utils/Cargo.toml
lemmy_utils/src/request.rs

index fe119382e5cc1e9d40f7d53985210cd86426f16b..7c709ffa6288af3891b64ed811f2cfaea0dabde9 100644 (file)
@@ -1833,7 +1833,6 @@ dependencies = [
  "lemmy_websocket",
  "log",
  "openssl",
- "percent-encoding",
  "rand 0.7.3",
  "reqwest",
  "serde 1.0.116",
@@ -1984,6 +1983,7 @@ dependencies = [
  "lettre_email",
  "log",
  "openssl",
+ "percent-encoding",
  "rand 0.7.3",
  "regex",
  "reqwest",
index b302854f1ab4dd9bbb443dd2de8c0b6ffe30b9fe..e8f0aa863eb3ad1ee4003ad35751840e104d8479 100644 (file)
@@ -31,7 +31,6 @@ strum_macros = "0.19"
 jsonwebtoken = "7.0"
 lazy_static = "1.3"
 url = { version = "2.1", features = ["serde"] }
-percent-encoding = "2.1"
 openssl = "0.10"
 http = "0.2"
 http-signature-normalization-actix = { version = "0.4", default-features = false, features = ["sha-2"] }
index 11ec4b34ffbab610fbebb337ec7d3d4649e89159..1b9222f8c59799eadd4c00ee24d276ecd040677c 100644 (file)
@@ -1,6 +1,5 @@
 use crate::claims::Claims;
 use actix_web::{web, web::Data};
-use anyhow::anyhow;
 use lemmy_db::{
   community::Community,
   community_view::CommunityUserBanView,
@@ -10,18 +9,8 @@ use lemmy_db::{
   DbPool,
 };
 use lemmy_structs::{blocking, comment::*, community::*, post::*, site::*, user::*};
-use lemmy_utils::{
-  apub::get_apub_protocol_string,
-  request::{retry, RecvError},
-  settings::Settings,
-  APIError,
-  ConnectionId,
-  LemmyError,
-};
+use lemmy_utils::{settings::Settings, APIError, ConnectionId, LemmyError};
 use lemmy_websocket::{serialize_websocket_message, LemmyContext, UserOperation};
-use log::error;
-use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
-use reqwest::Client;
 use serde::Deserialize;
 use std::process::Command;
 use url::Url;
@@ -361,179 +350,12 @@ pub(crate) fn espeak_wav_base64(text: &str) -> Result<String, LemmyError> {
   Ok(base64)
 }
 
-#[derive(Deserialize, Debug)]
-pub(crate) struct IframelyResponse {
-  title: Option<String>,
-  description: Option<String>,
-  thumbnail_url: Option<String>,
-  html: Option<String>,
-}
-
-pub(crate) async fn fetch_iframely(
-  client: &Client,
-  url: &str,
-) -> Result<IframelyResponse, LemmyError> {
-  let fetch_url = format!("http://iframely/oembed?url={}", url);
-
-  let response = retry(|| client.get(&fetch_url).send()).await?;
-
-  let res: IframelyResponse = response
-    .json()
-    .await
-    .map_err(|e| RecvError(e.to_string()))?;
-  Ok(res)
-}
-
-#[derive(Deserialize, Debug, Clone)]
-pub(crate) struct PictrsResponse {
-  files: Vec<PictrsFile>,
-  msg: String,
-}
-
-#[derive(Deserialize, Debug, Clone)]
-pub(crate) struct PictrsFile {
-  file: String,
-  delete_token: String,
-}
-
-pub(crate) async fn fetch_pictrs(
-  client: &Client,
-  image_url: &str,
-) -> Result<PictrsResponse, LemmyError> {
-  is_image_content_type(client, image_url).await?;
-
-  let fetch_url = format!(
-    "http://pictrs:8080/image/download?url={}",
-    utf8_percent_encode(image_url, NON_ALPHANUMERIC) // TODO this might not be needed
-  );
-
-  let response = retry(|| client.get(&fetch_url).send()).await?;
-
-  let response: PictrsResponse = response
-    .json()
-    .await
-    .map_err(|e| RecvError(e.to_string()))?;
-
-  if response.msg == "ok" {
-    Ok(response)
-  } else {
-    Err(anyhow!("{}", &response.msg).into())
-  }
-}
-
-async fn fetch_iframely_and_pictrs_data(
-  client: &Client,
-  url: Option<String>,
-) -> (
-  Option<String>,
-  Option<String>,
-  Option<String>,
-  Option<String>,
-) {
-  match &url {
-    Some(url) => {
-      // Fetch iframely data
-      let (iframely_title, iframely_description, iframely_thumbnail_url, iframely_html) =
-        match fetch_iframely(client, url).await {
-          Ok(res) => (res.title, res.description, res.thumbnail_url, res.html),
-          Err(e) => {
-            error!("iframely err: {}", e);
-            (None, None, None, None)
-          }
-        };
-
-      // Fetch pictrs thumbnail
-      let pictrs_hash = match iframely_thumbnail_url {
-        Some(iframely_thumbnail_url) => match fetch_pictrs(client, &iframely_thumbnail_url).await {
-          Ok(res) => Some(res.files[0].file.to_owned()),
-          Err(e) => {
-            error!("pictrs err: {}", e);
-            None
-          }
-        },
-        // Try to generate a small thumbnail if iframely is not supported
-        None => match fetch_pictrs(client, &url).await {
-          Ok(res) => Some(res.files[0].file.to_owned()),
-          Err(e) => {
-            error!("pictrs err: {}", e);
-            None
-          }
-        },
-      };
-
-      // The full urls are necessary for federation
-      let pictrs_thumbnail = if let Some(pictrs_hash) = pictrs_hash {
-        Some(format!(
-          "{}://{}/pictrs/image/{}",
-          get_apub_protocol_string(),
-          Settings::get().hostname,
-          pictrs_hash
-        ))
-      } else {
-        None
-      };
-
-      (
-        iframely_title,
-        iframely_description,
-        iframely_html,
-        pictrs_thumbnail,
-      )
-    }
-    None => (None, None, None, None),
-  }
-}
-
-pub(crate) async fn is_image_content_type(client: &Client, test: &str) -> Result<(), LemmyError> {
-  let response = retry(|| client.get(test).send()).await?;
-
-  if response
-    .headers()
-    .get("Content-Type")
-    .ok_or_else(|| anyhow!("No Content-Type header"))?
-    .to_str()?
-    .starts_with("image/")
-  {
-    Ok(())
-  } else {
-    Err(anyhow!("Not an image type.").into())
-  }
-}
-
 #[cfg(test)]
 mod tests {
-  use crate::{captcha_espeak_wav_base64, is_image_content_type};
-
-  #[test]
-  fn test_image() {
-    actix_rt::System::new("tset_image").block_on(async move {
-      let client = reqwest::Client::default();
-      assert!(is_image_content_type(&client, "https://1734811051.rsc.cdn77.org/data/images/full/365645/as-virus-kills-navajos-in-their-homes-tribal-women-provide-lifeline.jpg?w=600?w=650").await.is_ok());
-      assert!(is_image_content_type(&client,
-                                    "https://twitter.com/BenjaminNorton/status/1259922424272957440?s=20"
-      )
-        .await.is_err()
-      );
-    });
-  }
+  use crate::captcha_espeak_wav_base64;
 
   #[test]
   fn test_espeak() {
     assert!(captcha_espeak_wav_base64("WxRt2l").is_ok())
   }
-
-  // These helped with testing
-  // #[test]
-  // fn test_iframely() {
-  //   let res = fetch_iframely(client, "https://www.redspark.nu/?p=15341").await;
-  //   assert!(res.is_ok());
-  // }
-
-  // #[test]
-  // fn test_pictshare() {
-  //   let res = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpg");
-  //   assert!(res.is_ok());
-  //   let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu");
-  //   assert!(res_other.is_err());
-  // }
 }
index e8fb9d984216b85d4c5a3926b0fb47ef0bd23ae3..b3fbe8c99a7af69471837cef8e539562909f8b2b 100644 (file)
@@ -1,6 +1,5 @@
 use crate::{
   check_community_ban,
-  fetch_iframely_and_pictrs_data,
   get_user_from_jwt,
   get_user_from_jwt_opt,
   is_mod_or_admin,
@@ -25,6 +24,7 @@ use lemmy_db::{
 use lemmy_structs::{blocking, post::*};
 use lemmy_utils::{
   apub::{make_apub_endpoint, EndpointType},
+  request::fetch_iframely_and_pictrs_data,
   utils::{check_slurs, check_slurs_opt, is_valid_post_title},
   APIError,
   ConnectionId,
index 07ecb8f703acfa4822aeb8b305bc51ff0df1553a..8f5ffbcb8f4310fd481b256ee4e5a36b68232552 100644 (file)
@@ -24,7 +24,7 @@ use activitystreams::{
     Undo,
     Update,
   },
-  object::{kind::PageType, Image, Object, Page, Tombstone},
+  object::{kind::PageType, Image, Page, Tombstone},
   prelude::*,
   public,
 };
@@ -41,6 +41,7 @@ use lemmy_db::{
 use lemmy_structs::blocking;
 use lemmy_utils::{
   location_info,
+  request::fetch_iframely_and_pictrs_data,
   utils::{check_slurs, convert_datetime, remove_slurs},
   LemmyError,
 };
@@ -104,24 +105,6 @@ impl ToApub for Post {
     let url = self.url.as_ref().filter(|u| !u.is_empty());
     if let Some(u) = url {
       page.set_url(u.to_owned());
-
-      // Embeds
-      let mut page_preview = Page::new();
-      page_preview.set_url(u.to_owned());
-
-      if let Some(embed_title) = &self.embed_title {
-        page_preview.set_name(embed_title.to_owned());
-      }
-
-      if let Some(embed_description) = &self.embed_description {
-        page_preview.set_summary(embed_description.to_owned());
-      }
-
-      if let Some(embed_html) = &self.embed_html {
-        page_preview.set_content(embed_html.to_owned());
-      }
-
-      page.set_preview(page_preview.into_any_base()?);
     }
 
     if let Some(thumbnail_url) = &self.thumbnail_url {
@@ -147,50 +130,6 @@ impl ToApub for Post {
   }
 }
 
-struct EmbedType {
-  title: Option<String>,
-  description: Option<String>,
-  html: Option<String>,
-}
-
-fn extract_embed_from_apub(
-  page: &Ext1<Object<PageType>, PageExtension>,
-) -> Result<EmbedType, LemmyError> {
-  match page.inner.preview() {
-    Some(preview) => {
-      let preview_page = Page::from_any_base(preview.one().context(location_info!())?.to_owned())?
-        .context(location_info!())?;
-      let title = preview_page
-        .name()
-        .map(|n| n.one())
-        .flatten()
-        .map(|s| s.as_xsd_string())
-        .flatten()
-        .map(|s| s.to_string());
-      let description = preview_page
-        .summary()
-        .map(|s| s.as_single_xsd_string())
-        .flatten()
-        .map(|s| s.to_string());
-      let html = preview_page
-        .content()
-        .map(|c| c.as_single_xsd_string())
-        .flatten()
-        .map(|s| s.to_string());
-      Ok(EmbedType {
-        title,
-        description,
-        html,
-      })
-    }
-    None => Ok(EmbedType {
-      title: None,
-      description: None,
-      html: None,
-    }),
-  }
-}
-
 #[async_trait::async_trait(?Send)]
 impl FromApub for PostForm {
   type ApubType = PageExt;
@@ -237,8 +176,19 @@ impl FromApub for PostForm {
       .map(|u| u.to_string()),
       None => None,
     };
+    let url = page
+      .inner
+      .url()
+      .map(|u| u.as_single_xsd_any_uri())
+      .flatten()
+      .map(|s| s.to_string());
 
-    let embed = extract_embed_from_apub(page)?;
+    let (iframely_title, iframely_description, iframely_html, pictrs_thumbnail) =
+      if let Some(url) = &url {
+        fetch_iframely_and_pictrs_data(context.client(), Some(url.to_owned())).await
+      } else {
+        (None, None, None, thumbnail_url)
+      };
 
     let name = page
       .inner
@@ -248,12 +198,6 @@ impl FromApub for PostForm {
       .as_single_xsd_string()
       .context(location_info!())?
       .to_string();
-    let url = page
-      .inner
-      .url()
-      .map(|u| u.as_single_xsd_any_uri())
-      .flatten()
-      .map(|s| s.to_string());
     let body = page
       .inner
       .content()
@@ -284,10 +228,10 @@ impl FromApub for PostForm {
       deleted: None,
       nsfw: ext.sensitive,
       stickied: Some(ext.stickied),
-      embed_title: embed.title,
-      embed_description: embed.description,
-      embed_html: embed.html,
-      thumbnail_url,
+      embed_title: iframely_title,
+      embed_description: iframely_description,
+      embed_html: iframely_html,
+      thumbnail_url: pictrs_thumbnail,
       ap_id: Some(check_actor_domain(page, expected_domain)?),
       local: false,
     })
index a575bb18a20340476bf2da94762720dec3808376..25744b17d43c5c46ce4f16c73f6c076f7bc86c3e 100644 (file)
@@ -16,6 +16,7 @@ lettre_email = "0.9"
 log = "0.4"
 itertools = "0.9"
 rand = "0.7"
+percent-encoding = "2.1"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = { version = "1.0", features = ["preserve_order"]}
 thiserror = "1.0"
index 490609e7dc0be31954e5fd961e79e12579119f4e..4aa70c6fd42a4655b978754a41404479b2ee0f75 100644 (file)
@@ -1,5 +1,9 @@
-use crate::LemmyError;
+use crate::{apub::get_apub_protocol_string, settings::Settings, LemmyError};
 use anyhow::anyhow;
+use log::error;
+use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
+use reqwest::Client;
+use serde::Deserialize;
 use std::future::Future;
 use thiserror::Error;
 
@@ -41,3 +45,175 @@ where
 
   response
 }
+
+#[derive(Deserialize, Debug)]
+pub(crate) struct IframelyResponse {
+  title: Option<String>,
+  description: Option<String>,
+  thumbnail_url: Option<String>,
+  html: Option<String>,
+}
+
+pub(crate) async fn fetch_iframely(
+  client: &Client,
+  url: &str,
+) -> Result<IframelyResponse, LemmyError> {
+  let fetch_url = format!("http://iframely/oembed?url={}", url);
+
+  let response = retry(|| client.get(&fetch_url).send()).await?;
+
+  let res: IframelyResponse = response
+    .json()
+    .await
+    .map_err(|e| RecvError(e.to_string()))?;
+  Ok(res)
+}
+
+#[derive(Deserialize, Debug, Clone)]
+pub(crate) struct PictrsResponse {
+  files: Vec<PictrsFile>,
+  msg: String,
+}
+
+#[derive(Deserialize, Debug, Clone)]
+pub(crate) struct PictrsFile {
+  file: String,
+  delete_token: String,
+}
+
+pub(crate) async fn fetch_pictrs(
+  client: &Client,
+  image_url: &str,
+) -> Result<PictrsResponse, LemmyError> {
+  is_image_content_type(client, image_url).await?;
+
+  let fetch_url = format!(
+    "http://pictrs:8080/image/download?url={}",
+    utf8_percent_encode(image_url, NON_ALPHANUMERIC) // TODO this might not be needed
+  );
+
+  let response = retry(|| client.get(&fetch_url).send()).await?;
+
+  let response: PictrsResponse = response
+    .json()
+    .await
+    .map_err(|e| RecvError(e.to_string()))?;
+
+  if response.msg == "ok" {
+    Ok(response)
+  } else {
+    Err(anyhow!("{}", &response.msg).into())
+  }
+}
+
+pub async fn fetch_iframely_and_pictrs_data(
+  client: &Client,
+  url: Option<String>,
+) -> (
+  Option<String>,
+  Option<String>,
+  Option<String>,
+  Option<String>,
+) {
+  match &url {
+    Some(url) => {
+      // Fetch iframely data
+      let (iframely_title, iframely_description, iframely_thumbnail_url, iframely_html) =
+        match fetch_iframely(client, url).await {
+          Ok(res) => (res.title, res.description, res.thumbnail_url, res.html),
+          Err(e) => {
+            error!("iframely err: {}", e);
+            (None, None, None, None)
+          }
+        };
+
+      // Fetch pictrs thumbnail
+      let pictrs_hash = match iframely_thumbnail_url {
+        Some(iframely_thumbnail_url) => match fetch_pictrs(client, &iframely_thumbnail_url).await {
+          Ok(res) => Some(res.files[0].file.to_owned()),
+          Err(e) => {
+            error!("pictrs err: {}", e);
+            None
+          }
+        },
+        // Try to generate a small thumbnail if iframely is not supported
+        None => match fetch_pictrs(client, &url).await {
+          Ok(res) => Some(res.files[0].file.to_owned()),
+          Err(e) => {
+            error!("pictrs err: {}", e);
+            None
+          }
+        },
+      };
+
+      // The full urls are necessary for federation
+      let pictrs_thumbnail = if let Some(pictrs_hash) = pictrs_hash {
+        Some(format!(
+          "{}://{}/pictrs/image/{}",
+          get_apub_protocol_string(),
+          Settings::get().hostname,
+          pictrs_hash
+        ))
+      } else {
+        None
+      };
+
+      (
+        iframely_title,
+        iframely_description,
+        iframely_html,
+        pictrs_thumbnail,
+      )
+    }
+    None => (None, None, None, None),
+  }
+}
+
+async fn is_image_content_type(client: &Client, test: &str) -> Result<(), LemmyError> {
+  let response = retry(|| client.get(test).send()).await?;
+
+  if response
+    .headers()
+    .get("Content-Type")
+    .ok_or_else(|| anyhow!("No Content-Type header"))?
+    .to_str()?
+    .starts_with("image/")
+  {
+    Ok(())
+  } else {
+    Err(anyhow!("Not an image type.").into())
+  }
+}
+
+#[cfg(test)]
+mod tests {
+  use crate::request::is_image_content_type;
+
+  #[test]
+  fn test_image() {
+    actix_rt::System::new("tset_image").block_on(async move {
+      let client = reqwest::Client::default();
+      assert!(is_image_content_type(&client, "https://1734811051.rsc.cdn77.org/data/images/full/365645/as-virus-kills-navajos-in-their-homes-tribal-women-provide-lifeline.jpg?w=600?w=650").await.is_ok());
+      assert!(is_image_content_type(&client,
+                                    "https://twitter.com/BenjaminNorton/status/1259922424272957440?s=20"
+      )
+        .await.is_err()
+      );
+    });
+  }
+
+  // These helped with testing
+  // #[test]
+  // fn test_iframely() {
+  //   let res = fetch_iframely(client, "https://www.redspark.nu/?p=15341").await;
+  //   assert!(res.is_ok());
+  // }
+
+  // #[test]
+  // fn test_pictshare() {
+  //   let res = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpg");
+  //   assert!(res.is_ok());
+  //   let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu");
+  //   assert!(res_other.is_err());
+  // }
+}