From: nutomic Date: Thu, 24 Sep 2020 17:43:42 +0000 (+0000) Subject: Dont federate embeds, but refetch them for security (#106) X-Git-Url: http://these/git/%7B%60%24%7BwebArchiveUrl%7D/%7BtotpUrl%7D?a=commitdiff_plain;h=bfed8a8be4a1c4ca65f9faa37780a1a89e8095c1;p=lemmy.git Dont federate embeds, but refetch them for security (#106) Dont federate embeds, but refetch them for security (#ref 647) Co-authored-by: Felix Ableitner Reviewed-on: https://yerbamate.dev/LemmyNet/lemmy/pulls/106 --- diff --git a/Cargo.lock b/Cargo.lock index fe119382..7c709ffa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1833,7 +1833,6 @@ dependencies = [ "lemmy_websocket", "log", "openssl", - "percent-encoding", "rand 0.7.3", "reqwest", "serde 1.0.116", @@ -1984,6 +1983,7 @@ dependencies = [ "lettre_email", "log", "openssl", + "percent-encoding", "rand 0.7.3", "regex", "reqwest", diff --git a/lemmy_api/Cargo.toml b/lemmy_api/Cargo.toml index b302854f..e8f0aa86 100644 --- a/lemmy_api/Cargo.toml +++ b/lemmy_api/Cargo.toml @@ -31,7 +31,6 @@ strum_macros = "0.19" jsonwebtoken = "7.0" lazy_static = "1.3" url = { version = "2.1", features = ["serde"] } -percent-encoding = "2.1" openssl = "0.10" http = "0.2" http-signature-normalization-actix = { version = "0.4", default-features = false, features = ["sha-2"] } diff --git a/lemmy_api/src/lib.rs b/lemmy_api/src/lib.rs index 11ec4b34..1b9222f8 100644 --- a/lemmy_api/src/lib.rs +++ b/lemmy_api/src/lib.rs @@ -1,6 +1,5 @@ use crate::claims::Claims; use actix_web::{web, web::Data}; -use anyhow::anyhow; use lemmy_db::{ community::Community, community_view::CommunityUserBanView, @@ -10,18 +9,8 @@ use lemmy_db::{ DbPool, }; use lemmy_structs::{blocking, comment::*, community::*, post::*, site::*, user::*}; -use lemmy_utils::{ - apub::get_apub_protocol_string, - request::{retry, RecvError}, - settings::Settings, - APIError, - ConnectionId, - LemmyError, -}; +use lemmy_utils::{settings::Settings, APIError, ConnectionId, LemmyError}; use lemmy_websocket::{serialize_websocket_message, LemmyContext, UserOperation}; -use log::error; -use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; -use reqwest::Client; use serde::Deserialize; use std::process::Command; use url::Url; @@ -361,179 +350,12 @@ pub(crate) fn espeak_wav_base64(text: &str) -> Result { Ok(base64) } -#[derive(Deserialize, Debug)] -pub(crate) struct IframelyResponse { - title: Option, - description: Option, - thumbnail_url: Option, - html: Option, -} - -pub(crate) async fn fetch_iframely( - client: &Client, - url: &str, -) -> Result { - let fetch_url = format!("http://iframely/oembed?url={}", url); - - let response = retry(|| client.get(&fetch_url).send()).await?; - - let res: IframelyResponse = response - .json() - .await - .map_err(|e| RecvError(e.to_string()))?; - Ok(res) -} - -#[derive(Deserialize, Debug, Clone)] -pub(crate) struct PictrsResponse { - files: Vec, - msg: String, -} - -#[derive(Deserialize, Debug, Clone)] -pub(crate) struct PictrsFile { - file: String, - delete_token: String, -} - -pub(crate) async fn fetch_pictrs( - client: &Client, - image_url: &str, -) -> Result { - is_image_content_type(client, image_url).await?; - - let fetch_url = format!( - "http://pictrs:8080/image/download?url={}", - utf8_percent_encode(image_url, NON_ALPHANUMERIC) // TODO this might not be needed - ); - - let response = retry(|| client.get(&fetch_url).send()).await?; - - let response: PictrsResponse = response - .json() - .await - .map_err(|e| RecvError(e.to_string()))?; - - if response.msg == "ok" { - Ok(response) - } else { - Err(anyhow!("{}", &response.msg).into()) - } -} - -async fn fetch_iframely_and_pictrs_data( - client: &Client, - url: Option, -) -> ( - Option, - Option, - Option, - Option, -) { - match &url { - Some(url) => { - // Fetch iframely data - let (iframely_title, iframely_description, iframely_thumbnail_url, iframely_html) = - match fetch_iframely(client, url).await { - Ok(res) => (res.title, res.description, res.thumbnail_url, res.html), - Err(e) => { - error!("iframely err: {}", e); - (None, None, None, None) - } - }; - - // Fetch pictrs thumbnail - let pictrs_hash = match iframely_thumbnail_url { - Some(iframely_thumbnail_url) => match fetch_pictrs(client, &iframely_thumbnail_url).await { - Ok(res) => Some(res.files[0].file.to_owned()), - Err(e) => { - error!("pictrs err: {}", e); - None - } - }, - // Try to generate a small thumbnail if iframely is not supported - None => match fetch_pictrs(client, &url).await { - Ok(res) => Some(res.files[0].file.to_owned()), - Err(e) => { - error!("pictrs err: {}", e); - None - } - }, - }; - - // The full urls are necessary for federation - let pictrs_thumbnail = if let Some(pictrs_hash) = pictrs_hash { - Some(format!( - "{}://{}/pictrs/image/{}", - get_apub_protocol_string(), - Settings::get().hostname, - pictrs_hash - )) - } else { - None - }; - - ( - iframely_title, - iframely_description, - iframely_html, - pictrs_thumbnail, - ) - } - None => (None, None, None, None), - } -} - -pub(crate) async fn is_image_content_type(client: &Client, test: &str) -> Result<(), LemmyError> { - let response = retry(|| client.get(test).send()).await?; - - if response - .headers() - .get("Content-Type") - .ok_or_else(|| anyhow!("No Content-Type header"))? - .to_str()? - .starts_with("image/") - { - Ok(()) - } else { - Err(anyhow!("Not an image type.").into()) - } -} - #[cfg(test)] mod tests { - use crate::{captcha_espeak_wav_base64, is_image_content_type}; - - #[test] - fn test_image() { - actix_rt::System::new("tset_image").block_on(async move { - let client = reqwest::Client::default(); - assert!(is_image_content_type(&client, "https://1734811051.rsc.cdn77.org/data/images/full/365645/as-virus-kills-navajos-in-their-homes-tribal-women-provide-lifeline.jpg?w=600?w=650").await.is_ok()); - assert!(is_image_content_type(&client, - "https://twitter.com/BenjaminNorton/status/1259922424272957440?s=20" - ) - .await.is_err() - ); - }); - } + use crate::captcha_espeak_wav_base64; #[test] fn test_espeak() { assert!(captcha_espeak_wav_base64("WxRt2l").is_ok()) } - - // These helped with testing - // #[test] - // fn test_iframely() { - // let res = fetch_iframely(client, "https://www.redspark.nu/?p=15341").await; - // assert!(res.is_ok()); - // } - - // #[test] - // fn test_pictshare() { - // let res = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpg"); - // assert!(res.is_ok()); - // let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu"); - // assert!(res_other.is_err()); - // } } diff --git a/lemmy_api/src/post.rs b/lemmy_api/src/post.rs index e8fb9d98..b3fbe8c9 100644 --- a/lemmy_api/src/post.rs +++ b/lemmy_api/src/post.rs @@ -1,6 +1,5 @@ use crate::{ check_community_ban, - fetch_iframely_and_pictrs_data, get_user_from_jwt, get_user_from_jwt_opt, is_mod_or_admin, @@ -25,6 +24,7 @@ use lemmy_db::{ use lemmy_structs::{blocking, post::*}; use lemmy_utils::{ apub::{make_apub_endpoint, EndpointType}, + request::fetch_iframely_and_pictrs_data, utils::{check_slurs, check_slurs_opt, is_valid_post_title}, APIError, ConnectionId, diff --git a/lemmy_apub/src/post.rs b/lemmy_apub/src/post.rs index 07ecb8f7..8f5ffbcb 100644 --- a/lemmy_apub/src/post.rs +++ b/lemmy_apub/src/post.rs @@ -24,7 +24,7 @@ use activitystreams::{ Undo, Update, }, - object::{kind::PageType, Image, Object, Page, Tombstone}, + object::{kind::PageType, Image, Page, Tombstone}, prelude::*, public, }; @@ -41,6 +41,7 @@ use lemmy_db::{ use lemmy_structs::blocking; use lemmy_utils::{ location_info, + request::fetch_iframely_and_pictrs_data, utils::{check_slurs, convert_datetime, remove_slurs}, LemmyError, }; @@ -104,24 +105,6 @@ impl ToApub for Post { let url = self.url.as_ref().filter(|u| !u.is_empty()); if let Some(u) = url { page.set_url(u.to_owned()); - - // Embeds - let mut page_preview = Page::new(); - page_preview.set_url(u.to_owned()); - - if let Some(embed_title) = &self.embed_title { - page_preview.set_name(embed_title.to_owned()); - } - - if let Some(embed_description) = &self.embed_description { - page_preview.set_summary(embed_description.to_owned()); - } - - if let Some(embed_html) = &self.embed_html { - page_preview.set_content(embed_html.to_owned()); - } - - page.set_preview(page_preview.into_any_base()?); } if let Some(thumbnail_url) = &self.thumbnail_url { @@ -147,50 +130,6 @@ impl ToApub for Post { } } -struct EmbedType { - title: Option, - description: Option, - html: Option, -} - -fn extract_embed_from_apub( - page: &Ext1, PageExtension>, -) -> Result { - match page.inner.preview() { - Some(preview) => { - let preview_page = Page::from_any_base(preview.one().context(location_info!())?.to_owned())? - .context(location_info!())?; - let title = preview_page - .name() - .map(|n| n.one()) - .flatten() - .map(|s| s.as_xsd_string()) - .flatten() - .map(|s| s.to_string()); - let description = preview_page - .summary() - .map(|s| s.as_single_xsd_string()) - .flatten() - .map(|s| s.to_string()); - let html = preview_page - .content() - .map(|c| c.as_single_xsd_string()) - .flatten() - .map(|s| s.to_string()); - Ok(EmbedType { - title, - description, - html, - }) - } - None => Ok(EmbedType { - title: None, - description: None, - html: None, - }), - } -} - #[async_trait::async_trait(?Send)] impl FromApub for PostForm { type ApubType = PageExt; @@ -237,8 +176,19 @@ impl FromApub for PostForm { .map(|u| u.to_string()), None => None, }; + let url = page + .inner + .url() + .map(|u| u.as_single_xsd_any_uri()) + .flatten() + .map(|s| s.to_string()); - let embed = extract_embed_from_apub(page)?; + let (iframely_title, iframely_description, iframely_html, pictrs_thumbnail) = + if let Some(url) = &url { + fetch_iframely_and_pictrs_data(context.client(), Some(url.to_owned())).await + } else { + (None, None, None, thumbnail_url) + }; let name = page .inner @@ -248,12 +198,6 @@ impl FromApub for PostForm { .as_single_xsd_string() .context(location_info!())? .to_string(); - let url = page - .inner - .url() - .map(|u| u.as_single_xsd_any_uri()) - .flatten() - .map(|s| s.to_string()); let body = page .inner .content() @@ -284,10 +228,10 @@ impl FromApub for PostForm { deleted: None, nsfw: ext.sensitive, stickied: Some(ext.stickied), - embed_title: embed.title, - embed_description: embed.description, - embed_html: embed.html, - thumbnail_url, + embed_title: iframely_title, + embed_description: iframely_description, + embed_html: iframely_html, + thumbnail_url: pictrs_thumbnail, ap_id: Some(check_actor_domain(page, expected_domain)?), local: false, }) diff --git a/lemmy_utils/Cargo.toml b/lemmy_utils/Cargo.toml index a575bb18..25744b17 100644 --- a/lemmy_utils/Cargo.toml +++ b/lemmy_utils/Cargo.toml @@ -16,6 +16,7 @@ lettre_email = "0.9" log = "0.4" itertools = "0.9" rand = "0.7" +percent-encoding = "2.1" serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0", features = ["preserve_order"]} thiserror = "1.0" diff --git a/lemmy_utils/src/request.rs b/lemmy_utils/src/request.rs index 490609e7..4aa70c6f 100644 --- a/lemmy_utils/src/request.rs +++ b/lemmy_utils/src/request.rs @@ -1,5 +1,9 @@ -use crate::LemmyError; +use crate::{apub::get_apub_protocol_string, settings::Settings, LemmyError}; use anyhow::anyhow; +use log::error; +use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; +use reqwest::Client; +use serde::Deserialize; use std::future::Future; use thiserror::Error; @@ -41,3 +45,175 @@ where response } + +#[derive(Deserialize, Debug)] +pub(crate) struct IframelyResponse { + title: Option, + description: Option, + thumbnail_url: Option, + html: Option, +} + +pub(crate) async fn fetch_iframely( + client: &Client, + url: &str, +) -> Result { + let fetch_url = format!("http://iframely/oembed?url={}", url); + + let response = retry(|| client.get(&fetch_url).send()).await?; + + let res: IframelyResponse = response + .json() + .await + .map_err(|e| RecvError(e.to_string()))?; + Ok(res) +} + +#[derive(Deserialize, Debug, Clone)] +pub(crate) struct PictrsResponse { + files: Vec, + msg: String, +} + +#[derive(Deserialize, Debug, Clone)] +pub(crate) struct PictrsFile { + file: String, + delete_token: String, +} + +pub(crate) async fn fetch_pictrs( + client: &Client, + image_url: &str, +) -> Result { + is_image_content_type(client, image_url).await?; + + let fetch_url = format!( + "http://pictrs:8080/image/download?url={}", + utf8_percent_encode(image_url, NON_ALPHANUMERIC) // TODO this might not be needed + ); + + let response = retry(|| client.get(&fetch_url).send()).await?; + + let response: PictrsResponse = response + .json() + .await + .map_err(|e| RecvError(e.to_string()))?; + + if response.msg == "ok" { + Ok(response) + } else { + Err(anyhow!("{}", &response.msg).into()) + } +} + +pub async fn fetch_iframely_and_pictrs_data( + client: &Client, + url: Option, +) -> ( + Option, + Option, + Option, + Option, +) { + match &url { + Some(url) => { + // Fetch iframely data + let (iframely_title, iframely_description, iframely_thumbnail_url, iframely_html) = + match fetch_iframely(client, url).await { + Ok(res) => (res.title, res.description, res.thumbnail_url, res.html), + Err(e) => { + error!("iframely err: {}", e); + (None, None, None, None) + } + }; + + // Fetch pictrs thumbnail + let pictrs_hash = match iframely_thumbnail_url { + Some(iframely_thumbnail_url) => match fetch_pictrs(client, &iframely_thumbnail_url).await { + Ok(res) => Some(res.files[0].file.to_owned()), + Err(e) => { + error!("pictrs err: {}", e); + None + } + }, + // Try to generate a small thumbnail if iframely is not supported + None => match fetch_pictrs(client, &url).await { + Ok(res) => Some(res.files[0].file.to_owned()), + Err(e) => { + error!("pictrs err: {}", e); + None + } + }, + }; + + // The full urls are necessary for federation + let pictrs_thumbnail = if let Some(pictrs_hash) = pictrs_hash { + Some(format!( + "{}://{}/pictrs/image/{}", + get_apub_protocol_string(), + Settings::get().hostname, + pictrs_hash + )) + } else { + None + }; + + ( + iframely_title, + iframely_description, + iframely_html, + pictrs_thumbnail, + ) + } + None => (None, None, None, None), + } +} + +async fn is_image_content_type(client: &Client, test: &str) -> Result<(), LemmyError> { + let response = retry(|| client.get(test).send()).await?; + + if response + .headers() + .get("Content-Type") + .ok_or_else(|| anyhow!("No Content-Type header"))? + .to_str()? + .starts_with("image/") + { + Ok(()) + } else { + Err(anyhow!("Not an image type.").into()) + } +} + +#[cfg(test)] +mod tests { + use crate::request::is_image_content_type; + + #[test] + fn test_image() { + actix_rt::System::new("tset_image").block_on(async move { + let client = reqwest::Client::default(); + assert!(is_image_content_type(&client, "https://1734811051.rsc.cdn77.org/data/images/full/365645/as-virus-kills-navajos-in-their-homes-tribal-women-provide-lifeline.jpg?w=600?w=650").await.is_ok()); + assert!(is_image_content_type(&client, + "https://twitter.com/BenjaminNorton/status/1259922424272957440?s=20" + ) + .await.is_err() + ); + }); + } + + // These helped with testing + // #[test] + // fn test_iframely() { + // let res = fetch_iframely(client, "https://www.redspark.nu/?p=15341").await; + // assert!(res.is_ok()); + // } + + // #[test] + // fn test_pictshare() { + // let res = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpg"); + // assert!(res.is_ok()); + // let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu"); + // assert!(res_other.is_err()); + // } +}