X-Git-Url: http://these/git/?a=blobdiff_plain;f=crates%2Fapi_common%2Fsrc%2Frequest.rs;h=b62514c02f22b3139ec4e3c268ff0ce15ba1bd5e;hb=70fae9d68d65b1e4d153e30d3c065cc315b75eaf;hp=37bb39a0c0c41815c9029bbc548b2a5939821440;hpb=4e12e25c59beef296c750fec640b0b80c4c11de9;p=lemmy.git diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index 37bb39a0..b62514c0 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -2,7 +2,7 @@ use crate::post::SiteMetadata; use encoding::{all::encodings, DecoderTrap}; use lemmy_db_schema::newtypes::DbUrl; use lemmy_utils::{ - error::LemmyError, + error::{LemmyError, LemmyErrorType}, settings::structs::Settings, version::VERSION, REQWEST_TIMEOUT, @@ -27,27 +27,24 @@ pub async fn fetch_site_metadata( // https://github.com/LemmyNet/lemmy/issues/1964 let html_bytes = response.bytes().await.map_err(LemmyError::from)?.to_vec(); - let tags = html_to_site_metadata(&html_bytes)?; + let tags = html_to_site_metadata(&html_bytes, url)?; Ok(tags) } -fn html_to_site_metadata(html_bytes: &[u8]) -> Result { +fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result { let html = String::from_utf8_lossy(html_bytes); // Make sure the first line is doctype html let first_line = html .trim_start() .lines() - .into_iter() .next() - .ok_or_else(|| LemmyError::from_message("No lines in html"))? + .ok_or(LemmyErrorType::NoLinesInHtml)? .to_lowercase(); if !first_line.starts_with("") { - return Err(LemmyError::from_message( - "Site metadata page fetch is not DOCTYPE html", - )); + Err(LemmyErrorType::SiteMetadataPageIsNotDoctypeHtml)?; } let mut page = HTML::from_string(html.to_string(), None)?; @@ -72,22 +69,24 @@ fn html_to_site_metadata(html_bytes: &[u8]) -> Result .opengraph .properties .get("description") - .map(|t| t.to_string()); + .map(std::string::ToString::to_string); let og_title = page .opengraph .properties .get("title") - .map(|t| t.to_string()); + .map(std::string::ToString::to_string); let og_image = page .opengraph .images - .get(0) - .and_then(|ogo| Url::parse(&ogo.url).ok()); + .first() + // join also works if the target URL is absolute + .and_then(|ogo| url.join(&ogo.url).ok()); let og_embed_url = page .opengraph .videos .first() - .and_then(|v| Url::parse(&v.url).ok()); + // join also works if the target URL is absolute + .and_then(|v| url.join(&v.url).ok()); Ok(SiteMetadata { title: og_title.or(page_title), @@ -125,7 +124,7 @@ pub(crate) async fn fetch_pictrs( is_image_content_type(client, image_url).await?; let fetch_url = format!( - "{}/image/download?url={}", + "{}image/download?url={}", pictrs_config.url, utf8_percent_encode(image_url.as_str(), NON_ALPHANUMERIC) // TODO this might not be needed ); @@ -141,7 +140,7 @@ pub(crate) async fn fetch_pictrs( if response.msg == "ok" { Ok(response) } else { - Err(LemmyError::from_message(&response.msg)) + Err(LemmyErrorType::PictrsResponseError(response.msg))? } } @@ -160,16 +159,19 @@ pub async fn purge_image_from_pictrs( let alias = image_url .path_segments() - .ok_or_else(|| LemmyError::from_message("Image URL missing path segments"))? + .ok_or(LemmyErrorType::ImageUrlMissingPathSegments)? .next_back() - .ok_or_else(|| LemmyError::from_message("Image URL missing last path segment"))?; + .ok_or(LemmyErrorType::ImageUrlMissingLastPathSegment)?; let purge_url = format!("{}/internal/purge?alias={}", pictrs_config.url, alias); + let pictrs_api_key = pictrs_config + .api_key + .ok_or(LemmyErrorType::PictrsApiKeyNotProvided)?; let response = client .post(&purge_url) .timeout(REQWEST_TIMEOUT) - .header("x-api-token", pictrs_config.api_key) + .header("x-api-token", pictrs_api_key) .send() .await?; @@ -178,7 +180,7 @@ pub async fn purge_image_from_pictrs( if response.msg == "ok" { Ok(()) } else { - Err(LemmyError::from_message(&response.msg)) + Err(LemmyErrorType::PictrsPurgeResponseError(response.msg))? } } @@ -189,6 +191,7 @@ pub async fn fetch_site_data( client: &ClientWithMiddleware, settings: &Settings, url: Option<&Url>, + include_image: bool, ) -> (Option, Option) { match &url { Some(url) => { @@ -196,6 +199,12 @@ pub async fn fetch_site_data( // Ignore errors, since it may be an image, or not have the data. // Warning, this may ignore SSL errors let metadata_option = fetch_site_metadata(client, url).await.ok(); + if !include_image { + return (metadata_option, None); + } + + let missing_pictrs_file = + |r: PictrsResponse| r.files.first().expect("missing pictrs file").file.clone(); // Fetch pictrs thumbnail let pictrs_hash = match &metadata_option { @@ -204,16 +213,16 @@ pub async fn fetch_site_data( // Try to generate a small thumbnail if there's a full sized one from post-links Some(metadata_image) => fetch_pictrs(client, settings, metadata_image) .await - .map(|r| r.files[0].file.to_owned()), + .map(missing_pictrs_file), // Metadata, but no image None => fetch_pictrs(client, settings, url) .await - .map(|r| r.files[0].file.to_owned()), + .map(missing_pictrs_file), }, // No metadata, try to fetch the URL as an image None => fetch_pictrs(client, settings, url) .await - .map(|r| r.files[0].file.to_owned()), + .map(missing_pictrs_file), }; // The full urls are necessary for federation @@ -241,13 +250,13 @@ async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Resu if response .headers() .get("Content-Type") - .ok_or_else(|| LemmyError::from_message("No Content-Type header"))? + .ok_or(LemmyErrorType::NoContentTypeHeader)? .to_str()? .starts_with("image/") { Ok(()) } else { - Err(LemmyError::from_message("Not an image type.")) + Err(LemmyErrorType::NotAnImageType)? } } @@ -261,16 +270,24 @@ pub fn build_user_agent(settings: &Settings) -> String { #[cfg(test)] mod tests { - use crate::request::{build_user_agent, fetch_site_metadata, SiteMetadata}; - use lemmy_utils::settings::structs::Settings; + #![allow(clippy::unwrap_used)] + #![allow(clippy::indexing_slicing)] + + use crate::request::{ + build_user_agent, + fetch_site_metadata, + html_to_site_metadata, + SiteMetadata, + }; + use lemmy_utils::settings::SETTINGS; use url::Url; // These helped with testing - #[actix_rt::test] + #[tokio::test] async fn test_site_metadata() { - let settings = Settings::init().unwrap(); + let settings = &SETTINGS.clone(); let client = reqwest::Client::builder() - .user_agent(build_user_agent(&settings)) + .user_agent(build_user_agent(settings)) .build() .unwrap() .into(); @@ -300,4 +317,46 @@ mod tests { // let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu"); // assert!(res_other.is_err()); // } + + #[test] + fn test_resolve_image_url() { + // url that lists the opengraph fields + let url = Url::parse("https://example.com/one/two.html").unwrap(); + + // root relative url + let html_bytes = b""; + let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata"); + assert_eq!( + metadata.image, + Some(Url::parse("https://example.com/image.jpg").unwrap().into()) + ); + + // base relative url + let html_bytes = b""; + let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata"); + assert_eq!( + metadata.image, + Some( + Url::parse("https://example.com/one/image.jpg") + .unwrap() + .into() + ) + ); + + // absolute url + let html_bytes = b""; + let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata"); + assert_eq!( + metadata.image, + Some(Url::parse("https://cdn.host.com/image.jpg").unwrap().into()) + ); + + // protocol relative url + let html_bytes = b""; + let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata"); + assert_eq!( + metadata.image, + Some(Url::parse("https://example.com/image.jpg").unwrap().into()) + ); + } }