X-Git-Url: http://these/git/?a=blobdiff_plain;f=crates%2Fapi_common%2Fsrc%2Frequest.rs;h=b62514c02f22b3139ec4e3c268ff0ce15ba1bd5e;hb=70fae9d68d65b1e4d153e30d3c065cc315b75eaf;hp=260abce1bb0649ee9061a3b30e0103e9afa5c49a;hpb=8c0c1628e08f8db4f602f1b69a7f94ba60bfc2d5;p=lemmy.git diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index 260abce1..b62514c0 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -2,7 +2,7 @@ use crate::post::SiteMetadata; use encoding::{all::encodings, DecoderTrap}; use lemmy_db_schema::newtypes::DbUrl; use lemmy_utils::{ - error::LemmyError, + error::{LemmyError, LemmyErrorType}, settings::structs::Settings, version::VERSION, REQWEST_TIMEOUT, @@ -27,27 +27,24 @@ pub async fn fetch_site_metadata( // https://github.com/LemmyNet/lemmy/issues/1964 let html_bytes = response.bytes().await.map_err(LemmyError::from)?.to_vec(); - let tags = html_to_site_metadata(&html_bytes)?; + let tags = html_to_site_metadata(&html_bytes, url)?; Ok(tags) } -fn html_to_site_metadata(html_bytes: &[u8]) -> Result { +fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result { let html = String::from_utf8_lossy(html_bytes); // Make sure the first line is doctype html let first_line = html .trim_start() .lines() - .into_iter() .next() - .ok_or_else(|| LemmyError::from_message("No lines in html"))? + .ok_or(LemmyErrorType::NoLinesInHtml)? .to_lowercase(); if !first_line.starts_with("") { - return Err(LemmyError::from_message( - "Site metadata page fetch is not DOCTYPE html", - )); + Err(LemmyErrorType::SiteMetadataPageIsNotDoctypeHtml)?; } let mut page = HTML::from_string(html.to_string(), None)?; @@ -82,12 +79,14 @@ fn html_to_site_metadata(html_bytes: &[u8]) -> Result .opengraph .images .first() - .and_then(|ogo| Url::parse(&ogo.url).ok()); + // join also works if the target URL is absolute + .and_then(|ogo| url.join(&ogo.url).ok()); let og_embed_url = page .opengraph .videos .first() - .and_then(|v| Url::parse(&v.url).ok()); + // join also works if the target URL is absolute + .and_then(|v| url.join(&v.url).ok()); Ok(SiteMetadata { title: og_title.or(page_title), @@ -141,7 +140,7 @@ pub(crate) async fn fetch_pictrs( if response.msg == "ok" { Ok(response) } else { - Err(LemmyError::from_message(&response.msg)) + Err(LemmyErrorType::PictrsResponseError(response.msg))? } } @@ -160,15 +159,15 @@ pub async fn purge_image_from_pictrs( let alias = image_url .path_segments() - .ok_or_else(|| LemmyError::from_message("Image URL missing path segments"))? + .ok_or(LemmyErrorType::ImageUrlMissingPathSegments)? .next_back() - .ok_or_else(|| LemmyError::from_message("Image URL missing last path segment"))?; + .ok_or(LemmyErrorType::ImageUrlMissingLastPathSegment)?; let purge_url = format!("{}/internal/purge?alias={}", pictrs_config.url, alias); let pictrs_api_key = pictrs_config .api_key - .ok_or_else(|| LemmyError::from_message("pictrs_api_key_not_provided"))?; + .ok_or(LemmyErrorType::PictrsApiKeyNotProvided)?; let response = client .post(&purge_url) .timeout(REQWEST_TIMEOUT) @@ -181,7 +180,7 @@ pub async fn purge_image_from_pictrs( if response.msg == "ok" { Ok(()) } else { - Err(LemmyError::from_message(&response.msg)) + Err(LemmyErrorType::PictrsPurgeResponseError(response.msg))? } } @@ -192,6 +191,7 @@ pub async fn fetch_site_data( client: &ClientWithMiddleware, settings: &Settings, url: Option<&Url>, + include_image: bool, ) -> (Option, Option) { match &url { Some(url) => { @@ -199,6 +199,9 @@ pub async fn fetch_site_data( // Ignore errors, since it may be an image, or not have the data. // Warning, this may ignore SSL errors let metadata_option = fetch_site_metadata(client, url).await.ok(); + if !include_image { + return (metadata_option, None); + } let missing_pictrs_file = |r: PictrsResponse| r.files.first().expect("missing pictrs file").file.clone(); @@ -247,13 +250,13 @@ async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Resu if response .headers() .get("Content-Type") - .ok_or_else(|| LemmyError::from_message("No Content-Type header"))? + .ok_or(LemmyErrorType::NoContentTypeHeader)? .to_str()? .starts_with("image/") { Ok(()) } else { - Err(LemmyError::from_message("Not an image type.")) + Err(LemmyErrorType::NotAnImageType)? } } @@ -267,12 +270,20 @@ pub fn build_user_agent(settings: &Settings) -> String { #[cfg(test)] mod tests { - use crate::request::{build_user_agent, fetch_site_metadata, SiteMetadata}; + #![allow(clippy::unwrap_used)] + #![allow(clippy::indexing_slicing)] + + use crate::request::{ + build_user_agent, + fetch_site_metadata, + html_to_site_metadata, + SiteMetadata, + }; use lemmy_utils::settings::SETTINGS; use url::Url; // These helped with testing - #[actix_rt::test] + #[tokio::test] async fn test_site_metadata() { let settings = &SETTINGS.clone(); let client = reqwest::Client::builder() @@ -306,4 +317,46 @@ mod tests { // let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu"); // assert!(res_other.is_err()); // } + + #[test] + fn test_resolve_image_url() { + // url that lists the opengraph fields + let url = Url::parse("https://example.com/one/two.html").unwrap(); + + // root relative url + let html_bytes = b""; + let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata"); + assert_eq!( + metadata.image, + Some(Url::parse("https://example.com/image.jpg").unwrap().into()) + ); + + // base relative url + let html_bytes = b""; + let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata"); + assert_eq!( + metadata.image, + Some( + Url::parse("https://example.com/one/image.jpg") + .unwrap() + .into() + ) + ); + + // absolute url + let html_bytes = b""; + let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata"); + assert_eq!( + metadata.image, + Some(Url::parse("https://cdn.host.com/image.jpg").unwrap().into()) + ); + + // protocol relative url + let html_bytes = b""; + let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata"); + assert_eq!( + metadata.image, + Some(Url::parse("https://example.com/image.jpg").unwrap().into()) + ); + } }