use crate::post::SiteMetadata;
use encoding::{all::encodings, DecoderTrap};
-use lemmy_utils::{settings::structs::Settings, version::VERSION, LemmyError, REQWEST_TIMEOUT};
+use lemmy_db_schema::newtypes::DbUrl;
+use lemmy_utils::{
+ error::{LemmyError, LemmyErrorType},
+ settings::structs::Settings,
+ version::VERSION,
+ REQWEST_TIMEOUT,
+};
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use reqwest_middleware::ClientWithMiddleware;
use serde::Deserialize;
url: &Url,
) -> Result<SiteMetadata, LemmyError> {
info!("Fetching site metadata for url: {}", url);
- let response = client
- .get(url.as_str())
- .timeout(REQWEST_TIMEOUT)
- .send()
- .await?;
+ let response = client.get(url.as_str()).send().await?;
// Can't use .text() here, because it only checks the content header, not the actual bytes
// https://github.com/LemmyNet/lemmy/issues/1964
let html_bytes = response.bytes().await.map_err(LemmyError::from)?.to_vec();
- let tags = html_to_site_metadata(&html_bytes)?;
+ let tags = html_to_site_metadata(&html_bytes, url)?;
Ok(tags)
}
-fn html_to_site_metadata(html_bytes: &[u8]) -> Result<SiteMetadata, LemmyError> {
+fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result<SiteMetadata, LemmyError> {
let html = String::from_utf8_lossy(html_bytes);
// Make sure the first line is doctype html
let first_line = html
.trim_start()
.lines()
- .into_iter()
.next()
- .ok_or_else(|| LemmyError::from_message("No lines in html"))?
+ .ok_or(LemmyErrorType::NoLinesInHtml)?
.to_lowercase();
if !first_line.starts_with("<!doctype html>") {
- return Err(LemmyError::from_message(
- "Site metadata page fetch is not DOCTYPE html",
- ));
+ Err(LemmyErrorType::SiteMetadataPageIsNotDoctypeHtml)?;
}
let mut page = HTML::from_string(html.to_string(), None)?;
.opengraph
.properties
.get("description")
- .map(|t| t.to_string());
+ .map(std::string::ToString::to_string);
let og_title = page
.opengraph
.properties
.get("title")
- .map(|t| t.to_string());
+ .map(std::string::ToString::to_string);
let og_image = page
.opengraph
.images
- .get(0)
- .and_then(|ogo| Url::parse(&ogo.url).ok());
-
- let title = og_title.or(page_title);
- let description = og_description.or(page_description);
- let image = og_image;
+ .first()
+ // join also works if the target URL is absolute
+ .and_then(|ogo| url.join(&ogo.url).ok());
+ let og_embed_url = page
+ .opengraph
+ .videos
+ .first()
+ // join also works if the target URL is absolute
+ .and_then(|v| url.join(&v.url).ok());
Ok(SiteMetadata {
- title,
- description,
- image,
- html: None,
+ title: og_title.or(page_title),
+ description: og_description.or(page_description),
+ image: og_image.map(Into::into),
+ embed_video_url: og_embed_url.map(Into::into),
})
}
delete_token: String,
}
+#[derive(Deserialize, Debug, Clone)]
+pub(crate) struct PictrsPurgeResponse {
+ msg: String,
+}
+
#[tracing::instrument(skip_all)]
pub(crate) async fn fetch_pictrs(
client: &ClientWithMiddleware,
settings: &Settings,
image_url: &Url,
) -> Result<PictrsResponse, LemmyError> {
- if let Some(pictrs_url) = settings.pictrs_url.to_owned() {
- is_image_content_type(client, image_url).await?;
+ let pictrs_config = settings.pictrs_config()?;
+ is_image_content_type(client, image_url).await?;
- let fetch_url = format!(
- "{}/image/download?url={}",
- pictrs_url,
- utf8_percent_encode(image_url.as_str(), NON_ALPHANUMERIC) // TODO this might not be needed
- );
+ let fetch_url = format!(
+ "{}image/download?url={}",
+ pictrs_config.url,
+ utf8_percent_encode(image_url.as_str(), NON_ALPHANUMERIC) // TODO this might not be needed
+ );
- let response = client
- .get(&fetch_url)
- .timeout(REQWEST_TIMEOUT)
- .send()
- .await?;
+ let response = client
+ .get(&fetch_url)
+ .timeout(REQWEST_TIMEOUT)
+ .send()
+ .await?;
- let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?;
+ let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?;
- if response.msg == "ok" {
- Ok(response)
- } else {
- Err(LemmyError::from_message(&response.msg))
- }
+ if response.msg == "ok" {
+ Ok(response)
+ } else {
+ Err(LemmyErrorType::PictrsResponseError(response.msg))?
+ }
+}
+
+/// Purges an image from pictrs
+/// Note: This should often be coerced from a Result to .ok() in order to fail softly, because:
+/// - It might fail due to image being not local
+/// - It might not be an image
+/// - Pictrs might not be set up
+pub async fn purge_image_from_pictrs(
+ client: &ClientWithMiddleware,
+ settings: &Settings,
+ image_url: &Url,
+) -> Result<(), LemmyError> {
+ let pictrs_config = settings.pictrs_config()?;
+ is_image_content_type(client, image_url).await?;
+
+ let alias = image_url
+ .path_segments()
+ .ok_or(LemmyErrorType::ImageUrlMissingPathSegments)?
+ .next_back()
+ .ok_or(LemmyErrorType::ImageUrlMissingLastPathSegment)?;
+
+ let purge_url = format!("{}/internal/purge?alias={}", pictrs_config.url, alias);
+
+ let pictrs_api_key = pictrs_config
+ .api_key
+ .ok_or(LemmyErrorType::PictrsApiKeyNotProvided)?;
+ let response = client
+ .post(&purge_url)
+ .timeout(REQWEST_TIMEOUT)
+ .header("x-api-token", pictrs_api_key)
+ .send()
+ .await?;
+
+ let response: PictrsPurgeResponse = response.json().await.map_err(LemmyError::from)?;
+
+ if response.msg == "ok" {
+ Ok(())
} else {
- Err(LemmyError::from_message("pictrs_url not set up in config"))
+ Err(LemmyErrorType::PictrsPurgeResponseError(response.msg))?
}
}
client: &ClientWithMiddleware,
settings: &Settings,
url: Option<&Url>,
-) -> (Option<SiteMetadata>, Option<Url>) {
+ include_image: bool,
+) -> (Option<SiteMetadata>, Option<DbUrl>) {
match &url {
Some(url) => {
// Fetch metadata
// Ignore errors, since it may be an image, or not have the data.
// Warning, this may ignore SSL errors
let metadata_option = fetch_site_metadata(client, url).await.ok();
+ if !include_image {
+ return (metadata_option, None);
+ }
+
+ let missing_pictrs_file =
+ |r: PictrsResponse| r.files.first().expect("missing pictrs file").file.clone();
// Fetch pictrs thumbnail
let pictrs_hash = match &metadata_option {
// Try to generate a small thumbnail if there's a full sized one from post-links
Some(metadata_image) => fetch_pictrs(client, settings, metadata_image)
.await
- .map(|r| r.files[0].file.to_owned()),
+ .map(missing_pictrs_file),
// Metadata, but no image
None => fetch_pictrs(client, settings, url)
.await
- .map(|r| r.files[0].file.to_owned()),
+ .map(missing_pictrs_file),
},
// No metadata, try to fetch the URL as an image
None => fetch_pictrs(client, settings, url)
.await
- .map(|r| r.files[0].file.to_owned()),
+ .map(missing_pictrs_file),
};
// The full urls are necessary for federation
.ok()
.flatten();
- (metadata_option, pictrs_thumbnail)
+ (metadata_option, pictrs_thumbnail.map(Into::into))
}
None => (None, None),
}
#[tracing::instrument(skip_all)]
async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Result<(), LemmyError> {
- let response = client
- .get(url.as_str())
- .timeout(REQWEST_TIMEOUT)
- .send()
- .await?;
+ let response = client.get(url.as_str()).send().await?;
if response
.headers()
.get("Content-Type")
- .ok_or_else(|| LemmyError::from_message("No Content-Type header"))?
+ .ok_or(LemmyErrorType::NoContentTypeHeader)?
.to_str()?
.starts_with("image/")
{
Ok(())
} else {
- Err(LemmyError::from_message("Not an image type."))
+ Err(LemmyErrorType::NotAnImageType)?
}
}
#[cfg(test)]
mod tests {
- use crate::request::{build_user_agent, fetch_site_metadata, SiteMetadata};
- use lemmy_utils::settings::structs::Settings;
+ #![allow(clippy::unwrap_used)]
+ #![allow(clippy::indexing_slicing)]
+
+ use crate::request::{
+ build_user_agent,
+ fetch_site_metadata,
+ html_to_site_metadata,
+ SiteMetadata,
+ };
+ use lemmy_utils::settings::SETTINGS;
use url::Url;
// These helped with testing
- #[actix_rt::test]
+ #[tokio::test]
async fn test_site_metadata() {
- let settings = Settings::init().unwrap();
+ let settings = &SETTINGS.clone();
let client = reqwest::Client::builder()
- .user_agent(build_user_agent(&settings))
+ .user_agent(build_user_agent(settings))
.build()
.unwrap()
.into();
image: Some(
Url::parse("https://gitlab.com/uploads/-/system/project/avatar/4877469/iod_logo.png")
.unwrap()
+ .into()
),
- html: None,
+ embed_video_url: None,
},
sample_res
);
// let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu");
// assert!(res_other.is_err());
// }
+
+ #[test]
+ fn test_resolve_image_url() {
+ // url that lists the opengraph fields
+ let url = Url::parse("https://example.com/one/two.html").unwrap();
+
+ // root relative url
+ let html_bytes = b"<!DOCTYPE html><html><head><meta property='og:image' content='/image.jpg'></head><body></body></html>";
+ let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata");
+ assert_eq!(
+ metadata.image,
+ Some(Url::parse("https://example.com/image.jpg").unwrap().into())
+ );
+
+ // base relative url
+ let html_bytes = b"<!DOCTYPE html><html><head><meta property='og:image' content='image.jpg'></head><body></body></html>";
+ let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata");
+ assert_eq!(
+ metadata.image,
+ Some(
+ Url::parse("https://example.com/one/image.jpg")
+ .unwrap()
+ .into()
+ )
+ );
+
+ // absolute url
+ let html_bytes = b"<!DOCTYPE html><html><head><meta property='og:image' content='https://cdn.host.com/image.jpg'></head><body></body></html>";
+ let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata");
+ assert_eq!(
+ metadata.image,
+ Some(Url::parse("https://cdn.host.com/image.jpg").unwrap().into())
+ );
+
+ // protocol relative url
+ let html_bytes = b"<!DOCTYPE html><html><head><meta property='og:image' content='//example.com/image.jpg'></head><body></body></html>";
+ let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata");
+ assert_eq!(
+ metadata.image,
+ Some(Url::parse("https://example.com/image.jpg").unwrap().into())
+ );
+ }
}