1 use crate::{settings::structs::Settings, LemmyError};
4 use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
6 use serde::{Deserialize, Serialize};
7 use std::future::Future;
12 #[derive(Clone, Debug, Error)]
13 #[error("Error sending request, {0}")]
14 struct SendError(pub String);
16 #[derive(Clone, Debug, Error)]
17 #[error("Error receiving response, {0}")]
18 pub struct RecvError(pub String);
20 pub async fn retry<F, Fut, T>(f: F) -> Result<T, reqwest::Error>
23 Fut: Future<Output = Result<T, reqwest::Error>>,
25 retry_custom(|| async { Ok((f)().await) }).await
28 async fn retry_custom<F, Fut, T>(f: F) -> Result<T, reqwest::Error>
31 Fut: Future<Output = Result<Result<T, reqwest::Error>, reqwest::Error>>,
33 let mut response: Option<Result<T, reqwest::Error>> = None;
37 Ok(t) => return Ok(t),
40 response = Some(Err(e));
48 response.expect("retry http request")
51 #[derive(Deserialize, Serialize, Debug, PartialEq, Clone)]
52 pub struct SiteMetadata {
53 pub title: Option<String>,
54 pub description: Option<String>,
56 pub html: Option<String>,
59 /// Fetches the post link html tags (like title, description, image, etc)
60 pub async fn fetch_site_metadata(client: &Client, url: &Url) -> Result<SiteMetadata, LemmyError> {
61 let response = retry(|| client.get(url.as_str()).send()).await?;
66 .map_err(|e| RecvError(e.to_string()))?;
68 let tags = html_to_site_metadata(&html)?;
73 fn html_to_site_metadata(html: &str) -> Result<SiteMetadata, LemmyError> {
74 let page = HTML::from_string(html.to_string(), None)?;
76 let page_title = page.title;
77 let page_description = page.description;
79 let og_description = page
83 .map(|t| t.to_string());
88 .map(|t| t.to_string());
93 .map(|ogo| Url::parse(&ogo.url).ok())
96 let title = og_title.or(page_title);
97 let description = og_description.or(page_description);
108 #[derive(Deserialize, Debug, Clone)]
109 pub(crate) struct PictrsResponse {
110 files: Vec<PictrsFile>,
114 #[derive(Deserialize, Debug, Clone)]
115 pub(crate) struct PictrsFile {
118 delete_token: String,
121 pub(crate) async fn fetch_pictrs(
125 ) -> Result<PictrsResponse, LemmyError> {
126 if let Some(pictrs_url) = settings.pictrs_url.to_owned() {
127 is_image_content_type(client, image_url).await?;
129 let fetch_url = format!(
130 "{}/image/download?url={}",
132 utf8_percent_encode(image_url.as_str(), NON_ALPHANUMERIC) // TODO this might not be needed
135 let response = retry(|| client.get(&fetch_url).send()).await?;
137 let response: PictrsResponse = response
140 .map_err(|e| RecvError(e.to_string()))?;
142 if response.msg == "ok" {
145 Err(anyhow!("{}", &response.msg).into())
148 Err(anyhow!("pictrs_url not set up in config").into())
152 /// Both are options, since the URL might be either an html page, or an image
153 /// Returns the SiteMetadata, and a Pictrs URL, if there is a picture associated
154 pub async fn fetch_site_data(
158 ) -> (Option<SiteMetadata>, Option<Url>) {
162 // Ignore errors, since it may be an image, or not have the data.
163 // Warning, this may ignore SSL errors
164 let metadata_option = fetch_site_metadata(client, url).await.ok();
166 // Fetch pictrs thumbnail
167 let pictrs_hash = match &metadata_option {
168 Some(metadata_res) => match &metadata_res.image {
169 // Metadata, with image
170 // Try to generate a small thumbnail if there's a full sized one from post-links
171 Some(metadata_image) => fetch_pictrs(client, settings, metadata_image)
173 .map(|r| r.files[0].file.to_owned()),
174 // Metadata, but no image
175 None => fetch_pictrs(client, settings, url)
177 .map(|r| r.files[0].file.to_owned()),
179 // No metadata, try to fetch the URL as an image
180 None => fetch_pictrs(client, settings, url)
182 .map(|r| r.files[0].file.to_owned()),
185 // The full urls are necessary for federation
186 let pictrs_thumbnail = pictrs_hash
189 "{}/pictrs/image/{}",
190 settings.get_protocol_and_hostname(),
198 (metadata_option, pictrs_thumbnail)
200 None => (None, None),
204 async fn is_image_content_type(client: &Client, test: &Url) -> Result<(), LemmyError> {
205 let response = retry(|| client.get(test.to_owned()).send()).await?;
209 .ok_or_else(|| anyhow!("No Content-Type header"))?
211 .starts_with("image/")
215 Err(anyhow!("Not an image type.").into())
221 use crate::request::fetch_site_metadata;
224 use super::SiteMetadata;
226 // These helped with testing
228 async fn test_site_metadata() {
229 let client = reqwest::Client::default();
230 let sample_url = Url::parse("https://www.redspark.nu/en/peoples-war/district-leader-of-chand-led-cpn-arrested-in-bhojpur/").unwrap();
231 let sample_res = fetch_site_metadata(&client, &sample_url).await.unwrap();
234 title: Some("District Leader Of Chand Led CPN Arrested In Bhojpur - Redspark".to_string()),
235 description: Some("BHOJPUR: A district leader of the outlawed Netra Bikram Chand alias Biplav-led outfit has been arrested. According to District Police".to_string()),
236 image: Some(Url::parse("https://www.redspark.nu/wp-content/uploads/2020/03/netra-bikram-chand-attends-program-1272019033653-1000x0-845x653-1.jpg").unwrap()),
240 let youtube_url = Url::parse("https://www.youtube.com/watch?v=IquO_TcMZIQ").unwrap();
241 let youtube_res = fetch_site_metadata(&client, &youtube_url).await.unwrap();
244 title: Some("A Hard Look at Rent and Rent Seeking with Michael Hudson & Pepe Escobar".to_string()),
245 description: Some("An interactive discussion on wealth inequality and the “Great Game” on the control of natural resources.In this webinar organized jointly by the Henry George...".to_string()),
246 image: Some(Url::parse("https://i.ytimg.com/vi/IquO_TcMZIQ/maxresdefault.jpg").unwrap()),
252 // fn test_pictshare() {
253 // let res = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpg");
254 // assert!(res.is_ok());
255 // let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu");
256 // assert!(res_other.is_err());