X-Git-Url: http://these/git/?a=blobdiff_plain;f=crates%2Fapub%2Fsrc%2Fobjects%2Fpost.rs;h=48b573d30ab0dfdfd58398cdd2159ce7ee266a85;hb=92568956353f21649ed9aff68b42699c9d036f30;hp=19bfe8aabae631405bf32a71b2286bef86f68a39;hpb=f54209c451d1ef749c371a03b4eb84f67f78ada4;p=lemmy.git diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index 19bfe8aa..48b573d3 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -1,243 +1,328 @@ use crate::{ - check_is_apub_id_valid, - extensions::{context::lemmy_context, page_extension::PageExtension}, - fetcher::person::get_or_fetch_and_upsert_person, - get_community_from_to_or_cc, - objects::{ - check_object_domain, - check_object_for_community_or_site_ban, - create_tombstone, - get_object_from_apub, - get_source_markdown_value, - set_content_and_source, - FromApub, - FromApubToForm, - ToApub, + activities::{verify_is_public, verify_person_in_community}, + check_apub_id_valid_with_strictness, + local_site_data_cached, + objects::{read_from_string_or_source_opt, verify_is_remote_object}, + protocol::{ + objects::{ + page::{Attachment, AttributedTo, Page, PageType}, + LanguageTag, + }, + ImageObject, + InCommunity, + Source, }, - PageExt, }; -use activitystreams::{ - object::{kind::PageType, ApObject, Image, Page, Tombstone}, - prelude::*, - public, +use activitypub_federation::{ + config::Data, + kinds::public, + protocol::{values::MediaTypeMarkdownOrHtml, verification::verify_domains_match}, + traits::Object, +}; +use anyhow::anyhow; +use chrono::NaiveDateTime; +use html2md::parse_html; +use lemmy_api_common::{ + context::LemmyContext, + request::fetch_site_data, + utils::{is_mod_or_admin, local_site_opt_to_sensitive, local_site_opt_to_slur_regex}, }; -use activitystreams_ext::Ext1; -use anyhow::Context; -use lemmy_api_common::blocking; -use lemmy_db_queries::{Crud, DbPool}; use lemmy_db_schema::{ self, source::{ community::Community, + local_site::LocalSite, + moderator::{ModLockPost, ModLockPostForm}, person::Person, - post::{Post, PostForm}, + post::{Post, PostInsertForm, PostUpdateForm}, }, + traits::Crud, }; use lemmy_utils::{ - location_info, - request::fetch_iframely_and_pictrs_data, - utils::{check_slurs, convert_datetime, remove_slurs}, - LemmyError, + error::LemmyError, + utils::{ + markdown::markdown_to_html, + slurs::{check_slurs_opt, remove_slurs}, + time::convert_datetime, + validation::check_url_scheme, + }, }; -use lemmy_websocket::LemmyContext; +use std::ops::Deref; use url::Url; -#[async_trait::async_trait(?Send)] -impl ToApub for Post { - type ApubType = PageExt; +const MAX_TITLE_LENGTH: usize = 200; - // Turn a Lemmy post into an ActivityPub page that can be sent out over the network. - async fn to_apub(&self, pool: &DbPool) -> Result { - let mut page = ApObject::new(Page::new()); +#[derive(Clone, Debug)] +pub struct ApubPost(pub(crate) Post); - let creator_id = self.creator_id; - let creator = blocking(pool, move |conn| Person::read(conn, creator_id)).await??; +impl Deref for ApubPost { + type Target = Post; + fn deref(&self) -> &Self::Target { + &self.0 + } +} - let community_id = self.community_id; - let community = blocking(pool, move |conn| Community::read(conn, community_id)).await??; - - page - // Not needed when the Post is embedded in a collection (like for community outbox) - // TODO: need to set proper context defining sensitive/commentsEnabled fields - // https://git.asonix.dog/Aardwolf/activitystreams/issues/5 - .set_many_contexts(lemmy_context()?) - .set_id(self.ap_id.to_owned().into_inner()) - .set_name(self.name.to_owned()) - // `summary` field for compatibility with lemmy v0.9.9 and older, - // TODO: remove this after some time - .set_summary(self.name.to_owned()) - .set_published(convert_datetime(self.published)) - .set_many_tos(vec![community.actor_id.into_inner(), public()]) - .set_attributed_to(creator.actor_id.into_inner()); - - if let Some(body) = &self.body { - set_content_and_source(&mut page, body)?; - } +impl From for ApubPost { + fn from(p: Post) -> Self { + ApubPost(p) + } +} - if let Some(url) = &self.url { - page.set_url::(url.to_owned().into()); - } +#[async_trait::async_trait] +impl Object for ApubPost { + type DataType = LemmyContext; + type Kind = Page; + type Error = LemmyError; - if let Some(thumbnail_url) = &self.thumbnail_url { - let mut image = Image::new(); - image.set_url::(thumbnail_url.to_owned().into()); - page.set_image(image.into_any_base()?); - } + fn last_refreshed_at(&self) -> Option { + None + } - if let Some(u) = self.updated { - page.set_updated(convert_datetime(u)); + #[tracing::instrument(skip_all)] + async fn read_from_id( + object_id: Url, + context: &Data, + ) -> Result, LemmyError> { + Ok( + Post::read_from_apub_id(&mut context.pool(), object_id) + .await? + .map(Into::into), + ) + } + + #[tracing::instrument(skip_all)] + async fn delete(self, context: &Data) -> Result<(), LemmyError> { + if !self.deleted { + let form = PostUpdateForm::builder().deleted(Some(true)).build(); + Post::update(&mut context.pool(), self.id, &form).await?; } + Ok(()) + } + + // Turn a Lemmy post into an ActivityPub page that can be sent out over the network. + #[tracing::instrument(skip_all)] + async fn into_json(self, context: &Data) -> Result { + let creator_id = self.creator_id; + let creator = Person::read(&mut context.pool(), creator_id).await?; + let community_id = self.community_id; + let community = Community::read(&mut context.pool(), community_id).await?; + let language = LanguageTag::new_single(self.language_id, &mut context.pool()).await?; - let ext = PageExtension { + let page = Page { + kind: PageType::Page, + id: self.ap_id.clone().into(), + attributed_to: AttributedTo::Lemmy(creator.actor_id.into()), + to: vec![community.actor_id.clone().into(), public()], + cc: vec![], + name: Some(self.name.clone()), + content: self.body.as_ref().map(|b| markdown_to_html(b)), + media_type: Some(MediaTypeMarkdownOrHtml::Html), + source: self.body.clone().map(Source::new), + attachment: self.url.clone().map(Attachment::new).into_iter().collect(), + image: self.thumbnail_url.clone().map(ImageObject::new), comments_enabled: Some(!self.locked), sensitive: Some(self.nsfw), - stickied: Some(self.stickied), + language, + published: Some(convert_datetime(self.published)), + updated: self.updated.map(convert_datetime), + audience: Some(community.actor_id.into()), + in_reply_to: None, }; - Ok(Ext1::new(page, ext)) + Ok(page) } - fn to_tombstone(&self) -> Result { - create_tombstone( - self.deleted, - self.ap_id.to_owned().into(), - self.updated, - PageType::Page, - ) - } -} + #[tracing::instrument(skip_all)] + async fn verify( + page: &Page, + expected_domain: &Url, + context: &Data, + ) -> Result<(), LemmyError> { + // We can't verify the domain in case of mod action, because the mod may be on a different + // instance from the post author. + if !page.is_mod_action(context).await? { + verify_domains_match(page.id.inner(), expected_domain)?; + verify_is_remote_object(page.id.inner(), context.settings())?; + }; -#[async_trait::async_trait(?Send)] -impl FromApub for Post { - type ApubType = PageExt; - - /// Converts a `PageExt` to `PostForm`. - /// - /// If the post's community or creator are not known locally, these are also fetched. - async fn from_apub( - page: &PageExt, - context: &LemmyContext, - expected_domain: Url, - request_counter: &mut i32, - mod_action_allowed: bool, - ) -> Result { - let post: Post = get_object_from_apub( - page, - context, - expected_domain, - request_counter, - mod_action_allowed, - ) - .await?; - check_object_for_community_or_site_ban(page, post.community_id, context, request_counter) - .await?; - Ok(post) + let community = page.community(context).await?; + check_apub_id_valid_with_strictness(page.id.inner(), community.local, context).await?; + verify_person_in_community(&page.creator()?, &community, context).await?; + + let local_site_data = local_site_data_cached(&mut context.pool()).await?; + let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site); + check_slurs_opt(&page.name, slur_regex)?; + + verify_domains_match(page.creator()?.inner(), page.id.inner())?; + verify_is_public(&page.to, &page.cc)?; + Ok(()) } -} -#[async_trait::async_trait(?Send)] -impl FromApubToForm for PostForm { - async fn from_apub( - page: &PageExt, - context: &LemmyContext, - expected_domain: Url, - request_counter: &mut i32, - mod_action_allowed: bool, - ) -> Result { - let community = get_community_from_to_or_cc(page, context, request_counter).await?; - let ap_id = if mod_action_allowed { - let id = page.id_unchecked().context(location_info!())?; - check_is_apub_id_valid(id, community.local)?; - id.to_owned().into() + #[tracing::instrument(skip_all)] + async fn from_json(page: Page, context: &Data) -> Result { + let creator = page.creator()?.dereference(context).await?; + let community = page.community(context).await?; + if community.posting_restricted_to_mods { + is_mod_or_admin(&mut context.pool(), creator.id, community.id).await?; + } + let mut name = page + .name + .clone() + .or_else(|| { + page + .content + .clone() + .as_ref() + .and_then(|c| parse_html(c).lines().next().map(ToString::to_string)) + }) + .ok_or_else(|| anyhow!("Object must have name or content"))?; + if name.chars().count() > MAX_TITLE_LENGTH { + name = name.chars().take(MAX_TITLE_LENGTH).collect(); + } + + // read existing, local post if any (for generating mod log) + let old_post = page.id.dereference_local(context).await; + + let form = if !page.is_mod_action(context).await? { + let first_attachment = page.attachment.into_iter().map(Attachment::url).next(); + let url = if first_attachment.is_some() { + first_attachment + } else if page.kind == PageType::Video { + // we cant display videos directly, so insert a link to external video page + Some(page.id.inner().clone()) + } else { + None + }; + check_url_scheme(&url)?; + + let local_site = LocalSite::read(&mut context.pool()).await.ok(); + let allow_sensitive = local_site_opt_to_sensitive(&local_site); + let page_is_sensitive = page.sensitive.unwrap_or(false); + let include_image = allow_sensitive || !page_is_sensitive; + + // Only fetch metadata if the post has a url and was not seen previously. We dont want to + // waste resources by fetching metadata for the same post multiple times. + // Additionally, only fetch image if content is not sensitive or is allowed on local site. + let (metadata_res, thumbnail) = match &url { + Some(url) if old_post.is_err() => { + fetch_site_data( + context.client(), + context.settings(), + Some(url), + include_image, + ) + .await + } + _ => (None, None), + }; + // If no image was included with metadata, use post image instead when available. + let thumbnail_url = thumbnail.or_else(|| page.image.map(|i| i.url.into())); + + let (embed_title, embed_description, embed_video_url) = metadata_res + .map(|u| (u.title, u.description, u.embed_video_url)) + .unwrap_or_default(); + let slur_regex = &local_site_opt_to_slur_regex(&local_site); + + let body_slurs_removed = + read_from_string_or_source_opt(&page.content, &page.media_type, &page.source) + .map(|s| remove_slurs(&s, slur_regex)); + let language_id = + LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?; + + PostInsertForm { + name, + url: url.map(Into::into), + body: body_slurs_removed, + creator_id: creator.id, + community_id: community.id, + removed: None, + locked: page.comments_enabled.map(|e| !e), + published: page.published.map(|u| u.naive_local()), + updated: page.updated.map(|u| u.naive_local()), + deleted: Some(false), + nsfw: page.sensitive, + embed_title, + embed_description, + embed_video_url, + thumbnail_url, + ap_id: Some(page.id.clone().into()), + local: Some(false), + language_id, + featured_community: None, + featured_local: None, + } } else { - check_object_domain(page, expected_domain, community.local)? + // if is mod action, only update locked/stickied fields, nothing else + PostInsertForm::builder() + .name(name) + .creator_id(creator.id) + .community_id(community.id) + .ap_id(Some(page.id.clone().into())) + .locked(page.comments_enabled.map(|e| !e)) + .updated(page.updated.map(|u| u.naive_local())) + .build() }; - let ext = &page.ext_one; - let creator_actor_id = page - .inner - .attributed_to() - .as_ref() - .context(location_info!())? - .as_single_xsd_any_uri() - .context(location_info!())?; - - let creator = - get_or_fetch_and_upsert_person(creator_actor_id, context, request_counter).await?; - - let thumbnail_url: Option = match &page.inner.image() { - Some(any_image) => Image::from_any_base( - any_image - .to_owned() - .as_one() - .context(location_info!())? - .to_owned(), - )? - .context(location_info!())? - .url() - .context(location_info!())? - .as_single_xsd_any_uri() - .map(|url| url.to_owned()), - None => None, - }; - let url = page - .inner - .url() - .map(|u| u.as_single_xsd_any_uri()) - .flatten() - .map(|u| u.to_owned()); - - let (iframely_title, iframely_description, iframely_html, pictrs_thumbnail) = - if let Some(url) = &url { - fetch_iframely_and_pictrs_data(context.client(), Some(url)).await - } else { - (None, None, None, thumbnail_url) + + let post = Post::create(&mut context.pool(), &form).await?; + + // write mod log entry for lock + if Page::is_locked_changed(&old_post, &page.comments_enabled) { + let form = ModLockPostForm { + mod_person_id: creator.id, + post_id: post.id, + locked: Some(post.locked), }; + ModLockPost::create(&mut context.pool(), &form).await?; + } + + Ok(post.into()) + } +} + +#[cfg(test)] +mod tests { + #![allow(clippy::unwrap_used)] + #![allow(clippy::indexing_slicing)] + + use super::*; + use crate::{ + objects::{ + community::tests::parse_lemmy_community, + person::tests::parse_lemmy_person, + post::ApubPost, + tests::init_context, + }, + protocol::tests::file_to_json_object, + }; + use lemmy_db_schema::source::site::Site; + use serial_test::serial; + + #[tokio::test] + #[serial] + async fn test_parse_lemmy_post() { + let context = init_context().await; + let (person, site) = parse_lemmy_person(&context).await; + let community = parse_lemmy_community(&context).await; + + let json = file_to_json_object("assets/lemmy/objects/page.json").unwrap(); + let url = Url::parse("https://enterprise.lemmy.ml/post/55143").unwrap(); + ApubPost::verify(&json, &url, &context).await.unwrap(); + let post = ApubPost::from_json(json, &context).await.unwrap(); + + assert_eq!(post.ap_id, url.into()); + assert_eq!(post.name, "Post title"); + assert!(post.body.is_some()); + assert_eq!(post.body.as_ref().unwrap().len(), 45); + assert!(!post.locked); + assert!(!post.featured_community); + assert_eq!(context.request_count(), 0); - let name = page - .inner - .name() - // The following is for compatibility with lemmy v0.9.9 and older - // TODO: remove it after some time (along with the map above) - .or_else(|| page.inner.summary()) - .context(location_info!())? - .as_single_xsd_string() - .context(location_info!())? - .to_string(); - let body = get_source_markdown_value(page)?; - - // TODO: expected_domain is wrong in this case, because it simply takes the domain of the actor - // maybe we need to take id_unchecked() if the activity is from community to user? - // why did this work before? -> i dont think it did? - // -> try to make expected_domain optional and set it null if it is a mod action - - check_slurs(&name)?; - let body_slurs_removed = body.map(|b| remove_slurs(&b)); - Ok(PostForm { - name, - url: url.map(|u| u.into()), - body: body_slurs_removed, - creator_id: creator.id, - community_id: community.id, - removed: None, - locked: ext.comments_enabled.map(|e| !e), - published: page - .inner - .published() - .as_ref() - .map(|u| u.to_owned().naive_local()), - updated: page - .inner - .updated() - .as_ref() - .map(|u| u.to_owned().naive_local()), - deleted: None, - nsfw: ext.sensitive, - stickied: ext.stickied, - embed_title: iframely_title, - embed_description: iframely_description, - embed_html: iframely_html, - thumbnail_url: pictrs_thumbnail.map(|u| u.into()), - ap_id: Some(ap_id), - local: Some(false), - }) + Post::delete(&mut context.pool(), post.id).await.unwrap(); + Person::delete(&mut context.pool(), person.id) + .await + .unwrap(); + Community::delete(&mut context.pool(), community.id) + .await + .unwrap(); + Site::delete(&mut context.pool(), site.id).await.unwrap(); } }