]> Untitled Git - lemmy.git/blobdiff - crates/apub/src/objects/post.rs
Cache & Optimize Woodpecker CI (#3450)
[lemmy.git] / crates / apub / src / objects / post.rs
index 341b428b6def03e257da0d34e880a6917d912896..48b573d30ab0dfdfd58398cdd2159ce7ee266a85 100644 (file)
 use crate::{
-  activities::{extract_community, verify_person_in_community},
-  extensions::context::lemmy_context,
-  fetcher::object_id::ObjectId,
-  objects::{create_tombstone, FromApub, ImageObject, Source, ToApub},
-  ActorType,
-};
-use activitystreams::{
-  base::AnyBase,
-  object::{
-    kind::{ImageType, PageType},
-    Tombstone,
+  activities::{verify_is_public, verify_person_in_community},
+  check_apub_id_valid_with_strictness,
+  local_site_data_cached,
+  objects::{read_from_string_or_source_opt, verify_is_remote_object},
+  protocol::{
+    objects::{
+      page::{Attachment, AttributedTo, Page, PageType},
+      LanguageTag,
+    },
+    ImageObject,
+    InCommunity,
+    Source,
   },
-  primitives::OneOrMany,
-  public,
-  unparsed::Unparsed,
 };
-use chrono::{DateTime, FixedOffset};
-use lemmy_api_common::blocking;
-use lemmy_apub_lib::{
-  values::{MediaTypeHtml, MediaTypeMarkdown},
-  verify_domains_match,
+use activitypub_federation::{
+  config::Data,
+  kinds::public,
+  protocol::{values::MediaTypeMarkdownOrHtml, verification::verify_domains_match},
+  traits::Object,
+};
+use anyhow::anyhow;
+use chrono::NaiveDateTime;
+use html2md::parse_html;
+use lemmy_api_common::{
+  context::LemmyContext,
+  request::fetch_site_data,
+  utils::{is_mod_or_admin, local_site_opt_to_sensitive, local_site_opt_to_slur_regex},
 };
-use lemmy_db_queries::{source::post::Post_, ApubObject, Crud, DbPool};
 use lemmy_db_schema::{
   self,
   source::{
     community::Community,
+    local_site::LocalSite,
+    moderator::{ModLockPost, ModLockPostForm},
     person::Person,
-    post::{Post, PostForm},
+    post::{Post, PostInsertForm, PostUpdateForm},
   },
+  traits::Crud,
 };
 use lemmy_utils::{
-  request::fetch_site_data,
-  utils::{check_slurs, convert_datetime, markdown_to_html, remove_slurs},
-  LemmyError,
+  error::LemmyError,
+  utils::{
+    markdown::markdown_to_html,
+    slurs::{check_slurs_opt, remove_slurs},
+    time::convert_datetime,
+    validation::check_url_scheme,
+  },
 };
-use lemmy_websocket::LemmyContext;
-use serde::{Deserialize, Serialize};
-use serde_with::skip_serializing_none;
+use std::ops::Deref;
 use url::Url;
 
-#[skip_serializing_none]
-#[derive(Clone, Debug, Deserialize, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct Page {
-  #[serde(rename = "@context")]
-  context: OneOrMany<AnyBase>,
-  r#type: PageType,
-  id: Url,
-  pub(crate) attributed_to: ObjectId<Person>,
-  to: [Url; 2],
-  name: String,
-  content: Option<String>,
-  media_type: MediaTypeHtml,
-  source: Option<Source>,
-  url: Option<Url>,
-  image: Option<ImageObject>,
-  pub(crate) comments_enabled: Option<bool>,
-  sensitive: Option<bool>,
-  pub(crate) stickied: Option<bool>,
-  published: DateTime<FixedOffset>,
-  updated: Option<DateTime<FixedOffset>>,
-  #[serde(flatten)]
-  unparsed: Unparsed,
-}
+const MAX_TITLE_LENGTH: usize = 200;
+
+#[derive(Clone, Debug)]
+pub struct ApubPost(pub(crate) Post);
 
-impl Page {
-  pub(crate) fn id_unchecked(&self) -> &Url {
-    &self.id
+impl Deref for ApubPost {
+  type Target = Post;
+  fn deref(&self) -> &Self::Target {
+    &self.0
   }
-  pub(crate) fn id(&self, expected_domain: &Url) -> Result<&Url, LemmyError> {
-    verify_domains_match(&self.id, expected_domain)?;
-    Ok(&self.id)
+}
+
+impl From<Post> for ApubPost {
+  fn from(p: Post) -> Self {
+    ApubPost(p)
   }
+}
 
-  /// Only mods can change the post's stickied/locked status. So if either of these is changed from
-  /// the current value, it is a mod action and needs to be verified as such.
-  ///
-  /// Both stickied and locked need to be false on a newly created post (verified in [[CreatePost]].
-  pub(crate) async fn is_mod_action(&self, pool: &DbPool) -> Result<bool, LemmyError> {
-    let post_id = self.id.clone();
-    let old_post = blocking(pool, move |conn| {
-      Post::read_from_apub_id(conn, &post_id.into())
-    })
-    .await?;
-
-    let is_mod_action = if let Ok(old_post) = old_post {
-      self.stickied != Some(old_post.stickied) || self.comments_enabled != Some(!old_post.locked)
-    } else {
-      false
-    };
-    Ok(is_mod_action)
+#[async_trait::async_trait]
+impl Object for ApubPost {
+  type DataType = LemmyContext;
+  type Kind = Page;
+  type Error = LemmyError;
+
+  fn last_refreshed_at(&self) -> Option<NaiveDateTime> {
+    None
   }
 
-  pub(crate) async fn verify(
-    &self,
-    context: &LemmyContext,
-    request_counter: &mut i32,
-  ) -> Result<(), LemmyError> {
-    let community = extract_community(&self.to, context, request_counter).await?;
-
-    check_slurs(&self.name)?;
-    verify_domains_match(self.attributed_to.inner(), &self.id)?;
-    verify_person_in_community(
-      &self.attributed_to,
-      &ObjectId::new(community.actor_id()),
-      context,
-      request_counter,
+  #[tracing::instrument(skip_all)]
+  async fn read_from_id(
+    object_id: Url,
+    context: &Data<Self::DataType>,
+  ) -> Result<Option<Self>, LemmyError> {
+    Ok(
+      Post::read_from_apub_id(&mut context.pool(), object_id)
+        .await?
+        .map(Into::into),
     )
-    .await?;
-    Ok(())
   }
-}
 
-#[async_trait::async_trait(?Send)]
-impl ToApub for Post {
-  type ApubType = Page;
+  #[tracing::instrument(skip_all)]
+  async fn delete(self, context: &Data<Self::DataType>) -> Result<(), LemmyError> {
+    if !self.deleted {
+      let form = PostUpdateForm::builder().deleted(Some(true)).build();
+      Post::update(&mut context.pool(), self.id, &form).await?;
+    }
+    Ok(())
+  }
 
   // Turn a Lemmy post into an ActivityPub page that can be sent out over the network.
-  async fn to_apub(&self, pool: &DbPool) -> Result<Page, LemmyError> {
+  #[tracing::instrument(skip_all)]
+  async fn into_json(self, context: &Data<Self::DataType>) -> Result<Page, LemmyError> {
     let creator_id = self.creator_id;
-    let creator = blocking(pool, move |conn| Person::read(conn, creator_id)).await??;
+    let creator = Person::read(&mut context.pool(), creator_id).await?;
     let community_id = self.community_id;
-    let community = blocking(pool, move |conn| Community::read(conn, community_id)).await??;
-
-    let source = self.body.clone().map(|body| Source {
-      content: body,
-      media_type: MediaTypeMarkdown::Markdown,
-    });
-    let image = self.thumbnail_url.clone().map(|thumb| ImageObject {
-      kind: ImageType::Image,
-      url: thumb.into(),
-    });
+    let community = Community::read(&mut context.pool(), community_id).await?;
+    let language = LanguageTag::new_single(self.language_id, &mut context.pool()).await?;
 
     let page = Page {
-      context: lemmy_context(),
-      r#type: PageType::Page,
+      kind: PageType::Page,
       id: self.ap_id.clone().into(),
-      attributed_to: ObjectId::new(creator.actor_id),
-      to: [community.actor_id.into(), public()],
-      name: self.name.clone(),
+      attributed_to: AttributedTo::Lemmy(creator.actor_id.into()),
+      to: vec![community.actor_id.clone().into(), public()],
+      cc: vec![],
+      name: Some(self.name.clone()),
       content: self.body.as_ref().map(|b| markdown_to_html(b)),
-      media_type: MediaTypeHtml::Html,
-      source,
-      url: self.url.clone().map(|u| u.into()),
-      image,
+      media_type: Some(MediaTypeMarkdownOrHtml::Html),
+      source: self.body.clone().map(Source::new),
+      attachment: self.url.clone().map(Attachment::new).into_iter().collect(),
+      image: self.thumbnail_url.clone().map(ImageObject::new),
       comments_enabled: Some(!self.locked),
       sensitive: Some(self.nsfw),
-      stickied: Some(self.stickied),
-      published: convert_datetime(self.published),
+      language,
+      published: Some(convert_datetime(self.published)),
       updated: self.updated.map(convert_datetime),
-      unparsed: Default::default(),
+      audience: Some(community.actor_id.into()),
+      in_reply_to: None,
     };
     Ok(page)
   }
 
-  fn to_tombstone(&self) -> Result<Tombstone, LemmyError> {
-    create_tombstone(
-      self.deleted,
-      self.ap_id.to_owned().into(),
-      self.updated,
-      PageType::Page,
-    )
-  }
-}
-
-#[async_trait::async_trait(?Send)]
-impl FromApub for Post {
-  type ApubType = Page;
-
-  async fn from_apub(
+  #[tracing::instrument(skip_all)]
+  async fn verify(
     page: &Page,
-    context: &LemmyContext,
     expected_domain: &Url,
-    request_counter: &mut i32,
-  ) -> Result<Post, LemmyError> {
+    context: &Data<Self::DataType>,
+  ) -> Result<(), LemmyError> {
     // We can't verify the domain in case of mod action, because the mod may be on a different
     // instance from the post author.
-    let ap_id = if page.is_mod_action(context.pool()).await? {
-      page.id_unchecked()
-    } else {
-      page.id(expected_domain)?
+    if !page.is_mod_action(context).await? {
+      verify_domains_match(page.id.inner(), expected_domain)?;
+      verify_is_remote_object(page.id.inner(), context.settings())?;
     };
-    let ap_id = Some(ap_id.clone().into());
-    let creator = page
-      .attributed_to
-      .dereference(context, request_counter)
-      .await?;
-    let community = extract_community(&page.to, context, request_counter).await?;
-
-    let thumbnail_url: Option<Url> = page.image.clone().map(|i| i.url);
-    let (metadata_res, pictrs_thumbnail) = if let Some(url) = &page.url {
-      fetch_site_data(context.client(), Some(url)).await
+
+    let community = page.community(context).await?;
+    check_apub_id_valid_with_strictness(page.id.inner(), community.local, context).await?;
+    verify_person_in_community(&page.creator()?, &community, context).await?;
+
+    let local_site_data = local_site_data_cached(&mut context.pool()).await?;
+    let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);
+    check_slurs_opt(&page.name, slur_regex)?;
+
+    verify_domains_match(page.creator()?.inner(), page.id.inner())?;
+    verify_is_public(&page.to, &page.cc)?;
+    Ok(())
+  }
+
+  #[tracing::instrument(skip_all)]
+  async fn from_json(page: Page, context: &Data<Self::DataType>) -> Result<ApubPost, LemmyError> {
+    let creator = page.creator()?.dereference(context).await?;
+    let community = page.community(context).await?;
+    if community.posting_restricted_to_mods {
+      is_mod_or_admin(&mut context.pool(), creator.id, community.id).await?;
+    }
+    let mut name = page
+      .name
+      .clone()
+      .or_else(|| {
+        page
+          .content
+          .clone()
+          .as_ref()
+          .and_then(|c| parse_html(c).lines().next().map(ToString::to_string))
+      })
+      .ok_or_else(|| anyhow!("Object must have name or content"))?;
+    if name.chars().count() > MAX_TITLE_LENGTH {
+      name = name.chars().take(MAX_TITLE_LENGTH).collect();
+    }
+
+    // read existing, local post if any (for generating mod log)
+    let old_post = page.id.dereference_local(context).await;
+
+    let form = if !page.is_mod_action(context).await? {
+      let first_attachment = page.attachment.into_iter().map(Attachment::url).next();
+      let url = if first_attachment.is_some() {
+        first_attachment
+      } else if page.kind == PageType::Video {
+        // we cant display videos directly, so insert a link to external video page
+        Some(page.id.inner().clone())
+      } else {
+        None
+      };
+      check_url_scheme(&url)?;
+
+      let local_site = LocalSite::read(&mut context.pool()).await.ok();
+      let allow_sensitive = local_site_opt_to_sensitive(&local_site);
+      let page_is_sensitive = page.sensitive.unwrap_or(false);
+      let include_image = allow_sensitive || !page_is_sensitive;
+
+      // Only fetch metadata if the post has a url and was not seen previously. We dont want to
+      // waste resources by fetching metadata for the same post multiple times.
+      // Additionally, only fetch image if content is not sensitive or is allowed on local site.
+      let (metadata_res, thumbnail) = match &url {
+        Some(url) if old_post.is_err() => {
+          fetch_site_data(
+            context.client(),
+            context.settings(),
+            Some(url),
+            include_image,
+          )
+          .await
+        }
+        _ => (None, None),
+      };
+      // If no image was included with metadata, use post image instead when available.
+      let thumbnail_url = thumbnail.or_else(|| page.image.map(|i| i.url.into()));
+
+      let (embed_title, embed_description, embed_video_url) = metadata_res
+        .map(|u| (u.title, u.description, u.embed_video_url))
+        .unwrap_or_default();
+      let slur_regex = &local_site_opt_to_slur_regex(&local_site);
+
+      let body_slurs_removed =
+        read_from_string_or_source_opt(&page.content, &page.media_type, &page.source)
+          .map(|s| remove_slurs(&s, slur_regex));
+      let language_id =
+        LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?;
+
+      PostInsertForm {
+        name,
+        url: url.map(Into::into),
+        body: body_slurs_removed,
+        creator_id: creator.id,
+        community_id: community.id,
+        removed: None,
+        locked: page.comments_enabled.map(|e| !e),
+        published: page.published.map(|u| u.naive_local()),
+        updated: page.updated.map(|u| u.naive_local()),
+        deleted: Some(false),
+        nsfw: page.sensitive,
+        embed_title,
+        embed_description,
+        embed_video_url,
+        thumbnail_url,
+        ap_id: Some(page.id.clone().into()),
+        local: Some(false),
+        language_id,
+        featured_community: None,
+        featured_local: None,
+      }
     } else {
-      (None, thumbnail_url)
+      // if is mod action, only update locked/stickied fields, nothing else
+      PostInsertForm::builder()
+        .name(name)
+        .creator_id(creator.id)
+        .community_id(community.id)
+        .ap_id(Some(page.id.clone().into()))
+        .locked(page.comments_enabled.map(|e| !e))
+        .updated(page.updated.map(|u| u.naive_local()))
+        .build()
     };
-    let (embed_title, embed_description, embed_html) = metadata_res
-      .map(|u| (u.title, u.description, u.html))
-      .unwrap_or((None, None, None));
-
-    let body_slurs_removed = page.source.as_ref().map(|s| remove_slurs(&s.content));
-    let form = PostForm {
-      name: page.name.clone(),
-      url: page.url.clone().map(|u| u.into()),
-      body: body_slurs_removed,
-      creator_id: creator.id,
-      community_id: community.id,
-      removed: None,
-      locked: page.comments_enabled.map(|e| !e),
-      published: Some(page.published.naive_local()),
-      updated: page.updated.map(|u| u.naive_local()),
-      deleted: None,
-      nsfw: page.sensitive,
-      stickied: page.stickied,
-      embed_title,
-      embed_description,
-      embed_html,
-      thumbnail_url: pictrs_thumbnail.map(|u| u.into()),
-      ap_id,
-      local: Some(false),
-    };
-    Ok(blocking(context.pool(), move |conn| Post::upsert(conn, &form)).await??)
+
+    let post = Post::create(&mut context.pool(), &form).await?;
+
+    // write mod log entry for lock
+    if Page::is_locked_changed(&old_post, &page.comments_enabled) {
+      let form = ModLockPostForm {
+        mod_person_id: creator.id,
+        post_id: post.id,
+        locked: Some(post.locked),
+      };
+      ModLockPost::create(&mut context.pool(), &form).await?;
+    }
+
+    Ok(post.into())
+  }
+}
+
+#[cfg(test)]
+mod tests {
+  #![allow(clippy::unwrap_used)]
+  #![allow(clippy::indexing_slicing)]
+
+  use super::*;
+  use crate::{
+    objects::{
+      community::tests::parse_lemmy_community,
+      person::tests::parse_lemmy_person,
+      post::ApubPost,
+      tests::init_context,
+    },
+    protocol::tests::file_to_json_object,
+  };
+  use lemmy_db_schema::source::site::Site;
+  use serial_test::serial;
+
+  #[tokio::test]
+  #[serial]
+  async fn test_parse_lemmy_post() {
+    let context = init_context().await;
+    let (person, site) = parse_lemmy_person(&context).await;
+    let community = parse_lemmy_community(&context).await;
+
+    let json = file_to_json_object("assets/lemmy/objects/page.json").unwrap();
+    let url = Url::parse("https://enterprise.lemmy.ml/post/55143").unwrap();
+    ApubPost::verify(&json, &url, &context).await.unwrap();
+    let post = ApubPost::from_json(json, &context).await.unwrap();
+
+    assert_eq!(post.ap_id, url.into());
+    assert_eq!(post.name, "Post title");
+    assert!(post.body.is_some());
+    assert_eq!(post.body.as_ref().unwrap().len(), 45);
+    assert!(!post.locked);
+    assert!(!post.featured_community);
+    assert_eq!(context.request_count(), 0);
+
+    Post::delete(&mut context.pool(), post.id).await.unwrap();
+    Person::delete(&mut context.pool(), person.id)
+      .await
+      .unwrap();
+    Community::delete(&mut context.pool(), community.id)
+      .await
+      .unwrap();
+    Site::delete(&mut context.pool(), site.id).await.unwrap();
   }
 }