]> Untitled Git - lemmy.git/blobdiff - crates/apub/src/objects/comment.rs
Sanitize html (#3708)
[lemmy.git] / crates / apub / src / objects / comment.rs
index b5f72ca38b7f04a2c69dc57665264c8a1ef08cbf..3b05ed3946e0bb6bc00f4959d70905e6155bcbf8 100644 (file)
@@ -1,46 +1,44 @@
-use std::ops::Deref;
-
-use activitystreams::{object::kind::NoteType, public};
-use anyhow::anyhow;
+use crate::{
+  activities::{verify_is_public, verify_person_in_community},
+  check_apub_id_valid_with_strictness,
+  mentions::collect_non_local_mentions,
+  objects::{read_from_string_or_source, verify_is_remote_object},
+  protocol::{
+    objects::{note::Note, LanguageTag},
+    InCommunity,
+    Source,
+  },
+};
+use activitypub_federation::{
+  config::Data,
+  kinds::{object::NoteType, public},
+  protocol::{values::MediaTypeMarkdownOrHtml, verification::verify_domains_match},
+  traits::Object,
+};
 use chrono::NaiveDateTime;
-use html2md::parse_html;
-use url::Url;
-
-use lemmy_api_common::blocking;
-use lemmy_apub_lib::{
-  traits::ApubObject,
-  values::{MediaTypeHtml, MediaTypeMarkdown},
+use lemmy_api_common::{
+  context::LemmyContext,
+  utils::{local_site_opt_to_slur_regex, sanitize_html},
 };
 use lemmy_db_schema::{
   source::{
-    comment::{Comment, CommentForm},
+    comment::{Comment, CommentInsertForm, CommentUpdateForm},
     community::Community,
+    local_site::LocalSite,
     person::Person,
     post::Post,
   },
   traits::Crud,
 };
 use lemmy_utils::{
-  utils::{convert_datetime, remove_slurs},
-  LemmyError,
-};
-use lemmy_websocket::LemmyContext;
-
-use crate::{
-  activities::verify_person_in_community,
-  fetcher::object_id::ObjectId,
-  protocol::{
-    objects::{
-      note::{Note, SourceCompat},
-      tombstone::Tombstone,
-    },
-    Source,
-  },
-  PostOrComment,
+  error::{LemmyError, LemmyErrorType},
+  utils::{markdown::markdown_to_html, slurs::remove_slurs, time::convert_datetime},
 };
+use std::ops::Deref;
+use url::Url;
 
 #[derive(Clone, Debug)]
-pub struct ApubComment(Comment);
+pub struct ApubComment(pub(crate) Comment);
 
 impl Deref for ApubComment {
   type Target = Comment;
@@ -51,225 +49,246 @@ impl Deref for ApubComment {
 
 impl From<Comment> for ApubComment {
   fn from(c: Comment) -> Self {
-    ApubComment { 0: c }
+    ApubComment(c)
   }
 }
 
-#[async_trait::async_trait(?Send)]
-impl ApubObject for ApubComment {
+#[async_trait::async_trait]
+impl Object for ApubComment {
   type DataType = LemmyContext;
-  type ApubType = Note;
-  type TombstoneType = Tombstone;
+  type Kind = Note;
+  type Error = LemmyError;
 
   fn last_refreshed_at(&self) -> Option<NaiveDateTime> {
     None
   }
 
-  async fn read_from_apub_id(
+  #[tracing::instrument(skip_all)]
+  async fn read_from_id(
     object_id: Url,
-    context: &LemmyContext,
+    context: &Data<Self::DataType>,
   ) -> Result<Option<Self>, LemmyError> {
     Ok(
-      blocking(context.pool(), move |conn| {
-        Comment::read_from_apub_id(conn, object_id)
-      })
-      .await??
-      .map(Into::into),
+      Comment::read_from_apub_id(&mut context.pool(), object_id)
+        .await?
+        .map(Into::into),
     )
   }
 
-  async fn delete(self, context: &LemmyContext) -> Result<(), LemmyError> {
-    blocking(context.pool(), move |conn| {
-      Comment::update_deleted(conn, self.id, true)
-    })
-    .await??;
+  #[tracing::instrument(skip_all)]
+  async fn delete(self, context: &Data<Self::DataType>) -> Result<(), LemmyError> {
+    if !self.deleted {
+      let form = CommentUpdateForm::builder().deleted(Some(true)).build();
+      Comment::update(&mut context.pool(), self.id, &form).await?;
+    }
     Ok(())
   }
 
-  async fn to_apub(&self, context: &LemmyContext) -> Result<Note, LemmyError> {
+  #[tracing::instrument(skip_all)]
+  async fn into_json(self, context: &Data<Self::DataType>) -> Result<Note, LemmyError> {
     let creator_id = self.creator_id;
-    let creator = blocking(context.pool(), move |conn| Person::read(conn, creator_id)).await??;
+    let creator = Person::read(&mut context.pool(), creator_id).await?;
 
     let post_id = self.post_id;
-    let post = blocking(context.pool(), move |conn| Post::read(conn, post_id)).await??;
+    let post = Post::read(&mut context.pool(), post_id).await?;
+    let community_id = post.community_id;
+    let community = Community::read(&mut context.pool(), community_id).await?;
 
-    let in_reply_to = if let Some(comment_id) = self.parent_id {
-      let parent_comment =
-        blocking(context.pool(), move |conn| Comment::read(conn, comment_id)).await??;
-      ObjectId::<PostOrComment>::new(parent_comment.ap_id.into_inner())
+    let in_reply_to = if let Some(comment_id) = self.parent_comment_id() {
+      let parent_comment = Comment::read(&mut context.pool(), comment_id).await?;
+      parent_comment.ap_id.into()
     } else {
-      ObjectId::<PostOrComment>::new(post.ap_id.into_inner())
+      post.ap_id.into()
     };
+    let language = LanguageTag::new_single(self.language_id, &mut context.pool()).await?;
+    let maa = collect_non_local_mentions(&self, community.actor_id.clone().into(), context).await?;
 
     let note = Note {
       r#type: NoteType::Note,
-      id: self.ap_id.to_owned().into_inner(),
-      attributed_to: ObjectId::new(creator.actor_id),
+      id: self.ap_id.clone().into(),
+      attributed_to: creator.actor_id.into(),
       to: vec![public()],
-      content: self.content.clone(),
-      media_type: Some(MediaTypeHtml::Html),
-      source: SourceCompat::Lemmy(Source {
-        content: self.content.clone(),
-        media_type: MediaTypeMarkdown::Markdown,
-      }),
+      cc: maa.ccs,
+      content: markdown_to_html(&self.content),
+      media_type: Some(MediaTypeMarkdownOrHtml::Html),
+      source: Some(Source::new(self.content.clone())),
       in_reply_to,
       published: Some(convert_datetime(self.published)),
       updated: self.updated.map(convert_datetime),
-      unparsed: Default::default(),
+      tag: maa.tags,
+      distinguished: Some(self.distinguished),
+      language,
+      audience: Some(community.actor_id.into()),
     };
 
     Ok(note)
   }
 
-  fn to_tombstone(&self) -> Result<Tombstone, LemmyError> {
-    Ok(Tombstone::new(
-      NoteType::Note,
-      self.updated.unwrap_or(self.published),
-    ))
+  #[tracing::instrument(skip_all)]
+  async fn verify(
+    note: &Note,
+    expected_domain: &Url,
+    context: &Data<LemmyContext>,
+  ) -> Result<(), LemmyError> {
+    verify_domains_match(note.id.inner(), expected_domain)?;
+    verify_domains_match(note.attributed_to.inner(), note.id.inner())?;
+    verify_is_public(&note.to, &note.cc)?;
+    let community = note.community(context).await?;
+
+    check_apub_id_valid_with_strictness(note.id.inner(), community.local, context).await?;
+    verify_is_remote_object(note.id.inner(), context.settings())?;
+    verify_person_in_community(&note.attributed_to, &community, context).await?;
+    let (post, _) = note.get_parents(context).await?;
+    if post.locked {
+      return Err(LemmyErrorType::PostIsLocked)?;
+    }
+    Ok(())
   }
 
   /// Converts a `Note` to `Comment`.
   ///
   /// If the parent community, post and comment(s) are not known locally, these are also fetched.
-  async fn from_apub(
-    note: &Note,
-    context: &LemmyContext,
-    expected_domain: &Url,
-    request_counter: &mut i32,
-  ) -> Result<ApubComment, LemmyError> {
-    let ap_id = Some(note.id(expected_domain)?.clone().into());
-    let creator = note
-      .attributed_to
-      .dereference(context, request_counter)
-      .await?;
-    let (post, parent_comment_id) = note.get_parents(context, request_counter).await?;
-    let community_id = post.community_id;
-    let community = blocking(context.pool(), move |conn| {
-      Community::read(conn, community_id)
-    })
-    .await??;
-    verify_person_in_community(
-      &note.attributed_to,
-      &community.into(),
-      context,
-      request_counter,
-    )
-    .await?;
-    if post.locked {
-      return Err(anyhow!("Post is locked").into());
-    }
+  #[tracing::instrument(skip_all)]
+  async fn from_json(note: Note, context: &Data<LemmyContext>) -> Result<ApubComment, LemmyError> {
+    let creator = note.attributed_to.dereference(context).await?;
+    let (post, parent_comment) = note.get_parents(context).await?;
 
-    let content = if let SourceCompat::Lemmy(source) = &note.source {
-      source.content.clone()
-    } else {
-      parse_html(&note.content)
-    };
-    let content_slurs_removed = remove_slurs(&content, &context.settings().slur_regex());
+    let content = read_from_string_or_source(&note.content, &note.media_type, &note.source);
+
+    let local_site = LocalSite::read(&mut context.pool()).await.ok();
+    let slur_regex = &local_site_opt_to_slur_regex(&local_site);
+    let content = remove_slurs(&content, slur_regex);
+    let content = sanitize_html(&content);
+    let language_id =
+      LanguageTag::to_language_id_single(note.language, &mut context.pool()).await?;
 
-    let form = CommentForm {
+    let form = CommentInsertForm {
       creator_id: creator.id,
       post_id: post.id,
-      parent_id: parent_comment_id,
-      content: content_slurs_removed,
+      content,
       removed: None,
-      read: None,
-      published: note.published.map(|u| u.to_owned().naive_local()),
-      updated: note.updated.map(|u| u.to_owned().naive_local()),
-      deleted: None,
-      ap_id,
+      published: note.published.map(|u| u.naive_local()),
+      updated: note.updated.map(|u| u.naive_local()),
+      deleted: Some(false),
+      ap_id: Some(note.id.into()),
+      distinguished: note.distinguished,
       local: Some(false),
+      language_id,
     };
-    let comment = blocking(context.pool(), move |conn| Comment::upsert(conn, &form)).await??;
+    let parent_comment_path = parent_comment.map(|t| t.0.path);
+    let comment = Comment::create(&mut context.pool(), &form, parent_comment_path.as_ref()).await?;
     Ok(comment.into())
   }
 }
 
 #[cfg(test)]
 pub(crate) mod tests {
+  #![allow(clippy::unwrap_used)]
+  #![allow(clippy::indexing_slicing)]
+
   use super::*;
-  use crate::objects::{
-    community::{tests::parse_lemmy_community, ApubCommunity},
-    person::{tests::parse_lemmy_person, ApubPerson},
-    post::ApubPost,
-    tests::{file_to_json_object, init_context},
+  use crate::{
+    objects::{
+      community::{tests::parse_lemmy_community, ApubCommunity},
+      instance::ApubSite,
+      person::{tests::parse_lemmy_person, ApubPerson},
+      post::ApubPost,
+      tests::init_context,
+    },
+    protocol::tests::file_to_json_object,
   };
   use assert_json_diff::assert_json_include;
+  use html2md::parse_html;
+  use lemmy_db_schema::source::site::Site;
   use serial_test::serial;
 
   async fn prepare_comment_test(
     url: &Url,
-    context: &LemmyContext,
-  ) -> (ApubPerson, ApubCommunity, ApubPost) {
-    let person = parse_lemmy_person(context).await;
-    let community = parse_lemmy_community(context).await;
-    let post_json = file_to_json_object("assets/lemmy/objects/page.json");
-    let post = ApubPost::from_apub(&post_json, context, url, &mut 0)
-      .await
-      .unwrap();
-    (person, community, post)
+    context: &Data<LemmyContext>,
+  ) -> (ApubPerson, ApubCommunity, ApubPost, ApubSite) {
+    // use separate counter so this doesnt affect tests
+    let context2 = context.reset_request_count();
+    let (person, site) = parse_lemmy_person(&context2).await;
+    let community = parse_lemmy_community(&context2).await;
+    let post_json = file_to_json_object("assets/lemmy/objects/page.json").unwrap();
+    ApubPost::verify(&post_json, url, &context2).await.unwrap();
+    let post = ApubPost::from_json(post_json, &context2).await.unwrap();
+    (person, community, post, site)
   }
 
-  fn cleanup(data: (ApubPerson, ApubCommunity, ApubPost), context: &LemmyContext) {
-    Post::delete(&*context.pool().get().unwrap(), data.2.id).unwrap();
-    Community::delete(&*context.pool().get().unwrap(), data.1.id).unwrap();
-    Person::delete(&*context.pool().get().unwrap(), data.0.id).unwrap();
+  async fn cleanup(data: (ApubPerson, ApubCommunity, ApubPost, ApubSite), context: &LemmyContext) {
+    Post::delete(&mut context.pool(), data.2.id).await.unwrap();
+    Community::delete(&mut context.pool(), data.1.id)
+      .await
+      .unwrap();
+    Person::delete(&mut context.pool(), data.0.id)
+      .await
+      .unwrap();
+    Site::delete(&mut context.pool(), data.3.id).await.unwrap();
+    LocalSite::delete(&mut context.pool()).await.unwrap();
   }
 
-  #[actix_rt::test]
+  #[tokio::test]
   #[serial]
   pub(crate) async fn test_parse_lemmy_comment() {
-    let context = init_context();
+    let context = init_context().await;
     let url = Url::parse("https://enterprise.lemmy.ml/comment/38741").unwrap();
     let data = prepare_comment_test(&url, &context).await;
 
-    let json = file_to_json_object("assets/lemmy/objects/note.json");
-    let mut request_counter = 0;
-    let comment = ApubComment::from_apub(&json, &context, &url, &mut request_counter)
+    let json: Note = file_to_json_object("assets/lemmy/objects/note.json").unwrap();
+    ApubComment::verify(&json, &url, &context).await.unwrap();
+    let comment = ApubComment::from_json(json.clone(), &context)
       .await
       .unwrap();
 
-    assert_eq!(comment.ap_id.clone().into_inner(), url);
+    assert_eq!(comment.ap_id, url.into());
     assert_eq!(comment.content.len(), 14);
     assert!(!comment.local);
-    assert_eq!(request_counter, 0);
+    assert_eq!(context.request_count(), 0);
 
-    let to_apub = comment.to_apub(&context).await.unwrap();
+    let comment_id = comment.id;
+    let to_apub = comment.into_json(&context).await.unwrap();
     assert_json_include!(actual: json, expected: to_apub);
 
-    Comment::delete(&*context.pool().get().unwrap(), comment.id).unwrap();
-    cleanup(data, &context);
+    Comment::delete(&mut context.pool(), comment_id)
+      .await
+      .unwrap();
+    cleanup(data, &context).await;
   }
 
-  #[actix_rt::test]
+  #[tokio::test]
   #[serial]
   async fn test_parse_pleroma_comment() {
-    let context = init_context();
+    let context = init_context().await;
     let url = Url::parse("https://enterprise.lemmy.ml/comment/38741").unwrap();
     let data = prepare_comment_test(&url, &context).await;
 
     let pleroma_url =
       Url::parse("https://queer.hacktivis.me/objects/8d4973f4-53de-49cd-8c27-df160e16a9c2")
         .unwrap();
-    let person_json = file_to_json_object("assets/pleroma/objects/person.json");
-    ApubPerson::from_apub(&person_json, &context, &pleroma_url, &mut 0)
+    let person_json = file_to_json_object("assets/pleroma/objects/person.json").unwrap();
+    ApubPerson::verify(&person_json, &pleroma_url, &context)
       .await
       .unwrap();
-    let json = file_to_json_object("assets/pleroma/objects/note.json");
-    let mut request_counter = 0;
-    let comment = ApubComment::from_apub(&json, &context, &pleroma_url, &mut request_counter)
+    ApubPerson::from_json(person_json, &context).await.unwrap();
+    let json = file_to_json_object("assets/pleroma/objects/note.json").unwrap();
+    ApubComment::verify(&json, &pleroma_url, &context)
       .await
       .unwrap();
+    let comment = ApubComment::from_json(json, &context).await.unwrap();
 
-    assert_eq!(comment.ap_id.clone().into_inner(), pleroma_url);
+    assert_eq!(comment.ap_id, pleroma_url.into());
     assert_eq!(comment.content.len(), 64);
     assert!(!comment.local);
-    assert_eq!(request_counter, 0);
+    assert_eq!(context.request_count(), 1);
 
-    Comment::delete(&*context.pool().get().unwrap(), comment.id).unwrap();
-    cleanup(data, &context);
+    Comment::delete(&mut context.pool(), comment.id)
+      .await
+      .unwrap();
+    cleanup(data, &context).await;
   }
 
-  #[actix_rt::test]
+  #[tokio::test]
   #[serial]
   async fn test_html_to_markdown_sanitize() {
     let parsed = parse_html("<script></script><b>hello</b>");