X-Git-Url: http://these/git/?a=blobdiff_plain;f=crates%2Fapub%2Fsrc%2Flib.rs;h=d4e34f9e740fd2bb3b444b759ec7e985c9841c93;hb=70fae9d68d65b1e4d153e30d3c065cc315b75eaf;hp=e758e706db23e753e5731a3be98a5416db357729;hpb=a2a594b7635db2241602be56250f7d9bf992f7b9;p=lemmy.git diff --git a/crates/apub/src/lib.rs b/crates/apub/src/lib.rs index e758e706..d4e34f9e 100644 --- a/crates/apub/src/lib.rs +++ b/crates/apub/src/lib.rs @@ -1,20 +1,20 @@ use crate::fetcher::post_or_comment::PostOrComment; -use activitypub_federation::{ - core::{inbox::ActorPublicKey, signatures::PublicKey}, - InstanceSettingsBuilder, - LocalInstance, +use activitypub_federation::config::{Data, UrlVerifier}; +use async_trait::async_trait; +use lemmy_api_common::context::LemmyContext; +use lemmy_db_schema::{ + source::{activity::ReceivedActivity, instance::Instance, local_site::LocalSite}, + utils::{ActualDbPool, DbPool}, }; -use anyhow::Context; -use lemmy_api_common::utils::blocking; -use lemmy_db_schema::{newtypes::DbUrl, source::activity::Activity, utils::DbPool}; -use lemmy_utils::{error::LemmyError, location_info, settings::structs::Settings}; -use lemmy_websocket::LemmyContext; -use once_cell::sync::{Lazy, OnceCell}; -use std::env; -use url::{ParseError, Url}; +use lemmy_utils::error::{LemmyError, LemmyErrorType, LemmyResult}; +use moka::future::Cache; +use once_cell::sync::Lazy; +use std::{sync::Arc, time::Duration}; +use url::Url; pub mod activities; pub(crate) mod activity_lists; +pub mod api; pub(crate) mod collections; pub mod fetcher; pub mod http; @@ -22,27 +22,43 @@ pub(crate) mod mentions; pub mod objects; pub mod protocol; +pub const FEDERATION_HTTP_FETCH_LIMIT: u32 = 50; +/// All incoming and outgoing federation actions read the blocklist/allowlist and slur filters +/// multiple times. This causes a huge number of database reads if we hit the db directly. So we +/// cache these values for a short time, which will already make a huge difference and ensures that +/// changes take effect quickly. +const BLOCKLIST_CACHE_DURATION: Duration = Duration::from_secs(60); + static CONTEXT: Lazy> = Lazy::new(|| { serde_json::from_str(include_str!("../assets/lemmy/context.json")).expect("parse context") }); -// TODO: store this in context? but its only used in this crate, no need to expose it elsewhere -fn local_instance(context: &LemmyContext) -> &'static LocalInstance { - static LOCAL_INSTANCE: OnceCell = OnceCell::new(); - LOCAL_INSTANCE.get_or_init(|| { - let settings = InstanceSettingsBuilder::default() - .http_fetch_retry_limit(context.settings().http_fetch_retry_limit) - .worker_count(context.settings().federation.worker_count) - .testing_send_sync(env::var("APUB_TESTING_SEND_SYNC").is_ok()) - .verify_url_function(|url| check_apub_id_valid(url, &Settings::get())) - .build() - .expect("configure federation"); - LocalInstance::new( - context.settings().hostname, - context.client().clone(), - settings, - ) - }) +#[derive(Clone)] +pub struct VerifyUrlData(pub ActualDbPool); + +#[async_trait] +impl UrlVerifier for VerifyUrlData { + async fn verify(&self, url: &Url) -> Result<(), &'static str> { + let local_site_data = local_site_data_cached(&mut (&self.0).into()) + .await + .expect("read local site data"); + check_apub_id_valid(url, &local_site_data).map_err(|err| match err { + LemmyError { + error_type: LemmyErrorType::FederationDisabled, + .. + } => "Federation disabled", + LemmyError { + error_type: LemmyErrorType::DomainBlocked(_), + .. + } => "Domain is blocked", + LemmyError { + error_type: LemmyErrorType::DomainNotInAllowList(_), + .. + } => "Domain is not in allowlist", + _ => "Failed validating apub id", + })?; + Ok(()) + } } /// Checks if the ID is allowed for sending or receiving. @@ -52,172 +68,141 @@ fn local_instance(context: &LemmyContext) -> &'static LocalInstance { /// - the correct scheme (either http or https) /// - URL being in the allowlist (if it is active) /// - URL not being in the blocklist (if it is active) -/// -/// `use_strict_allowlist` should be true only when parsing a remote community, or when parsing a -/// post/comment in a local community. -#[tracing::instrument(skip(settings))] -fn check_apub_id_valid(apub_id: &Url, settings: &Settings) -> Result<(), &'static str> { +#[tracing::instrument(skip(local_site_data))] +fn check_apub_id_valid(apub_id: &Url, local_site_data: &LocalSiteData) -> Result<(), LemmyError> { let domain = apub_id.domain().expect("apud id has domain").to_string(); - let local_instance = settings - .get_hostname_without_port() - .expect("local hostname is valid"); - if domain == local_instance { - return Ok(()); - } - if !settings.federation.enabled { - return Err("Federation disabled"); + if !local_site_data + .local_site + .as_ref() + .map(|l| l.federation_enabled) + .unwrap_or(true) + { + Err(LemmyErrorType::FederationDisabled)?; } - if apub_id.scheme() != settings.get_protocol_string() { - return Err("Invalid protocol scheme"); + if local_site_data + .blocked_instances + .iter() + .any(|i| domain.eq(&i.domain)) + { + Err(LemmyErrorType::DomainBlocked(domain.clone()))?; } - if let Some(blocked) = settings.to_owned().federation.blocked_instances { - if blocked.contains(&domain) { - return Err("Domain is blocked"); - } - } - - if let Some(allowed) = settings.to_owned().federation.allowed_instances { - if !allowed.contains(&domain) { - return Err("Domain is not in allowlist"); - } + // Only check this if there are instances in the allowlist + if !local_site_data.allowed_instances.is_empty() + && !local_site_data + .allowed_instances + .iter() + .any(|i| domain.eq(&i.domain)) + { + Err(LemmyErrorType::DomainNotInAllowList(domain))?; } Ok(()) } -#[tracing::instrument(skip(settings))] -pub(crate) fn check_apub_id_valid_with_strictness( +#[derive(Clone)] +pub(crate) struct LocalSiteData { + local_site: Option, + allowed_instances: Vec, + blocked_instances: Vec, +} + +pub(crate) async fn local_site_data_cached( + pool: &mut DbPool<'_>, +) -> LemmyResult> { + static CACHE: Lazy>> = Lazy::new(|| { + Cache::builder() + .max_capacity(1) + .time_to_live(BLOCKLIST_CACHE_DURATION) + .build() + }); + Ok( + CACHE + .try_get_with((), async { + let (local_site, allowed_instances, blocked_instances) = + lemmy_db_schema::try_join_with_pool!(pool => ( + // LocalSite may be missing + |pool| async { + Ok(LocalSite::read(pool).await.ok()) + }, + Instance::allowlist, + Instance::blocklist + ))?; + + Ok::<_, diesel::result::Error>(Arc::new(LocalSiteData { + local_site, + allowed_instances, + blocked_instances, + })) + }) + .await?, + ) +} + +pub(crate) async fn check_apub_id_valid_with_strictness( apub_id: &Url, is_strict: bool, - settings: &Settings, + context: &LemmyContext, ) -> Result<(), LemmyError> { - check_apub_id_valid(apub_id, settings).map_err(LemmyError::from_message)?; let domain = apub_id.domain().expect("apud id has domain").to_string(); - let local_instance = settings + let local_instance = context + .settings() .get_hostname_without_port() .expect("local hostname is valid"); if domain == local_instance { return Ok(()); } - if let Some(mut allowed) = settings.to_owned().federation.allowed_instances { - // Only check allowlist if this is a community, or strict allowlist is enabled. - let strict_allowlist = settings.to_owned().federation.strict_allowlist; - if is_strict || strict_allowlist { - // need to allow this explicitly because apub receive might contain objects from our local - // instance. - allowed.push(local_instance); - - if !allowed.contains(&domain) { - return Err(LemmyError::from_message( - "Federation forbidden by strict allowlist", - )); - } + let local_site_data = local_site_data_cached(&mut context.pool()).await?; + check_apub_id_valid(apub_id, &local_site_data)?; + + // Only check allowlist if this is a community, and there are instances in the allowlist + if is_strict && !local_site_data.allowed_instances.is_empty() { + // need to allow this explicitly because apub receive might contain objects from our local + // instance. + let mut allowed_and_local = local_site_data + .allowed_instances + .iter() + .map(|i| i.domain.clone()) + .collect::>(); + let local_instance = context + .settings() + .get_hostname_without_port() + .expect("local hostname is valid"); + allowed_and_local.push(local_instance); + + let domain = apub_id.domain().expect("apud id has domain").to_string(); + if !allowed_and_local.contains(&domain) { + return Err(LemmyErrorType::FederationDisabledByStrictAllowList)?; } } Ok(()) } -pub enum EndpointType { - Community, - Person, - Post, - Comment, - PrivateMessage, -} - -/// Generates an apub endpoint for a given domain, IE xyz.tld -pub fn generate_local_apub_endpoint( - endpoint_type: EndpointType, - name: &str, - domain: &str, -) -> Result { - let point = match endpoint_type { - EndpointType::Community => "c", - EndpointType::Person => "u", - EndpointType::Post => "post", - EndpointType::Comment => "comment", - EndpointType::PrivateMessage => "private_message", - }; - - Ok(Url::parse(&format!("{}/{}/{}", domain, point, name))?.into()) -} - -pub fn generate_followers_url(actor_id: &DbUrl) -> Result { - Ok(Url::parse(&format!("{}/followers", actor_id))?.into()) -} - -pub fn generate_inbox_url(actor_id: &DbUrl) -> Result { - Ok(Url::parse(&format!("{}/inbox", actor_id))?.into()) -} - -pub fn generate_site_inbox_url(actor_id: &DbUrl) -> Result { - let mut actor_id: Url = actor_id.clone().into(); - actor_id.set_path("site_inbox"); - Ok(actor_id.into()) -} - -pub fn generate_shared_inbox_url(actor_id: &DbUrl) -> Result { - let actor_id: Url = actor_id.clone().into(); - let url = format!( - "{}://{}{}/inbox", - &actor_id.scheme(), - &actor_id.host_str().context(location_info!())?, - if let Some(port) = actor_id.port() { - format!(":{}", port) - } else { - "".to_string() - }, - ); - Ok(Url::parse(&url)?.into()) -} - -pub fn generate_outbox_url(actor_id: &DbUrl) -> Result { - Ok(Url::parse(&format!("{}/outbox", actor_id))?.into()) -} - -fn generate_moderators_url(community_id: &DbUrl) -> Result { - Ok(Url::parse(&format!("{}/moderators", community_id))?.into()) -} - -/// Store a sent or received activity in the database, for logging purposes. These records are not -/// persistent. -#[tracing::instrument(skip(pool))] -async fn insert_activity( +/// Store received activities in the database. +/// +/// This ensures that the same activity doesnt get received and processed more than once, which +/// would be a waste of resources. +#[tracing::instrument(skip(data))] +async fn insert_received_activity( ap_id: &Url, - activity: serde_json::Value, - local: bool, - sensitive: bool, - pool: &DbPool, -) -> Result { - let ap_id = ap_id.to_owned().into(); - Ok( - blocking(pool, move |conn| { - Activity::insert(conn, ap_id, activity, local, sensitive) - }) - .await??, - ) + data: &Data, +) -> Result<(), LemmyError> { + ReceivedActivity::create(&mut data.pool(), &ap_id.clone().into()).await?; + Ok(()) } -/// Common methods provided by ActivityPub actors (community and person). Not all methods are -/// implemented by all actors. -pub trait ActorType: ActorPublicKey { - fn actor_id(&self) -> Url; - - fn private_key(&self) -> Option; - - fn inbox_url(&self) -> Url; - - fn shared_inbox_url(&self) -> Option; - - fn shared_inbox_or_inbox_url(&self) -> Url { - self.shared_inbox_url().unwrap_or_else(|| self.inbox_url()) - } +#[async_trait::async_trait] +pub trait SendActivity: Sync { + type Response: Sync + Send + Clone; - fn get_public_key(&self) -> PublicKey { - PublicKey::new_main_key(self.actor_id(), self.public_key().to_string()) + async fn send_activity( + _request: &Self, + _response: &Self::Response, + _context: &Data, + ) -> Result<(), LemmyError> { + Ok(()) } }