* Adding hot_rank columns in place of function sorting.
- Creates hot_rank columns for post, comment, and community.
- Fixes #2932
* Updating all hot ranks on startup.
* Fixing post.url migration.
* Removing update_instance_software from startup.
* Adding post_rank query
pub published: chrono::NaiveDateTime,
/// The total number of children in this comment branch.
pub child_count: i32,
+ pub hot_rank: i32,
}
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
pub users_active_month: i64,
/// The number of users with any activity in the last year.
pub users_active_half_year: i64,
+ pub hot_rank: i32,
}
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone, Default)]
pub featured_community: bool,
/// If the post is featured on the site / to local.
pub featured_local: bool,
+ pub hot_rank: i32,
+ pub hot_rank_active: i32,
}
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
downvotes -> Int8,
published -> Timestamp,
child_count -> Int4,
+ hot_rank -> Int4,
}
}
users_active_week -> Int8,
users_active_month -> Int8,
users_active_half_year -> Int8,
+ hot_rank -> Int4,
}
}
id -> Int4,
#[max_length = 200]
name -> Varchar,
- url -> Nullable<Text>,
+ #[max_length = 512]
+ url -> Nullable<Varchar>,
body -> Nullable<Text>,
creator_id -> Int4,
community_id -> Int4,
newest_comment_time -> Timestamp,
featured_community -> Bool,
featured_local -> Bool,
+ hot_rank -> Int4,
+ hot_rank_active -> Int4,
}
}
downvotes: 0,
published: agg.published,
child_count: 0,
+ hot_rank: 1728,
},
my_vote: None,
resolver: None,
post::Post,
},
traits::JoinView,
- utils::{functions::hot_rank, fuzzy_search, get_conn, limit_and_offset_unlimited, DbPool},
+ utils::{fuzzy_search, get_conn, limit_and_offset_unlimited, DbPool},
CommentSortType,
ListingType,
};
};
query = match self.sort.unwrap_or(CommentSortType::Hot) {
- CommentSortType::Hot => query
- .then_order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc())
- .then_order_by(comment_aggregates::published.desc()),
+ CommentSortType::Hot => query.then_order_by(comment_aggregates::hot_rank.desc()),
CommentSortType::New => query.then_order_by(comment::published.desc()),
CommentSortType::Old => query.then_order_by(comment::published.asc()),
CommentSortType::Top => query.order_by(comment_aggregates::score.desc()),
downvotes: 0,
published: agg.published,
child_count: 5,
+ hot_rank: 1728,
},
}
}
newest_comment_time: inserted_post.published,
featured_community: false,
featured_local: false,
+ hot_rank: 1728,
+ hot_rank_active: 1728,
},
resolver: None,
};
post::{Post, PostRead, PostSaved},
},
traits::JoinView,
- utils::{functions::hot_rank, fuzzy_search, get_conn, limit_and_offset, DbPool},
+ utils::{fuzzy_search, get_conn, limit_and_offset, DbPool},
ListingType,
SortType,
};
}
query = match self.sort.unwrap_or(SortType::Hot) {
- SortType::Active => query
- .then_order_by(
- hot_rank(
- post_aggregates::score,
- post_aggregates::newest_comment_time_necro,
- )
- .desc(),
- )
- .then_order_by(post_aggregates::newest_comment_time_necro.desc()),
- SortType::Hot => query
- .then_order_by(hot_rank(post_aggregates::score, post_aggregates::published).desc())
- .then_order_by(post_aggregates::published.desc()),
+ SortType::Active => query.then_order_by(post_aggregates::hot_rank_active.desc()),
+ SortType::Hot => query.then_order_by(post_aggregates::hot_rank.desc()),
SortType::New => query.then_order_by(post_aggregates::published.desc()),
SortType::Old => query.then_order_by(post_aggregates::published.asc()),
SortType::NewComments => query.then_order_by(post_aggregates::newest_comment_time.desc()),
newest_comment_time: inserted_post.published,
featured_community: false,
featured_local: false,
+ hot_rank: 1728,
+ hot_rank_active: 1728,
},
subscribed: SubscribedType::NotSubscribed,
read: false,
post::Post,
},
traits::JoinView,
- utils::{functions::hot_rank, get_conn, limit_and_offset, DbPool},
+ utils::{get_conn, limit_and_offset, DbPool},
CommentSortType,
};
use typed_builder::TypedBuilder;
};
query = match self.sort.unwrap_or(CommentSortType::New) {
- CommentSortType::Hot => query
- .then_order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc())
- .then_order_by(comment_aggregates::published.desc()),
+ CommentSortType::Hot => query.then_order_by(comment_aggregates::hot_rank.desc()),
CommentSortType::New => query.then_order_by(comment_reply::published.desc()),
CommentSortType::Old => query.then_order_by(comment_reply::published.asc()),
CommentSortType::Top => query.order_by(comment_aggregates::score.desc()),
SortType::TopAll => query = query.order_by(community_aggregates::subscribers.desc()),
SortType::TopMonth => query = query.order_by(community_aggregates::users_active_month.desc()),
SortType::Hot => {
- query = query.order_by(community_aggregates::users_active_month.desc());
+ query = query.order_by(community_aggregates::hot_rank.desc());
// Don't show hidden communities in Hot (trending)
query = query.filter(
community::hidden
post::Post,
},
traits::JoinView,
- utils::{functions::hot_rank, get_conn, limit_and_offset, DbPool},
+ utils::{get_conn, limit_and_offset, DbPool},
CommentSortType,
};
use typed_builder::TypedBuilder;
};
query = match self.sort.unwrap_or(CommentSortType::Hot) {
- CommentSortType::Hot => query
- .then_order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc())
- .then_order_by(comment_aggregates::published.desc()),
+ CommentSortType::Hot => query.then_order_by(comment_aggregates::hot_rank.desc()),
CommentSortType::New => query.then_order_by(comment::published.desc()),
CommentSortType::Old => query.then_order_by(comment::published.asc()),
CommentSortType::Top => query.order_by(comment_aggregates::score.desc()),
+-- Change back the column type
+alter table post alter column url type text;
+
+-- Drop the index
drop index idx_post_url;
+-- Make a hard limit of 512 for the post.url column
+-- Truncate existing long rows.
+update post set url = left(url, 512) where length(url) > 512;
+
+-- Enforce the limit
+alter table post alter column url type varchar (512);
+
+-- Add the index
create index idx_post_url on post(url);
--- /dev/null
+
+-- Remove the new columns
+
+alter table post_aggregates drop column hot_rank;
+alter table post_aggregates drop column hot_rank_active;
+
+alter table comment_aggregates drop column hot_rank;
+
+alter table community_aggregates drop column hot_rank;
+
+-- Drop some new indexes
+drop index idx_post_aggregates_score;
+drop index idx_post_aggregates_published;
+drop index idx_post_aggregates_newest_comment_time;
+drop index idx_post_aggregates_newest_comment_time_necro;
+drop index idx_post_aggregates_featured_community;
+drop index idx_post_aggregates_featured_local;
+
+-- Recreate the old indexes
+CREATE INDEX idx_post_aggregates_featured_local_newest_comment_time ON public.post_aggregates USING btree (featured_local DESC, newest_comment_time DESC);
+CREATE INDEX idx_post_aggregates_featured_community_newest_comment_time ON public.post_aggregates USING btree (featured_community DESC, newest_comment_time DESC);
+CREATE INDEX idx_post_aggregates_featured_local_comments ON public.post_aggregates USING btree (featured_local DESC, comments DESC);
+CREATE INDEX idx_post_aggregates_featured_community_comments ON public.post_aggregates USING btree (featured_community DESC, comments DESC);
+CREATE INDEX idx_post_aggregates_featured_local_hot ON public.post_aggregates USING btree (featured_local DESC, hot_rank((score)::numeric, published) DESC, published DESC);
+CREATE INDEX idx_post_aggregates_featured_community_hot ON public.post_aggregates USING btree (featured_community DESC, hot_rank((score)::numeric, published) DESC, published DESC);
+CREATE INDEX idx_post_aggregates_featured_local_score ON public.post_aggregates USING btree (featured_local DESC, score DESC);
+CREATE INDEX idx_post_aggregates_featured_community_score ON public.post_aggregates USING btree (featured_community DESC, score DESC);
+CREATE INDEX idx_post_aggregates_featured_local_published ON public.post_aggregates USING btree (featured_local DESC, published DESC);
+CREATE INDEX idx_post_aggregates_featured_community_published ON public.post_aggregates USING btree (featured_community DESC, published DESC);
+CREATE INDEX idx_post_aggregates_featured_local_active ON public.post_aggregates USING btree (featured_local DESC, hot_rank((score)::numeric, newest_comment_time_necro) DESC, newest_comment_time_necro DESC);
+CREATE INDEX idx_post_aggregates_featured_community_active ON public.post_aggregates USING btree (featured_community DESC, hot_rank((score)::numeric, newest_comment_time_necro) DESC, newest_comment_time_necro DESC);
+
+CREATE INDEX idx_comment_aggregates_hot ON public.comment_aggregates USING btree (hot_rank((score)::numeric, published) DESC, published DESC);
+
+CREATE INDEX idx_community_aggregates_hot ON public.community_aggregates USING btree (hot_rank((subscribers)::numeric, published) DESC, published DESC);
--- /dev/null
+-- This converts the old hot_rank functions, to columns
+
+-- Remove the old compound indexes
+DROP INDEX idx_post_aggregates_featured_local_newest_comment_time;
+DROP INDEX idx_post_aggregates_featured_community_newest_comment_time;
+DROP INDEX idx_post_aggregates_featured_local_comments;
+DROP INDEX idx_post_aggregates_featured_community_comments;
+DROP INDEX idx_post_aggregates_featured_local_hot;
+DROP INDEX idx_post_aggregates_featured_community_hot;
+DROP INDEX idx_post_aggregates_featured_local_score;
+DROP INDEX idx_post_aggregates_featured_community_score;
+DROP INDEX idx_post_aggregates_featured_local_published;
+DROP INDEX idx_post_aggregates_featured_community_published;
+DROP INDEX idx_post_aggregates_featured_local_active;
+DROP INDEX idx_post_aggregates_featured_community_active;
+
+DROP INDEX idx_comment_aggregates_hot;
+
+DROP INDEX idx_community_aggregates_hot;
+
+-- Add the new hot rank columns for post and comment aggregates
+-- Note: 1728 is the result of the hot_rank function, with a score of 1, posted now
+-- hot_rank = 10000*log10(1 + 3)/Power(2, 1.8)
+alter table post_aggregates add column hot_rank integer not null default 1728;
+alter table post_aggregates add column hot_rank_active integer not null default 1728;
+
+alter table comment_aggregates add column hot_rank integer not null default 1728;
+
+alter table community_aggregates add column hot_rank integer not null default 1728;
+
+-- Populate them initially
+-- Note: After initial population, these are updated in a periodic scheduled job,
+-- with only the last week being updated.
+update post_aggregates set hot_rank_active = hot_rank(score::numeric, newest_comment_time_necro);
+update post_aggregates set hot_rank = hot_rank(score::numeric, published);
+update comment_aggregates set hot_rank = hot_rank(score::numeric, published);
+update community_aggregates set hot_rank = hot_rank(subscribers::numeric, published);
+
+-- Create single column indexes
+create index idx_post_aggregates_score on post_aggregates (score desc);
+create index idx_post_aggregates_published on post_aggregates (published desc);
+create index idx_post_aggregates_newest_comment_time on post_aggregates (newest_comment_time desc);
+create index idx_post_aggregates_newest_comment_time_necro on post_aggregates (newest_comment_time_necro desc);
+create index idx_post_aggregates_featured_community on post_aggregates (featured_community desc);
+create index idx_post_aggregates_featured_local on post_aggregates (featured_local desc);
+create index idx_post_aggregates_hot on post_aggregates (hot_rank desc);
+create index idx_post_aggregates_active on post_aggregates (hot_rank_active desc);
+
+create index idx_comment_aggregates_hot on comment_aggregates (hot_rank desc);
+
+create index idx_community_aggregates_hot on community_aggregates (hot_rank desc);
--- /dev/null
+#!/bin/bash
+
+sudo docker exec -i docker-postgres-1 psql -Ulemmy -c "EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) SELECT post.id, post.name, post.url, post.body, post.creator_id, post.community_id, post.removed, post.locked, post.published, post.updated, post.deleted, post.nsfw, post.embed_title, post.embed_description, post.embed_video_url, post.thumbnail_url, post.ap_id, post.local, post.language_id, post.featured_community, post.featured_local, person.id, person.name, person.display_name, person.avatar, person.banned, person.published, person.updated, person.actor_id, person.bio, person.local, person.banner, person.deleted, person.inbox_url, person.shared_inbox_url, person.matrix_user_id, person.admin, person.bot_account, person.ban_expires, person.instance_id, community.id, community.name, community.title, community.description, community.removed, community.published, community.updated, community.deleted, community.nsfw, community.actor_id, community.local, community.icon, community.banner, community.hidden, community.posting_restricted_to_mods, community.instance_id, community_person_ban.id, community_person_ban.community_id, community_person_ban.person_id, community_person_ban.published, community_person_ban.expires, post_aggregates.id, post_aggregates.post_id, post_aggregates.comments, post_aggregates.score, post_aggregates.upvotes, post_aggregates.downvotes, post_aggregates.published, post_aggregates.newest_comment_time_necro, post_aggregates.newest_comment_time, post_aggregates.featured_community, post_aggregates.featured_local, community_follower.id, community_follower.community_id, community_follower.person_id, community_follower.published, community_follower.pending, post_saved.id, post_saved.post_id, post_saved.person_id, post_saved.published, post_read.id, post_read.post_id, post_read.person_id, post_read.published, person_block.id, person_block.person_id, person_block.target_id, person_block.published, post_like.score, coalesce((post_aggregates.comments - person_post_aggregates.read_comments), post_aggregates.comments) FROM ((((((((((((post INNER JOIN person ON (post.creator_id = person.id)) INNER JOIN community ON (post.community_id = community.id)) LEFT OUTER JOIN community_person_ban ON (((post.community_id = community_person_ban.community_id) AND (community_person_ban.person_id = post.creator_id)) AND ((community_person_ban.expires IS NULL) OR (community_person_ban.expires > CURRENT_TIMESTAMP)))) INNER JOIN post_aggregates ON (post_aggregates.post_id = post.id)) LEFT OUTER JOIN community_follower ON ((post.community_id = community_follower.community_id) AND (community_follower.person_id = '33517'))) LEFT OUTER JOIN post_saved ON ((post.id = post_saved.post_id) AND (post_saved.person_id = '33517'))) LEFT OUTER JOIN post_read ON ((post.id = post_read.post_id) AND (post_read.person_id = '33517'))) LEFT OUTER JOIN person_block ON ((post.creator_id = person_block.target_id) AND (person_block.person_id = '33517'))) LEFT OUTER JOIN community_block ON ((community.id = community_block.community_id) AND (community_block.person_id = '33517'))) LEFT OUTER JOIN post_like ON ((post.id = post_like.post_id) AND (post_like.person_id = '33517'))) LEFT OUTER JOIN person_post_aggregates ON ((post.id = person_post_aggregates.post_id) AND (person_post_aggregates.person_id = '33517'))) LEFT OUTER JOIN local_user_language ON ((post.language_id = local_user_language.language_id) AND (local_user_language.local_user_id = '11402'))) WHERE ((((((((((community_follower.person_id IS NOT NULL) AND (post.nsfw = 'f')) AND (community.nsfw = 'f')) AND (local_user_language.language_id IS NOT NULL)) AND (community_block.person_id IS NULL)) AND (person_block.person_id IS NULL)) AND (post.removed = 'f')) AND (post.deleted = 'f')) AND (community.removed = 'f')) AND (community.deleted = 'f')) ORDER BY post_aggregates.featured_local DESC , post_aggregates.hot_rank DESC LIMIT '40' OFFSET '0';" > query_results.json
// Import week days and WeekDay
use diesel::{sql_query, PgConnection, RunQueryDsl};
use lemmy_db_schema::{
- schema::{activity, community_person_ban, instance, person},
+ schema::{
+ activity,
+ comment_aggregates,
+ community_aggregates,
+ community_person_ban,
+ instance,
+ person,
+ post_aggregates,
+ },
source::instance::{Instance, InstanceForm},
- utils::naive_now,
+ utils::{functions::hot_rank, naive_now},
};
use lemmy_routes::nodeinfo::NodeInfo;
use lemmy_utils::{error::LemmyError, REQWEST_TIMEOUT};
// Setup the connections
let mut scheduler = Scheduler::new();
- let mut conn = PgConnection::establish(&db_url).expect("could not establish connection");
-
+ let mut conn_1 = PgConnection::establish(&db_url).expect("could not establish connection");
let mut conn_2 = PgConnection::establish(&db_url).expect("could not establish connection");
+ let mut conn_3 = PgConnection::establish(&db_url).expect("could not establish connection");
+ let mut conn_4 = PgConnection::establish(&db_url).expect("could not establish connection");
- active_counts(&mut conn);
- update_banned_when_expired(&mut conn);
+ // Run on startup
+ active_counts(&mut conn_1);
+ update_hot_ranks(&mut conn_1, false);
+ update_banned_when_expired(&mut conn_1);
+ clear_old_activities(&mut conn_1);
- // On startup, reindex the tables non-concurrently
- // TODO remove this for now, since it slows down startup a lot on lemmy.ml
- reindex_aggregates_tables(&mut conn, true);
+ // Update active counts every hour
scheduler.every(CTimeUnits::hour(1)).run(move || {
- let conn = &mut PgConnection::establish(&db_url)
- .unwrap_or_else(|_| panic!("Error connecting to {db_url}"));
- active_counts(conn);
- update_banned_when_expired(conn);
- reindex_aggregates_tables(conn, true);
- drop_ccnew_indexes(conn);
+ active_counts(&mut conn_1);
+ update_banned_when_expired(&mut conn_1);
+ });
+
+ // Update hot ranks every 5 minutes
+ scheduler.every(CTimeUnits::minutes(5)).run(move || {
+ update_hot_ranks(&mut conn_2, true);
});
- clear_old_activities(&mut conn);
+ // Clear old activities every week
scheduler.every(CTimeUnits::weeks(1)).run(move || {
- clear_old_activities(&mut conn);
+ clear_old_activities(&mut conn_3);
});
- update_instance_software(&mut conn_2, &user_agent);
scheduler.every(CTimeUnits::days(1)).run(move || {
- update_instance_software(&mut conn_2, &user_agent);
+ update_instance_software(&mut conn_4, &user_agent);
});
// Manually run the scheduler in an event loop
}
}
-/// Reindex the aggregates tables every one hour
-/// This is necessary because hot_rank is actually a mutable function:
-/// https://dba.stackexchange.com/questions/284052/how-to-create-an-index-based-on-a-time-based-function-in-postgres?noredirect=1#comment555727_284052
-fn reindex_aggregates_tables(conn: &mut PgConnection, concurrently: bool) {
- for table_name in &[
- "post_aggregates",
- "comment_aggregates",
- "community_aggregates",
- ] {
- reindex_table(conn, table_name, concurrently);
+/// Update the hot_rank columns for the aggregates tables
+fn update_hot_ranks(conn: &mut PgConnection, last_week_only: bool) {
+ let mut post_update = diesel::update(post_aggregates::table).into_boxed();
+ let mut comment_update = diesel::update(comment_aggregates::table).into_boxed();
+ let mut community_update = diesel::update(community_aggregates::table).into_boxed();
+
+ // Only update for the last week of content
+ if last_week_only {
+ info!("Updating hot ranks for last week...");
+ let last_week = now - diesel::dsl::IntervalDsl::weeks(1);
+
+ post_update = post_update.filter(post_aggregates::published.gt(last_week));
+ comment_update = comment_update.filter(comment_aggregates::published.gt(last_week));
+ community_update = community_update.filter(community_aggregates::published.gt(last_week));
+ } else {
+ info!("Updating hot ranks for all history...");
}
-}
-fn reindex_table(conn: &mut PgConnection, table_name: &str, concurrently: bool) {
- let concurrently_str = if concurrently { "concurrently" } else { "" };
- info!("Reindexing table {} {} ...", concurrently_str, table_name);
- let query = format!("reindex table {concurrently_str} {table_name}");
- sql_query(query).execute(conn).expect("reindex table");
+ post_update
+ .set((
+ post_aggregates::hot_rank.eq(hot_rank(post_aggregates::score, post_aggregates::published)),
+ post_aggregates::hot_rank_active.eq(hot_rank(
+ post_aggregates::score,
+ post_aggregates::newest_comment_time_necro,
+ )),
+ ))
+ .execute(conn)
+ .expect("update post_aggregate hot_ranks");
+
+ comment_update
+ .set(comment_aggregates::hot_rank.eq(hot_rank(
+ comment_aggregates::score,
+ comment_aggregates::published,
+ )))
+ .execute(conn)
+ .expect("update comment_aggregate hot_ranks");
+
+ community_update
+ .set(community_aggregates::hot_rank.eq(hot_rank(
+ community_aggregates::subscribers,
+ community_aggregates::published,
+ )))
+ .execute(conn)
+ .expect("update community_aggregate hot_ranks");
info!("Done.");
}
.expect("remove community_ban expired rows");
}
-/// Drops the phantom CCNEW indexes created by postgres
-/// https://github.com/LemmyNet/lemmy/issues/2431
-fn drop_ccnew_indexes(conn: &mut PgConnection) {
- info!("Dropping phantom ccnew indexes...");
- let drop_stmt = "select drop_ccnew_indexes()";
- sql_query(drop_stmt)
- .execute(conn)
- .expect("drop ccnew indexes");
-}
-
/// Updates the instance software and version
fn update_instance_software(conn: &mut PgConnection, user_agent: &str) {
info!("Updating instances software and versions...");