From: Dessalines Date: Wed, 6 Jan 2021 04:42:48 +0000 (-0500) Subject: Halfway done with hot rank indexes. X-Git-Url: http://these/git/readmes/README.ja.md?a=commitdiff_plain;h=d227000de31ecb641d678711cb94fe7f9fcad351;p=lemmy.git Halfway done with hot rank indexes. --- diff --git a/.gitignore b/.gitignore index 4b9715b4..2a7b3a1d 100644 --- a/.gitignore +++ b/.gitignore @@ -15,8 +15,7 @@ volumes # local build files target env_setup.sh -query_testing/*.json -query_testing/*.json.old +query_testing/**/reports/*.json # API tests api_tests/node_modules diff --git a/lemmy_db_queries/src/aggregates/comment_aggregates.rs b/lemmy_db_queries/src/aggregates/comment_aggregates.rs index f6da44b0..cab81261 100644 --- a/lemmy_db_queries/src/aggregates/comment_aggregates.rs +++ b/lemmy_db_queries/src/aggregates/comment_aggregates.rs @@ -10,6 +10,7 @@ pub struct CommentAggregates { pub score: i64, pub upvotes: i64, pub downvotes: i64, + pub published: chrono::NaiveDateTime, } impl CommentAggregates { diff --git a/lemmy_db_queries/src/aggregates/community_aggregates.rs b/lemmy_db_queries/src/aggregates/community_aggregates.rs index f34bd88b..3fb891c1 100644 --- a/lemmy_db_queries/src/aggregates/community_aggregates.rs +++ b/lemmy_db_queries/src/aggregates/community_aggregates.rs @@ -10,6 +10,7 @@ pub struct CommunityAggregates { pub subscribers: i64, pub posts: i64, pub comments: i64, + pub published: chrono::NaiveDateTime, } impl CommunityAggregates { diff --git a/lemmy_db_queries/src/aggregates/post_aggregates.rs b/lemmy_db_queries/src/aggregates/post_aggregates.rs index 5cfe0fdc..6c1dbed2 100644 --- a/lemmy_db_queries/src/aggregates/post_aggregates.rs +++ b/lemmy_db_queries/src/aggregates/post_aggregates.rs @@ -11,6 +11,7 @@ pub struct PostAggregates { pub score: i64, pub upvotes: i64, pub downvotes: i64, + pub published: chrono::NaiveDateTime, pub newest_comment_time: chrono::NaiveDateTime, } diff --git a/lemmy_db_schema/src/schema.rs b/lemmy_db_schema/src/schema.rs index f0aca2db..fa5d8c21 100644 --- a/lemmy_db_schema/src/schema.rs +++ b/lemmy_db_schema/src/schema.rs @@ -41,6 +41,7 @@ table! { score -> Int8, upvotes -> Int8, downvotes -> Int8, + published -> Timestamp, } } @@ -108,6 +109,7 @@ table! { subscribers -> Int8, posts -> Int8, comments -> Int8, + published -> Timestamp, } } @@ -280,6 +282,7 @@ table! { score -> Int8, upvotes -> Int8, downvotes -> Int8, + published -> Timestamp, newest_comment_time -> Timestamp, } } diff --git a/lemmy_db_views/src/comment_view.rs b/lemmy_db_views/src/comment_view.rs index 4afca279..951e2f61 100644 --- a/lemmy_db_views/src/comment_view.rs +++ b/lemmy_db_views/src/comment_view.rs @@ -365,8 +365,8 @@ impl<'a> CommentQueryBuilder<'a> { query = match self.sort { SortType::Hot | SortType::Active => query - .order_by(hot_rank(comment_aggregates::score, comment::published).desc()) - .then_order_by(comment::published.desc()), + .order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc()) + .then_order_by(comment_aggregates::published.desc()), SortType::New => query.order_by(comment::published.desc()), SortType::TopAll => query.order_by(comment_aggregates::score.desc()), SortType::TopYear => query @@ -614,6 +614,7 @@ mod tests { score: 1, upvotes: 1, downvotes: 0, + published: agg.published, }, }; diff --git a/lemmy_db_views/src/post_view.rs b/lemmy_db_views/src/post_view.rs index 4caa1f02..703ab167 100644 --- a/lemmy_db_views/src/post_view.rs +++ b/lemmy_db_views/src/post_view.rs @@ -349,10 +349,10 @@ impl<'a> PostQueryBuilder<'a> { .then_order_by( hot_rank(post_aggregates::score, post_aggregates::newest_comment_time).desc(), ) - .then_order_by(post::published.desc()), + .then_order_by(post_aggregates::newest_comment_time.desc()), SortType::Hot => query - .then_order_by(hot_rank(post_aggregates::score, post::published).desc()) - .then_order_by(post::published.desc()), + .then_order_by(hot_rank(post_aggregates::score, post_aggregates::published).desc()) + .then_order_by(post_aggregates::published.desc()), SortType::New => query.then_order_by(post::published.desc()), SortType::TopAll => query.then_order_by(post_aggregates::score.desc()), SortType::TopYear => query @@ -601,6 +601,7 @@ mod tests { score: 1, upvotes: 1, downvotes: 0, + published: agg.published, newest_comment_time: inserted_post.published, }, subscribed: false, diff --git a/lemmy_db_views_actor/src/community_view.rs b/lemmy_db_views_actor/src/community_view.rs index 0c0421b9..05dc3789 100644 --- a/lemmy_db_views_actor/src/community_view.rs +++ b/lemmy_db_views_actor/src/community_view.rs @@ -186,9 +186,14 @@ impl<'a> CommunityQueryBuilder<'a> { // Covers all other sorts, including hot _ => { query = query - // TODO do custom sql function for hot_rank, make sure this works - .order_by(hot_rank(community_aggregates::subscribers, community::published).desc()) - .then_order_by(community_aggregates::subscribers.desc()) + .order_by( + hot_rank( + community_aggregates::subscribers, + community_aggregates::published, + ) + .desc(), + ) + .then_order_by(community_aggregates::published.desc()) } }; diff --git a/lemmy_db_views_actor/src/user_mention_view.rs b/lemmy_db_views_actor/src/user_mention_view.rs index 05ec705b..1e9e74ee 100644 --- a/lemmy_db_views_actor/src/user_mention_view.rs +++ b/lemmy_db_views_actor/src/user_mention_view.rs @@ -268,8 +268,8 @@ impl<'a> UserMentionQueryBuilder<'a> { query = match self.sort { SortType::Hot | SortType::Active => query - .order_by(hot_rank(comment_aggregates::score, comment::published).desc()) - .then_order_by(comment::published.desc()), + .order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc()) + .then_order_by(comment_aggregates::published.desc()), SortType::New => query.order_by(comment::published.desc()), SortType::TopAll => query.order_by(comment_aggregates::score.desc()), SortType::TopYear => query diff --git a/migrations/2020-12-04-183345_create_community_aggregates/up.sql b/migrations/2020-12-04-183345_create_community_aggregates/up.sql index 18a62298..129b58c0 100644 --- a/migrations/2020-12-04-183345_create_community_aggregates/up.sql +++ b/migrations/2020-12-04-183345_create_community_aggregates/up.sql @@ -5,15 +5,17 @@ create table community_aggregates ( subscribers bigint not null default 0, posts bigint not null default 0, comments bigint not null default 0, + published timestamp not null default now(), unique (community_id) ); -insert into community_aggregates (community_id, subscribers, posts, comments) +insert into community_aggregates (community_id, subscribers, posts, comments, published) select c.id, coalesce(cf.subs, 0) as subscribers, coalesce(cd.posts, 0) as posts, - coalesce(cd.comments, 0) as comments + coalesce(cd.comments, 0) as comments, + c.published from community c left join ( select diff --git a/migrations/2020-12-10-152350_create_post_aggregates/up.sql b/migrations/2020-12-10-152350_create_post_aggregates/up.sql index b3dc6278..784f33e2 100644 --- a/migrations/2020-12-10-152350_create_post_aggregates/up.sql +++ b/migrations/2020-12-10-152350_create_post_aggregates/up.sql @@ -6,17 +6,19 @@ create table post_aggregates ( score bigint not null default 0, upvotes bigint not null default 0, downvotes bigint not null default 0, + published timestamp not null default now(), newest_comment_time timestamp not null default now(), unique (post_id) ); -insert into post_aggregates (post_id, comments, score, upvotes, downvotes, newest_comment_time) +insert into post_aggregates (post_id, comments, score, upvotes, downvotes, published, newest_comment_time) select p.id, coalesce(ct.comments, 0::bigint) as comments, coalesce(pl.score, 0::bigint) as score, coalesce(pl.upvotes, 0::bigint) as upvotes, coalesce(pl.downvotes, 0::bigint) as downvotes, + p.published, greatest(ct.recent_comment_time, p.published) as newest_activity_time from post p left join ( @@ -64,7 +66,9 @@ begin update post_aggregates pa set comments = comments + 1, newest_comment_time = NEW.published - where pa.post_id = NEW.post_id; + where pa.post_id = NEW.post_id + -- A 2 day necro-bump limit + and published > ('now'::timestamp - '2 days'::interval); ELSIF (TG_OP = 'DELETE') THEN -- Join to post because that post may not exist anymore update post_aggregates pa diff --git a/migrations/2020-12-14-020038_create_comment_aggregates/up.sql b/migrations/2020-12-14-020038_create_comment_aggregates/up.sql index 1a168bec..f9cae6b3 100644 --- a/migrations/2020-12-14-020038_create_comment_aggregates/up.sql +++ b/migrations/2020-12-14-020038_create_comment_aggregates/up.sql @@ -5,15 +5,17 @@ create table comment_aggregates ( score bigint not null default 0, upvotes bigint not null default 0, downvotes bigint not null default 0, + published timestamp not null default now(), unique (comment_id) ); -insert into comment_aggregates (comment_id, score, upvotes, downvotes) +insert into comment_aggregates (comment_id, score, upvotes, downvotes, published) select c.id, COALESCE(cl.total, 0::bigint) AS score, COALESCE(cl.up, 0::bigint) AS upvotes, - COALESCE(cl.down, 0::bigint) AS downvotes + COALESCE(cl.down, 0::bigint) AS downvotes, + c.published from comment c left join ( select l.comment_id as id, sum(l.score) as total, diff --git a/migrations/2021-01-05-200932_add_hot_rank_indexes/down.sql b/migrations/2021-01-05-200932_add_hot_rank_indexes/down.sql new file mode 100644 index 00000000..2ec455a5 --- /dev/null +++ b/migrations/2021-01-05-200932_add_hot_rank_indexes/down.sql @@ -0,0 +1,25 @@ +-- Rank = ScaleFactor * sign(Score) * log(1 + abs(Score)) / (Time + 2)^Gravity +create or replace function hot_rank( + score numeric, + published timestamp without time zone) +returns integer as $$ +begin + -- hours_diff:=EXTRACT(EPOCH FROM (timezone('utc',now()) - published))/3600 + return floor(10000*log(greatest(1,score+3)) / power(((EXTRACT(EPOCH FROM (timezone('utc',now()) - published))/3600) + 2), 1.8))::integer; +end; $$ +LANGUAGE plpgsql; + +drop index + idx_post_published, + idx_post_stickied, + idx_post_aggregates_hot, + idx_post_aggregates_active, + idx_post_aggregates_score, + idx_comment_published, + idx_comment_aggregates_hot, + idx_comment_aggregates_score, + idx_user_published, + idx_user_aggregates_comment_score, + idx_community_published, + idx_community_aggregates_hot, + idx_community_aggregates_subscribers; diff --git a/migrations/2021-01-05-200932_add_hot_rank_indexes/up.sql b/migrations/2021-01-05-200932_add_hot_rank_indexes/up.sql new file mode 100644 index 00000000..a6c45234 --- /dev/null +++ b/migrations/2021-01-05-200932_add_hot_rank_indexes/up.sql @@ -0,0 +1,45 @@ +-- Need to add immutable to the hot_rank function in order to index by it + +-- Rank = ScaleFactor * sign(Score) * log(1 + abs(Score)) / (Time + 2)^Gravity +create or replace function hot_rank( + score numeric, + published timestamp without time zone) +returns integer as $$ +begin + -- hours_diff:=EXTRACT(EPOCH FROM (timezone('utc',now()) - published))/3600 + return floor(10000*log(greatest(1,score+3)) / power(((EXTRACT(EPOCH FROM (timezone('utc',now()) - published))/3600) + 2), 1.8))::integer; +end; $$ +LANGUAGE plpgsql +IMMUTABLE; + +-- Post +create index idx_post_published on post (published desc); +create index idx_post_stickied on post (stickied desc); + +-- Post_aggregates +create index idx_post_aggregates_hot on post_aggregates (hot_rank(score, published) desc, published desc); +create index idx_post_aggregates_active on post_aggregates (hot_rank(score, newest_comment_time) desc, newest_comment_time desc); +create index idx_post_aggregates_score on post_aggregates (score desc); + +-- Comment +create index idx_comment_published on comment (published desc); + +-- Comment_aggregates +create index idx_comment_aggregates_hot on comment_aggregates (hot_rank(score, published) desc, published desc); +create index idx_comment_aggregates_score on comment_aggregates (score desc); + +-- User +create index idx_user_published on user_ (published desc); + +-- User_aggregates +create index idx_user_aggregates_comment_score on user_aggregates (comment_score desc); + +-- Community +create index idx_community_published on community (published desc); + +-- Community_aggregates +create index idx_community_aggregates_hot on community_aggregates (hot_rank(subscribers, published) desc, published desc); +create index idx_community_aggregates_subscribers on community_aggregates (subscribers desc); + + + diff --git a/query_testing/after.out b/query_testing/after.out deleted file mode 100644 index 2dbc5d1e..00000000 --- a/query_testing/after.out +++ /dev/null @@ -1,13 +0,0 @@ -comment_fast_view.json: "Execution Time": 400.841 -comment_view.json: "Execution Time": 2312.899 -community_fast_view.json: "Execution Time": 0.272 -community_view.json: "Execution Time": 36.572 -post_fast_view.json: "Execution Time": 128.839 -post_view.json: "Execution Time": 970.671 -private_message_view.json: "Execution Time": 1.426 -reply_fast_view.json: "Execution Time": 426.179 -site_view.json: "Execution Time": 2.453 -user_fast.json: "Execution Time": 0.400 -user_mention_fast_view.json: "Execution Time": 0.179 -user_mention_view.json: "Execution Time": 95.815 -user_view.json: "Execution Time": 44.692 diff --git a/query_testing/before.out b/query_testing/before.out deleted file mode 100644 index 579dd127..00000000 --- a/query_testing/before.out +++ /dev/null @@ -1,13 +0,0 @@ -comment_fast_view.json: "Execution Time": 3.501 -comment_view.json: "Execution Time": 2312.899 -community_fast_view.json: "Execution Time": 0.372 -community_view.json: "Execution Time": 36.572 -post_fast_view.json: "Execution Time": 78.920 -post_view.json: "Execution Time": 970.671 -private_message_view.json: "Execution Time": 1.426 -reply_fast_view.json: "Execution Time": 32.875 -site_view.json: "Execution Time": 2.593 -user_fast.json: "Execution Time": 0.155 -user_mention_fast_view.json: "Execution Time": 0.171 -user_mention_view.json: "Execution Time": 1468.291 -user_view.json: "Execution Time": 44.692 diff --git a/query_testing/generate_explain_reports.sh b/query_testing/generate_explain_reports.sh deleted file mode 100755 index 439b46a7..00000000 --- a/query_testing/generate_explain_reports.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -set -e - -# You can import these to http://tatiyants.com/pev/#/plans/new - -# Do the views first - -echo "explain (analyze, format json) select * from user_fast" > explain.sql -psql -qAt -U lemmy -f explain.sql > user_fast.json - -echo "explain (analyze, format json) select * from post_view where user_id is null order by hot_rank desc, published desc" > explain.sql -psql -qAt -U lemmy -f explain.sql > post_view.json - -echo "explain (analyze, format json) select * from post_fast_view where user_id is null order by hot_rank desc, published desc" > explain.sql -psql -qAt -U lemmy -f explain.sql > post_fast_view.json - -echo "explain (analyze, format json) select * from comment_view where user_id is null" > explain.sql -psql -qAt -U lemmy -f explain.sql > comment_view.json - -echo "explain (analyze, format json) select * from comment_fast_view where user_id is null" > explain.sql -psql -qAt -U lemmy -f explain.sql > comment_fast_view.json - -echo "explain (analyze, format json) select * from community_view where user_id is null order by hot_rank desc" > explain.sql -psql -qAt -U lemmy -f explain.sql > community_view.json - -echo "explain (analyze, format json) select * from community_fast_view where user_id is null order by hot_rank desc" > explain.sql -psql -qAt -U lemmy -f explain.sql > community_fast_view.json - -echo "explain (analyze, format json) select * from site_view limit 1" > explain.sql -psql -qAt -U lemmy -f explain.sql > site_view.json - -echo "explain (analyze, format json) select * from reply_fast_view where user_id = 34 and recipient_id = 34" > explain.sql -psql -qAt -U lemmy -f explain.sql > reply_fast_view.json - -echo "explain (analyze, format json) select * from user_mention_view where user_id = 34 and recipient_id = 34" > explain.sql -psql -qAt -U lemmy -f explain.sql > user_mention_view.json - -echo "explain (analyze, format json) select * from user_mention_fast_view where user_id = 34 and recipient_id = 34" > explain.sql -psql -qAt -U lemmy -f explain.sql > user_mention_fast_view.json - -grep "Execution Time" *.json - -rm explain.sql diff --git a/query_testing/views_old/generate_reports.sh b/query_testing/views_old/generate_reports.sh new file mode 100755 index 00000000..8d2b4a05 --- /dev/null +++ b/query_testing/views_old/generate_reports.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -e + +# You can import these to http://tatiyants.com/pev/#/plans/new + +pushd reports + +# Do the views first + +PSQL_CMD="docker exec -i dev_postgres_1 psql -qAt -U lemmy" + +echo "explain (analyze, format json) select * from user_fast limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > user_fast.json + +echo "explain (analyze, format json) select * from post_view where user_id is null order by hot_rank desc, published desc limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > post_view.json + +echo "explain (analyze, format json) select * from post_fast_view where user_id is null order by hot_rank desc, published desc limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > post_fast_view.json + +echo "explain (analyze, format json) select * from comment_view where user_id is null limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > comment_view.json + +echo "explain (analyze, format json) select * from comment_fast_view where user_id is null limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > comment_fast_view.json + +echo "explain (analyze, format json) select * from community_view where user_id is null order by hot_rank desc limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > community_view.json + +echo "explain (analyze, format json) select * from community_fast_view where user_id is null order by hot_rank desc limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > community_fast_view.json + +echo "explain (analyze, format json) select * from site_view limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > site_view.json + +echo "explain (analyze, format json) select * from reply_fast_view where user_id = 34 and recipient_id = 34 limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > reply_fast_view.json + +echo "explain (analyze, format json) select * from user_mention_view where user_id = 34 and recipient_id = 34 limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > user_mention_view.json + +echo "explain (analyze, format json) select * from user_mention_fast_view where user_id = 34 and recipient_id = 34 limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > user_mention_fast_view.json + +grep "Execution Time" *.json > ../timings-`date +%Y-%m-%d_%H-%M-%S`.out + +rm explain.sql + +popd diff --git a/query_testing/views_old/timings-2021-01-05_21-06-37.out b/query_testing/views_old/timings-2021-01-05_21-06-37.out new file mode 100644 index 00000000..cd89f501 --- /dev/null +++ b/query_testing/views_old/timings-2021-01-05_21-06-37.out @@ -0,0 +1,11 @@ +comment_fast_view.json: "Execution Time": 93.165 +comment_view.json: "Execution Time": 4513.485 +community_fast_view.json: "Execution Time": 3.998 +community_view.json: "Execution Time": 561.814 +post_fast_view.json: "Execution Time": 1604.543 +post_view.json: "Execution Time": 11630.471 +reply_fast_view.json: "Execution Time": 85.708 +site_view.json: "Execution Time": 27.264 +user_fast.json: "Execution Time": 0.135 +user_mention_fast_view.json: "Execution Time": 6.665 +user_mention_view.json: "Execution Time": 4996.688 diff --git a/query_testing/views_to_diesel_migration/generate_reports.sh b/query_testing/views_to_diesel_migration/generate_reports.sh new file mode 100755 index 00000000..c6ae773b --- /dev/null +++ b/query_testing/views_to_diesel_migration/generate_reports.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -e + +# You can import these to http://tatiyants.com/pev/#/plans/new + +pushd reports + +PSQL_CMD="docker exec -i dev_postgres_1 psql -qAt -U lemmy" + +echo "explain (analyze, format json) select * from user_ limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > user_.json + +echo "explain (analyze, format json) select * from post p limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > post.json + +echo "explain (analyze, format json) select * from post p, post_aggregates pa where p.id = pa.post_id order by hot_rank(pa.score, pa.published) desc limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > post_ordered_by_rank.json + +echo "explain (analyze, format json) select * from comment limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > comment.json + +echo "explain (analyze, format json) select * from community limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > community.json + +echo "explain (analyze, format json) select * from community c, community_aggregates ca where c.id = ca.community_id order by hot_rank(ca.subscribers, ca.published) desc limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > community_ordered_by_subscribers.json + +echo "explain (analyze, format json) select * from site s" > explain.sql +cat explain.sql | $PSQL_CMD > site.json + +echo "explain (analyze, format json) select * from user_mention limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > user_mention.json + +echo "explain (analyze, format json) select * from private_message limit 100" > explain.sql +cat explain.sql | $PSQL_CMD > private_message.json + +grep "Execution Time" *.json > ../timings-`date +%Y-%m-%d_%H-%M-%S`.out + +rm explain.sql + +popd diff --git a/query_testing/views_to_diesel_migration/timings-1.out b/query_testing/views_to_diesel_migration/timings-1.out new file mode 100644 index 00000000..2df77431 --- /dev/null +++ b/query_testing/views_to_diesel_migration/timings-1.out @@ -0,0 +1,9 @@ +comment.json: "Execution Time": 12.263 +community.json: "Execution Time": 1.225 +community_ordered_by_subscribers.json: "Execution Time": 170.255 +post.json: "Execution Time": 5.373 +post_ordered_by_rank.json: "Execution Time": 1458.801 +private_message.json: "Execution Time": 0.306 +site.json: "Execution Time": 0.064 +user_.json: "Execution Time": 2.606 +user_mention.json: "Execution Time": 0.135 diff --git a/query_testing/views_to_diesel_migration/timings-2021-01-05_21-32-54.out b/query_testing/views_to_diesel_migration/timings-2021-01-05_21-32-54.out new file mode 100644 index 00000000..93d7f60b --- /dev/null +++ b/query_testing/views_to_diesel_migration/timings-2021-01-05_21-32-54.out @@ -0,0 +1,9 @@ +comment.json: "Execution Time": 0.136 +community.json: "Execution Time": 0.157 +community_ordered_by_subscribers.json: "Execution Time": 16.036 +post.json: "Execution Time": 0.129 +post_ordered_by_rank.json: "Execution Time": 15.969 +private_message.json: "Execution Time": 0.133 +site.json: "Execution Time": 0.056 +user_.json: "Execution Time": 0.300 +user_mention.json: "Execution Time": 0.122