From: self <self@awful.systems>
Date: Mon, 31 Jul 2023 22:23:13 +0000 (-0700)
Subject: Initial commit
X-Git-Url: http://these/git/%22%7B%7D/static/%7B%7D/%24%7Bargs.thread.url%7D?a=commitdiff_plain;h=refs%2Fheads%2Fmain;p=sneer-archive-data.git

Initial commit
---

262b488c58c37aa5428a465e1847609656c94236
diff --git a/.envrc b/.envrc
new file mode 100644
index 0000000..a4f3544
--- /dev/null
+++ b/.envrc
@@ -0,0 +1,7 @@
+if ! has nix_direnv_version || ! nix_direnv_version 2.3.0; then
+    source_url "https://raw.githubusercontent.com/nix-community/nix-direnv/2.3.0/direnvrc" "sha256-Dmd+j63L84wuzgyjITIfSxSD57Tx7v51DMxVZOsiUD8="
+fi
+use flake
+
+source_env_if_exists .envrc.private
+watch_file template.nix
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bda639a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+.DS_Store
+.idea
+*.log
+tmp/
+
+result
+.envrc.private
+.direnv/
diff --git a/SneerClub_comments.jsonl.zst b/SneerClub_comments.jsonl.zst
new file mode 100644
index 0000000..1154538
Binary files /dev/null and b/SneerClub_comments.jsonl.zst differ
diff --git a/SneerClub_submissions.jsonl.zst b/SneerClub_submissions.jsonl.zst
new file mode 100644
index 0000000..58f4880
Binary files /dev/null and b/SneerClub_submissions.jsonl.zst differ
diff --git a/bdfr.tar.zst b/bdfr.tar.zst
new file mode 100644
index 0000000..a3fa14f
Binary files /dev/null and b/bdfr.tar.zst differ
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..3dfe0e6
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,59 @@
+{
+  "nodes": {
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1689068808,
+        "narHash": "sha256-6ixXo3wt24N/melDWjq70UuHQLxGV8jZvooRanIHXw0=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "919d646de7be200f3bf08cb76ae1f09402b6f9b4",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1689078114,
+        "narHash": "sha256-osG8BrX5RpKJ7wH+vI6auOU+ctvNOblT4XXCgknK47c=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "b6cc7ff8fee93789bc871a267ab876c3fca042cb",
+        "type": "github"
+      },
+      "original": {
+        "id": "nixpkgs",
+        "type": "indirect"
+      }
+    },
+    "root": {
+      "inputs": {
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..cf6708b
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,60 @@
+{
+  description = "r/SneerClub's contents preserved as JSON files";
+
+  inputs = { flake-utils.url = "github:numtide/flake-utils"; };
+
+  outputs = { self, nixpkgs, flake-utils }:
+    flake-utils.lib.eachDefaultSystem (system:
+      let
+        pkgs = nixpkgs.legacyPackages."${system}";
+        process-markdown = pkgs.writeText "process-markdown.mlr" ''
+          subr process_replies(r) {
+               for (i, v in $r) {
+                   b = r[i]["body"];
+                   md = system("${pkgs.pandoc}/bin/pandoc -f markdown <<__EOF__\n".$r[i]["body"]."\n__EOF__");
+                   utc = $r[i]["created_utc"];
+                   if (!is_error(md)) {
+                      $r[i]["body"] = md;
+                   }
+                   $r[i]["created_date"] = system("date -d @".utc." -u +'%B %d, %Y %I:%M %p'");
+
+                   call process_replies($r[i].replies);
+               }
+          }
+          $selftext = system("${pkgs.pandoc}/bin/pandoc -f markdown <<__EOF__\n".$selftext."\n__EOF__");
+          $created_date = system("date -d @".$created_utc." -u +'%B %d, %Y %I:%M %p'");
+          for (i, v in $comments) {
+              b = $comments[i].body;
+              md = system("${pkgs.pandoc}/bin/pandoc -f markdown <<__EOF__\n".b."\n__EOF__");
+              utc = $comments[i].created_utc;
+              if (!is_error(md)) {
+                 $comments[i].body = md;
+              }
+              $comments[i].created_date = system("date -d @".utc." -u +'%B %d, %Y %I:%M %p'");
+
+              call process_replies($comments[i]["replies"]);
+          }
+        '';
+      in {
+        packages.json-threads = pkgs.runCommand "process-json" { } ''
+          shopt -s globstar
+          mkdir -p $out
+          tar -I ${pkgs.zstd}/bin/zstd -xf ${./bdfr.tar.zst}
+          ${pkgs.miller}/bin/mlr --json \
+                                 put -f ${process-markdown} then \
+                                 sort -nr created_utc \
+                                 ./bdfr/**/*.json > $out/threads-newest.json
+          ${pkgs.miller}/bin/mlr --json \
+                                 cut -x -f comments,selftext then \
+                                 sort -nr score \
+                                 $out/threads-newest.json > $out/submissions-bestest.json
+          ${pkgs.miller}/bin/mlr --json \
+                                 cut -x -f comments,selftext then \
+                                 sort -nr num_comments \
+                                 $out/threads-newest.json > $out/submissions-longest.json
+        '';
+
+        packages.default = self.packages."${system}".json-threads;
+
+      });
+}