]> Untitled Git - sneer-archive-data.git/blob - flake.nix
Initial commit
[sneer-archive-data.git] / flake.nix
1 {
2   description = "r/SneerClub's contents preserved as JSON files";
3
4   inputs = { flake-utils.url = "github:numtide/flake-utils"; };
5
6   outputs = { self, nixpkgs, flake-utils }:
7     flake-utils.lib.eachDefaultSystem (system:
8       let
9         pkgs = nixpkgs.legacyPackages."${system}";
10         process-markdown = pkgs.writeText "process-markdown.mlr" ''
11           subr process_replies(r) {
12                for (i, v in $r) {
13                    b = r[i]["body"];
14                    md = system("${pkgs.pandoc}/bin/pandoc -f markdown <<__EOF__\n".$r[i]["body"]."\n__EOF__");
15                    utc = $r[i]["created_utc"];
16                    if (!is_error(md)) {
17                       $r[i]["body"] = md;
18                    }
19                    $r[i]["created_date"] = system("date -d @".utc." -u +'%B %d, %Y %I:%M %p'");
20
21                    call process_replies($r[i].replies);
22                }
23           }
24           $selftext = system("${pkgs.pandoc}/bin/pandoc -f markdown <<__EOF__\n".$selftext."\n__EOF__");
25           $created_date = system("date -d @".$created_utc." -u +'%B %d, %Y %I:%M %p'");
26           for (i, v in $comments) {
27               b = $comments[i].body;
28               md = system("${pkgs.pandoc}/bin/pandoc -f markdown <<__EOF__\n".b."\n__EOF__");
29               utc = $comments[i].created_utc;
30               if (!is_error(md)) {
31                  $comments[i].body = md;
32               }
33               $comments[i].created_date = system("date -d @".utc." -u +'%B %d, %Y %I:%M %p'");
34
35               call process_replies($comments[i]["replies"]);
36           }
37         '';
38       in {
39         packages.json-threads = pkgs.runCommand "process-json" { } ''
40           shopt -s globstar
41           mkdir -p $out
42           tar -I ${pkgs.zstd}/bin/zstd -xf ${./bdfr.tar.zst}
43           ${pkgs.miller}/bin/mlr --json \
44                                  put -f ${process-markdown} then \
45                                  sort -nr created_utc \
46                                  ./bdfr/**/*.json > $out/threads-newest.json
47           ${pkgs.miller}/bin/mlr --json \
48                                  cut -x -f comments,selftext then \
49                                  sort -nr score \
50                                  $out/threads-newest.json > $out/submissions-bestest.json
51           ${pkgs.miller}/bin/mlr --json \
52                                  cut -x -f comments,selftext then \
53                                  sort -nr num_comments \
54                                  $out/threads-newest.json > $out/submissions-longest.json
55         '';
56
57         packages.default = self.packages."${system}".json-threads;
58
59       });
60 }