From 8a7295dd8ebc6309f1c4d9795cb97e1f46b08e81 Mon Sep 17 00:00:00 2001
From: Kristian Klausen <kristian@klausen.dk>
Date: Sat, 20 Jul 2024 20:43:21 +0200
Subject: [PATCH] archwiki: cache at the web server level

---
 roles/archwiki/files/nginx-cache-purge        | 30 +++++++++++++++++++
 roles/archwiki/tasks/main.yml                 |  8 +++--
 roles/archwiki/templates/LocalSettings.php.j2 |  6 ++--
 .../templates/archwiki-prune-cache.service.j2 | 21 -------------
 .../templates/archwiki-prune-cache.timer.j2   |  8 -----
 .../templates/nginx-cache-purge.service.j2    | 11 +++++++
 roles/archwiki/templates/nginx.d.conf.j2      |  7 ++++-
 7 files changed, 55 insertions(+), 36 deletions(-)
 create mode 100644 roles/archwiki/files/nginx-cache-purge
 delete mode 100644 roles/archwiki/templates/archwiki-prune-cache.service.j2
 delete mode 100644 roles/archwiki/templates/archwiki-prune-cache.timer.j2
 create mode 100644 roles/archwiki/templates/nginx-cache-purge.service.j2

diff --git a/roles/archwiki/files/nginx-cache-purge b/roles/archwiki/files/nginx-cache-purge
new file mode 100644
index 000000000..d1bc7262e
--- /dev/null
+++ b/roles/archwiki/files/nginx-cache-purge
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+import hashlib
+import http.server
+import pathlib
+import socketserver
+import urllib.parse
+
+socketserver.ThreadingTCPServer.allow_reuse_address = True
+
+
+class Handler(http.server.BaseHTTPRequestHandler):
+    def do_PURGE(self):
+        self.send_response(http.HTTPStatus.OK)
+        self.end_headers()
+        o = urllib.parse.urlparse(self.path)
+        for method in ["GET", "HEAD"]:
+            # Please keep in sync with "fastcgi_cache_key" in nginx.d.conf.j2
+            if o.query:
+                cache_key = f"https{method}{o.netloc}{o.path}?{o.query}"
+            else:
+                cache_key = f"https{method}{o.netloc}{o.path}"
+            hash = hashlib.md5(cache_key.encode("utf-8")).hexdigest()
+            # Please keep in sync with "fastcgi_cache_path" in nginx.d.conf.j2
+            pathlib.Path(
+                f"/var/lib/nginx/cache/{hash[-1]}/{hash[-3:-1]}/{hash}"
+            ).unlink(missing_ok=True)
+
+
+httpd = http.server.ThreadingHTTPServer(("127.0.0.1", 1080), Handler)
+httpd.serve_forever()
diff --git a/roles/archwiki/tasks/main.yml b/roles/archwiki/tasks/main.yml
index 7ab910b99..123a945c8 100644
--- a/roles/archwiki/tasks/main.yml
+++ b/roles/archwiki/tasks/main.yml
@@ -98,16 +98,18 @@
 - name: Start and enable memcached service
   systemd: name=memcached@archwiki.service state=started enabled=true daemon_reload=true
 
+- name: Install nginx-cache-purge script
+  copy: src=nginx-cache-purge dest=/usr/local/bin/nginx-cache-purge owner=root group=root mode=0755
+
 - name: Install systemd services/timers
   template: src="{{ item }}.j2" dest="/etc/systemd/system/{{ item }}" owner=root group=root mode=0644
   loop:
     - archwiki-runjobs.service
     - archwiki-runjobs-wait.service
     - archwiki-runjobs.timer
-    - archwiki-prune-cache.service
-    - archwiki-prune-cache.timer
     - archwiki-question-updater.service
     - archwiki-question-updater.timer
+    - nginx-cache-purge.service
 
 - name: Start and enable archwiki timers and services
   systemd:
@@ -117,9 +119,9 @@
     daemon_reload: true
   with_items:
     - archwiki-runjobs.timer
-    - archwiki-prune-cache.timer
     - archwiki-runjobs-wait.service
     - archwiki-question-updater.timer
+    - nginx-cache-purge.service
 
 - name: Create question answer file
   systemd:
diff --git a/roles/archwiki/templates/LocalSettings.php.j2 b/roles/archwiki/templates/LocalSettings.php.j2
index 70b55bb3d..d5b049494 100644
--- a/roles/archwiki/templates/LocalSettings.php.j2
+++ b/roles/archwiki/templates/LocalSettings.php.j2
@@ -147,9 +147,9 @@ $wgMemCachedServers = [ "unix://{{ archwiki_memcached_socket }}" ];
 ## be publicly accessible from the web.
 $wgCacheDirectory = "$IP/../cache/data";
 $wgEnableSidebarCache = true;
-$wgUseFileCache = true;
-$wgFileCacheDirectory = "$IP/../cache/html";
-$wgUseGzip = true;
+$wgUseCdn = true;
+$wgCdnServers = [ '127.0.0.1' ];
+$wgInternalServer = 'http://wiki.archlinux.org';
 
 # CSS-based preferences supposedly cause about 20 times slower page loads
 # https://phabricator.wikimedia.org/rSVN63707
diff --git a/roles/archwiki/templates/archwiki-prune-cache.service.j2 b/roles/archwiki/templates/archwiki-prune-cache.service.j2
deleted file mode 100644
index 458800531..000000000
--- a/roles/archwiki/templates/archwiki-prune-cache.service.j2
+++ /dev/null
@@ -1,21 +0,0 @@
-[Unit]
-Description=Archwiki Prune Cache Service
-
-[Service]
-Type=oneshot
-User={{ archwiki_user }}
-WorkingDirectory={{ archwiki_dir }}
-ExecStart=/usr/bin/php {{ archwiki_dir }}/public/maintenance/run.php pruneFileCache -q --agedays 1
-
-NoNewPrivileges=yes
-PrivateTmp=yes
-PrivateDevices=yes
-PrivateNetwork=true
-ProtectSystem=full
-ProtectHome=true
-ProtectControlGroups=yes
-ProtectKernelModules=yes
-ProtectKernelTunables=yes
-
-[Install]
-WantedBy=multi-user.target
diff --git a/roles/archwiki/templates/archwiki-prune-cache.timer.j2 b/roles/archwiki/templates/archwiki-prune-cache.timer.j2
deleted file mode 100644
index 95200a190..000000000
--- a/roles/archwiki/templates/archwiki-prune-cache.timer.j2
+++ /dev/null
@@ -1,8 +0,0 @@
-[Unit]
-Description=Archwiki Prune Cache timer
-
-[Timer]
-OnCalendar=*-*-* 04:12:00
-
-[Install]
-WantedBy=timers.target
diff --git a/roles/archwiki/templates/nginx-cache-purge.service.j2 b/roles/archwiki/templates/nginx-cache-purge.service.j2
new file mode 100644
index 000000000..a8d619fec
--- /dev/null
+++ b/roles/archwiki/templates/nginx-cache-purge.service.j2
@@ -0,0 +1,11 @@
+[Unit]
+Description=nginx cache PURGE service
+
+[Service]
+User=http
+ProtectSystem=strict
+ReadWritePaths=/var/lib/nginx/cache
+ExecStart=/usr/local/bin/nginx-cache-purge
+
+[Install]
+WantedBy=multi-user.target
diff --git a/roles/archwiki/templates/nginx.d.conf.j2 b/roles/archwiki/templates/nginx.d.conf.j2
index 6003fd321..ce395d1a9 100644
--- a/roles/archwiki/templates/nginx.d.conf.j2
+++ b/roles/archwiki/templates/nginx.d.conf.j2
@@ -1,4 +1,6 @@
-fastcgi_cache_path /var/lib/nginx/cache levels=1:2 keys_zone=wiki:100m inactive=60m;
+# Please keep "path" and "levels" in sync with nginx-cache-purger-service
+fastcgi_cache_path /var/lib/nginx/cache levels=1:2 keys_zone=wiki:100m inactive=720m;
+# Please keep in sync with "cache_key" in nginx-cache-purger-service
 fastcgi_cache_key "$scheme$request_method$host$request_uri";
 
 # rate limit API endpoint
@@ -110,6 +112,9 @@ server {
         fastcgi_index  index.php;
         include        fastcgi.conf;
 
+        fastcgi_cache wiki;
+        add_header X-Cache $upstream_cache_status;
+
         limit_req zone=archwikilimit burst=10 nodelay;
     }
 
-- 
GitLab