From 9aefd872582254b7bb5e2cbb1e716c8948dcf0d2 Mon Sep 17 00:00:00 2001 From: Kristian Klausen <kristian@klausen.dk> Date: Mon, 22 Jul 2024 21:05:03 +0200 Subject: [PATCH] archwiki: Add simple challenge for Chinese IP addresses The wiki has been hammered with requests from some stupid Chinese bots/crawlers. Adding a simple challenge (requiring a cookie to be set), seems to be enough to throw them off. This was initially added for all pages, but as that could affect Chinese search engines (concern raised on the forum[1]), it was changed to only affect "action views", which search engines are not supposed to crawl. [1] https://bbs.archlinux.org/viewtopic.php?pid=2185963#p2185963 --- host_vars/wiki.archlinux.org/misc | 2 ++ roles/archwiki/defaults/main.yml | 1 + roles/archwiki/templates/nginx.d.conf.j2 | 31 ++++++++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/host_vars/wiki.archlinux.org/misc b/host_vars/wiki.archlinux.org/misc index 6dfb7cfbf..728880621 100644 --- a/host_vars/wiki.archlinux.org/misc +++ b/host_vars/wiki.archlinux.org/misc @@ -2,3 +2,5 @@ filesystem: btrfs memcached_socket: "/run/memcached/archwiki.sock" wireguard_address: 10.0.0.22 wireguard_public_key: bZeNWMLtyNDaFR7jjWr06nNZt/vV/OKNleV7XZZs+lc= +nginx_extra_modules: + - name: geoip2 diff --git a/roles/archwiki/defaults/main.yml b/roles/archwiki/defaults/main.yml index 6813ba68f..72bb56bb9 100644 --- a/roles/archwiki/defaults/main.yml +++ b/roles/archwiki/defaults/main.yml @@ -1,6 +1,7 @@ archwiki_dir: '/srv/http/archwiki' archwiki_domain: 'wiki.archlinux.org' archwiki_nginx_conf: '/etc/nginx/nginx.d/archwiki.conf' +archwiki_nginx_challenge_value: '41ce6c6' archwiki_user: 'archwiki' archwiki_repository: 'https://gitlab.archlinux.org/archlinux/archwiki.git' archwiki_version: '1.42.1-2' diff --git a/roles/archwiki/templates/nginx.d.conf.j2 b/roles/archwiki/templates/nginx.d.conf.j2 index 6003fd321..86bc74263 100644 --- a/roles/archwiki/templates/nginx.d.conf.j2 +++ b/roles/archwiki/templates/nginx.d.conf.j2 @@ -13,6 +13,32 @@ upstream archwiki { server unix://{{ archwiki_socket }}; } +# Challenge the client if the cookie "challenge" is not set to +# the value of "archwiki_nginx_challenge_value". +map $cookie_challenge $challenge_required2 { + default 1; + {{ archwiki_nginx_challenge_value }} 0; +} + +# Challenge the client if it is requesting an "action view" and +# $challenge_required2 is true. +map $request_uri $challenge_required { + default 0; + ~^/index\.php\? $challenge_required2; +} + +geoip2 /var/lib/GeoIP/GeoLite2-Country.mmdb { + auto_reload 60m; + $geoip2_data_country_iso_code country iso_code; +} + +# Challenge the client if it is from China and $challenge_required is +# true. This is enough to "throw off" some bots/crawlers from China. +map $geoip2_data_country_iso_code $challenge { + default 0; + CN $challenge_required; +} + server { listen 80; listen [::]:80; @@ -103,6 +129,11 @@ server { # normal PHP FastCGI handler location ~ ^/[^/]+\.php$ { + if ($challenge) { + add_header Set-Cookie "challenge={{ archwiki_nginx_challenge_value }}; SameSite=Strict"; + return 303 $scheme://$server_name/$request_uri; + } + try_files $uri =404; access_log /var/log/nginx/{{ archwiki_domain }}/access.log main; access_log /var/log/nginx/{{ archwiki_domain }}/access.log.json json_main; -- GitLab