From a56c7109506f1e8f54c8b3dfff1e2ac97abf2585 Mon Sep 17 00:00:00 2001
From: Kristian Klausen <kristian@klausen.dk>
Date: Sat, 4 Jan 2025 16:37:04 +0100
Subject: [PATCH] feat(db-functions): Check the archive index in
 check_reproducible

With the archive and repos no longer hosted on the same server[1], the
existence of "dependent packages" can no longer be determined solely by
checking whether the file exists in the local archive directory, which
now contains at most a few days worth of packages.

To solve this the archive index[2] must also be checked which contains
all archived packages. We still need to check locally first, as the
archive index is only updated once a day.

[1] https://lists.archlinux.org/archives/list/arch-dev-public@lists.archlinux.org/thread/B66SQUW4HOJAN2WHG4RLEBLYWNWOSF3Y/
[2] https://archive.archlinux.org/packages/.all/index.0.xz
---
 config                  |  1 +
 cron-jobs/archive-index | 16 ++++++++++++++++
 db-functions            |  5 +++++
 3 files changed, 22 insertions(+)
 create mode 100755 cron-jobs/archive-index

diff --git a/config b/config
index 842d684..b8c2924 100644
--- a/config
+++ b/config
@@ -2,6 +2,7 @@
 
 FTP_BASE="/srv/ftp"
 ARCHIVE_BASE="/srv/archive"
+ARCHIVE_INDEX_URL="https://archive.archlinux.org/packages/.all/index.0.xz"
 ARCHIVEUSER='archive'
 PKGREPOS=()
 DEBUGREPOS=()
diff --git a/cron-jobs/archive-index b/cron-jobs/archive-index
new file mode 100755
index 0000000..93370ac
--- /dev/null
+++ b/cron-jobs/archive-index
@@ -0,0 +1,16 @@
+#!/bin/bash
+set -eo pipefail
+
+. "$(dirname "$(readlink -e "$0")")/../config"
+. /usr/share/makepkg/util.sh
+
+http_code="$(curl --silent --show-error --fail --write-out "%{http_code}" --time-cond "${ARCHIVE_BASE}/index.0" --remote-time --output "${ARCHIVE_BASE}/.index.0.xz" "${ARCHIVE_INDEX_URL}")"
+
+if (( http_code == 200 )); then
+  msg "Updating ${ARCHIVE_BASE}/index.0"
+  rm -f "${ARCHIVE_BASE}/.index.0"
+  xz --decompress --keep "${ARCHIVE_BASE}/.index.0.xz"
+  touch --reference "${ARCHIVE_BASE}/.index.0"{.xz,} --time mtime
+  rm "${ARCHIVE_BASE}/.index.0.xz"
+  mv "${ARCHIVE_BASE}/"{.,}"index.0"
+fi
diff --git a/db-functions b/db-functions
index 84a0b09..786c043 100644
--- a/db-functions
+++ b/db-functions
@@ -790,6 +790,11 @@ check_reproducible() {
 		# fast lookup with none glob file exists check using default PKGEXT
 		[[ -f "${FTP_BASE}"/pool/packages/${dependency}${PKGEXT_DEFAULT} ]] && continue
 		[[ -f "${ARCHIVE_BASE}/packages/${pkgname:0:1}/${pkgname}/${dependency}${PKGEXT_DEFAULT}" ]] && continue
+		# the local archive directory contains at most a few
+		# days worth of packages, so the archive index must be
+		# checked for older packages. Both checks are needed as
+		# the archive index is only updated once a day
+		grep --quiet --no-messages --max-count=1 "^${dependency}\$" "${ARCHIVE_BASE}/index.0" && continue
 
 		# fallback lookup in csae fast lookup with default pkgext fails
 		# shellcheck disable=2086
-- 
GitLab