From 89cdabda53f9706bb828a596cfdeb07b2d1c5790 Mon Sep 17 00:00:00 2001
From: Kevin Morris <kevr@0cost.org>
Date: Mon, 1 Nov 2021 09:10:58 -0700
Subject: [PATCH] fix(mkpkglists): revert to iterating over cur.fetchall() once

This change removes the "mapping" key that was added in a recent
commit to simplify generation.

Additionally, we make cache handling a lot cleaner.

Signed-off-by: Kevin Morris <kevr@0cost.org>
---
 aurweb/scripts/mkpkglists.py | 92 ++++++++++++++++++------------------
 1 file changed, 46 insertions(+), 46 deletions(-)

diff --git a/aurweb/scripts/mkpkglists.py b/aurweb/scripts/mkpkglists.py
index 73c0dcec9..d2f7216ad 100755
--- a/aurweb/scripts/mkpkglists.py
+++ b/aurweb/scripts/mkpkglists.py
@@ -5,37 +5,51 @@ import gzip
 import json
 import os
 
+from typing import Tuple
+
 import aurweb.config
 import aurweb.db
 
+
+def state_path(archive: str) -> str:
+    # A hard-coded /tmp state directory.
+    # TODO: Use Redis cache to store this state after we merge
+    # FastAPI into master and removed PHP from the tree.
+    return os.path.join("/tmp", os.path.basename(archive) + ".state")
+
+
 packagesfile = aurweb.config.get('mkpkglists', 'packagesfile')
 packagesmetafile = aurweb.config.get('mkpkglists', 'packagesmetafile')
+packages_state = state_path(packagesfile)
+
 pkgbasefile = aurweb.config.get('mkpkglists', 'pkgbasefile')
+pkgbases_state = state_path(pkgbasefile)
+
 userfile = aurweb.config.get('mkpkglists', 'userfile')
+users_state = state_path(userfile)
 
 
-def should_update(tablename: str) -> int:
+def should_update(state: str, tablename: str) -> Tuple[bool, int]:
     if aurweb.config.get("database", "backend") != "mysql":
         return False
 
-    conn = aurweb.db.Connection()
-
     db_name = aurweb.config.get("database", "name")
+    conn = aurweb.db.Connection()
     cur = conn.execute("SELECT auto_increment FROM information_schema.tables "
                        "WHERE table_schema = ? AND table_name = ?",
                        (db_name, tablename,))
     update_time = cur.fetchone()[0]
 
-    cached_update_time = 0
-    if os.path.exists(f"/tmp/{tablename}.update-time.cache"):
-        with open(f"/tmp/{tablename}.update-time.cache") as f:
-            cached_update_time = int(f.read().strip())
+    saved_update_time = 0
+    if os.path.exists(state):
+        with open(state) as f:
+            saved_update_time = int(f.read().strip())
 
-    return (cached_update_time == update_time, update_time)
+    return (saved_update_time == update_time, update_time)
 
 
-def update_cache(tablename: str, update_time: int) -> None:
-    with open(f"/tmp/{tablename}.update-time.cache", "w") as f:
+def update_state(state: str, update_time: int) -> None:
+    with open(state, "w") as f:
         f.write(str(update_time))
 
 
@@ -47,7 +61,7 @@ def main():
     pkgbaselist_header = "# AUR package base list, generated on " + datestr
     userlist_header = "# AUR user name list, generated on " + datestr
 
-    updated, update_time = should_update("Packages")
+    updated, update_time = should_update(packages_state, "Packages")
     if not updated:
         print("Updating Packages...")
         columns = ("Packages.ID, PackageBaseID, Packages.Name, "
@@ -56,29 +70,15 @@ def main():
                            "INNER JOIN PackageBases "
                            "ON PackageBases.ID = Packages.PackageBaseID "
                            "WHERE PackageBases.PackagerUID IS NOT NULL")
-        results = cur.fetchall()
-
-        with gzip.open(packagesfile, "w") as f:
-            f.write(bytes(pkglist_header + "\n", "UTF-8"))
-            f.writelines([bytes(x[2] + "\n", "UTF-8") for x in results])
 
+        # Store JSON-data in `output`, which can be reused for the
+        # more basic packagesfile generation afterward.
+        output = dict()
         with gzip.open(packagesmetafile, "wt") as f:
             """ The output "data" json key points to a list of dictionaries,
             each representing a single result, filled with column names as
             keys and column values as values.
 
-            The output "mapping" json key points to a dictionary of Package
-            name key -> "data"-list index pairs. This provides users of
-            the meta archive a way to perform O(1) searches based on a
-            package name, while still providing a sequential list for
-            loopability.
-
-            i = json_data["mapping"]["package_name"]
-            package_data = json_data["data"][i]
-
-            name = package_data.get("Name")
-            version = package_data.get("Version")
-
             Example:
                 {
                     "data": [
@@ -91,31 +91,31 @@ def main():
                             "URL": "https://some.url"
                         },
                         ...
-                    ],
-                    "mapping": {
-                        "package_name": 0,
-                        ...
-                    }
+                    ]
                 }
             """
+            output = [{
+                column[0]: result[i]
+                for i, column in enumerate(cur.description)
+            } for result in cur.fetchall()]
             json.dump({
                 "warning": ("This is a experimental! It can be removed "
                             "or modified without warning!"),
-                "mapping": {
-                    result[2]: i
-                    for i, result in enumerate(results)
-                },
-                "data": [{
-                    column[0]: result[i]
-                    for i, column in enumerate(cur.description)
-                } for result in results]
+                "data": output
             }, f)
 
-        update_cache("Packages", update_time)
+        with gzip.open(packagesfile, "w") as f:
+            f.write(bytes(pkglist_header + "\n", "UTF-8"))
+            f.writelines([
+                bytes(x.get("Name") + "\n", "UTF-8")
+                for x in output
+            ])
+
+        update_state(packages_state, update_time)
     else:
         print("Packages have not been updated; skipping.")
 
-    updated, update_time = should_update("PackageBases")
+    updated, update_time = should_update(pkgbases_state, "PackageBases")
     if not updated:
         print("Updating PackageBases...")
         with gzip.open(pkgbasefile, "w") as f:
@@ -123,18 +123,18 @@ def main():
             cur = conn.execute("SELECT Name FROM PackageBases " +
                                "WHERE PackagerUID IS NOT NULL")
             f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
-        update_cache("PackageBases", update_time)
+        update_state(pkgbases_state, update_time)
     else:
         print("PackageBases have not been updated; skipping.")
 
-    updated, update_time = should_update("Users")
+    updated, update_time = should_update(users_state, "Users")
     if not updated:
         print("Updating Users...")
         with gzip.open(userfile, "w") as f:
             f.write(bytes(userlist_header + "\n", "UTF-8"))
             cur = conn.execute("SELECT UserName FROM Users")
             f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
-        update_cache("Users", update_time)
+        update_state(users_state, update_time)
     else:
         print("Users have not been updated; skipping.")
 
-- 
GitLab