feat(mkpkglists): cache update states of mkpkglists

This commit changes mkpkglists so that it does not update archives when they don't need it by utilizing InnoDB's `information_schema.tables` AUTO_INCREMENT column. Signed-off-by: Kevin Morris <kevr@0cost.org>

feat(mkpkglists): cache update states of mkpkglists
5b4931d9 · Kevin Morris · 928ef295 · 5b4931d9
Verified Commit 5b4931d9 authored 3 years ago by Kevin Morris
--- a/aurweb/scripts/mkpkglists.py
+++ b/aurweb/scripts/mkpkglists.py
@@ -3,6 +3,7 @@
 import datetime
 import gzip
 import json
+import os
 import aurweb.config
 import aurweb.db
@@ -13,6 +14,28 @@ pkgbasefile = aurweb.config.get('mkpkglists', 'pkgbasefile')
 userfile = aurweb.config.get('mkpkglists', 'userfile')
+def should_update(tablename: str) -> int:
+    conn = aurweb.db.Connection()
+    db_name = aurweb.config.get("database", "name")
+    cur = conn.execute("SELECT auto_increment FROM information_schema.tables "
+                       "WHERE table_schema = ? AND table_name = ?",
+                       (db_name, tablename,))
+    update_time = cur.fetchone()[0]
+    cached_update_time = 0
+    if os.path.exists(f"/tmp/{tablename}.update-time.cache"):
+        with open(f"/tmp/{tablename}.update-time.cache") as f:
+            cached_update_time = int(f.read().strip())
+    return (cached_update_time == update_time, update_time)
+def update_cache(tablename: str, update_time: int) -> None:
+    with open(f"/tmp/{tablename}.update-time.cache", "w") as f:
+        f.write(str(update_time))
 def main():
    conn = aurweb.db.Connection()
@@ -21,77 +44,96 @@ def main():
    pkgbaselist_header = "# AUR package base list, generated on " + datestr
    userlist_header = "# AUR user name list, generated on " + datestr
-    columns = ("Packages.ID, PackageBaseID, Packages.Name, "
+    updated, update_time = should_update("Packages")
-               "Version, Description, URL")
+    if not updated:
-    cur = conn.execute(f"SELECT {columns} FROM Packages "
+        print("Updating Packages...")
-                       "INNER JOIN PackageBases "
+        columns = ("Packages.ID, PackageBaseID, Packages.Name, "
-                       "ON PackageBases.ID = Packages.PackageBaseID "
+                   "Version, Description, URL")
-                       "WHERE PackageBases.PackagerUID IS NOT NULL")
+        cur = conn.execute(f"SELECT {columns} FROM Packages "
-    results = cur.fetchall()
+                           "INNER JOIN PackageBases "
+                           "ON PackageBases.ID = Packages.PackageBaseID "
-    with gzip.open(packagesfile, "w") as f:
+                           "WHERE PackageBases.PackagerUID IS NOT NULL")
-        f.write(bytes(pkglist_header + "\n", "UTF-8"))
+        results = cur.fetchall()
-        f.writelines([bytes(x[2] + "\n", "UTF-8") for x in results])
+        with gzip.open(packagesfile, "w") as f:
-    with gzip.open(packagesmetafile, "wt") as f:
+            f.write(bytes(pkglist_header + "\n", "UTF-8"))
-        """ The output "data" json key points to a list of dictionaries,
+            f.writelines([bytes(x[2] + "\n", "UTF-8") for x in results])
-        each representing a single result, filled with column names as
-        keys and column values as values.
+        with gzip.open(packagesmetafile, "wt") as f:
+            """ The output "data" json key points to a list of dictionaries,
-        The output "mapping" json key points to a dictionary of Package
+            each representing a single result, filled with column names as
-        name key -> "data"-list index pairs. This provides users of
+            keys and column values as values.
-        the meta archive a way to perform O(1) searches based on a
-        package name, while still providing a sequential list for
+            The output "mapping" json key points to a dictionary of Package
-        loopability.
+            name key -> "data"-list index pairs. This provides users of
+            the meta archive a way to perform O(1) searches based on a
-        i = json_data["mapping"]["package_name"]
+            package name, while still providing a sequential list for
-        package_data = json_data["data"][i]
+            loopability.
-        name = package_data.get("Name")
+            i = json_data["mapping"]["package_name"]
-        version = package_data.get("Version")
+            package_data = json_data["data"][i]
-        Example:
+            name = package_data.get("Name")
-            {
+            version = package_data.get("Version")
-                "data": [
-                    {
+            Example:
-                        "ID": 123,
+                {
-                        "Name": "package_name",
+                    "data": [
-                        "PackageBaseID": 234,
+                        {
-                        "Version": "0.1.1",
+                            "ID": 123,
-                        "Description": "Some description...",
+                            "Name": "package_name",
-                        "URL": "https://some.url"
+                            "PackageBaseID": 234,
-                    },
+                            "Version": "0.1.1",
-                    ...
+                            "Description": "Some description...",
-                ],
+                            "URL": "https://some.url"
-                "mapping": {
+                        },
-                    "package_name": 0,
+                        ...
-                    ...
+                    ],
+                    "mapping": {
+                        "package_name": 0,
+                        ...
+                    }
                }
-            }
+            """
-        """
+            json.dump({
-        json.dump({
+                "warning": ("This is a experimental! It can be removed "
-            "warning": ("This is a experimental! It can be removed "
+                            "or modified without warning!"),
-                        "or modified without warning!"),
+                "mapping": {
-            "mapping": {
+                    result[2]: i
-                result[2]: i
+                    for i, result in enumerate(results)
-                for i, result in enumerate(results)
+                },
-            },
+                "data": [{
-            "data": [{
+                    column[0]: result[i]
-                column[0]: result[i]
+                    for i, column in enumerate(cur.description)
-                for i, column in enumerate(cur.description)
+                } for result in results]
-            } for result in results]
+            }, f)
-        }, f)
+        update_cache("Packages", update_time)
-    with gzip.open(pkgbasefile, "w") as f:
+    else:
-        f.write(bytes(pkgbaselist_header + "\n", "UTF-8"))
+        print("Packages have not been updated; skipping.")
-        cur = conn.execute("SELECT Name FROM PackageBases " +
-                           "WHERE PackagerUID IS NOT NULL")
+    updated, update_time = should_update("PackageBases")
-        f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
+    if not updated:
+        print("Updating PackageBases...")
-    with gzip.open(userfile, "w") as f:
+        with gzip.open(pkgbasefile, "w") as f:
-        f.write(bytes(userlist_header + "\n", "UTF-8"))
+            f.write(bytes(pkgbaselist_header + "\n", "UTF-8"))
-        cur = conn.execute("SELECT UserName FROM Users")
+            cur = conn.execute("SELECT Name FROM PackageBases " +
-        f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
+                               "WHERE PackagerUID IS NOT NULL")
+            f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
+        update_cache("PackageBases", update_time)
+    else:
+        print("PackageBases have not been updated; skipping.")
+    updated, update_time = should_update("Users")
+    if not updated:
+        print("Updating Users...")
+        with gzip.open(userfile, "w") as f:
+            f.write(bytes(userlist_header + "\n", "UTF-8"))
+            cur = conn.execute("SELECT UserName FROM Users")
+            f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
+        update_cache("Users", update_time)
+    else:
+        print("Users have not been updated; skipping.")
    conn.close()