From 89cdabda53f9706bb828a596cfdeb07b2d1c5790 Mon Sep 17 00:00:00 2001 From: Kevin Morris <kevr@0cost.org> Date: Mon, 1 Nov 2021 09:10:58 -0700 Subject: [PATCH] fix(mkpkglists): revert to iterating over cur.fetchall() once This change removes the "mapping" key that was added in a recent commit to simplify generation. Additionally, we make cache handling a lot cleaner. Signed-off-by: Kevin Morris <kevr@0cost.org> --- aurweb/scripts/mkpkglists.py | 92 ++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/aurweb/scripts/mkpkglists.py b/aurweb/scripts/mkpkglists.py index 73c0dcec9..d2f7216ad 100755 --- a/aurweb/scripts/mkpkglists.py +++ b/aurweb/scripts/mkpkglists.py @@ -5,37 +5,51 @@ import gzip import json import os +from typing import Tuple + import aurweb.config import aurweb.db + +def state_path(archive: str) -> str: + # A hard-coded /tmp state directory. + # TODO: Use Redis cache to store this state after we merge + # FastAPI into master and removed PHP from the tree. + return os.path.join("/tmp", os.path.basename(archive) + ".state") + + packagesfile = aurweb.config.get('mkpkglists', 'packagesfile') packagesmetafile = aurweb.config.get('mkpkglists', 'packagesmetafile') +packages_state = state_path(packagesfile) + pkgbasefile = aurweb.config.get('mkpkglists', 'pkgbasefile') +pkgbases_state = state_path(pkgbasefile) + userfile = aurweb.config.get('mkpkglists', 'userfile') +users_state = state_path(userfile) -def should_update(tablename: str) -> int: +def should_update(state: str, tablename: str) -> Tuple[bool, int]: if aurweb.config.get("database", "backend") != "mysql": return False - conn = aurweb.db.Connection() - db_name = aurweb.config.get("database", "name") + conn = aurweb.db.Connection() cur = conn.execute("SELECT auto_increment FROM information_schema.tables " "WHERE table_schema = ? AND table_name = ?", (db_name, tablename,)) update_time = cur.fetchone()[0] - cached_update_time = 0 - if os.path.exists(f"/tmp/{tablename}.update-time.cache"): - with open(f"/tmp/{tablename}.update-time.cache") as f: - cached_update_time = int(f.read().strip()) + saved_update_time = 0 + if os.path.exists(state): + with open(state) as f: + saved_update_time = int(f.read().strip()) - return (cached_update_time == update_time, update_time) + return (saved_update_time == update_time, update_time) -def update_cache(tablename: str, update_time: int) -> None: - with open(f"/tmp/{tablename}.update-time.cache", "w") as f: +def update_state(state: str, update_time: int) -> None: + with open(state, "w") as f: f.write(str(update_time)) @@ -47,7 +61,7 @@ def main(): pkgbaselist_header = "# AUR package base list, generated on " + datestr userlist_header = "# AUR user name list, generated on " + datestr - updated, update_time = should_update("Packages") + updated, update_time = should_update(packages_state, "Packages") if not updated: print("Updating Packages...") columns = ("Packages.ID, PackageBaseID, Packages.Name, " @@ -56,29 +70,15 @@ def main(): "INNER JOIN PackageBases " "ON PackageBases.ID = Packages.PackageBaseID " "WHERE PackageBases.PackagerUID IS NOT NULL") - results = cur.fetchall() - - with gzip.open(packagesfile, "w") as f: - f.write(bytes(pkglist_header + "\n", "UTF-8")) - f.writelines([bytes(x[2] + "\n", "UTF-8") for x in results]) + # Store JSON-data in `output`, which can be reused for the + # more basic packagesfile generation afterward. + output = dict() with gzip.open(packagesmetafile, "wt") as f: """ The output "data" json key points to a list of dictionaries, each representing a single result, filled with column names as keys and column values as values. - The output "mapping" json key points to a dictionary of Package - name key -> "data"-list index pairs. This provides users of - the meta archive a way to perform O(1) searches based on a - package name, while still providing a sequential list for - loopability. - - i = json_data["mapping"]["package_name"] - package_data = json_data["data"][i] - - name = package_data.get("Name") - version = package_data.get("Version") - Example: { "data": [ @@ -91,31 +91,31 @@ def main(): "URL": "https://some.url" }, ... - ], - "mapping": { - "package_name": 0, - ... - } + ] } """ + output = [{ + column[0]: result[i] + for i, column in enumerate(cur.description) + } for result in cur.fetchall()] json.dump({ "warning": ("This is a experimental! It can be removed " "or modified without warning!"), - "mapping": { - result[2]: i - for i, result in enumerate(results) - }, - "data": [{ - column[0]: result[i] - for i, column in enumerate(cur.description) - } for result in results] + "data": output }, f) - update_cache("Packages", update_time) + with gzip.open(packagesfile, "w") as f: + f.write(bytes(pkglist_header + "\n", "UTF-8")) + f.writelines([ + bytes(x.get("Name") + "\n", "UTF-8") + for x in output + ]) + + update_state(packages_state, update_time) else: print("Packages have not been updated; skipping.") - updated, update_time = should_update("PackageBases") + updated, update_time = should_update(pkgbases_state, "PackageBases") if not updated: print("Updating PackageBases...") with gzip.open(pkgbasefile, "w") as f: @@ -123,18 +123,18 @@ def main(): cur = conn.execute("SELECT Name FROM PackageBases " + "WHERE PackagerUID IS NOT NULL") f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()]) - update_cache("PackageBases", update_time) + update_state(pkgbases_state, update_time) else: print("PackageBases have not been updated; skipping.") - updated, update_time = should_update("Users") + updated, update_time = should_update(users_state, "Users") if not updated: print("Updating Users...") with gzip.open(userfile, "w") as f: f.write(bytes(userlist_header + "\n", "UTF-8")) cur = conn.execute("SELECT UserName FROM Users") f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()]) - update_cache("Users", update_time) + update_state(users_state, update_time) else: print("Users have not been updated; skipping.") -- GitLab