Skip to content
Snippets Groups Projects
Verified Commit 5b4931d9 authored by Kevin Morris's avatar Kevin Morris
Browse files

feat(mkpkglists): cache update states of mkpkglists


This commit changes mkpkglists so that it does not update
archives when they don't need it by utilizing InnoDB's
`information_schema.tables` AUTO_INCREMENT column.

Signed-off-by: Kevin Morris's avatarKevin Morris <kevr@0cost.org>
parent 928ef295
No related branches found
No related tags found
No related merge requests found
This commit is part of merge request !233. Comments created here will be created in the context of that merge request.
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import datetime import datetime
import gzip import gzip
import json import json
import os
import aurweb.config import aurweb.config
import aurweb.db import aurweb.db
...@@ -13,6 +14,28 @@ pkgbasefile = aurweb.config.get('mkpkglists', 'pkgbasefile') ...@@ -13,6 +14,28 @@ pkgbasefile = aurweb.config.get('mkpkglists', 'pkgbasefile')
userfile = aurweb.config.get('mkpkglists', 'userfile') userfile = aurweb.config.get('mkpkglists', 'userfile')
def should_update(tablename: str) -> int:
conn = aurweb.db.Connection()
db_name = aurweb.config.get("database", "name")
cur = conn.execute("SELECT auto_increment FROM information_schema.tables "
"WHERE table_schema = ? AND table_name = ?",
(db_name, tablename,))
update_time = cur.fetchone()[0]
cached_update_time = 0
if os.path.exists(f"/tmp/{tablename}.update-time.cache"):
with open(f"/tmp/{tablename}.update-time.cache") as f:
cached_update_time = int(f.read().strip())
return (cached_update_time == update_time, update_time)
def update_cache(tablename: str, update_time: int) -> None:
with open(f"/tmp/{tablename}.update-time.cache", "w") as f:
f.write(str(update_time))
def main(): def main():
conn = aurweb.db.Connection() conn = aurweb.db.Connection()
...@@ -21,77 +44,96 @@ def main(): ...@@ -21,77 +44,96 @@ def main():
pkgbaselist_header = "# AUR package base list, generated on " + datestr pkgbaselist_header = "# AUR package base list, generated on " + datestr
userlist_header = "# AUR user name list, generated on " + datestr userlist_header = "# AUR user name list, generated on " + datestr
columns = ("Packages.ID, PackageBaseID, Packages.Name, " updated, update_time = should_update("Packages")
"Version, Description, URL") if not updated:
cur = conn.execute(f"SELECT {columns} FROM Packages " print("Updating Packages...")
"INNER JOIN PackageBases " columns = ("Packages.ID, PackageBaseID, Packages.Name, "
"ON PackageBases.ID = Packages.PackageBaseID " "Version, Description, URL")
"WHERE PackageBases.PackagerUID IS NOT NULL") cur = conn.execute(f"SELECT {columns} FROM Packages "
results = cur.fetchall() "INNER JOIN PackageBases "
"ON PackageBases.ID = Packages.PackageBaseID "
with gzip.open(packagesfile, "w") as f: "WHERE PackageBases.PackagerUID IS NOT NULL")
f.write(bytes(pkglist_header + "\n", "UTF-8")) results = cur.fetchall()
f.writelines([bytes(x[2] + "\n", "UTF-8") for x in results])
with gzip.open(packagesfile, "w") as f:
with gzip.open(packagesmetafile, "wt") as f: f.write(bytes(pkglist_header + "\n", "UTF-8"))
""" The output "data" json key points to a list of dictionaries, f.writelines([bytes(x[2] + "\n", "UTF-8") for x in results])
each representing a single result, filled with column names as
keys and column values as values. with gzip.open(packagesmetafile, "wt") as f:
""" The output "data" json key points to a list of dictionaries,
The output "mapping" json key points to a dictionary of Package each representing a single result, filled with column names as
name key -> "data"-list index pairs. This provides users of keys and column values as values.
the meta archive a way to perform O(1) searches based on a
package name, while still providing a sequential list for The output "mapping" json key points to a dictionary of Package
loopability. name key -> "data"-list index pairs. This provides users of
the meta archive a way to perform O(1) searches based on a
i = json_data["mapping"]["package_name"] package name, while still providing a sequential list for
package_data = json_data["data"][i] loopability.
name = package_data.get("Name") i = json_data["mapping"]["package_name"]
version = package_data.get("Version") package_data = json_data["data"][i]
Example: name = package_data.get("Name")
{ version = package_data.get("Version")
"data": [
{ Example:
"ID": 123, {
"Name": "package_name", "data": [
"PackageBaseID": 234, {
"Version": "0.1.1", "ID": 123,
"Description": "Some description...", "Name": "package_name",
"URL": "https://some.url" "PackageBaseID": 234,
}, "Version": "0.1.1",
... "Description": "Some description...",
], "URL": "https://some.url"
"mapping": { },
"package_name": 0, ...
... ],
"mapping": {
"package_name": 0,
...
}
} }
} """
""" json.dump({
json.dump({ "warning": ("This is a experimental! It can be removed "
"warning": ("This is a experimental! It can be removed " "or modified without warning!"),
"or modified without warning!"), "mapping": {
"mapping": { result[2]: i
result[2]: i for i, result in enumerate(results)
for i, result in enumerate(results) },
}, "data": [{
"data": [{ column[0]: result[i]
column[0]: result[i] for i, column in enumerate(cur.description)
for i, column in enumerate(cur.description) } for result in results]
} for result in results] }, f)
}, f)
update_cache("Packages", update_time)
with gzip.open(pkgbasefile, "w") as f: else:
f.write(bytes(pkgbaselist_header + "\n", "UTF-8")) print("Packages have not been updated; skipping.")
cur = conn.execute("SELECT Name FROM PackageBases " +
"WHERE PackagerUID IS NOT NULL") updated, update_time = should_update("PackageBases")
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()]) if not updated:
print("Updating PackageBases...")
with gzip.open(userfile, "w") as f: with gzip.open(pkgbasefile, "w") as f:
f.write(bytes(userlist_header + "\n", "UTF-8")) f.write(bytes(pkgbaselist_header + "\n", "UTF-8"))
cur = conn.execute("SELECT UserName FROM Users") cur = conn.execute("SELECT Name FROM PackageBases " +
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()]) "WHERE PackagerUID IS NOT NULL")
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
update_cache("PackageBases", update_time)
else:
print("PackageBases have not been updated; skipping.")
updated, update_time = should_update("Users")
if not updated:
print("Updating Users...")
with gzip.open(userfile, "w") as f:
f.write(bytes(userlist_header + "\n", "UTF-8"))
cur = conn.execute("SELECT UserName FROM Users")
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
update_cache("Users", update_time)
else:
print("Users have not been updated; skipping.")
conn.close() conn.close()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment