Skip to content
Snippets Groups Projects

mkpkglists improvements

Merged Kevin Morris requested to merge kevr/aurweb:teapot into master
All threads resolved!
1 file
+ 46
46
Compare changes
  • Side-by-side
  • Inline
+ 46
46
@@ -5,37 +5,51 @@ import gzip
import json
import os
from typing import Tuple
import aurweb.config
import aurweb.db
def state_path(archive: str) -> str:
# A hard-coded /tmp state directory.
# TODO: Use Redis cache to store this state after we merge
# FastAPI into master and removed PHP from the tree.
return os.path.join("/tmp", os.path.basename(archive) + ".state")
packagesfile = aurweb.config.get('mkpkglists', 'packagesfile')
packagesmetafile = aurweb.config.get('mkpkglists', 'packagesmetafile')
packages_state = state_path(packagesfile)
pkgbasefile = aurweb.config.get('mkpkglists', 'pkgbasefile')
pkgbases_state = state_path(pkgbasefile)
userfile = aurweb.config.get('mkpkglists', 'userfile')
users_state = state_path(userfile)
def should_update(tablename: str) -> int:
def should_update(state: str, tablename: str) -> Tuple[bool, int]:
if aurweb.config.get("database", "backend") != "mysql":
return False
conn = aurweb.db.Connection()
db_name = aurweb.config.get("database", "name")
conn = aurweb.db.Connection()
cur = conn.execute("SELECT auto_increment FROM information_schema.tables "
"WHERE table_schema = ? AND table_name = ?",
(db_name, tablename,))
update_time = cur.fetchone()[0]
cached_update_time = 0
if os.path.exists(f"/tmp/{tablename}.update-time.cache"):
with open(f"/tmp/{tablename}.update-time.cache") as f:
cached_update_time = int(f.read().strip())
saved_update_time = 0
if os.path.exists(state):
with open(state) as f:
saved_update_time = int(f.read().strip())
return (cached_update_time == update_time, update_time)
return (saved_update_time == update_time, update_time)
def update_cache(tablename: str, update_time: int) -> None:
with open(f"/tmp/{tablename}.update-time.cache", "w") as f:
def update_state(state: str, update_time: int) -> None:
with open(state, "w") as f:
f.write(str(update_time))
@@ -47,7 +61,7 @@ def main():
pkgbaselist_header = "# AUR package base list, generated on " + datestr
userlist_header = "# AUR user name list, generated on " + datestr
updated, update_time = should_update("Packages")
updated, update_time = should_update(packages_state, "Packages")
if not updated:
print("Updating Packages...")
columns = ("Packages.ID, PackageBaseID, Packages.Name, "
@@ -56,29 +70,15 @@ def main():
"INNER JOIN PackageBases "
"ON PackageBases.ID = Packages.PackageBaseID "
"WHERE PackageBases.PackagerUID IS NOT NULL")
results = cur.fetchall()
with gzip.open(packagesfile, "w") as f:
f.write(bytes(pkglist_header + "\n", "UTF-8"))
f.writelines([bytes(x[2] + "\n", "UTF-8") for x in results])
# Store JSON-data in `output`, which can be reused for the
# more basic packagesfile generation afterward.
output = dict()
with gzip.open(packagesmetafile, "wt") as f:
""" The output "data" json key points to a list of dictionaries,
each representing a single result, filled with column names as
keys and column values as values.
The output "mapping" json key points to a dictionary of Package
name key -> "data"-list index pairs. This provides users of
the meta archive a way to perform O(1) searches based on a
package name, while still providing a sequential list for
loopability.
i = json_data["mapping"]["package_name"]
package_data = json_data["data"][i]
name = package_data.get("Name")
version = package_data.get("Version")
Example:
{
"data": [
@@ -91,31 +91,31 @@ def main():
"URL": "https://some.url"
},
...
],
"mapping": {
"package_name": 0,
...
}
]
}
"""
output = [{
column[0]: result[i]
for i, column in enumerate(cur.description)
} for result in cur.fetchall()]
json.dump({
"warning": ("This is a experimental! It can be removed "
"or modified without warning!"),
"mapping": {
result[2]: i
for i, result in enumerate(results)
},
"data": [{
column[0]: result[i]
for i, column in enumerate(cur.description)
} for result in results]
"data": output
}, f)
update_cache("Packages", update_time)
with gzip.open(packagesfile, "w") as f:
f.write(bytes(pkglist_header + "\n", "UTF-8"))
f.writelines([
bytes(x.get("Name") + "\n", "UTF-8")
for x in output
])
update_state(packages_state, update_time)
else:
print("Packages have not been updated; skipping.")
updated, update_time = should_update("PackageBases")
updated, update_time = should_update(pkgbases_state, "PackageBases")
if not updated:
print("Updating PackageBases...")
with gzip.open(pkgbasefile, "w") as f:
@@ -123,18 +123,18 @@ def main():
cur = conn.execute("SELECT Name FROM PackageBases " +
"WHERE PackagerUID IS NOT NULL")
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
update_cache("PackageBases", update_time)
update_state(pkgbases_state, update_time)
else:
print("PackageBases have not been updated; skipping.")
updated, update_time = should_update("Users")
updated, update_time = should_update(users_state, "Users")
if not updated:
print("Updating Users...")
with gzip.open(userfile, "w") as f:
f.write(bytes(userlist_header + "\n", "UTF-8"))
cur = conn.execute("SELECT UserName FROM Users")
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
update_cache("Users", update_time)
update_state(users_state, update_time)
else:
print("Users have not been updated; skipping.")
Loading