Skip to content
Snippets Groups Projects

mkpkglists improvements

Merged Kevin Morris requested to merge kevr/aurweb:teapot into master
All threads resolved!
Compare and Show latest version
2 files
+ 139
51
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 138
50
@@ -5,40 +5,117 @@ import gzip
import json
import os
from collections import defaultdict
from decimal import Decimal
from typing import Tuple
import aurweb.config
import aurweb.db
def state_path(archive: str) -> str:
# A hard-coded /tmp state directory.
# TODO: Use Redis cache to store this state after we merge
# FastAPI into master and removed PHP from the tree.
return os.path.join("/tmp", os.path.basename(archive) + ".state")
packagesfile = aurweb.config.get('mkpkglists', 'packagesfile')
packagesmetafile = aurweb.config.get('mkpkglists', 'packagesmetafile')
packages_state = state_path(packagesfile)
pkgbasefile = aurweb.config.get('mkpkglists', 'pkgbasefile')
pkgbases_state = state_path(pkgbasefile)
userfile = aurweb.config.get('mkpkglists', 'userfile')
users_state = state_path(userfile)
def should_update(tablename: str) -> int:
def should_update(state: str, tablename: str) -> Tuple[bool, int]:
if aurweb.config.get("database", "backend") != "mysql":
return False
conn = aurweb.db.Connection()
return (False, 0)
db_name = aurweb.config.get("database", "name")
conn = aurweb.db.Connection()
cur = conn.execute("SELECT auto_increment FROM information_schema.tables "
"WHERE table_schema = ? AND table_name = ?",
(db_name, tablename,))
update_time = cur.fetchone()[0]
cached_update_time = 0
if os.path.exists(f"/tmp/{tablename}.update-time.cache"):
with open(f"/tmp/{tablename}.update-time.cache") as f:
cached_update_time = int(f.read().strip())
saved_update_time = 0
if os.path.exists(state):
with open(state) as f:
saved_update_time = int(f.read().strip())
return (cached_update_time == update_time, update_time)
return (saved_update_time == update_time, update_time)
def update_cache(tablename: str, update_time: int) -> None:
with open(f"/tmp/{tablename}.update-time.cache", "w") as f:
def update_state(state: str, update_time: int) -> None:
with open(state, "w") as f:
f.write(str(update_time))
TYPE_MAP = {
"depends": "Depends",
"makedepends": "MakeDepends",
"checkdepends": "CheckDepends",
"optdepends": "OptDepends",
"conflicts": "Conflicts",
"provides": "Provides",
"replaces": "Replaces",
"groups": "Groups",
"license": "License",
"keyword": "Keywords"
}
def get_extended_fields(package_id: int, pkgbase_id: int):
query = """
SELECT DependencyTypes.Name AS Type,
PackageDepends.DepName AS Name,
PackageDepends.DepCondition AS Cond
FROM PackageDepends
LEFT JOIN DependencyTypes
ON DependencyTypes.ID = PackageDepends.DepTypeID
WHERE PackageDepends.PackageID = ?
UNION SELECT RelationTypes.Name AS Type,
PackageRelations.RelName AS Name,
PackageRelations.RelCondition AS Cond
FROM PackageRelations
LEFT JOIN RelationTypes
ON RelationTypes.ID = PackageRelations.RelTypeID
WHERE PackageRelations.PackageID = ?
UNION SELECT 'groups' AS Type, Groups.Name, '' AS Cond
FROM Groups INNER JOIN PackageGroups
ON PackageGroups.PackageID = ?
AND PackageGroups.GroupID = Groups.ID
UNION SELECT 'license' AS Type, Licenses.Name, '' as Cond
FROM Licenses INNER JOIN PackageLicenses
ON PackageLicenses.PackageID = ?
AND PackageLicenses.LicenseID = Licenses.ID
UNION SELECT 'keyword' AS Type, PackageKeywords.Keyword AS Name, '' as Cond
FROM PackageKeywords WHERE PackageBaseID = ?
"""
conn = aurweb.db.Connection()
args = [package_id] * 4 + [pkgbase_id]
cursor = conn.execute(query, args)
data = defaultdict(list)
data["License"] = []
data["Keywords"] = []
for result in cursor.fetchall():
key = TYPE_MAP.get(result[0])
output = result[1]
if result[2]:
output += result[2]
data[key].append(output)
conn.close()
return data
def main():
conn = aurweb.db.Connection()
@@ -47,38 +124,36 @@ def main():
pkgbaselist_header = "# AUR package base list, generated on " + datestr
userlist_header = "# AUR user name list, generated on " + datestr
updated, update_time = should_update("Packages")
updated, update_time = should_update(packages_state, "Packages")
if not updated:
print("Updating Packages...")
columns = ("Packages.ID, PackageBaseID, Packages.Name, "
"Version, Description, URL")
# Query columns; copied from RPC.
columns = ("Packages.ID, Packages.Name, "
"PackageBases.ID AS PackageBaseID, "
"PackageBases.Name AS PackageBase, "
"Version, Description, URL, NumVotes, "
"Popularity, OutOfDateTS AS OutOfDate, "
"Users.UserName AS Maintainer, "
"SubmittedTS AS FirstSubmitted, "
"ModifiedTS AS LastModified")
# Perform query.
cur = conn.execute(f"SELECT {columns} FROM Packages "
"INNER JOIN PackageBases "
"LEFT JOIN PackageBases "
"ON PackageBases.ID = Packages.PackageBaseID "
"LEFT JOIN Users "
"ON PackageBases.MaintainerUID = Users.ID "
"WHERE PackageBases.PackagerUID IS NOT NULL")
results = cur.fetchall()
with gzip.open(packagesfile, "w") as f:
f.write(bytes(pkglist_header + "\n", "UTF-8"))
f.writelines([bytes(x[2] + "\n", "UTF-8") for x in results])
# Store JSON-data in `output`, which can be reused for the
# more basic packagesfile generation afterward.
output = dict()
with gzip.open(packagesmetafile, "wt") as f:
""" The output "data" json key points to a list of dictionaries,
each representing a single result, filled with column names as
keys and column values as values.
The output "mapping" json key points to a dictionary of Package
name key -> "data"-list index pairs. This provides users of
the meta archive a way to perform O(1) searches based on a
package name, while still providing a sequential list for
loopability.
i = json_data["mapping"]["package_name"]
package_data = json_data["data"][i]
name = package_data.get("Name")
version = package_data.get("Version")
Example:
{
"data": [
@@ -91,31 +166,44 @@ def main():
"URL": "https://some.url"
},
...
],
"mapping": {
"package_name": 0,
...
}
]
}
"""
def is_decimal(column):
if isinstance(column, Decimal):
return float(column)
return column
output = []
for result in cur.fetchall():
extended_fields = get_extended_fields(result[0], result[2])
item = {
column[0]: is_decimal(result[i])
for i, column in enumerate(cur.description)
}
item.update(extended_fields)
output.append(item)
json.dump({
"warning": ("This is a experimental! It can be removed "
"or modified without warning!"),
"mapping": {
result[2]: i
for i, result in enumerate(results)
},
"data": [{
column[0]: result[i]
for i, column in enumerate(cur.description)
} for result in results]
"data": output
}, f)
update_cache("Packages", update_time)
with gzip.open(packagesfile, "w") as f:
f.write(bytes(pkglist_header + "\n", "UTF-8"))
f.writelines([
bytes(x.get("Name") + "\n", "UTF-8")
for x in output
])
update_state(packages_state, update_time)
else:
print("Packages have not been updated; skipping.")
updated, update_time = should_update("PackageBases")
updated, update_time = should_update(pkgbases_state, "PackageBases")
if not updated:
print("Updating PackageBases...")
with gzip.open(pkgbasefile, "w") as f:
@@ -123,18 +211,18 @@ def main():
cur = conn.execute("SELECT Name FROM PackageBases " +
"WHERE PackagerUID IS NOT NULL")
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
update_cache("PackageBases", update_time)
update_state(pkgbases_state, update_time)
else:
print("PackageBases have not been updated; skipping.")
updated, update_time = should_update("Users")
updated, update_time = should_update(users_state, "Users")
if not updated:
print("Updating Users...")
with gzip.open(userfile, "w") as f:
f.write(bytes(userlist_header + "\n", "UTF-8"))
cur = conn.execute("SELECT UserName FROM Users")
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
update_cache("Users", update_time)
update_state(users_state, update_time)
else:
print("Users have not been updated; skipping.")
Loading