Skip to content
Snippets Groups Projects

bugfix: gendummydata.py was producing invalid usernames

Merged Kevin Morris requested to merge kevr/aurweb:pu_gendummydata into pu
+ 18
9
@@ -98,11 +98,19 @@ if MAX_USERS > len(contents):
MAX_USERS = len(contents)
if MAX_PKGS > len(contents):
MAX_PKGS = len(contents)
if len(contents) - MAX_USERS > MAX_PKGS:
need_dupes = 0
else:
need_dupes = 0
if not len(contents) - MAX_USERS > MAX_PKGS:
need_dupes = 1
def normalize(unicode_data):
""" We only accept ascii for usernames. Also use this to normalize
package names; our database utf8mb4 collations compare with Unicode
Equivalence. """
return unicode_data.encode('ascii', 'ignore').decode('ascii')
# select random usernames
#
log.debug("Generating random user names...")
@@ -110,12 +118,13 @@ user_id = USER_ID
while len(seen_users) < MAX_USERS:
user = random.randrange(0, len(contents))
word = contents[user].replace("'", "").replace(".", "").replace(" ", "_")
word = word.strip().lower()
word = normalize(word.strip().lower())
if word not in seen_users:
seen_users[word] = user_id
user_id += 1
user_keys = list(seen_users.keys())
# select random package names
#
log.debug("Generating random package names...")
@@ -123,7 +132,7 @@ num_pkgs = PKG_ID
while len(seen_pkgs) < MAX_PKGS:
pkg = random.randrange(0, len(contents))
word = contents[pkg].replace("'", "").replace(".", "").replace(" ", "_")
word = word.strip().lower()
word = normalize(word.strip().lower())
if not need_dupes:
if word not in seen_pkgs and word not in seen_users:
seen_pkgs[word] = num_pkgs
@@ -285,10 +294,10 @@ for p in seen_pkgs_keys:
for i in range(num_sources):
src_file = user_keys[random.randrange(0, len(user_keys))]
src = "%s%s.%s/%s/%s-%s.tar.gz" % (
RANDOM_URL[random.randrange(0, len(RANDOM_URL))],
p, RANDOM_TLDS[random.randrange(0, len(RANDOM_TLDS))],
RANDOM_LOCS[random.randrange(0, len(RANDOM_LOCS))],
src_file, genVersion())
RANDOM_URL[random.randrange(0, len(RANDOM_URL))],
p, RANDOM_TLDS[random.randrange(0, len(RANDOM_TLDS))],
RANDOM_LOCS[random.randrange(0, len(RANDOM_LOCS))],
src_file, genVersion())
s = "INSERT INTO PackageSources(PackageID, Source) VALUES (%d, '%s');\n"
s = s % (seen_pkgs[p], src)
out.write(s)
Loading