Commit b44411fb authored by Lukas Fleischer's avatar Lukas Fleischer
Browse files

Use gitnamespaces for efficient storage



Instead of using one Git repository per package, use a single large
object storage for space efficiency. The refs of the individual package
bases are divided using gitnamespaces(7) which allows for exposing each
namespace as an independent repository easily. Also, git-serve is
modified to create a branch for each package, allowing to browse the
large repository with cgit.
Helped-by: Florian Pritz's avatarFlorian Pritz <bluewind@xinu.at>
Helped-by: Johannes Löthberg's avatarJohannes Löthberg <johannes@kyriasis.com>
Signed-off-by: Lukas Fleischer's avatarLukas Fleischer <lfleischer@archlinux.org>
parent ecfa27e4
...@@ -19,13 +19,17 @@ Setup on Arch Linux ...@@ -19,13 +19,17 @@ Setup on Arch Linux
$ mysql -uaur -p AUR </srv/http/aurweb/schema/aur-schema.sql $ mysql -uaur -p AUR </srv/http/aurweb/schema/aur-schema.sql
5) Generate templates for new Git repositories: 5) Create a new user:
$ /srv/http/aurweb/scripts/git-integration/gen-templates.py # useradd -U -d /srv/http/aurweb -c 'AUR user' aur
6) Create a new user: 6) Initialize the Git repository:
# useradd -U -d /srv/http/aurweb -c 'AUR user' aur # mkdir /srv/http/aurweb/aur.git/
# cd /srv/http/aurweb/aur.git/
# git init --bare
# ln -s ../../scripts/git-integration/git-update.py hooks/update
# chown -R aur .
7) Install the git-auth wrapper script: 7) Install the git-auth wrapper script:
...@@ -42,3 +46,24 @@ Setup on Arch Linux ...@@ -42,3 +46,24 @@ Setup on Arch Linux
AuthorizedKeysCommand /usr/local/bin/aur-git-auth "%t" "%k" AuthorizedKeysCommand /usr/local/bin/aur-git-auth "%t" "%k"
AuthorizedKeysCommandUser aur AuthorizedKeysCommandUser aur
9) If you want to enable smart HTTP support with nginx and uWSGI, you can use
the following directives:
location ~ ^/([a-z0-9][a-z0-9.+_-]*)\.git/(.*)$ {
include uwsgi_params;
uwsgi_modifier1 9;
uwsgi_param PATH_INFO /aur.git/$2;
uwsgi_param GIT_NAMESPACE $1;
uwsgi_pass unix:/run/uwsgi/smarthttp/aurweb.sock;
}
For the uWSGI configuration, the following template can be used:
[uwsgi]
plugins = cgi
uid = aur
processes = 1
threads = 8
env = GIT_HTTP_EXPORT_ALL=
env = GIT_PROJECT_ROOT=/srv/http/aurweb
cgi = /usr/lib/git-core/git-http-backend
...@@ -26,4 +26,6 @@ max-blob-size=2048 ...@@ -26,4 +26,6 @@ max-blob-size=2048
max-stats=year max-stats=year
enable-http-clone=1 enable-http-clone=1
scan-path=/srv/http/aurweb/repos/ repo.url=aur.git
repo.path=/srv/http/aurweb/aur.git
repo.desc=AUR Package Repositories
...@@ -18,7 +18,7 @@ persistent_cookie_timeout = 2592000 ...@@ -18,7 +18,7 @@ persistent_cookie_timeout = 2592000
max_filesize_uncompressed = 8388608 max_filesize_uncompressed = 8388608
disable_http_login = 1 disable_http_login = 1
aur_location = https://aur.archlinux.org aur_location = https://aur.archlinux.org
cgit_uri = https://aur.archlinux.org/cgit/ cgit_uri = https://aur.archlinux.org/cgit/aur.git
git_clone_uri_anon = https://aur.archlinux.org/cgit/%s.git/ git_clone_uri_anon = https://aur.archlinux.org/cgit/%s.git/
git_clone_uri_priv = ssh+git://aur@aur.archlinux.org/%s.git/ git_clone_uri_priv = ssh+git://aur@aur.archlinux.org/%s.git/
max_rpc_results = 5000 max_rpc_results = 5000
...@@ -34,7 +34,7 @@ git-serve-cmd = /srv/http/aurweb/scripts/git-integration/git-serve.py ...@@ -34,7 +34,7 @@ git-serve-cmd = /srv/http/aurweb/scripts/git-integration/git-serve.py
ssh-options = no-port-forwarding,no-X11-forwarding,no-pty ssh-options = no-port-forwarding,no-X11-forwarding,no-pty
[serve] [serve]
repo-base = /srv/http/aurweb/repos/ repo-path = /srv/http/aurweb/aur.git/
repo-regex = [a-z0-9][a-z0-9.+_-]*$ repo-regex = [a-z0-9][a-z0-9.+_-]*$
template-path = /srv/http/aurweb/scripts/git-integration/templates/ template-path = /srv/http/aurweb/scripts/git-integration/templates/
git-update-hook = /srv/http/aurweb/scripts/git-integration/git-update.py git-update-hook = /srv/http/aurweb/scripts/git-integration/git-update.py
......
#!/usr/bin/python3
import configparser
import os
import shutil
import sys
config = configparser.RawConfigParser()
config.read(os.path.dirname(os.path.realpath(__file__)) + "/../../conf/config")
template_path = config.get('serve', 'template-path')
git_update_hook = config.get('serve', 'git-update-hook')
def die(msg):
sys.stderr.write("%s\n" % (msg))
exit(1)
if os.path.exists(template_path):
shutil.rmtree(template_path)
os.mkdir(template_path)
os.chdir(template_path)
os.mkdir("branches")
os.mkdir("hooks")
os.mkdir("info")
os.symlink(git_update_hook, template_path + 'hooks/update')
with open("description", 'w') as f:
f.write("Unnamed repository; push to update the description.\n")
...@@ -17,28 +17,23 @@ aur_db_user = config.get('database', 'user') ...@@ -17,28 +17,23 @@ aur_db_user = config.get('database', 'user')
aur_db_pass = config.get('database', 'password') aur_db_pass = config.get('database', 'password')
aur_db_socket = config.get('database', 'socket') aur_db_socket = config.get('database', 'socket')
repo_base_path = config.get('serve', 'repo-base') repo_path = config.get('serve', 'repo-path')
repo_regex = config.get('serve', 'repo-regex') repo_regex = config.get('serve', 'repo-regex')
git_shell_cmd = config.get('serve', 'git-shell-cmd') git_shell_cmd = config.get('serve', 'git-shell-cmd')
ssh_cmdline = config.get('serve', 'ssh-cmdline') ssh_cmdline = config.get('serve', 'ssh-cmdline')
template_path = config.get('serve', 'template-path') template_path = config.get('serve', 'template-path')
def repo_path_validate(path): def pkgbase_exists(pkgbase):
if not path.startswith(repo_base_path): db = mysql.connector.connect(host=aur_db_host, user=aur_db_user,
return False passwd=aur_db_pass, db=aur_db_name,
if path.endswith('.git'): unix_socket=aur_db_socket)
repo = path[len(repo_base_path):-4] cur = db.cursor()
elif path.endswith('.git/'):
repo = path[len(repo_base_path):-5] cur.execute("SELECT COUNT(*) FROM PackageBases WHERE Name = %s ",
else: [pkgbase])
return False
return re.match(repo_regex, repo) db.close()
return (cur.fetchone()[0] > 0)
def repo_path_get_pkgbase(path):
pkgbase = path.rstrip('/').rpartition('/')[2]
if pkgbase.endswith('.git'):
pkgbase = pkgbase[:-4]
return pkgbase
def list_repos(user): def list_repos(user):
db = mysql.connector.connect(host=aur_db_host, user=aur_db_user, db = mysql.connector.connect(host=aur_db_host, user=aur_db_user,
...@@ -57,19 +52,17 @@ def list_repos(user): ...@@ -57,19 +52,17 @@ def list_repos(user):
print((' ' if row[1] else '*') + row[0]) print((' ' if row[1] else '*') + row[0])
db.close() db.close()
def setup_repo(repo, user): def setup_repo(pkgbase, user):
if not re.match(repo_regex, repo): if not re.match(repo_regex, pkgbase):
die('%s: invalid repository name: %s' % (action, repo)) die('%s: invalid repository name: %s' % (action, pkgbase))
if pkgbase_exists(pkgbase):
die('%s: package base already exists: %s' % (action, pkgbase))
db = mysql.connector.connect(host=aur_db_host, user=aur_db_user, db = mysql.connector.connect(host=aur_db_host, user=aur_db_user,
passwd=aur_db_pass, db=aur_db_name, passwd=aur_db_pass, db=aur_db_name,
unix_socket=aur_db_socket) unix_socket=aur_db_socket)
cur = db.cursor() cur = db.cursor()
cur.execute("SELECT COUNT(*) FROM PackageBases WHERE Name = %s ", [repo])
if cur.fetchone()[0] > 0:
die('%s: package base already exists: %s' % (action, repo))
cur.execute("SELECT ID FROM Users WHERE Username = %s ", [user]) cur.execute("SELECT ID FROM Users WHERE Username = %s ", [user])
userid = cur.fetchone()[0] userid = cur.fetchone()[0]
if userid == 0: if userid == 0:
...@@ -77,7 +70,7 @@ def setup_repo(repo, user): ...@@ -77,7 +70,7 @@ def setup_repo(repo, user):
cur.execute("INSERT INTO PackageBases (Name, SubmittedTS, ModifiedTS, " + cur.execute("INSERT INTO PackageBases (Name, SubmittedTS, ModifiedTS, " +
"SubmitterUID, MaintainerUID) VALUES (%s, UNIX_TIMESTAMP(), " + "SubmitterUID, MaintainerUID) VALUES (%s, UNIX_TIMESTAMP(), " +
"UNIX_TIMESTAMP(), %s, %s)", [repo, userid, userid]) "UNIX_TIMESTAMP(), %s, %s)", [pkgbase, userid, userid])
pkgbase_id = cur.lastrowid pkgbase_id = cur.lastrowid
cur.execute("INSERT INTO CommentNotify (PackageBaseID, UserID) " + cur.execute("INSERT INTO CommentNotify (PackageBaseID, UserID) " +
...@@ -86,8 +79,11 @@ def setup_repo(repo, user): ...@@ -86,8 +79,11 @@ def setup_repo(repo, user):
db.commit() db.commit()
db.close() db.close()
repo_path = repo_base_path + '/' + repo + '.git/' repo = pygit2.Repository(repo_path)
pygit2.init_repository(repo_path, True, 48, template_path=template_path) repo.create_reference('refs/heads/' + pkgbase,
'refs/namespaces/' + pkgbase + '/refs/heads/master')
repo.create_reference('refs/namespaces/' + pkgbase + '/HEAD',
'refs/namespaces/' + pkgbase + '/refs/heads/master')
def check_permissions(pkgbase, user): def check_permissions(pkgbase, user):
db = mysql.connector.connect(host=aur_db_host, user=aur_db_user, db = mysql.connector.connect(host=aur_db_host, user=aur_db_user,
...@@ -125,19 +121,25 @@ action = cmdargv[0] ...@@ -125,19 +121,25 @@ action = cmdargv[0]
if action == 'git-upload-pack' or action == 'git-receive-pack': if action == 'git-upload-pack' or action == 'git-receive-pack':
if len(cmdargv) < 2: if len(cmdargv) < 2:
die_with_help("%s: missing path" % (action)) die_with_help("%s: missing path" % (action))
path = repo_base_path.rstrip('/') + cmdargv[1]
if not repo_path_validate(path): path = cmdargv[1].rstrip('/')
if not path.startswith('/') or not path.endswith('.git'):
die('%s: invalid path: %s' % (action, path)) die('%s: invalid path: %s' % (action, path))
pkgbase = repo_path_get_pkgbase(path) pkgbase = path[1:-4]
if not os.path.exists(path): if not re.match(repo_regex, pkgbase):
die('%s: invalid repository name: %s' % (action, repo))
if not pkgbase_exists(pkgbase):
setup_repo(pkgbase, user) setup_repo(pkgbase, user)
if action == 'git-receive-pack': if action == 'git-receive-pack':
if not check_permissions(pkgbase, user): if not check_permissions(pkgbase, user):
die('%s: permission denied: %s' % (action, user)) die('%s: permission denied: %s' % (action, user))
os.environ["AUR_USER"] = user os.environ["AUR_USER"] = user
os.environ["AUR_GIT_DIR"] = path
os.environ["AUR_PKGBASE"] = pkgbase os.environ["AUR_PKGBASE"] = pkgbase
cmd = action + " '" + path + "'" os.environ["GIT_NAMESPACE"] = pkgbase
cmd = action + " '" + repo_path + "'"
os.execl(git_shell_cmd, git_shell_cmd, '-c', cmd) os.execl(git_shell_cmd, git_shell_cmd, '-c', cmd)
elif action == 'list-repos': elif action == 'list-repos':
if len(cmdargv) > 1: if len(cmdargv) > 1:
......
...@@ -19,6 +19,8 @@ aur_db_user = config.get('database', 'user') ...@@ -19,6 +19,8 @@ aur_db_user = config.get('database', 'user')
aur_db_pass = config.get('database', 'password') aur_db_pass = config.get('database', 'password')
aur_db_socket = config.get('database', 'socket') aur_db_socket = config.get('database', 'socket')
repo_path = config.get('serve', 'repo-path')
def extract_arch_fields(pkginfo, field): def extract_arch_fields(pkginfo, field):
values = [] values = []
...@@ -166,12 +168,11 @@ sha1_new = sys.argv[3] ...@@ -166,12 +168,11 @@ sha1_new = sys.argv[3]
user = os.environ.get("AUR_USER") user = os.environ.get("AUR_USER")
pkgbase = os.environ.get("AUR_PKGBASE") pkgbase = os.environ.get("AUR_PKGBASE")
git_dir = os.environ.get("AUR_GIT_DIR")
if refname != "refs/heads/master": if refname != "refs/heads/master":
die("pushing to a branch other than master is restricted") die("pushing to a branch other than master is restricted")
repo = pygit2.Repository(git_dir) repo = pygit2.Repository(repo_path)
walker = repo.walk(sha1_new, pygit2.GIT_SORT_TOPOLOGICAL) walker = repo.walk(sha1_new, pygit2.GIT_SORT_TOPOLOGICAL)
if sha1_old != "0000000000000000000000000000000000000000": if sha1_old != "0000000000000000000000000000000000000000":
walker.hide(sha1_old) walker.hide(sha1_old)
...@@ -245,6 +246,6 @@ db.close() ...@@ -245,6 +246,6 @@ db.close()
pkglist = list(srcinfo.GetPackageNames()) pkglist = list(srcinfo.GetPackageNames())
if len(pkglist) > 0: if len(pkglist) > 0:
with open(git_dir + '/description', 'w') as f: with open(repo_path + '/description', 'w') as f:
pkginfo = srcinfo.GetMergedPackage(pkglist[0]) pkginfo = srcinfo.GetMergedPackage(pkglist[0])
f.write(pkginfo['pkgdesc']) f.write(pkginfo['pkgdesc'])
#!/usr/bin/python3
import configparser
import mysql.connector
import os
import pygit2
import re
import shlex
import sys
config = configparser.RawConfigParser()
config.read(os.path.dirname(os.path.realpath(__file__)) + "/../../conf/config")
aur_db_host = config.get('database', 'host')
aur_db_name = config.get('database', 'name')
aur_db_user = config.get('database', 'user')
aur_db_pass = config.get('database', 'password')
aur_db_socket = config.get('database', 'socket')
repo_base_path = config.get('serve', 'repo-base')
repo_regex = config.get('serve', 'repo-regex')
template_path = config.get('serve', 'template-path')
def die(msg):
sys.stderr.write("%s\n" % (msg))
exit(1)
db = mysql.connector.connect(host=aur_db_host, user=aur_db_user,
passwd=aur_db_pass, db=aur_db_name,
unix_socket=aur_db_socket)
cur = db.cursor()
cur.execute("SELECT Name FROM PackageBases")
repos = [row[0] for row in cur]
db.close()
for repo in repos:
if not re.match(repo_regex, repo):
die('invalid repository name: %s' % (repo))
i = 1
n = len(repos)
for repo in repos:
print("[%s/%d] %s" % (str(i).rjust(len(str(n))), n, repo))
repo_path = repo_base_path + '/' + repo + '.git/'
pygit2.init_repository(repo_path, True, 48, template_path=template_path)
i += 1
...@@ -9,18 +9,15 @@ afterwards. ...@@ -9,18 +9,15 @@ afterwards.
ALTER TABLE Users ADD COLUMN SSHPubKey VARCHAR(4096) NULL DEFAULT NULL; ALTER TABLE Users ADD COLUMN SSHPubKey VARCHAR(4096) NULL DEFAULT NULL;
---- ----
2. Create a new user and configure the sshd as described in INSTALL. 2. Create a new user and configure Git/SSH as described in INSTALL.
3. Run gen-templates.py to initialize the Git repository template. Create a 3. Reset the packager field of all package bases:
directory for the Git repositories and run init-repos.py to initialize them.
4. Reset the packager field of all package bases:
---- ----
UPDATE PackageBases SET PackagerUID = NULL; UPDATE PackageBases SET PackagerUID = NULL;
---- ----
5. Create a new table for package base co-maintainers: 4. Create a new table for package base co-maintainers:
---- ----
CREATE TABLE PackageComaintainers ( CREATE TABLE PackageComaintainers (
...@@ -34,4 +31,4 @@ CREATE TABLE PackageComaintainers ( ...@@ -34,4 +31,4 @@ CREATE TABLE PackageComaintainers (
) ENGINE = InnoDB; ) ENGINE = InnoDB;
---- ----
6. (optional) Setup cgit to browse the Git repositories via HTTP. 5. (optional) Setup cgit to browse the Git repositories via HTTP.
...@@ -82,10 +82,10 @@ $sources = pkg_sources($row["ID"]); ...@@ -82,10 +82,10 @@ $sources = pkg_sources($row["ID"]);
<h4><?= __('Package Actions') ?></h4> <h4><?= __('Package Actions') ?></h4>
<ul class="small"> <ul class="small">
<li> <li>
<a href="<?= $cgit_uri . $row['BaseName'] . '.git' ?>/tree/PKGBUILD"><?= __('View PKGBUILD') ?></a> / <a href="<?= $cgit_uri . '/tree/PKGBUILD?h=' . $row['BaseName'] ?>"><?= __('View PKGBUILD') ?></a> /
<a href="<?= $cgit_uri . $row['BaseName'] . '.git' ?>/log/"><?= __('View Changes') ?></a> <a href="<?= $cgit_uri . '/log/?h=' . $row['BaseName'] ?>"><?= __('View Changes') ?></a>
</li> </li>
<li><a href="<?= $cgit_uri . $row['BaseName'] . '.git' ?>/snapshot/master.tar.gz"><?= __('Download snapshot') ?></a></li> <li><a href="<?= $cgit_uri . '/snapshot/' . $row['BaseName'] . '.tar.gz' ?>"><?= __('Download snapshot') ?></a>
<li><a href="https://wiki.archlinux.org/index.php/Special:Search?search=<?= urlencode($row['Name']) ?>"><?= __('Search wiki') ?></a></li> <li><a href="https://wiki.archlinux.org/index.php/Special:Search?search=<?= urlencode($row['Name']) ?>"><?= __('Search wiki') ?></a></li>
<li><span class="flagged"><?php if ($row["OutOfDateTS"] !== NULL) { echo __('Flagged out-of-date')." (${out_of_date_time})"; } ?></span></li> <li><span class="flagged"><?php if ($row["OutOfDateTS"] !== NULL) { echo __('Flagged out-of-date')." (${out_of_date_time})"; } ?></span></li>
<?php if ($uid): ?> <?php if ($uid): ?>
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment