Verified Commit fa43f6bc authored by Kevin Morris's avatar Kevin Morris
Browse files

change(aurweb): add parallel tests and improve aurweb.db



This change utilizes pytest-xdist to perform a multiproc test
run and reworks aurweb.db's code. We no longer use a global
engine, session or Session, but we now use a memo of engines
and sessions as they are requested, based on the PYTEST_CURRENT_TEST
environment variable, which is available during testing.

Additionally, this change strips several SQLite components
out of the Python code-base.

SQLite is still compatible with PHP and sharness tests, but
not with our FastAPI implementation.

More changes:
------------
- Remove use of aurweb.db.session global in other code.
- Use new aurweb.db.name() dynamic db name function in env.py.
- Added 'addopts' to pytest.ini which utilizes multiprocessing.
    - Highly recommended to leave this be or modify `-n auto` to
      `-n {cpu_threads}` where cpu_threads is at least 2.

Signed-off-by: Kevin Morris's avatarKevin Morris <kevr@0cost.org>
parent 07aac768
import functools
import hashlib
import math
import os
import re
from typing import Iterable, NewType
from sqlalchemy import event
from sqlalchemy.orm import Query, scoped_session
import sqlalchemy
from sqlalchemy import create_engine, event
from sqlalchemy.engine.base import Engine
from sqlalchemy.engine.url import URL
from sqlalchemy.orm import Query, Session, SessionTransaction, scoped_session, sessionmaker
import aurweb.config
import aurweb.util
# See get_engine.
engine = None
from aurweb import logging
# ORM Session class.
Session = None
logger = logging.get_logger(__name__)
# Global ORM Session object.
session = None
DRIVERS = {
"mysql": "mysql+mysqldb"
}
# Global introspected object memo.
introspected = dict()
# A mocked up type.
Base = NewType("aurweb.models.declarative_base.Base", "Base")
# Some types we don't get access to in this module.
Base = NewType("Base", "aurweb.models.declarative_base.Base")
def make_random_value(table: str, column: str):
......@@ -56,14 +61,85 @@ def make_random_value(table: str, column: str):
length = col.type.length
string = aurweb.util.make_random_string(length)
while session.query(table).filter(column == string).first():
while query(table).filter(column == string).first():
string = aurweb.util.make_random_string(length)
return string
def get_session():
def test_name() -> str:
"""
Return the unhashed database name.
The unhashed database name is determined (lower = higher priority) by:
-------------------------------------------
1. {test_suite} portion of PYTEST_CURRENT_TEST
2. aurweb.config.get("database", "name")
During `pytest` runs, the PYTEST_CURRENT_TEST environment variable
is set to the current test in the format `{test_suite}::{test_func}`.
This allows tests to use a suite-specific database for its runs,
which decouples database state from test suites.
:return: Unhashed database name
"""
db = os.environ.get("PYTEST_CURRENT_TEST",
aurweb.config.get("database", "name"))
return db.split(":")[0]
def name() -> str:
"""
Return sanitized database name that can be used for tests or production.
If test_name() starts with "test/", the database name is SHA-1 hashed,
prefixed with 'db', and returned. Otherwise, test_name() is passed
through and not hashed at all.
:return: SHA1-hashed database name prefixed with 'db'
"""
dbname = test_name()
if not dbname.startswith("test/"):
return dbname
sha1 = hashlib.sha1(dbname.encode()).hexdigest()
return "db" + sha1
# Module-private global memo used to store SQLAlchemy sessions.
_sessions = dict()
def get_session(engine: Engine = None) -> Session:
""" Return aurweb.db's global session. """
return session
dbname = name()
global _sessions
if dbname not in _sessions:
if not engine: # pragma: no cover
engine = get_engine()
Session = scoped_session(
sessionmaker(autocommit=True, autoflush=False, bind=engine))
_sessions[dbname] = Session()
# If this is the first grab of this session, log out the
# database name used.
raw_dbname = test_name()
logger.debug(f"DBName({raw_dbname}): {dbname}")
return _sessions.get(dbname)
def pop_session(dbname: str) -> None:
"""
Pop a Session out of the private _sessions memo.
:param dbname: Database name
:raises KeyError: When `dbname` does not exist in the memo
"""
global _sessions
_sessions.pop(dbname)
def refresh(model: Base) -> Base:
......@@ -121,41 +197,40 @@ def add(model: Base) -> Base:
return model
def begin():
def begin() -> SessionTransaction:
""" Begin an SQLAlchemy SessionTransaction. """
return get_session().begin()
def get_sqlalchemy_url():
def get_sqlalchemy_url() -> URL:
"""
Build an SQLAlchemy for use with create_engine based on the aurweb configuration.
"""
import sqlalchemy
Build an SQLAlchemy URL for use with create_engine.
constructor = sqlalchemy.engine.url.URL
:return: sqlalchemy.engine.url.URL
"""
constructor = URL
parts = sqlalchemy.__version__.split('.')
major = int(parts[0])
minor = int(parts[1])
if major == 1 and minor >= 4: # pragma: no cover
constructor = sqlalchemy.engine.url.URL.create
constructor = URL.create
aur_db_backend = aurweb.config.get('database', 'backend')
if aur_db_backend == 'mysql':
if aurweb.config.get_with_fallback('database', 'port', fallback=None):
port = aurweb.config.get('database', 'port')
param_query = None
else:
port = None
param_query = {
'unix_socket': aurweb.config.get('database', 'socket')
}
param_query = {}
port = aurweb.config.get_with_fallback("database", "port", None)
if not port:
param_query["unix_socket"] = aurweb.config.get(
"database", "socket")
return constructor(
'mysql+mysqldb',
DRIVERS.get(aur_db_backend),
username=aurweb.config.get('database', 'user'),
password=aurweb.config.get('database', 'password'),
password=aurweb.config.get_with_fallback('database', 'password',
fallback=None),
host=aurweb.config.get('database', 'host'),
database=aurweb.config.get('database', 'name'),
database=name(),
port=port,
query=param_query
)
......@@ -168,58 +243,83 @@ def get_sqlalchemy_url():
raise ValueError('unsupported database backend')
def get_engine(echo: bool = False):
def sqlite_regexp(regex, item) -> bool: # pragma: no cover
""" Method which mimics SQL's REGEXP for SQLite. """
return bool(re.search(regex, str(item)))
def setup_sqlite(engine: Engine) -> None: # pragma: no cover
""" Perform setup for an SQLite engine. """
@event.listens_for(engine, "connect")
def do_begin(conn, record):
create_deterministic_function = functools.partial(
conn.create_function,
deterministic=True
)
create_deterministic_function("REGEXP", 2, sqlite_regexp)
# Module-private global memo used to store SQLAlchemy engines.
_engines = dict()
def get_engine(dbname: str = None, echo: bool = False) -> Engine:
"""
Return the global SQLAlchemy engine.
Return the SQLAlchemy engine for `dbname`.
The engine is created on the first call to get_engine and then stored in the
`engine` global variable for the next calls.
"""
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
global engine, session, Session
:param dbname: Database name (default: aurweb.db.name())
:param echo: Flag passed through to sqlalchemy.create_engine
:return: SQLAlchemy Engine instance
"""
if not dbname:
dbname = name()
if engine is None:
global _engines
if dbname not in _engines:
db_backend = aurweb.config.get("database", "backend")
connect_args = dict()
db_backend = aurweb.config.get("database", "backend")
if db_backend == "sqlite":
# check_same_thread is for a SQLite technicality
# https://fastapi.tiangolo.com/tutorial/sql-databases/#note
is_sqlite = bool(db_backend == "sqlite")
if is_sqlite: # pragma: no cover
connect_args["check_same_thread"] = False
engine = create_engine(get_sqlalchemy_url(),
connect_args=connect_args,
echo=echo)
kwargs = {
"echo": echo,
"connect_args": connect_args
}
_engines[dbname] = create_engine(get_sqlalchemy_url(), **kwargs)
Session = scoped_session(
sessionmaker(autocommit=True, autoflush=False, bind=engine))
session = Session()
if is_sqlite: # pragma: no cover
setup_sqlite(_engines.get(dbname))
if db_backend == "sqlite":
# For SQLite, we need to add some custom functions as
# they are used in the reference graph method.
def regexp(regex, item):
return bool(re.search(regex, str(item)))
return _engines.get(dbname)
@event.listens_for(engine, "connect")
def do_begin(conn, record):
create_deterministic_function = functools.partial(
conn.create_function,
deterministic=True
)
create_deterministic_function("REGEXP", 2, regexp)
return engine
def pop_engine(dbname: str) -> None:
"""
Pop an Engine out of the private _engines memo.
:param dbname: Database name
:raises KeyError: When `dbname` does not exist in the memo
"""
global _engines
_engines.pop(dbname)
def kill_engine() -> None:
""" Close the current session and dispose of the engine. """
dbname = name()
def kill_engine():
global engine, Session, session
if engine:
session.close()
engine.dispose()
engine = Session = session = None
session = get_session()
session.close()
pop_session(dbname)
engine = get_engine()
engine.dispose()
pop_engine(dbname)
def connect():
......@@ -248,7 +348,9 @@ class ConnectionExecutor:
def paramstyle(self):
return self._paramstyle
def execute(self, query, params=()):
def execute(self, query, params=()): # pragma: no cover
# TODO: SQLite support has been removed in FastAPI. It remains
# here to fund its support for PHP until it is removed.
if self._paramstyle in ('format', 'pyformat'):
query = query.replace('%', '%%').replace('?', '%s')
elif self._paramstyle == 'qmark':
......@@ -278,16 +380,19 @@ class Connection:
if aur_db_backend == 'mysql':
import MySQLdb
aur_db_host = aurweb.config.get('database', 'host')
aur_db_name = aurweb.config.get('database', 'name')
aur_db_name = name()
aur_db_user = aurweb.config.get('database', 'user')
aur_db_pass = aurweb.config.get('database', 'password')
aur_db_pass = aurweb.config.get_with_fallback(
'database', 'password', str())
aur_db_socket = aurweb.config.get('database', 'socket')
self._conn = MySQLdb.connect(host=aur_db_host,
user=aur_db_user,
passwd=aur_db_pass,
db=aur_db_name,
unix_socket=aur_db_socket)
elif aur_db_backend == 'sqlite':
elif aur_db_backend == 'sqlite': # pragma: no cover
# TODO: SQLite support has been removed in FastAPI. It remains
# here to fund its support for PHP until it is removed.
import sqlite3
aur_db_name = aurweb.config.get('database', 'name')
self._conn = sqlite3.connect(aur_db_name)
......
......@@ -6,7 +6,7 @@ from fastapi.responses import HTMLResponse, RedirectResponse
import aurweb.config
from aurweb import cookies
from aurweb import cookies, db
from aurweb.auth import auth_required
from aurweb.l10n import get_translator_for_request
from aurweb.models import User
......@@ -45,9 +45,7 @@ async def login_post(request: Request,
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST,
detail=_("Bad Referer header."))
from aurweb.db import session
user = session.query(User).filter(User.Username == user).first()
user = db.query(User).filter(User.Username == user).first()
if not user:
return await login_template(request, next,
errors=["Bad username or password."])
......
......@@ -1014,12 +1014,12 @@ def pkgbase_disown_instance(request: Request, pkgbase: models.PackageBase):
models.PackageComaintainer.Priority.asc()
).limit(1).first()
if co:
with db.begin():
with db.begin():
if co:
pkgbase.Maintainer = co.User
db.delete(co)
else:
pkgbase.Maintainer = None
else:
pkgbase.Maintainer = None
notif.send()
......
......@@ -16,13 +16,13 @@ db_backend = aurweb.config.get("database", "backend")
@compiles(TINYINT, 'sqlite')
def compile_tinyint_sqlite(type_, compiler, **kw):
def compile_tinyint_sqlite(type_, compiler, **kw): # pragma: no cover
"""TINYINT is not supported on SQLite. Substitute it with INTEGER."""
return 'INTEGER'
@compiles(BIGINT, 'sqlite')
def compile_bigint_sqlite(type_, compiler, **kw):
def compile_bigint_sqlite(type_, compiler, **kw): # pragma: no cover
"""
For SQLite's AUTOINCREMENT to work on BIGINT columns, we need to map BIGINT
to INTEGER. Aside from that, BIGINT is the same as INTEGER for SQLite.
......
from itertools import chain
import aurweb.db
def references_graph(table):
""" Taken from Django's sqlite3/operations.py. """
query = """
WITH tables AS (
SELECT :table name
UNION
SELECT sqlite_master.name
FROM sqlite_master
JOIN tables ON (sql REGEXP :regexp_1 || tables.name || :regexp_2)
) SELECT name FROM tables;
"""
params = {
"table": table,
"regexp_1": r'(?i)\s+references\s+("|\')?',
"regexp_2": r'("|\')?\s*\(',
}
cursor = aurweb.db.get_session().execute(query, params=params)
return [row[0] for row in cursor.fetchall()]
from aurweb import models
def setup_test_db(*args):
......@@ -47,22 +27,38 @@ def setup_test_db(*args):
aurweb.db.get_engine()
tables = list(args)
if not tables:
tables = [
models.AcceptedTerm.__tablename__,
models.ApiRateLimit.__tablename__,
models.Ban.__tablename__,
models.Group.__tablename__,
models.License.__tablename__,
models.OfficialProvider.__tablename__,
models.Package.__tablename__,
models.PackageBase.__tablename__,
models.PackageBlacklist.__tablename__,
models.PackageComaintainer.__tablename__,
models.PackageComment.__tablename__,
models.PackageDependency.__tablename__,
models.PackageGroup.__tablename__,
models.PackageKeyword.__tablename__,
models.PackageLicense.__tablename__,
models.PackageNotification.__tablename__,
models.PackageRelation.__tablename__,
models.PackageRequest.__tablename__,
models.PackageSource.__tablename__,
models.PackageVote.__tablename__,
models.Session.__tablename__,
models.SSHPubKey.__tablename__,
models.Term.__tablename__,
models.TUVote.__tablename__,
models.TUVoteInfo.__tablename__,
models.User.__tablename__,
]
db_backend = aurweb.config.get("database", "backend")
if db_backend != "sqlite": # pragma: no cover
aurweb.db.get_session().execute("SET FOREIGN_KEY_CHECKS = 0")
else:
# We're using sqlite, setup tables to be deleted without violating
# foreign key constraints by graphing references.
tables = set(chain.from_iterable(
references_graph(table) for table in tables))
aurweb.db.get_session().execute("SET FOREIGN_KEY_CHECKS = 0")
for table in tables:
aurweb.db.get_session().execute(f"DELETE FROM {table}")
if db_backend != "sqlite": # pragma: no cover
aurweb.db.get_session().execute("SET FOREIGN_KEY_CHECKS = 1")
# Expunge all objects from SQLAlchemy's IdentityMap.
aurweb.db.get_session().execute("SET FOREIGN_KEY_CHECKS = 1")
aurweb.db.get_session().expunge_all()
......@@ -41,8 +41,8 @@ def run_migrations_offline():
script output.
"""
db_name = aurweb.config.get("database", "name")
logging.info(f"Performing offline migration on database '{db_name}'.")
dbname = aurweb.db.name()
logging.info(f"Performing offline migration on database '{dbname}'.")
context.configure(
url=aurweb.db.get_sqlalchemy_url(),
target_metadata=target_metadata,
......@@ -61,8 +61,8 @@ def run_migrations_online():
and associate a connection with the context.
"""
db_name = aurweb.config.get("database", "name")
logging.info(f"Performing online migration on database '{db_name}'.")
dbname = aurweb.db.name()
logging.info(f"Performing online migration on database '{dbname}'.")
connectable = sqlalchemy.create_engine(
aurweb.db.get_sqlalchemy_url(),
poolclass=sqlalchemy.pool.NullPool,
......
......@@ -8,3 +8,9 @@
# https://bugs.python.org/issue45097
filterwarnings =
ignore::DeprecationWarning:asyncio.base_events
# Build in coverage and pytest-xdist multiproc testing.
addopts = --cov=aurweb --cov-append --dist load --dist loadfile -n auto
# Our pytest units are located in the ./test/ directory.
testpaths = test
"""
pytest configuration.
The conftest.py file is used to define pytest-global fixtures
or actions run before tests.
Module scoped fixtures:
----------------------
- setup_database
- db_session (depends: setup_database)
Function scoped fixtures:
------------------------
- db_test (depends: db_session)
Tests in aurweb which access the database **must** use the `db_test`
function fixture. Most database tests simply require this fixture in
an autouse=True setup fixture, or for fixtures used in DB tests example:
# In scenarios which there are no other database fixtures
# or other database fixtures dependency paths don't always
# hit `db_test`.
@pytest.fixture(autouse=True)
def setup(db_test):
return
# In scenarios where we can embed the `db_test` fixture in
# specific fixtures that already exist.
@pytest.fixture
def user(db_test):
with db.begin():
user = db.create(User, ...)
yield user
The `db_test` fixture triggers our module-level database fixtures,
then clears the database for each test function run in that module.
It is done this way because migration has a large cost; migrating
ahead of each function takes too long when compared to this method.
"""
import pytest
from filelock import FileLock
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
from sqlalchemy.engine.base import Engine
from sqlalchemy.orm import scoped_session
import aurweb.config
import aurweb.db
from aurweb import initdb, logging, testing
logger = logging.get_logger(__name__)
def test_engine() -> Engine:
"""
Return a privileged SQLAlchemy engine with no database.
This method is particularly useful for providing an engine that
can be used to create and drop databases from an SQL server.
:return: SQLAlchemy Engine instance (not connected to a database)
"""
unix_socket = aurweb.config.get_with_fallback("database", "socket", None)
kwargs = {
"username": aurweb.config.get("database", "user"),
"password": aurweb.config.get_with_fallback(
"database", "password", None),
"host": aurweb.config.get("database", "host"),
"port": aurweb.config.get_with_fallback("database", "port", None),
"query": {
"unix_socket": unix_socket
}
}
backend = aurweb.config.get("database", "backend")
driver = aurweb.db.DRIVERS.get(backend)
return create_engine(URL.create(driver, **kwargs))
class AlembicArgs:
"""
Masquerade an ArgumentParser like structure.
This structure is needed to pass conftest-specific arguments
to initdb.run duration database creation.
"""
verbose = False
use_alembic = True
def _create_database(engine: Engine, dbname: str) -> None:
"""
Create a test database.
:param engine: Engine returned by test_engine()
:param dbname: Database name to create
"""
conn = engine.connect()
conn.execute(f"CREATE DATABASE {dbname}")
conn.close()
initdb.run(AlembicArgs)