Verified Commit 6f8cf758 authored by David Runge's avatar David Runge
Browse files

Change repo_management to be async

repo_managament/*:
Remove the obsolete
`convert._transform_package_desc_to_output_package()` (the functionality
is now covered by pydantic models directly).
Change all relevant methods to be async.
Change file open commands to make use of aiofiles.

tests/*:
Remove tests for the obsolete
`convert._transform_package_desc_to_output_package()`.
Change all tests for async methods to test using pytest-asyncio.
parent ec78c4b3
Pipeline #6346 failed with stage
in 6 minutes and 40 seconds
import asyncio
from argparse import ArgumentTypeError from argparse import ArgumentTypeError
from sys import exit from sys import exit
...@@ -13,9 +14,11 @@ def db2json() -> None: ...@@ -13,9 +14,11 @@ def db2json() -> None:
try: try:
args = argparse.ArgParseFactory.db2json().parse_args() args = argparse.ArgParseFactory.db2json().parse_args()
operations.dump_db_to_json_files( asyncio.run(
input_path=args.db_file, operations.dump_db_to_json_files(
output_path=args.output_dir, input_path=args.db_file,
output_path=args.output_dir,
)
) )
except (errors.RepoManagementError, ArgumentTypeError) as e: except (errors.RepoManagementError, ArgumentTypeError) as e:
print(e) print(e)
...@@ -31,10 +34,12 @@ def json2db() -> None: ...@@ -31,10 +34,12 @@ def json2db() -> None:
try: try:
args = argparse.ArgParseFactory.json2db().parse_args() args = argparse.ArgParseFactory.json2db().parse_args()
operations.create_db_from_json_files( asyncio.run(
input_path=args.input_dir, operations.create_db_from_json_files(
output_path=args.db_file, input_path=args.input_dir,
db_type=defaults.RepoDbType.FILES if args.files else defaults.RepoDbType.DEFAULT, output_path=args.db_file,
db_type=defaults.RepoDbType.FILES if args.files else defaults.RepoDbType.DEFAULT,
)
) )
except (errors.RepoManagementError, ArgumentTypeError) as e: except (errors.RepoManagementError, ArgumentTypeError) as e:
print(e) print(e)
......
import io import io
from typing import Dict, List, Optional, Union from typing import Dict, List, Union
from jinja2 import Environment, PackageLoader from jinja2 import Environment, PackageLoader
from pydantic.error_wrappers import ValidationError from pydantic.error_wrappers import ValidationError
...@@ -7,7 +7,7 @@ from pydantic.error_wrappers import ValidationError ...@@ -7,7 +7,7 @@ from pydantic.error_wrappers import ValidationError
from repo_management import defaults, errors, models from repo_management import defaults, errors, models
def _files_data_to_model(data: io.StringIO) -> models.Files: async def _files_data_to_model(data: io.StringIO) -> models.Files:
"""Read the contents of a 'files' file (represented as an instance of """Read the contents of a 'files' file (represented as an instance of
io.StringIO) and convert it to a pydantic model io.StringIO) and convert it to a pydantic model
...@@ -46,7 +46,7 @@ def _files_data_to_model(data: io.StringIO) -> models.Files: ...@@ -46,7 +46,7 @@ def _files_data_to_model(data: io.StringIO) -> models.Files:
return models.Files(**output) return models.Files(**output)
def _desc_data_line_to_dicts( async def _desc_data_line_to_dicts(
current_header: str, current_header: str,
current_type: defaults.FieldType, current_type: defaults.FieldType,
line: str, line: str,
...@@ -88,7 +88,7 @@ def _desc_data_line_to_dicts( ...@@ -88,7 +88,7 @@ def _desc_data_line_to_dicts(
int_types[current_header] = int(line) int_types[current_header] = int(line)
def _desc_data_to_model(data: io.StringIO) -> models.PackageDesc: async def _desc_data_to_model(data: io.StringIO) -> models.PackageDesc:
"""Read the contents of a 'desc' file (represented as an instance of io.StringIO) and convert it to a pydantic model """Read the contents of a 'desc' file (represented as an instance of io.StringIO) and convert it to a pydantic model
Parameters Parameters
...@@ -126,7 +126,7 @@ def _desc_data_to_model(data: io.StringIO) -> models.PackageDesc: ...@@ -126,7 +126,7 @@ def _desc_data_to_model(data: io.StringIO) -> models.PackageDesc:
if current_header: if current_header:
try: try:
_desc_data_line_to_dicts( await _desc_data_line_to_dicts(
current_header=current_header, current_header=current_header,
current_type=current_type, current_type=current_type,
line=line, line=line,
...@@ -148,37 +148,6 @@ def _desc_data_to_model(data: io.StringIO) -> models.PackageDesc: ...@@ -148,37 +148,6 @@ def _desc_data_to_model(data: io.StringIO) -> models.PackageDesc:
) )
def _transform_package_desc_to_output_package(
desc: models.PackageDesc, files: Optional[models.Files]
) -> models.OutputPackage:
"""Transform a PackageDesc model and an accompanying Files model to an OutputPackage model
Parameters
----------
desc: models.PackageDesc
A pydantic model, that has all required attributes (apart from the list of files) to create an OutputPackage
model
files: models.Files:
A pydantic model, that represents the list of files, that belong to the package described by desc
Returns
-------
models.OutputPackage
A pydantic model, that describes a package and its list of files
"""
desc_dict = desc.dict()
# remove attributes, that are represented on the pkgbase level
for name in ["base", "makedepends", "packager", "version"]:
if desc_dict.get(name):
del desc_dict[name]
if files:
return models.OutputPackage(**desc_dict, **files.dict())
else:
return models.OutputPackage(**desc_dict)
class RepoDbFile: class RepoDbFile:
"""A class for handling templates for files used in repository database files (such as 'desc' or 'files') """A class for handling templates for files used in repository database files (such as 'desc' or 'files')
...@@ -189,7 +158,7 @@ class RepoDbFile: ...@@ -189,7 +158,7 @@ class RepoDbFile:
""" """
def __init__(self, enable_async: bool = False) -> None: def __init__(self, enable_async: bool = True) -> None:
"""Initialize an instance of RepDbFile """Initialize an instance of RepDbFile
Parameters Parameters
...@@ -205,7 +174,7 @@ class RepoDbFile: ...@@ -205,7 +174,7 @@ class RepoDbFile:
enable_async=enable_async, enable_async=enable_async,
) )
def render_desc_template(self, model: models.PackageDesc, output: io.StringIO) -> None: async def render_desc_template(self, model: models.PackageDesc, output: io.StringIO) -> None:
"""Use the 'desc' template to write a string to an output stream based on a model """Use the 'desc' template to write a string to an output stream based on a model
Parameters Parameters
...@@ -217,9 +186,9 @@ class RepoDbFile: ...@@ -217,9 +186,9 @@ class RepoDbFile:
""" """
template = self.env.get_template("desc.j2") template = self.env.get_template("desc.j2")
output.write(template.render(model.dict())) output.write(await template.render_async(model.dict()))
def render_files_template(self, model: models.Files, output: io.StringIO) -> None: async def render_files_template(self, model: models.Files, output: io.StringIO) -> None:
"""Use the 'files' template to write a string to an output stream based on a model """Use the 'files' template to write a string to an output stream based on a model
Parameters Parameters
...@@ -231,4 +200,4 @@ class RepoDbFile: ...@@ -231,4 +200,4 @@ class RepoDbFile:
""" """
template = self.env.get_template("files.j2") template = self.env.get_template("files.j2")
output.write(template.render(model.dict())) output.write(await template.render_async(model.dict()))
...@@ -3,15 +3,16 @@ import re ...@@ -3,15 +3,16 @@ import re
import tarfile import tarfile
import time import time
from pathlib import Path from pathlib import Path
from typing import Iterator from typing import AsyncIterator
import aiofiles
import orjson import orjson
from pydantic.error_wrappers import ValidationError from pydantic.error_wrappers import ValidationError
from repo_management import convert, defaults, errors, models from repo_management import convert, defaults, errors, models
def _read_db_file(db_path: Path, compression: str = "gz") -> tarfile.TarFile: async def _read_db_file(db_path: Path, compression: str = "gz") -> tarfile.TarFile:
"""Read a repository database file """Read a repository database file
Parameters Parameters
...@@ -39,7 +40,7 @@ def _read_db_file(db_path: Path, compression: str = "gz") -> tarfile.TarFile: ...@@ -39,7 +40,7 @@ def _read_db_file(db_path: Path, compression: str = "gz") -> tarfile.TarFile:
return tarfile.open(name=db_path, mode=f"r:{compression}") return tarfile.open(name=db_path, mode=f"r:{compression}")
def _extract_db_member_package_name(name: str) -> str: async def _extract_db_member_package_name(name: str) -> str:
"""Extract and return the package name from a repository database member name """Extract and return the package name from a repository database member name
Parameters Parameters
...@@ -55,9 +56,9 @@ def _extract_db_member_package_name(name: str) -> str: ...@@ -55,9 +56,9 @@ def _extract_db_member_package_name(name: str) -> str:
return "".join(re.split("(-)", re.sub("(/desc|/files)$", "", name))[:-4]) return "".join(re.split("(-)", re.sub("(/desc|/files)$", "", name))[:-4])
def _db_file_member_as_model( async def _db_file_member_as_model(
db_file: tarfile.TarFile, regex: str = "(/desc|/files)$" db_file: tarfile.TarFile, regex: str = "(/desc|/files)$"
) -> Iterator[models.RepoDbMemberData]: ) -> AsyncIterator[models.RepoDbMemberData]:
"""Iterate over the members of a database file, represented by an instance of tarfile.TarFile and yield the members """Iterate over the members of a database file, represented by an instance of tarfile.TarFile and yield the members
as instances of models.RepoDbMemberData as instances of models.RepoDbMemberData
...@@ -82,7 +83,7 @@ def _db_file_member_as_model( ...@@ -82,7 +83,7 @@ def _db_file_member_as_model(
yield models.RepoDbMemberData( yield models.RepoDbMemberData(
member_type=file_type, member_type=file_type,
name=_extract_db_member_package_name(name=name), name=await _extract_db_member_package_name(name=name),
data=io.StringIO( data=io.StringIO(
io.BytesIO( io.BytesIO(
db_file.extractfile(name).read(), # type: ignore db_file.extractfile(name).read(), # type: ignore
...@@ -93,7 +94,7 @@ def _db_file_member_as_model( ...@@ -93,7 +94,7 @@ def _db_file_member_as_model(
) )
def _json_files_in_directory(path: Path) -> Iterator[Path]: async def _json_files_in_directory(path: Path) -> AsyncIterator[Path]:
"""Yield JSON files found in a directory """Yield JSON files found in a directory
Parameters Parameters
...@@ -108,7 +109,7 @@ def _json_files_in_directory(path: Path) -> Iterator[Path]: ...@@ -108,7 +109,7 @@ def _json_files_in_directory(path: Path) -> Iterator[Path]:
Returns Returns
------- -------
Iterator[Path] AsyncIterator[Path]
An iterator over the files found in the directory defined by path An iterator over the files found in the directory defined by path
""" """
...@@ -120,7 +121,7 @@ def _json_files_in_directory(path: Path) -> Iterator[Path]: ...@@ -120,7 +121,7 @@ def _json_files_in_directory(path: Path) -> Iterator[Path]:
yield json_file yield json_file
def _read_pkgbase_json_file(path: Path) -> models.OutputPackageBase: async def _read_pkgbase_json_file(path: Path) -> models.OutputPackageBase:
"""Read a JSON file that represents a pkgbase and return it as models.OutputPackageBase """Read a JSON file that represents a pkgbase and return it as models.OutputPackageBase
Parameters Parameters
...@@ -141,16 +142,16 @@ def _read_pkgbase_json_file(path: Path) -> models.OutputPackageBase: ...@@ -141,16 +142,16 @@ def _read_pkgbase_json_file(path: Path) -> models.OutputPackageBase:
A pydantic model representing a pkgbase A pydantic model representing a pkgbase
""" """
with open(path, "r") as input_file: async with aiofiles.open(path, "r") as input_file:
try: try:
return models.OutputPackageBase(**orjson.loads(input_file.read())) return models.OutputPackageBase(**orjson.loads(await input_file.read()))
except orjson.JSONDecodeError as e: except orjson.JSONDecodeError as e:
raise errors.RepoManagementFileError(f"The JSON file '{path}' could not be decoded!\n{e}") raise errors.RepoManagementFileError(f"The JSON file '{path}' could not be decoded!\n{e}")
except ValidationError as e: except ValidationError as e:
raise errors.RepoManagementValidationError(f"The JSON file '{path}' could not be validated!\n{e}") raise errors.RepoManagementValidationError(f"The JSON file '{path}' could not be validated!\n{e}")
def _write_db_file(path: Path, compression: str = "gz") -> tarfile.TarFile: async def _write_db_file(path: Path, compression: str = "gz") -> tarfile.TarFile:
"""Open a repository database file for writing """Open a repository database file for writing
Parameters Parameters
...@@ -178,7 +179,7 @@ def _write_db_file(path: Path, compression: str = "gz") -> tarfile.TarFile: ...@@ -178,7 +179,7 @@ def _write_db_file(path: Path, compression: str = "gz") -> tarfile.TarFile:
return tarfile.open(name=path, mode=f"w:{compression}") return tarfile.open(name=path, mode=f"w:{compression}")
def _stream_package_base_to_db( async def _stream_package_base_to_db(
db: tarfile.TarFile, db: tarfile.TarFile,
model: models.OutputPackageBase, model: models.OutputPackageBase,
repodbfile: convert.RepoDbFile, repodbfile: convert.RepoDbFile,
...@@ -198,7 +199,7 @@ def _stream_package_base_to_db( ...@@ -198,7 +199,7 @@ def _stream_package_base_to_db(
The type of database to stream to The type of database to stream to
""" """
for (desc_model, files_model) in model.get_packages_as_models(): for (desc_model, files_model) in await model.get_packages_as_models():
dirname = f"{desc_model.name}-{model.version}" dirname = f"{desc_model.name}-{model.version}"
directory = tarfile.TarInfo(dirname) directory = tarfile.TarInfo(dirname)
directory.type = tarfile.DIRTYPE directory.type = tarfile.DIRTYPE
...@@ -209,7 +210,7 @@ def _stream_package_base_to_db( ...@@ -209,7 +210,7 @@ def _stream_package_base_to_db(
db.addfile(directory) db.addfile(directory)
desc_content = io.StringIO() desc_content = io.StringIO()
repodbfile.render_desc_template(model=desc_model, output=desc_content) await repodbfile.render_desc_template(model=desc_model, output=desc_content)
desc_file = tarfile.TarInfo(f"{dirname}/desc") desc_file = tarfile.TarInfo(f"{dirname}/desc")
desc_file.size = len(desc_content.getvalue().encode()) desc_file.size = len(desc_content.getvalue().encode())
desc_file.mtime = int(time.time()) desc_file.mtime = int(time.time())
...@@ -219,7 +220,7 @@ def _stream_package_base_to_db( ...@@ -219,7 +220,7 @@ def _stream_package_base_to_db(
db.addfile(desc_file, io.BytesIO(desc_content.getvalue().encode())) db.addfile(desc_file, io.BytesIO(desc_content.getvalue().encode()))
if db_type == defaults.RepoDbType.FILES: if db_type == defaults.RepoDbType.FILES:
files_content = io.StringIO() files_content = io.StringIO()
repodbfile.render_files_template(model=files_model, output=files_content) await repodbfile.render_files_template(model=files_model, output=files_content)
files_file = tarfile.TarInfo(f"{dirname}/files") files_file = tarfile.TarInfo(f"{dirname}/files")
files_file.size = len(files_content.getvalue().encode()) files_file.size = len(files_content.getvalue().encode())
files_file.mtime = int(time.time()) files_file.mtime = int(time.time())
......
...@@ -717,7 +717,7 @@ class OutputPackageBase( ...@@ -717,7 +717,7 @@ class OutputPackageBase(
packages: List[OutputPackage] packages: List[OutputPackage]
def get_packages_as_models(self) -> List[Tuple[PackageDesc, Files]]: async def get_packages_as_models(self) -> List[Tuple[PackageDesc, Files]]:
"""Return the list of packages as tuples of PackageDesc and Files models """Return the list of packages as tuples of PackageDesc and Files models
Returns Returns
......
from os.path import join from os.path import join
from pathlib import Path from pathlib import Path
from typing import Dict, Iterator, Tuple from typing import AsyncIterator, Dict, Tuple
import aiofiles
import orjson import orjson
from repo_management import convert, defaults, files, models from repo_management import convert, defaults, files, models
def db_file_as_models(db_path: Path, compression: str = "gz") -> Iterator[Tuple[str, models.OutputPackageBase]]: async def db_file_as_models(
db_path: Path, compression: str = "gz"
) -> AsyncIterator[Tuple[str, models.OutputPackageBase]]:
"""Read a repository database and yield the name of each pkgbase and the respective data (represented as an instance """Read a repository database and yield the name of each pkgbase and the respective data (represented as an instance
of models.OutputPackageBase) in a Tuple. of models.OutputPackageBase) in a Tuple.
...@@ -28,11 +31,13 @@ def db_file_as_models(db_path: Path, compression: str = "gz") -> Iterator[Tuple[ ...@@ -28,11 +31,13 @@ def db_file_as_models(db_path: Path, compression: str = "gz") -> Iterator[Tuple[
packages: Dict[str, models.OutputPackageBase] = {} packages: Dict[str, models.OutputPackageBase] = {}
package_descs: Dict[str, models.PackageDesc] = {} package_descs: Dict[str, models.PackageDesc] = {}
package_files: Dict[str, models.Files] = {} package_files: Dict[str, models.Files] = {}
for member in files._db_file_member_as_model(db_file=files._read_db_file(db_path=db_path, compression=compression)): async for member in files._db_file_member_as_model(
db_file=await files._read_db_file(db_path=db_path, compression=compression)
):
if member.member_type == defaults.RepoDbMemberType.DESC: if member.member_type == defaults.RepoDbMemberType.DESC:
package_descs.update({member.name: convert._desc_data_to_model(member.data)}) package_descs.update({member.name: await convert._desc_data_to_model(member.data)})
if member.member_type == defaults.RepoDbMemberType.FILES: if member.member_type == defaults.RepoDbMemberType.FILES:
package_files.update({member.name: convert._files_data_to_model(member.data)}) package_files.update({member.name: await convert._files_data_to_model(member.data)})
for (name, package_desc) in package_descs.items(): for (name, package_desc) in package_descs.items():
if packages.get(package_desc.base): if packages.get(package_desc.base):
...@@ -54,7 +59,7 @@ def db_file_as_models(db_path: Path, compression: str = "gz") -> Iterator[Tuple[ ...@@ -54,7 +59,7 @@ def db_file_as_models(db_path: Path, compression: str = "gz") -> Iterator[Tuple[
yield (name, package) yield (name, package)
def dump_db_to_json_files(input_path: Path, output_path: Path) -> None: async def dump_db_to_json_files(input_path: Path, output_path: Path) -> None:
"""Read a repository database file and dump each pkgbase contained in it to a separate JSON file below a defined """Read a repository database file and dump each pkgbase contained in it to a separate JSON file below a defined
output directory output directory
...@@ -66,16 +71,16 @@ def dump_db_to_json_files(input_path: Path, output_path: Path) -> None: ...@@ -66,16 +71,16 @@ def dump_db_to_json_files(input_path: Path, output_path: Path) -> None:
A directory in which to A directory in which to
""" """
for name, model in db_file_as_models(db_path=input_path): async for name, model in db_file_as_models(db_path=input_path):
with open(join(output_path, f"{name}.json"), "wb") as output_file: async with aiofiles.open(join(output_path, f"{name}.json"), "wb") as output_file:
output_file.write( await output_file.write(
orjson.dumps( orjson.dumps(
model.dict(), option=orjson.OPT_INDENT_2 | orjson.OPT_APPEND_NEWLINE | orjson.OPT_SORT_KEYS model.dict(), option=orjson.OPT_INDENT_2 | orjson.OPT_APPEND_NEWLINE | orjson.OPT_SORT_KEYS
) )
) )
def create_db_from_json_files( async def create_db_from_json_files(
input_path: Path, output_path: Path, db_type: defaults.RepoDbType = defaults.RepoDbType.DEFAULT input_path: Path, output_path: Path, db_type: defaults.RepoDbType = defaults.RepoDbType.DEFAULT
) -> None: ) -> None:
"""Create a repository database from a list of JSON files found in a directory """Create a repository database from a list of JSON files found in a directory
...@@ -93,10 +98,10 @@ def create_db_from_json_files( ...@@ -93,10 +98,10 @@ def create_db_from_json_files(
""" """
repodbfile = convert.RepoDbFile() repodbfile = convert.RepoDbFile()
database = files._write_db_file(path=output_path) database = await files._write_db_file(path=output_path)
for path in files._json_files_in_directory(path=input_path): async for path in files._json_files_in_directory(path=input_path):
model = files._read_pkgbase_json_file(path) model = await files._read_pkgbase_json_file(path)
files._stream_package_base_to_db( await files._stream_package_base_to_db(
db=database, db=database,
model=model, model=model,
repodbfile=repodbfile, repodbfile=repodbfile,
......
...@@ -21,6 +21,7 @@ def test__print_env(env: Optional[Dict[str, str]]) -> None: ...@@ -21,6 +21,7 @@ def test__print_env(env: Optional[Dict[str, str]]) -> None:
(["cd", "-f"], {"FOO": "BAR"}, True, False, False, True, None, raises(CalledProcessError)), (["cd", "-f"], {"FOO": "BAR"}, True, False, False, True, None, raises(CalledProcessError)),
], ],
) )
@mark.asyncio
def test_run_command( def test_run_command(
cmd: Union[str, List[str]], cmd: Union[str, List[str]],
env: Optional[Dict[str, str]], env: Optional[Dict[str, str]],
......
...@@ -20,12 +20,13 @@ RESOURCES = join(dirname(realpath(__file__)), "resources") ...@@ -20,12 +20,13 @@ RESOURCES = join(dirname(realpath(__file__)), "resources")
("usr/%FILES%\nusr/lib/\n", raises(RuntimeError)), ("usr/%FILES%\nusr/lib/\n", raises(RuntimeError)),
], ],
) )
def test__files_data_to_dict( @mark.asyncio
async def test__files_data_to_model(
file_data: str, file_data: str,
expectation: ContextManager[str], expectation: ContextManager[str],
) -> None: ) -> None:
with expectation: with expectation:
assert convert._files_data_to_model(data=io.StringIO(file_data)) assert await convert._files_data_to_model(data=io.StringIO(file_data))
@mark.parametrize( @mark.parametrize(
...@@ -99,81 +100,26 @@ def test__files_data_to_dict( ...@@ -99,81 +100,26 @@ def test__files_data_to_dict(
), ),
], ],
) )
def test__desc_data_to_dict( @mark.asyncio
async def test__desc_data_to_model(
file_data: str, file_data: str,
expectation: ContextManager[str], expectation: ContextManager[str],
) -> None: ) -> None:
with expectation: with expectation:
assert convert._desc_data_to_model(data=io.StringIO(file_data)) assert await convert._desc_data_to_model(data=io.StringIO(file_data))
@mark.parametrize(
"desc, files",
[
(
models.PackageDesc(
arch="foo",
base="foo",
builddate=1,
csize=1,
desc="foo",
filename="foo",
isize=1,
license=["foo"],
md5sum="foo",
name="foo",
packager="foo",
pgpsig="foo",
sha256sum="foo",
url="foo",
version="foo",
),
models.Files(files=["foo", "bar"]),
),
(
models.PackageDesc(
arch="foo",
base="foo",