Commit bb999eab authored by David Runge's avatar David Runge
Browse files

Merge branch 'issues/4' into 'master'

Add functionality to write a database file from a set of JSON files

Closes #4

See merge request !4
parents cf332096 c51b92b9
Pipeline #5833 passed with stage
in 45 seconds
......@@ -127,6 +127,28 @@ pipfile_deprecated_finder = ["pipreqs", "requirementslib"]
requirements_deprecated_finder = ["pipreqs", "pip-api"]
colors = ["colorama (>=0.4.3,<0.5.0)"]
[[package]]
name = "jinja2"
version = "2.11.3"
description = "A very fast and expressive template engine."
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[package.dependencies]
MarkupSafe = ">=0.23"
[package.extras]
i18n = ["Babel (>=0.8)"]
[[package]]
name = "markupsafe"
version = "1.1.1"
description = "Safely add untrusted strings to HTML/XML markup."
category = "main"
optional = false
python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*"
[[package]]
name = "mccabe"
version = "0.6.1"
......@@ -369,7 +391,7 @@ testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)",
[metadata]
lock-version = "1.1"
python-versions = "^3.9"
content-hash = "78aa3be6f0cc30958dc2a4bc9235071a90e2045667c5079431ce066e9a486b5f"
content-hash = "31bb6f8018746cb2480f660aa14f53948ccc3a3505642533a576f5c05ff09aaa"
[metadata.files]
appdirs = [
......@@ -469,6 +491,45 @@ isort = [
{file = "isort-5.7.0-py3-none-any.whl", hash = "sha256:fff4f0c04e1825522ce6949973e83110a6e907750cd92d128b0d14aaaadbffdc"},
{file = "isort-5.7.0.tar.gz", hash = "sha256:c729845434366216d320e936b8ad6f9d681aab72dc7cbc2d51bedc3582f3ad1e"},
]
jinja2 = [
{file = "Jinja2-2.11.3-py2.py3-none-any.whl", hash = "sha256:03e47ad063331dd6a3f04a43eddca8a966a26ba0c5b7207a9a9e4e08f1b29419"},
{file = "Jinja2-2.11.3.tar.gz", hash = "sha256:a6d58433de0ae800347cab1fa3043cebbabe8baa9d29e668f1c768cb87a333c6"},
]
markupsafe = [
{file = "MarkupSafe-1.1.1-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161"},
{file = "MarkupSafe-1.1.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7"},
{file = "MarkupSafe-1.1.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183"},
{file = "MarkupSafe-1.1.1-cp27-cp27m-win32.whl", hash = "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b"},
{file = "MarkupSafe-1.1.1-cp27-cp27m-win_amd64.whl", hash = "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e"},
{file = "MarkupSafe-1.1.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f"},
{file = "MarkupSafe-1.1.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1"},
{file = "MarkupSafe-1.1.1-cp34-cp34m-macosx_10_6_intel.whl", hash = "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5"},
{file = "MarkupSafe-1.1.1-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1"},
{file = "MarkupSafe-1.1.1-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735"},
{file = "MarkupSafe-1.1.1-cp34-cp34m-win32.whl", hash = "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21"},
{file = "MarkupSafe-1.1.1-cp34-cp34m-win_amd64.whl", hash = "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235"},
{file = "MarkupSafe-1.1.1-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b"},
{file = "MarkupSafe-1.1.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f"},
{file = "MarkupSafe-1.1.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905"},
{file = "MarkupSafe-1.1.1-cp35-cp35m-win32.whl", hash = "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1"},
{file = "MarkupSafe-1.1.1-cp35-cp35m-win_amd64.whl", hash = "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d"},
{file = "MarkupSafe-1.1.1-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff"},
{file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473"},
{file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e"},
{file = "MarkupSafe-1.1.1-cp36-cp36m-win32.whl", hash = "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66"},
{file = "MarkupSafe-1.1.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5"},
{file = "MarkupSafe-1.1.1-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d"},
{file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e"},
{file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6"},
{file = "MarkupSafe-1.1.1-cp37-cp37m-win32.whl", hash = "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2"},
{file = "MarkupSafe-1.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c"},
{file = "MarkupSafe-1.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15"},
{file = "MarkupSafe-1.1.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2"},
{file = "MarkupSafe-1.1.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42"},
{file = "MarkupSafe-1.1.1-cp38-cp38-win32.whl", hash = "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b"},
{file = "MarkupSafe-1.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be"},
{file = "MarkupSafe-1.1.1.tar.gz", hash = "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b"},
]
mccabe = [
{file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"},
{file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"},
......
......@@ -14,6 +14,7 @@ python = "^3.9"
pyalpm = "^0.9.2"
pydantic = "^1.8.1"
orjson = "^3.5.1"
Jinja2 = "^2.11.3"
[tool.poetry.dev-dependencies]
tox = "^3.23.0"
......@@ -27,6 +28,7 @@ mock = "^4.0.3"
[tool.poetry.scripts]
db2json = "repo_management.cli:db2json"
json2db = "repo_management.cli:json2db"
[tool.black]
line-length = 120
......
import argparse
import os
from pathlib import Path
......@@ -46,6 +47,78 @@ class ArgParseFactory:
return instance.parser
@classmethod
def json2db(self) -> argparse.ArgumentParser:
"""A class method to create an ArgumentParser for the json2db script
Returns
-------
argparse.ArgumentParser
An ArgumentParser instance specific for the json2db script
"""
instance = self(
description="Read a set of JSON files from a directory and create a repository database from them."
)
instance.parser.add_argument(
"-f",
"--files",
action="store_true",
help="create a .files database instead of a .db database",
)
instance.parser.add_argument(
"input_dir",
type=self.string_to_dir_path,
default=".",
help="the directory from which to read the JSON files (defaults to current directory)",
)
instance.parser.add_argument(
"db_file",
type=self.string_to_writable_file_path,
default=None,
help="the repository database to write to (the parent directory needs to exist)",
)
return instance.parser
@classmethod
def string_to_writable_file_path(self, input_: str) -> Path:
"""Convert an input string into a Path to a file
This method checks whether an (existing) file is writable. If the file does not exist the parent directory is
checked for existence and whether it is writable.
Parameters
----------
input_: str
A string that is used to create a Path
Raises
------
argparse.ArgumentTypeError:
If a Path created from input_ does not exist or is not a file
Returns
-------
Path
A Path instance created from input_
"""
path = Path(input_)
if path.exists():
if not path.is_file():
raise argparse.ArgumentTypeError(f"not a file: '{input_}'")
if not os.access(path, os.W_OK):
raise argparse.ArgumentTypeError(f"the file '{input_}' is not writable")
else:
if not path.parent.exists():
raise argparse.ArgumentTypeError(f"the parent directory of '{input_}' does not exist")
if not path.parent.is_dir():
raise argparse.ArgumentTypeError(f"parent is not a directory: '{input_}'")
if not os.access(path.parent, os.W_OK):
raise argparse.ArgumentTypeError(f"the parent directory of '{input_}' is not writable")
return path
@classmethod
def string_to_file_path(self, input_: str) -> Path:
"""Convert an input string into a Path to a file
......
from repo_management import argparse, operations
from repo_management import argparse, defaults, operations
def db2json() -> None:
......@@ -13,3 +13,18 @@ def db2json() -> None:
input_path=args.db_file,
output_path=args.output_dir,
)
def json2db() -> None:
"""The entry point for the json2db script
The method calls operations.create_db_from_json_files() which creates a repository database from a set of JSON files
in a directory.
"""
args = argparse.ArgParseFactory.json2db().parse_args()
operations.create_db_from_json_files(
input_path=args.input_dir,
output_path=args.db_file,
db_type=defaults.RepoDbType.FILES if args.files else defaults.RepoDbType.DEFAULT,
)
import io
from typing import Dict, List, Optional, Union
from repo_management import defaults, models
from jinja2 import Environment, PackageLoader
from pydantic.error_wrappers import ValidationError
from repo_management import defaults, errors, models
def _files_data_to_model(data: io.StringIO) -> models.Files:
......@@ -43,6 +46,43 @@ def _files_data_to_model(data: io.StringIO) -> models.Files:
return models.Files(**output)
def _desc_data_line_to_dicts(
current_header: str,
current_type: defaults.FieldType,
line: str,
string_list_types: Dict[str, List[str]],
string_types: Dict[str, str],
int_types: Dict[str, int],
) -> None:
"""Add data retrieved from a line in a 'desc' file in a repository database to respective dicts for specific types
Parameters
----------
current_header: str
The current header under which the line is found
current_type: str
The type by which the header is defined by
line: str
The data
string_list_types: Dict[str, List[str]]
A dict for instances of type list string
string_types: Dict[str, str]
A dict for instances of type string
int_types: Dict[str, int]
A dict for instances of type int
"""
if current_type == defaults.FieldType.STRING_LIST:
if current_header in string_list_types.keys():
string_list_types[current_header] += [line]
else:
string_list_types[current_header] = [line]
if current_type == defaults.FieldType.STRING:
string_types[current_header] = line
if current_type == defaults.FieldType.INT:
int_types[current_header] = int(line)
def _desc_data_to_model(data: io.StringIO) -> models.PackageDesc:
"""Read the contents of a 'desc' file (represented as an instance of io.StringIO) and convert it to a pydantic model
......@@ -81,19 +121,22 @@ def _desc_data_to_model(data: io.StringIO) -> models.PackageDesc:
continue
if current_header:
if current_type == defaults.FieldType.STRING_LIST:
if current_header in string_list_types.keys():
string_list_types[current_header] += [line]
else:
string_list_types[current_header] = [line]
if current_type == defaults.FieldType.STRING:
string_types[current_header] = line
if current_type == defaults.FieldType.INT:
int_types[current_header] = int(line)
_desc_data_line_to_dicts(
current_header=current_header,
current_type=current_type,
line=line,
string_list_types=string_list_types,
string_types=string_types,
int_types=int_types,
)
data.close()
merged_dict: Dict[str, Union[int, str, List[str]]] = {**int_types, **string_types, **string_list_types}
return models.PackageDesc(**merged_dict)
try:
return models.PackageDesc(**merged_dict)
except ValidationError as e:
raise errors.RepoManagementValidationError(
f"An error occured while validating the file: {data.getvalue()}\n{e}"
)
def _transform_package_desc_to_output_package(
......@@ -125,3 +168,58 @@ def _transform_package_desc_to_output_package(
return models.OutputPackage(**desc_dict, **files.dict())
else:
return models.OutputPackage(**desc_dict)
class RepoDbFile:
"""A class for handling templates for files used in repository database files (such as 'desc' or 'files')
Attributes
----------
env: jinja2.Environment
A jinja2 Environment, that makes the templates available
"""
def __init__(self, enable_async: bool = False) -> None:
"""Initialize an instance of RepDbFile
Parameters
----------
enable_async: bool
A bool indicating whether the jinja2.Environment is instantiated with enable_async (defaults to False)
"""
self.env = Environment(
loader=PackageLoader("repo_management", "templates"),
trim_blocks=True,
lstrip_blocks=True,
enable_async=enable_async,
)
def render_desc_template(self, model: models.PackageDesc, output: io.StringIO) -> None:
"""Use the 'desc' template to write a string to an output stream based on a model
Parameters
----------
model: models.PackageDesc
A pydantic model with the required attributes to properly render a template for a 'desc' file
output: io.StringIO
An output stream to write to
"""
template = self.env.get_template("desc.j2")
output.write(template.render(model.dict()))
def render_files_template(self, model: models.Files, output: io.StringIO) -> None:
"""Use the 'files' template to write a string to an output stream based on a model
Parameters
----------
model: models.Files
A pydantic model with the required attributes to properly render a template for a 'files' file
output: io.StringIO
An output stream to write to
"""
template = self.env.get_template("files.j2")
output.write(template.render(model.dict()))
from enum import IntEnum
from typing import Dict, Union
DB_USER = "root"
DB_GROUP = "root"
DB_FILE_MODE = "0644"
DB_DIR_MODE = "0755"
class RepoDbMemberType(IntEnum):
UNKNOWN = 0
......@@ -8,6 +13,21 @@ class RepoDbMemberType(IntEnum):
FILES = 2
class RepoDbType(IntEnum):
"""An IntEnum to distinguish types of binary repository database files
Attributes
----------
DEFAULT: int
Use this to identify .db files
FILES: int
Use this to identify .files files
"""
DEFAULT = 0
FILES = 2
class FieldType(IntEnum):
STRING = 0
INT = 1
......@@ -30,7 +50,7 @@ DESC_JSON: Dict[str, Dict[str, Union[str, FieldType]]] = {
"%SHA256SUM%": {"name": "sha256sum", "type": FieldType.STRING},
"%PGPSIG%": {"name": "pgpsig", "type": FieldType.STRING},
"%URL%": {"name": "url", "type": FieldType.STRING},
"%LICENSE%": {"name": "licenses", "type": FieldType.STRING_LIST},
"%LICENSE%": {"name": "license", "type": FieldType.STRING_LIST},
"%ARCH%": {"name": "arch", "type": FieldType.STRING},
"%BUILDDATE%": {"name": "builddate", "type": FieldType.INT},
"%PACKAGER%": {"name": "packager", "type": FieldType.STRING},
......
class RepoManagementError(Exception):
"""A class of Errors that is raised on issues with handling a repository database"""
class RepoManagementFileError(RepoManagementError):
"""An Error that is raised on issues with reading or writing files using repo_managment"""
class RepoManagementValidationError(RepoManagementError):
"""An Error that is raised on issues with validating files using repo_managment"""
class RepoManagementFileNotFoundError(RepoManagementFileError, FileNotFoundError):
"""An Error that is raised when a file can not be found"""
import io
import re
import tarfile
import time
from pathlib import Path
from typing import Iterator
from repo_management import defaults, models
import orjson
from pydantic.error_wrappers import ValidationError
from repo_management import convert, defaults, errors, models
def _read_db_file(db_path: Path, compression: str = "gz") -> tarfile.TarFile:
......@@ -87,3 +91,139 @@ def _db_file_member_as_model(
.decode("utf-8"),
),
)
def _json_files_in_directory(path: Path) -> Iterator[Path]:
"""Yield JSON files found in a directory
Parameters
----------
path: Path
A Path to search in for JSON files
Raises
------
errors.RepoManagementFileNotFoundError
If there are no JSON files found below
Returns
-------
Iterator[Path]
An iterator over the files found in the directory defined by path
"""
file_list = sorted(path.glob("*.json"))
if not file_list:
raise errors.RepoManagementFileNotFoundError(f"There are no JSON files in {path}!")
for json_file in file_list:
yield json_file
def _read_pkgbase_json_file(path: Path) -> models.OutputPackageBase:
"""Read a JSON file that represents a pkgbase and return it as models.OutputPackageBase
Parameters
----------
path: Path
A Path to to a JSON file
Raises
------
errors.RepoManagementFileError
If the JSON file can not be decoded
errors.RepoManagementValidationError
If the JSON file can not be validated using models.OutputPackageBase
Returns
-------
models.OutputPackageBase
A pydantic model representing a pkgbase
"""
with open(path, "r") as input_file:
try:
return models.OutputPackageBase(**orjson.loads(input_file.read()))
except orjson.JSONDecodeError as e:
raise errors.RepoManagementFileError(f"The JSON file '{path}' could not be decoded!\n{e}")
except ValidationError as e:
raise errors.RepoManagementValidationError(f"The JSON file '{path}' could not be validated!\n{e}")
def _write_db_file(path: Path, compression: str = "gz") -> tarfile.TarFile:
"""Open a repository database file for writing
Parameters
----------
db_path: Path
A pathlib.Path instance, representing the location of the database file
compression: str
The compression used for the database file (defaults to 'gz')
Raises
------
ValueError
If the file represented by db_path does not exist
tarfile.ReadError
If the file could not be opened
tarfile.CompressionError
If the provided compression does not match the compression of the file or if the compression type is unknown
Returns
-------
tarfile.Tarfile
An instance of Tarfile
"""
return tarfile.open(name=path, mode=f"w:{compression}")
def _stream_package_base_to_db(
db: tarfile.TarFile,
model: models.OutputPackageBase,
repodbfile: convert.RepoDbFile,
db_type: defaults.RepoDbType,
) -> None:
"""Stream descriptor files for packages of a pkgbase to a repository database
Allows streaming to a default repository database or a files database
Parameters
----------
db: tarfile.TarFile
The repository database to stream to
model: models.OutputPackageBase
The model to use for streaming descriptor files to the repository database
db_type: defaults.RepoDbType
The type of database to stream to
"""
for (desc_model, files_model) in model.get_packages_as_models():
dirname = f"{desc_model.name}-{model.version}"
directory = tarfile.TarInfo(dirname)
directory.type = tarfile.DIRTYPE
directory.mtime = int(time.time())
directory.uname = defaults.DB_USER
directory.gname = defaults.DB_GROUP
directory.mode = int(defaults.DB_DIR_MODE, base=8)
db.addfile(directory)
desc_content = io.StringIO()
repodbfile.render_desc_template(model=desc_model, output=desc_content)
desc_file = tarfile.TarInfo(f"{dirname}/desc")
desc_file.size = len(desc_content.getvalue().encode())
desc_file.mtime = int(time.time())
desc_file.uname = defaults.DB_USER
desc_file.gname = defaults.DB_GROUP
desc_file.mode = int(defaults.DB_FILE_MODE, base=8)
db.addfile(desc_file, io.BytesIO(desc_content.getvalue().encode()))
if db_type == defaults.RepoDbType.FILES:
files_content = io.StringIO()
repodbfile.render_files_template(model=files_model, output=files_content)
files_file = tarfile.TarInfo(f"{dirname}/files")
files_file.size = len(files_content.getvalue().encode())
files_file.mtime = int(time.time())
files_file.uname = defaults.DB_USER
files_file.gname = defaults.DB_GROUP
files_file.mode = int(defaults.DB_FILE_MODE, base=8)
db.addfile(files_file, io.BytesIO(files_content.getvalue().encode()))
import io
from typing import List, Optional
from typing import List, Optional, Tuple
from pydantic import BaseModel
......@@ -108,7 +108,7 @@ class License(BaseModel):
"""A model describing the %LICENSE% header in a 'desc' file, which type it represents and whether it is required or
not"""
licenses: List[str]
license: Optional[List[str]]
class Arch(BaseModel):
......@@ -185,10 +185,9 @@ class PackageFiles(Name, Files):
pass
class PackageDesc(
class OutputPackage(
Arch,
Backup,
Base,
BuildDate,
Conflicts,
CSize,
......@@ -196,46 +195,30 @@ class PackageDesc(
Desc,
CheckDepends,
FileName,
Files,
Groups,
ISize,
License,
MakeDepends,
Md5Sum,
Name,
OptDepends,
Packager,
PgpSig,
Provides,
Replaces,
Sha256Sum,
Url,
Version,
):
"""A model describing all headers in a 'desc' file, which type they represent and whether they are required or
not"""