From 0ceb6c743ee1bb1df5a0937d9797fb7275f7fd66 Mon Sep 17 00:00:00 2001
From: Levente Polyak <anthraxx@archlinux.org>
Date: Thu, 25 Nov 2021 23:31:36 +0100
Subject: [PATCH] fix(keyringctl): avoid simplified uid collisions using a hash

Add a postfix hash of the raw uid data to the filenames to avoid
collisions with the simplified uid.
---
 libkeyringctl/keyring.py | 11 ++++++-----
 libkeyringctl/util.py    | 19 +++++++++++++++++++
 libkeyringctl/verify.py  |  4 ++--
 tests/conftest.py        |  4 ++--
 4 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/libkeyringctl/keyring.py b/libkeyringctl/keyring.py
index 4d5dc218..d62576bf 100644
--- a/libkeyringctl/keyring.py
+++ b/libkeyringctl/keyring.py
@@ -38,7 +38,7 @@ from .util import contains_fingerprint
 from .util import filter_fingerprints_by_trust
 from .util import get_cert_paths
 from .util import get_fingerprint_from_partial
-from .util import simplify_ascii
+from .util import simplify_uid
 from .util import transform_fd_to_tmpfile
 
 PACKET_FILENAME_DATETIME_FORMAT: str = "%Y-%m-%d_%H-%M-%S"
@@ -353,7 +353,7 @@ def convert_certificate(
         elif packet.name.endswith("--UserID"):
             current_packet_mode = "uid"
             current_packet_fingerprint = None
-            current_packet_uid = Uid(simplify_ascii(packet_dump_field(packet, "Value")))
+            current_packet_uid = Uid(packet_dump_field(packet, "Value"))
 
             if current_packet_uid in uids:
                 raise Exception(
@@ -549,7 +549,8 @@ def persist_uids(
     """
 
     for uid, uid_packet in uids.items():
-        output_file = key_dir / "uid" / uid / f"{uid}.asc"
+        simplified_uid = simplify_uid(uid)
+        output_file = key_dir / "uid" / simplified_uid / f"{simplified_uid}.asc"
         output_file.parent.mkdir(parents=True, exist_ok=True)
         debug(f"Writing file {output_file} from {uid_packet}")
         packet_join(packets=[uid_packet], output=output_file, force=True)
@@ -703,7 +704,7 @@ def persist_uid_certifications(
 
     for uid, uid_certifications in certifications.items():
         for issuer, issuer_certifications in uid_certifications.items():
-            certification_dir = key_dir / "uid" / uid / "certification"
+            certification_dir = key_dir / "uid" / simplify_uid(uid) / "certification"
             certification_dir.mkdir(parents=True, exist_ok=True)
             certification = latest_certification(issuer_certifications)
             output_file = certification_dir / f"{issuer}.asc"
@@ -728,7 +729,7 @@ def persist_uid_revocations(
 
     for uid, uid_revocations in revocations.items():
         for issuer, issuer_revocations in uid_revocations.items():
-            revocation_dir = key_dir / "uid" / uid / "revocation"
+            revocation_dir = key_dir / "uid" / simplify_uid(uid) / "revocation"
             revocation_dir.mkdir(parents=True, exist_ok=True)
             revocation = latest_certification(issuer_revocations)
             output_file = revocation_dir / f"{issuer}.asc"
diff --git a/libkeyringctl/util.py b/libkeyringctl/util.py
index af3c6207..41a61524 100644
--- a/libkeyringctl/util.py
+++ b/libkeyringctl/util.py
@@ -2,6 +2,7 @@
 from collections.abc import Iterable
 from collections.abc import Iterator
 from contextlib import contextmanager
+from hashlib import sha256
 from os import chdir
 from os import environ
 from os import getcwd
@@ -28,6 +29,7 @@ from typing import Union
 
 from libkeyringctl.types import Fingerprint
 from libkeyringctl.types import Trust
+from libkeyringctl.types import Uid
 
 
 @contextmanager
@@ -312,3 +314,20 @@ def simplify_ascii(_str: str) -> str:
     _str = "".join([ascii_mapping_lookup.get(char) or char for char in _str])
     _str = sub("[^" + escape(simple_printable) + "]", "_", _str)
     return _str
+
+
+def simplify_uid(uid: Uid, hash_postfix: bool = True) -> str:
+    """Simplify a uid to contain more filesystem and printable friendly characters with an optional
+    collision resistant hash postfix.
+
+    Parameters
+    ----------
+    uid: Uid to simplify (e.g. 'Foobar McFooface <foobar@foo.face>')
+    hash_postfix: Whether to add a hash of the uid as postfix
+
+    Returns
+    -------
+    Simplified str representation of uid
+    """
+    _hash = "" if not hash_postfix else f"_{sha256(uid.encode()).hexdigest()[:8]}"
+    return f"{simplify_ascii(_str=uid)}{_hash}"
diff --git a/libkeyringctl/verify.py b/libkeyringctl/verify.py
index d4ecbdc1..a765e02f 100644
--- a/libkeyringctl/verify.py
+++ b/libkeyringctl/verify.py
@@ -18,7 +18,7 @@ from libkeyringctl.types import Fingerprint
 from libkeyringctl.types import Uid
 from libkeyringctl.util import get_cert_paths
 from libkeyringctl.util import get_fingerprint_from_partial
-from libkeyringctl.util import simplify_ascii
+from libkeyringctl.util import simplify_uid
 from libkeyringctl.util import system
 
 
@@ -123,7 +123,7 @@ def verify_integrity(certificate: Path, all_fingerprints: Set[Fingerprint]) -> N
 
                             assert_packet_kind(path=uid_path, expected="User")
 
-                            uid_value = Uid(simplify_ascii(packet_dump_field(packet=uid_path, field="Value")))
+                            uid_value = simplify_uid(Uid(packet_dump_field(packet=uid_path, field="Value")))
                             if uid_value != uid.name:
                                 raise Exception(f"Unexpected uid in file {str(uid_path)}: {uid_value}")
                         elif not uid_path.is_dir():
diff --git a/tests/conftest.py b/tests/conftest.py
index 51e1d9fb..fc530b5c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -34,7 +34,7 @@ from libkeyringctl.types import Fingerprint
 from libkeyringctl.types import Uid
 from libkeyringctl.types import Username
 from libkeyringctl.util import cwd
-from libkeyringctl.util import simplify_ascii
+from libkeyringctl.util import simplify_uid
 from libkeyringctl.util import system
 
 test_keys: Dict[Username, List[Path]] = defaultdict(list)
@@ -133,7 +133,7 @@ def create_uid_certification(
             certificate: Path = test_certificates[certified][0]
             fingerprint: Fingerprint = Fingerprint(test_keyring_certificates[certified][0].name)
             issuer_fingerprint: Fingerprint = Fingerprint(test_keyring_certificates[issuer][0].name)
-            simplified_uid = Uid(simplify_ascii(uid))
+            simplified_uid = simplify_uid(Uid(uid))
 
             output: Path = (
                 working_dir
-- 
GitLab