Skip to content

Commit 23c348d

Browse files
feat: Extend sanitization strategy pattern to all stores
Apply the sanitization strategy pattern (from Elasticsearch store) to all stores that perform sanitization. This ensures consistent behavior across all stores: sanitization is now opt-in rather than automatic, avoiding surprising transformations of user keys and collections. Changes: - MongoDB: Added optional collection_sanitization_strategy parameter - Defaults to PassthroughStrategy() (no sanitization) - Created MongoDBV1CollectionSanitizationStrategy for backward compatibility - Keyring: Added optional key/collection sanitization strategy parameters - Defaults to PassthroughStrategy() (no sanitization) - Created KeyringV1SanitizationStrategy for backward compatibility - Windows Registry: Added optional key/collection sanitization strategy parameters - Defaults to PassthroughStrategy() (no sanitization) - Created WindowsRegistryV1SanitizationStrategy for backward compatibility - Memcached: Added optional key_sanitization_strategy parameter - Defaults to PassthroughStrategy() (no sanitization) - Created MemcachedV1KeySanitizationStrategy for backward compatibility - Removed custom sanitize_key method, now uses base class _sanitize_key All stores now follow the same pattern: 1. Accept optional sanitization strategy parameters in __init__ 2. Default to PassthroughStrategy() to avoid surprising transformations 3. Provide V1 strategy classes that encapsulate previous hardcoded behavior 4. Update docstrings to explain sanitization options and limitations 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: William Easton <[email protected]>
1 parent b367cc4 commit 23c348d

File tree

9 files changed

+219
-67
lines changed

9 files changed

+219
-67
lines changed

key-value/key-value-aio/src/key_value/aio/stores/keyring/store.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
"""Python keyring-based key-value store."""
22

3+
from typing import Any
4+
35
from key_value.shared.utils.compound import compound_key
46
from key_value.shared.utils.managed_entry import ManagedEntry
5-
from key_value.shared.utils.sanitization import HybridSanitizationStrategy
7+
from key_value.shared.utils.sanitization import HybridSanitizationStrategy, PassthroughStrategy, SanitizationStrategy
68
from key_value.shared.utils.sanitize import ALPHANUMERIC_CHARACTERS
79
from typing_extensions import override
810

@@ -21,12 +23,26 @@
2123
ALLOWED_KEY_COLLECTION_CHARACTERS: str = ALPHANUMERIC_CHARACTERS
2224

2325

26+
class KeyringV1SanitizationStrategy(HybridSanitizationStrategy):
27+
def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: ARG002
28+
super().__init__(
29+
replacement_character="_",
30+
max_length=MAX_KEY_COLLECTION_LENGTH,
31+
allowed_characters=ALLOWED_KEY_COLLECTION_CHARACTERS,
32+
)
33+
34+
2435
class KeyringStore(BaseStore):
2536
"""Python keyring-based key-value store using keyring library.
2637
2738
This store uses the Python keyring to persist key-value pairs. Each entry is stored
2839
as a password in the keychain with the combination of collection and key as the username.
2940
41+
By default, keys and collections are not sanitized. This means that there are character and length restrictions on
42+
keys and collections that may cause errors when trying to get and put entries.
43+
44+
To avoid issues, you may want to consider leveraging the `KeyringV1SanitizationStrategy` strategy.
45+
3046
Note: TTL is not natively supported by Python keyring, so TTL information is stored
3147
within the JSON payload and checked at retrieval time.
3248
"""
@@ -38,23 +54,23 @@ def __init__(
3854
*,
3955
service_name: str = DEFAULT_KEYCHAIN_SERVICE,
4056
default_collection: str | None = None,
57+
key_sanitization_strategy: SanitizationStrategy | None = None,
58+
collection_sanitization_strategy: SanitizationStrategy | None = None,
4159
) -> None:
4260
"""Initialize the Python keyring store.
4361
4462
Args:
4563
service_name: The service name to use in the keychain. Defaults to "py-key-value".
4664
default_collection: The default collection to use if no collection is provided.
65+
key_sanitization_strategy: The sanitization strategy to use for keys.
66+
collection_sanitization_strategy: The sanitization strategy to use for collections.
4767
"""
4868
self._service_name = service_name
4969

50-
sanitization_strategy = HybridSanitizationStrategy(
51-
replacement_character="_", max_length=MAX_KEY_COLLECTION_LENGTH, allowed_characters=ALLOWED_KEY_COLLECTION_CHARACTERS
52-
)
53-
5470
super().__init__(
5571
default_collection=default_collection,
56-
collection_sanitization_strategy=sanitization_strategy,
57-
key_sanitization_strategy=sanitization_strategy,
72+
collection_sanitization_strategy=collection_sanitization_strategy or PassthroughStrategy(),
73+
key_sanitization_strategy=key_sanitization_strategy or PassthroughStrategy(),
5874
)
5975

6076
@override

key-value/key-value-aio/src/key_value/aio/stores/memcached/store.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
1-
import hashlib
21
from collections.abc import Sequence
3-
from typing import overload
2+
from typing import Any, overload
43

54
from key_value.shared.utils.compound import compound_key
65
from key_value.shared.utils.managed_entry import ManagedEntry
7-
from key_value.shared.utils.sanitization import HashExcessLengthStrategy
6+
from key_value.shared.utils.sanitization import HashExcessLengthStrategy, PassthroughStrategy, SanitizationStrategy
87
from typing_extensions import override
98

109
from key_value.aio.stores.base import BaseContextManagerStore, BaseDestroyStore, BaseStore
@@ -18,16 +17,36 @@
1817
MAX_KEY_LENGTH = 240
1918

2019

20+
class MemcachedV1KeySanitizationStrategy(HashExcessLengthStrategy):
21+
def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: ARG002
22+
super().__init__(max_length=MAX_KEY_LENGTH)
23+
24+
2125
class MemcachedStore(BaseDestroyStore, BaseContextManagerStore, BaseStore):
22-
"""Memcached-based key-value store using aiomcache."""
26+
"""Memcached-based key-value store using aiomcache.
27+
28+
By default, keys are not sanitized. This means that there are character and length restrictions on
29+
keys that may cause errors when trying to get and put entries.
30+
31+
To avoid issues, you may want to consider leveraging the `MemcachedV1KeySanitizationStrategy` strategy.
32+
"""
2333

2434
_client: Client
2535

2636
@overload
27-
def __init__(self, *, client: Client, default_collection: str | None = None) -> None: ...
37+
def __init__(
38+
self, *, client: Client, default_collection: str | None = None, key_sanitization_strategy: SanitizationStrategy | None = None
39+
) -> None: ...
2840

2941
@overload
30-
def __init__(self, *, host: str = "127.0.0.1", port: int = 11211, default_collection: str | None = None) -> None: ...
42+
def __init__(
43+
self,
44+
*,
45+
host: str = "127.0.0.1",
46+
port: int = 11211,
47+
default_collection: str | None = None,
48+
key_sanitization_strategy: SanitizationStrategy | None = None,
49+
) -> None: ...
3150

3251
def __init__(
3352
self,
@@ -36,6 +55,7 @@ def __init__(
3655
host: str = "127.0.0.1",
3756
port: int = 11211,
3857
default_collection: str | None = None,
58+
key_sanitization_strategy: SanitizationStrategy | None = None,
3959
) -> None:
4060
"""Initialize the Memcached store.
4161
@@ -44,25 +64,18 @@ def __init__(
4464
host: Memcached host. Defaults to 127.0.0.1.
4565
port: Memcached port. Defaults to 11211.
4666
default_collection: The default collection to use if no collection is provided.
67+
key_sanitization_strategy: The sanitization strategy to use for keys.
4768
"""
4869
self._client = client or Client(host=host, port=port)
4970

50-
sanitization_strategy = HashExcessLengthStrategy(max_length=MAX_KEY_LENGTH)
51-
5271
super().__init__(
5372
default_collection=default_collection,
54-
key_sanitization_strategy=sanitization_strategy,
73+
key_sanitization_strategy=key_sanitization_strategy or PassthroughStrategy(),
5574
)
5675

57-
def sanitize_key(self, key: str) -> str:
58-
if len(key) > MAX_KEY_LENGTH:
59-
sha256_hash: str = hashlib.sha256(key.encode()).hexdigest()
60-
return sha256_hash[:64]
61-
return key
62-
6376
@override
6477
async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None:
65-
combo_key: str = self.sanitize_key(compound_key(collection=collection, key=key))
78+
combo_key: str = self._sanitize_key(compound_key(collection=collection, key=key))
6679

6780
raw_value: bytes | None = await self._client.get(combo_key.encode("utf-8"))
6881

@@ -78,7 +91,7 @@ async def _get_managed_entries(self, *, collection: str, keys: Sequence[str]) ->
7891
if not keys:
7992
return []
8093

81-
combo_keys: list[str] = [self.sanitize_key(compound_key(collection=collection, key=key)) for key in keys]
94+
combo_keys: list[str] = [self._sanitize_key(compound_key(collection=collection, key=key)) for key in keys]
8295

8396
# Use multi_get for efficient batch retrieval
8497
# multi_get returns a tuple in the same order as keys
@@ -102,7 +115,7 @@ async def _put_managed_entry(
102115
collection: str,
103116
managed_entry: ManagedEntry,
104117
) -> None:
105-
combo_key: str = self.sanitize_key(compound_key(collection=collection, key=key))
118+
combo_key: str = self._sanitize_key(compound_key(collection=collection, key=key))
106119

107120
# Memcached treats 0 as no-expiration. Do not pass <= 0 (other than 0) to avoid permanence errors.
108121
exptime: int
@@ -122,7 +135,7 @@ async def _put_managed_entry(
122135

123136
@override
124137
async def _delete_managed_entry(self, *, key: str, collection: str) -> bool:
125-
combo_key: str = self.sanitize_key(compound_key(collection=collection, key=key))
138+
combo_key: str = self._sanitize_key(compound_key(collection=collection, key=key))
126139

127140
return await self._client.delete(key=combo_key.encode(encoding="utf-8"))
128141

key-value/key-value-aio/src/key_value/aio/stores/mongodb/store.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from bson.errors import InvalidDocument
66
from key_value.shared.errors import DeserializationError, SerializationError
77
from key_value.shared.utils.managed_entry import ManagedEntry
8-
from key_value.shared.utils.sanitization import HybridSanitizationStrategy
8+
from key_value.shared.utils.sanitization import HybridSanitizationStrategy, PassthroughStrategy, SanitizationStrategy
99
from key_value.shared.utils.sanitize import ALPHANUMERIC_CHARACTERS
1010
from key_value.shared.utils.serialization import SerializationAdapter
1111
from typing_extensions import Self, override
@@ -89,8 +89,25 @@ def prepare_load(self, data: dict[str, Any]) -> dict[str, Any]:
8989
return data
9090

9191

92+
class MongoDBV1CollectionSanitizationStrategy(HybridSanitizationStrategy):
93+
def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: ARG002
94+
super().__init__(
95+
replacement_character="_",
96+
max_length=MAX_COLLECTION_LENGTH,
97+
allowed_characters=COLLECTION_ALLOWED_CHARACTERS,
98+
)
99+
100+
92101
class MongoDBStore(BaseDestroyCollectionStore, BaseContextManagerStore, BaseStore):
93-
"""MongoDB-based key-value store using pymongo."""
102+
"""MongoDB-based key-value store using pymongo.
103+
104+
Stores collections as MongoDB collections and stores values in document fields.
105+
106+
By default, collections are not sanitized. This means that there are character and length restrictions on
107+
collection names that may cause errors when trying to get and put entries.
108+
109+
To avoid issues, you may want to consider leveraging the `MongoDBV1CollectionSanitizationStrategy` strategy.
110+
"""
94111

95112
_client: AsyncMongoClient[dict[str, Any]]
96113
_db: AsyncDatabase[dict[str, Any]]
@@ -106,6 +123,7 @@ def __init__(
106123
coll_name: str | None = None,
107124
native_storage: bool = True,
108125
default_collection: str | None = None,
126+
collection_sanitization_strategy: SanitizationStrategy | None = None,
109127
) -> None:
110128
"""Initialize the MongoDB store.
111129
@@ -115,6 +133,7 @@ def __init__(
115133
coll_name: The name of the MongoDB collection.
116134
native_storage: Whether to use native BSON storage (True, default) or JSON string storage (False).
117135
default_collection: The default collection to use if no collection is provided.
136+
collection_sanitization_strategy: The sanitization strategy to use for collections.
118137
"""
119138

120139
@overload
@@ -126,6 +145,7 @@ def __init__(
126145
coll_name: str | None = None,
127146
native_storage: bool = True,
128147
default_collection: str | None = None,
148+
collection_sanitization_strategy: SanitizationStrategy | None = None,
129149
) -> None:
130150
"""Initialize the MongoDB store.
131151
@@ -135,6 +155,7 @@ def __init__(
135155
coll_name: The name of the MongoDB collection.
136156
native_storage: Whether to use native BSON storage (True, default) or JSON string storage (False).
137157
default_collection: The default collection to use if no collection is provided.
158+
collection_sanitization_strategy: The sanitization strategy to use for collections.
138159
"""
139160

140161
def __init__(
@@ -146,6 +167,7 @@ def __init__(
146167
coll_name: str | None = None,
147168
native_storage: bool = True,
148169
default_collection: str | None = None,
170+
collection_sanitization_strategy: SanitizationStrategy | None = None,
149171
) -> None:
150172
"""Initialize the MongoDB store.
151173
@@ -158,6 +180,7 @@ def __init__(
158180
Native storage stores values as BSON dicts for better query support.
159181
Legacy mode stores values as JSON strings for backward compatibility.
160182
default_collection: The default collection to use if no collection is provided.
183+
collection_sanitization_strategy: The sanitization strategy to use for collections.
161184
"""
162185

163186
if client:
@@ -177,9 +200,7 @@ def __init__(
177200

178201
super().__init__(
179202
default_collection=default_collection,
180-
collection_sanitization_strategy=HybridSanitizationStrategy(
181-
replacement_character="_", max_length=MAX_COLLECTION_LENGTH, allowed_characters=COLLECTION_ALLOWED_CHARACTERS
182-
),
203+
collection_sanitization_strategy=collection_sanitization_strategy or PassthroughStrategy(),
183204
)
184205

185206
@override

key-value/key-value-aio/src/key_value/aio/stores/windows_registry/store.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
"""Windows Registry-based key-value store."""
22

3-
from typing import Literal
3+
from typing import Any, Literal
44
from winreg import HKEY_CURRENT_USER, HKEY_LOCAL_MACHINE
55

66
from key_value.shared.utils.managed_entry import ManagedEntry
7-
from key_value.shared.utils.sanitization import HybridSanitizationStrategy
7+
from key_value.shared.utils.sanitization import HybridSanitizationStrategy, PassthroughStrategy, SanitizationStrategy
88
from key_value.shared.utils.sanitize import ALPHANUMERIC_CHARACTERS
99
from typing_extensions import override
1010

@@ -25,12 +25,25 @@
2525
ALLOWED_KEY_COLLECTION_CHARACTERS: str = ALPHANUMERIC_CHARACTERS
2626

2727

28+
class WindowsRegistryV1SanitizationStrategy(HybridSanitizationStrategy):
29+
def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: ARG002
30+
super().__init__(
31+
max_length=MAX_KEY_COLLECTION_LENGTH,
32+
allowed_characters=ALLOWED_KEY_COLLECTION_CHARACTERS,
33+
)
34+
35+
2836
class WindowsRegistryStore(BaseStore):
2937
"""Windows Registry-based key-value store.
3038
3139
This store uses the Windows Registry to persist key-value pairs. Each entry is stored
3240
as a string value in the registry under HKEY_CURRENT_USER\\Software\\{root}\\{collection}\\{key}.
3341
42+
By default, keys and collections are not sanitized. This means that there are character and length restrictions on
43+
keys and collections that may cause errors when trying to get and put entries.
44+
45+
To avoid issues, you may want to consider leveraging the `WindowsRegistryV1SanitizationStrategy` strategy.
46+
3447
Note: TTL is not natively supported by Windows Registry, so TTL information is stored
3548
within the JSON payload and checked at retrieval time.
3649
"""
@@ -41,25 +54,25 @@ def __init__(
4154
hive: Literal["HKEY_CURRENT_USER", "HKEY_LOCAL_MACHINE"] | None = None,
4255
registry_path: str | None = None,
4356
default_collection: str | None = None,
57+
key_sanitization_strategy: SanitizationStrategy | None = None,
58+
collection_sanitization_strategy: SanitizationStrategy | None = None,
4459
) -> None:
4560
"""Initialize the Windows Registry store.
4661
4762
Args:
4863
hive: The hive to use. Defaults to "HKEY_CURRENT_USER".
4964
registry_path: The registry path to use. Must be a valid registry path under the hive. Defaults to "Software\\py-key-value".
5065
default_collection: The default collection to use if no collection is provided.
66+
key_sanitization_strategy: The sanitization strategy to use for keys.
67+
collection_sanitization_strategy: The sanitization strategy to use for collections.
5168
"""
5269
self._hive = HKEY_LOCAL_MACHINE if hive == "HKEY_LOCAL_MACHINE" else HKEY_CURRENT_USER
5370
self._registry_path = registry_path or DEFAULT_REGISTRY_PATH
5471

55-
sanitization_strategy = HybridSanitizationStrategy(
56-
max_length=MAX_KEY_COLLECTION_LENGTH, allowed_characters=ALLOWED_KEY_COLLECTION_CHARACTERS
57-
)
58-
5972
super().__init__(
6073
default_collection=default_collection,
61-
key_sanitization_strategy=sanitization_strategy,
62-
collection_sanitization_strategy=sanitization_strategy,
74+
key_sanitization_strategy=key_sanitization_strategy or PassthroughStrategy(),
75+
collection_sanitization_strategy=collection_sanitization_strategy or PassthroughStrategy(),
6376
)
6477

6578
def _get_registry_path(self, *, collection: str) -> str:

key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
MAX_KEY_LENGTH = 256
6767
ALLOWED_KEY_CHARACTERS: str = ALPHANUMERIC_CHARACTERS
6868

69-
MAX_INDEX_LENGTH = 240
69+
MAX_INDEX_LENGTH = 200
7070
ALLOWED_INDEX_CHARACTERS: str = LOWERCASE_ALPHABET + NUMBERS + "_" + "-" + "."
7171

7272

@@ -97,7 +97,7 @@ def prepare_load(self, data: dict[str, Any]) -> dict[str, Any]:
9797

9898
class ElasticsearchV1KeySanitizationStrategy(AlwaysHashStrategy):
9999
def __init__(self, *args: Any, **kwargs: Any) -> None:
100-
super().__init__(hash_length=MAX_KEY_LENGTH)
100+
super().__init__(hash_length=64)
101101

102102

103103
class ElasticsearchV1CollectionSanitizationStrategy(HybridSanitizationStrategy):
@@ -175,8 +175,6 @@ def __init__(
175175
api_key: The api key to use.
176176
index_prefix: The index prefix to use. Collections will be prefixed with this prefix.
177177
default_collection: The default collection to use if no collection is provided.
178-
key_sanitization_strategy: The sanitization strategy to use for keys.
179-
collection_sanitization_strategy: The sanitization strategy to use for collections.
180178
"""
181179

182180
def __init__(

0 commit comments

Comments
 (0)