From da86ea2dd5a274505823cbb7c3988328e7a365e2 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Wed, 29 Oct 2025 01:48:46 +0000 Subject: [PATCH 01/19] Add AWS S3 support for py-key-value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit implements a new S3Store that uses AWS S3 as a distributed key-value storage backend. The implementation follows the established pattern from other distributed stores (DynamoDB, MongoDB) and provides: - S3Store class for async operations using aioboto3 - Client-side TTL expiration checking (S3 lifecycle policies don't support atomic TTL+retrieval) - Comprehensive test suite with LocalStack integration - Auto-generated sync library support - Documentation updates in README The store uses S3 objects with path format {collection}/{key} and serializes ManagedEntry objects to JSON. TTL metadata is stored in S3 object metadata and checked during retrieval operations. Resolves #161 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: William Easton --- README.md | 5 +- key-value/key-value-aio/pyproject.toml | 3 +- .../src/key_value/aio/stores/s3/__init__.py | 5 + .../src/key_value/aio/stores/s3/store.py | 324 ++++++++++++++++++ .../key-value-aio/tests/stores/s3/__init__.py | 1 + .../key-value-aio/tests/stores/s3/test_s3.py | 110 ++++++ .../key_value/sync/code_gen/stores/base.py | 1 + .../sync/code_gen/stores/s3/__init__.py | 8 + .../sync/code_gen/stores/s3/store.py | 302 ++++++++++++++++ .../src/key_value/sync/stores/s3/__init__.py | 8 + .../tests/code_gen/stores/s3/__init__.py | 4 + .../tests/code_gen/stores/s3/test_s3.py | 102 ++++++ uv.lock | 24 +- 13 files changed, 891 insertions(+), 6 deletions(-) create mode 100644 key-value/key-value-aio/src/key_value/aio/stores/s3/__init__.py create mode 100644 key-value/key-value-aio/src/key_value/aio/stores/s3/store.py create mode 100644 key-value/key-value-aio/tests/stores/s3/__init__.py create mode 100644 key-value/key-value-aio/tests/stores/s3/test_s3.py create mode 100644 key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/__init__.py create mode 100644 key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/store.py create mode 100644 key-value/key-value-sync/src/key_value/sync/stores/s3/__init__.py create mode 100644 key-value/key-value-sync/tests/code_gen/stores/s3/__init__.py create mode 100644 key-value/key-value-sync/tests/code_gen/stores/s3/test_s3.py diff --git a/README.md b/README.md index c190a090..6f7fd179 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ This monorepo contains two libraries: ## Why use this library? -- **Multiple backends**: DynamoDB, Elasticsearch, Memcached, MongoDB, Redis, +- **Multiple backends**: DynamoDB, S3, Elasticsearch, Memcached, MongoDB, Redis, RocksDB, Valkey, and In-memory, Disk, etc - **TTL support**: Automatic expiration handling across all store types - **Type-safe**: Full type hints with Protocol-based interfaces @@ -131,6 +131,7 @@ pip install py-key-value-aio pip install py-key-value-aio[memory] pip install py-key-value-aio[disk] pip install py-key-value-aio[dynamodb] +pip install py-key-value-aio[s3] pip install py-key-value-aio[elasticsearch] # or: redis, mongodb, memcached, valkey, vault, registry, rocksdb, see below for all options ``` @@ -191,7 +192,7 @@ categories: - **Local stores**: In-memory and disk-based storage (Memory, Disk, RocksDB, etc.) - **Secret stores**: Secure OS-level storage for sensitive data (Keyring, Vault) - **Distributed stores**: Network-based storage for multi-node apps (Redis, - DynamoDB, MongoDB, etc.) + DynamoDB, S3, MongoDB, etc.) Each store has a **stability rating** indicating likelihood of backwards-incompatible changes. Stable stores (Redis, Valkey, Disk, Keyring) diff --git a/key-value/key-value-aio/pyproject.toml b/key-value/key-value-aio/pyproject.toml index 43aebd42..d4f4edce 100644 --- a/key-value/key-value-aio/pyproject.toml +++ b/key-value/key-value-aio/pyproject.toml @@ -41,6 +41,7 @@ vault = ["hvac>=2.3.0", "types-hvac>=2.3.0"] memcached = ["aiomcache>=0.8.0"] elasticsearch = ["elasticsearch>=8.0.0", "aiohttp>=3.12"] dynamodb = ["aioboto3>=13.3.0", "types-aiobotocore-dynamodb>=2.16.0"] +s3 = ["aioboto3>=13.3.0", "types-aiobotocore-s3>=2.16.0"] keyring = ["keyring>=25.6.0"] keyring-linux = ["keyring>=25.6.0", "dbus-python>=1.4.0"] pydantic = ["pydantic>=2.11.9"] @@ -67,7 +68,7 @@ env_files = [".env"] [dependency-groups] dev = [ - "py-key-value-aio[memory,disk,redis,elasticsearch,memcached,mongodb,vault,dynamodb,rocksdb]", + "py-key-value-aio[memory,disk,redis,elasticsearch,memcached,mongodb,vault,dynamodb,s3,rocksdb]", "py-key-value-aio[valkey]; platform_system != 'Windows'", "py-key-value-aio[keyring]", "py-key-value-aio[pydantic]", diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/__init__.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/__init__.py new file mode 100644 index 00000000..67a57ea1 --- /dev/null +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/__init__.py @@ -0,0 +1,5 @@ +"""AWS S3-based key-value store.""" + +from key_value.aio.stores.s3.store import S3Store + +__all__ = ["S3Store"] diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py new file mode 100644 index 00000000..d6c03de2 --- /dev/null +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py @@ -0,0 +1,324 @@ +from datetime import datetime, timezone +from types import TracebackType +from typing import TYPE_CHECKING, Any, overload + +from key_value.shared.utils.managed_entry import ManagedEntry +from typing_extensions import Self, override + +from key_value.aio.stores.base import ( + BaseContextManagerStore, + BaseStore, +) + +try: + import aioboto3 + from aioboto3.session import Session # noqa: TC002 +except ImportError as e: + msg = "S3Store requires py-key-value-aio[s3]" + raise ImportError(msg) from e + +# aioboto3 generates types at runtime, so we use AioBaseClient at runtime but S3Client during static type checking +if TYPE_CHECKING: + from types_aiobotocore_s3.client import S3Client +else: + from aiobotocore.client import AioBaseClient as S3Client + + +class S3Store(BaseContextManagerStore, BaseStore): + """AWS S3-based key-value store. + + This store uses AWS S3 to store key-value pairs as objects. Each entry is stored + as a separate S3 object with the path format: {collection}/{key}. The ManagedEntry + is serialized to JSON and stored as the object body. TTL information is stored in + S3 object metadata and checked client-side during retrieval (S3 lifecycle policies + can be configured separately for background cleanup, but don't provide atomic TTL+retrieval). + + Example: + Basic usage with automatic AWS credentials: + + >>> async with S3Store(bucket_name="my-kv-store") as store: + ... await store.put(key="user:123", value={"name": "Alice"}, ttl=3600) + ... user = await store.get(key="user:123") + + With custom AWS credentials: + + >>> async with S3Store( + ... bucket_name="my-kv-store", + ... region_name="us-west-2", + ... aws_access_key_id="...", + ... aws_secret_access_key="...", + ... ) as store: + ... await store.put(key="config", value={"setting": "value"}) + + For local testing with LocalStack: + + >>> async with S3Store( + ... bucket_name="test-bucket", + ... endpoint_url="http://localhost:4566", + ... ) as store: + ... await store.put(key="test", value={"data": "test"}) + """ + + _session: aioboto3.Session # pyright: ignore[reportAny] + _bucket_name: str + _endpoint_url: str | None + _raw_client: Any # S3 client from aioboto3 + _client: S3Client | None + + @overload + def __init__(self, *, client: S3Client, bucket_name: str, default_collection: str | None = None) -> None: + """Initialize the S3 store with a pre-configured client. + + Args: + client: The S3 client to use. You must have entered the context manager before passing this in. + bucket_name: The name of the S3 bucket to use. + default_collection: The default collection to use if no collection is provided. + """ + + @overload + def __init__( + self, + *, + bucket_name: str, + region_name: str | None = None, + endpoint_url: str | None = None, + aws_access_key_id: str | None = None, + aws_secret_access_key: str | None = None, + aws_session_token: str | None = None, + default_collection: str | None = None, + ) -> None: + """Initialize the S3 store with AWS credentials. + + Args: + bucket_name: The name of the S3 bucket to use. + region_name: AWS region name. Defaults to None (uses AWS default). + endpoint_url: Custom endpoint URL (useful for LocalStack/MinIO). Defaults to None. + aws_access_key_id: AWS access key ID. Defaults to None (uses AWS default credentials). + aws_secret_access_key: AWS secret access key. Defaults to None (uses AWS default credentials). + aws_session_token: AWS session token. Defaults to None (uses AWS default credentials). + default_collection: The default collection to use if no collection is provided. + """ + + def __init__( + self, + *, + client: S3Client | None = None, + bucket_name: str, + region_name: str | None = None, + endpoint_url: str | None = None, + aws_access_key_id: str | None = None, + aws_secret_access_key: str | None = None, + aws_session_token: str | None = None, + default_collection: str | None = None, + ) -> None: + """Initialize the S3 store. + + Args: + client: The S3 client to use. Defaults to None (creates a new client). + bucket_name: The name of the S3 bucket to use. + region_name: AWS region name. Defaults to None (uses AWS default). + endpoint_url: Custom endpoint URL (useful for LocalStack/MinIO). Defaults to None. + aws_access_key_id: AWS access key ID. Defaults to None (uses AWS default credentials). + aws_secret_access_key: AWS secret access key. Defaults to None (uses AWS default credentials). + aws_session_token: AWS session token. Defaults to None (uses AWS default credentials). + default_collection: The default collection to use if no collection is provided. + """ + self._bucket_name = bucket_name + self._endpoint_url = endpoint_url + + if client: + self._client = client + self._raw_client = None + else: + session: Session = aioboto3.Session( + region_name=region_name, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_session_token=aws_session_token, + ) + + self._raw_client = session.client(service_name="s3", endpoint_url=endpoint_url) # pyright: ignore[reportUnknownMemberType] + self._client = None + + super().__init__(default_collection=default_collection) + + @override + async def __aenter__(self) -> Self: + if self._raw_client: + self._client = await self._raw_client.__aenter__() + await super().__aenter__() + return self + + @override + async def __aexit__( + self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: TracebackType | None + ) -> None: + await super().__aexit__(exc_type, exc_value, traceback) + if self._client and self._raw_client: + await self._client.__aexit__(exc_type, exc_value, traceback) + + @property + def _connected_client(self) -> S3Client: + """Get the connected S3 client. + + Raises: + ValueError: If the client is not connected. + + Returns: + The connected S3 client. + """ + if not self._client: + msg = "Client not connected" + raise ValueError(msg) + return self._client + + @override + async def _setup(self) -> None: + """Setup the S3 client and ensure bucket exists. + + This method creates the S3 bucket if it doesn't already exist. It uses the + HeadBucket operation to check for bucket existence and creates it if not found. + """ + if not self._client and self._raw_client: + self._client = await self._raw_client.__aenter__() + + try: + # Check if bucket exists + await self._connected_client.head_bucket(Bucket=self._bucket_name) # pyright: ignore[reportUnknownMemberType] + except Exception: + # Bucket doesn't exist, create it + import contextlib + + with contextlib.suppress(self._connected_client.exceptions.BucketAlreadyOwnedByYou): # pyright: ignore[reportUnknownMemberType] + await self._connected_client.create_bucket(Bucket=self._bucket_name) # pyright: ignore[reportUnknownMemberType] + + def _get_s3_key(self, *, collection: str, key: str) -> str: + """Generate the S3 object key for a given collection and key. + + Args: + collection: The collection name. + key: The key within the collection. + + Returns: + The S3 object key in format: {collection}/{key} + """ + return f"{collection}/{key}" + + @override + async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None: + """Retrieve a managed entry from S3. + + This method fetches the object from S3, deserializes the JSON body to a ManagedEntry, + and checks for client-side TTL expiration. If the entry has expired, it is deleted + and None is returned. + + Args: + key: The key to retrieve. + collection: The collection to retrieve from. + + Returns: + The ManagedEntry if found and not expired, otherwise None. + """ + s3_key = self._get_s3_key(collection=collection, key=key) + + try: + response = await self._connected_client.get_object( # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + Bucket=self._bucket_name, + Key=s3_key, + ) + + # Read the object body + body_bytes = await response["Body"].read() # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + json_value = body_bytes.decode("utf-8") # pyright: ignore[reportUnknownMemberType] + + # Deserialize to ManagedEntry + managed_entry = ManagedEntry.from_json(json_str=json_value) + + # Check for client-side expiration + if managed_entry.expires_at and managed_entry.expires_at <= datetime.now(tz=timezone.utc): + # Entry expired, delete it and return None + await self._delete_managed_entry(key=key, collection=collection) + return None + return managed_entry # noqa: TRY300 + + except self._connected_client.exceptions.NoSuchKey: # pyright: ignore[reportUnknownMemberType] + # Object doesn't exist + return None + + @override + async def _put_managed_entry( + self, + *, + key: str, + collection: str, + managed_entry: ManagedEntry, + ) -> None: + """Store a managed entry in S3. + + This method serializes the ManagedEntry to JSON and stores it as an S3 object. + TTL information is stored in the object metadata for potential use by S3 lifecycle + policies (though lifecycle policies don't support atomic TTL+retrieval, so client-side + checking is still required). + + Args: + key: The key to store. + collection: The collection to store in. + managed_entry: The ManagedEntry to store. + """ + s3_key = self._get_s3_key(collection=collection, key=key) + json_value = managed_entry.to_json() + + # Prepare metadata + metadata: dict[str, str] = {} + if managed_entry.expires_at: + metadata["expires-at"] = managed_entry.expires_at.isoformat() + if managed_entry.created_at: + metadata["created-at"] = managed_entry.created_at.isoformat() + + await self._connected_client.put_object( # pyright: ignore[reportUnknownMemberType] + Bucket=self._bucket_name, + Key=s3_key, + Body=json_value.encode("utf-8"), + ContentType="application/json", + Metadata=metadata, + ) + + @override + async def _delete_managed_entry(self, *, key: str, collection: str) -> bool: + """Delete a managed entry from S3. + + Args: + key: The key to delete. + collection: The collection to delete from. + + Returns: + True if an object was deleted, False if the object didn't exist. + """ + s3_key = self._get_s3_key(collection=collection, key=key) + + try: + # Check if object exists before deletion + await self._connected_client.head_object( # pyright: ignore[reportUnknownMemberType] + Bucket=self._bucket_name, + Key=s3_key, + ) + + except self._connected_client.exceptions.NoSuchKey: # pyright: ignore[reportUnknownMemberType, reportUnknownAttributeType] + # Object doesn't exist + return False + except Exception: + # For 404 errors that don't raise NoSuchKey exception + return False + else: + # Object exists, delete it + await self._connected_client.delete_object( # pyright: ignore[reportUnknownMemberType] + Bucket=self._bucket_name, + Key=s3_key, + ) + return True + + @override + async def _close(self) -> None: + """Close the S3 client.""" + if self._client and self._raw_client: + await self._client.__aexit__(None, None, None) # pyright: ignore[reportUnknownMemberType] diff --git a/key-value/key-value-aio/tests/stores/s3/__init__.py b/key-value/key-value-aio/tests/stores/s3/__init__.py new file mode 100644 index 00000000..d1936388 --- /dev/null +++ b/key-value/key-value-aio/tests/stores/s3/__init__.py @@ -0,0 +1 @@ +"""Tests for S3Store.""" diff --git a/key-value/key-value-aio/tests/stores/s3/test_s3.py b/key-value/key-value-aio/tests/stores/s3/test_s3.py new file mode 100644 index 00000000..b1e0bc88 --- /dev/null +++ b/key-value/key-value-aio/tests/stores/s3/test_s3.py @@ -0,0 +1,110 @@ +import contextlib +from collections.abc import AsyncGenerator + +import pytest +from key_value.shared.stores.wait import async_wait_for_true +from typing_extensions import override + +from key_value.aio.stores.base import BaseStore +from key_value.aio.stores.s3 import S3Store +from tests.conftest import docker_container, should_skip_docker_tests +from tests.stores.base import BaseStoreTests, ContextManagerStoreTestMixin + +# S3 test configuration (using LocalStack) +S3_HOST = "localhost" +S3_HOST_PORT = 4566 +S3_ENDPOINT = f"http://{S3_HOST}:{S3_HOST_PORT}" +S3_TEST_BUCKET = "kv-store-test" + +WAIT_FOR_S3_TIMEOUT = 30 + +# LocalStack versions to test +LOCALSTACK_VERSIONS_TO_TEST = [ + "4.0.3", # Latest stable version +] + +LOCALSTACK_CONTAINER_PORT = 4566 + + +async def ping_s3() -> bool: + """Check if LocalStack S3 is running.""" + try: + import aioboto3 + + session = aioboto3.Session( + aws_access_key_id="test", + aws_secret_access_key="test", # noqa: S106 + region_name="us-east-1", + ) + async with session.client(service_name="s3", endpoint_url=S3_ENDPOINT) as client: # type: ignore + await client.list_buckets() # type: ignore + except Exception: + return False + else: + return True + + +class S3FailedToStartError(Exception): + pass + + +@pytest.mark.skipif(should_skip_docker_tests(), reason="Docker is not available") +class TestS3Store(ContextManagerStoreTestMixin, BaseStoreTests): + @pytest.fixture(autouse=True, scope="session", params=LOCALSTACK_VERSIONS_TO_TEST) + async def setup_s3(self, request: pytest.FixtureRequest) -> AsyncGenerator[None, None]: + version = request.param + + # LocalStack container for S3 + with docker_container( + f"s3-test-{version}", + f"localstack/localstack:{version}", + {str(LOCALSTACK_CONTAINER_PORT): S3_HOST_PORT}, + environment={"SERVICES": "s3"}, + ): + if not await async_wait_for_true(bool_fn=ping_s3, tries=WAIT_FOR_S3_TIMEOUT, wait_time=1): + msg = f"LocalStack S3 {version} failed to start" + raise S3FailedToStartError(msg) + + yield + + @override + @pytest.fixture + async def store(self, setup_s3: None) -> S3Store: + store = S3Store( + bucket_name=S3_TEST_BUCKET, + endpoint_url=S3_ENDPOINT, + aws_access_key_id="test", + aws_secret_access_key="test", # noqa: S106 + region_name="us-east-1", + ) + + # Clean up test bucket if it exists + import aioboto3 + + session = aioboto3.Session( + aws_access_key_id="test", + aws_secret_access_key="test", # noqa: S106 + region_name="us-east-1", + ) + async with ( + session.client(service_name="s3", endpoint_url=S3_ENDPOINT) as client, # type: ignore + contextlib.suppress(Exception), + ): + # Delete all objects in the bucket + response = await client.list_objects_v2(Bucket=S3_TEST_BUCKET) # type: ignore + if "Contents" in response: + for obj in response["Contents"]: + await client.delete_object(Bucket=S3_TEST_BUCKET, Key=obj["Key"]) # type: ignore + + # Delete the bucket + await client.delete_bucket(Bucket=S3_TEST_BUCKET) # type: ignore + + return store + + @pytest.fixture + async def s3_store(self, store: S3Store) -> S3Store: + return store + + @pytest.mark.skip(reason="Distributed Caches are unbounded") + @override + async def test_not_unbounded(self, store: BaseStore): ... diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/base.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/base.py index 1b3ff65e..3dc71480 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/base.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/base.py @@ -285,6 +285,7 @@ def _put_managed_entries( created_at: datetime, expires_at: datetime | None, ) -> None: + """Store multiple managed entries by key in the specified collection. Args: diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/__init__.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/__init__.py new file mode 100644 index 00000000..51f7d45f --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/__init__.py @@ -0,0 +1,8 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file '__init__.py' +# DO NOT CHANGE! Change the original file instead. +"""AWS S3-based key-value store.""" + +from key_value.sync.code_gen.stores.s3.store import S3Store + +__all__ = ["S3Store"] diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/store.py new file mode 100644 index 00000000..6b09977b --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/store.py @@ -0,0 +1,302 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file 'store.py' +# DO NOT CHANGE! Change the original file instead. +from datetime import datetime, timezone +from types import TracebackType +from typing import TYPE_CHECKING, Any, overload + +from key_value.shared.utils.managed_entry import ManagedEntry +from typing_extensions import Self, override + +from key_value.sync.code_gen.stores.base import BaseContextManagerStore, BaseStore + +try: + import aioboto3 + from aioboto3.session import Session # noqa: TC002 +except ImportError as e: + msg = "S3Store requires py-key-value-aio[s3]" + raise ImportError(msg) from e + +# aioboto3 generates types at runtime, so we use AioBaseClient at runtime but S3Client during static type checking +if TYPE_CHECKING: + from types_aiobotocore_s3.client import S3Client +else: + from aiobotocore.client import AioBaseClient as S3Client + + +class S3Store(BaseContextManagerStore, BaseStore): + """AWS S3-based key-value store. + + This store uses AWS S3 to store key-value pairs as objects. Each entry is stored + as a separate S3 object with the path format: {collection}/{key}. The ManagedEntry + is serialized to JSON and stored as the object body. TTL information is stored in + S3 object metadata and checked client-side during retrieval (S3 lifecycle policies + can be configured separately for background cleanup, but don't provide atomic TTL+retrieval). + + Example: + Basic usage with automatic AWS credentials: + + >>> async with S3Store(bucket_name="my-kv-store") as store: + ... await store.put(key="user:123", value={"name": "Alice"}, ttl=3600) + ... user = await store.get(key="user:123") + + With custom AWS credentials: + + >>> async with S3Store( + ... bucket_name="my-kv-store", + ... region_name="us-west-2", + ... aws_access_key_id="...", + ... aws_secret_access_key="...", + ... ) as store: + ... await store.put(key="config", value={"setting": "value"}) + + For local testing with LocalStack: + + >>> async with S3Store( + ... bucket_name="test-bucket", + ... endpoint_url="http://localhost:4566", + ... ) as store: + ... await store.put(key="test", value={"data": "test"}) + """ + + _session: aioboto3.Session # pyright: ignore[reportAny] + _bucket_name: str + _endpoint_url: str | None + _raw_client: Any # S3 client from aioboto3 + _client: S3Client | None + + @overload + def __init__(self, *, client: S3Client, bucket_name: str, default_collection: str | None = None) -> None: + """Initialize the S3 store with a pre-configured client. + + Args: + client: The S3 client to use. You must have entered the context manager before passing this in. + bucket_name: The name of the S3 bucket to use. + default_collection: The default collection to use if no collection is provided. + """ + + @overload + def __init__( + self, + *, + bucket_name: str, + region_name: str | None = None, + endpoint_url: str | None = None, + aws_access_key_id: str | None = None, + aws_secret_access_key: str | None = None, + aws_session_token: str | None = None, + default_collection: str | None = None, + ) -> None: + """Initialize the S3 store with AWS credentials. + + Args: + bucket_name: The name of the S3 bucket to use. + region_name: AWS region name. Defaults to None (uses AWS default). + endpoint_url: Custom endpoint URL (useful for LocalStack/MinIO). Defaults to None. + aws_access_key_id: AWS access key ID. Defaults to None (uses AWS default credentials). + aws_secret_access_key: AWS secret access key. Defaults to None (uses AWS default credentials). + aws_session_token: AWS session token. Defaults to None (uses AWS default credentials). + default_collection: The default collection to use if no collection is provided. + """ + + def __init__( + self, + *, + client: S3Client | None = None, + bucket_name: str, + region_name: str | None = None, + endpoint_url: str | None = None, + aws_access_key_id: str | None = None, + aws_secret_access_key: str | None = None, + aws_session_token: str | None = None, + default_collection: str | None = None, + ) -> None: + """Initialize the S3 store. + + Args: + client: The S3 client to use. Defaults to None (creates a new client). + bucket_name: The name of the S3 bucket to use. + region_name: AWS region name. Defaults to None (uses AWS default). + endpoint_url: Custom endpoint URL (useful for LocalStack/MinIO). Defaults to None. + aws_access_key_id: AWS access key ID. Defaults to None (uses AWS default credentials). + aws_secret_access_key: AWS secret access key. Defaults to None (uses AWS default credentials). + aws_session_token: AWS session token. Defaults to None (uses AWS default credentials). + default_collection: The default collection to use if no collection is provided. + """ + self._bucket_name = bucket_name + self._endpoint_url = endpoint_url + + if client: + self._client = client + self._raw_client = None + else: + session: Session = aioboto3.Session( + region_name=region_name, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_session_token=aws_session_token, + ) + self._raw_client = session.client(service_name="s3", endpoint_url=endpoint_url) # pyright: ignore[reportUnknownMemberType] + self._client = None + + super().__init__(default_collection=default_collection) + + @override + def __enter__(self) -> Self: + if self._raw_client: + self._client = self._raw_client.__enter__() + super().__enter__() + return self + + @override + def __exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: TracebackType | None) -> None: + super().__exit__(exc_type, exc_value, traceback) + if self._client and self._raw_client: + self._client.__exit__(exc_type, exc_value, traceback) + + @property + def _connected_client(self) -> S3Client: + """Get the connected S3 client. + + Raises: + ValueError: If the client is not connected. + + Returns: + The connected S3 client. + """ + if not self._client: + msg = "Client not connected" + raise ValueError(msg) + return self._client + + @override + def _setup(self) -> None: + """Setup the S3 client and ensure bucket exists. + + This method creates the S3 bucket if it doesn't already exist. It uses the + HeadBucket operation to check for bucket existence and creates it if not found. + """ + if not self._client and self._raw_client: + self._client = self._raw_client.__enter__() + + try: + # Check if bucket exists + self._connected_client.head_bucket(Bucket=self._bucket_name) # pyright: ignore[reportUnknownMemberType] + except Exception: + # Bucket doesn't exist, create it + import contextlib + + with contextlib.suppress(self._connected_client.exceptions.BucketAlreadyOwnedByYou): # pyright: ignore[reportUnknownMemberType] + self._connected_client.create_bucket(Bucket=self._bucket_name) # pyright: ignore[reportUnknownMemberType] + + def _get_s3_key(self, *, collection: str, key: str) -> str: + """Generate the S3 object key for a given collection and key. + + Args: + collection: The collection name. + key: The key within the collection. + + Returns: + The S3 object key in format: {collection}/{key} + """ + return f"{collection}/{key}" + + @override + def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None: + """Retrieve a managed entry from S3. + + This method fetches the object from S3, deserializes the JSON body to a ManagedEntry, + and checks for client-side TTL expiration. If the entry has expired, it is deleted + and None is returned. + + Args: + key: The key to retrieve. + collection: The collection to retrieve from. + + Returns: + The ManagedEntry if found and not expired, otherwise None. + """ + s3_key = self._get_s3_key(collection=collection, key=key) + + try: # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + response = self._connected_client.get_object(Bucket=self._bucket_name, Key=s3_key) + + # Read the object body + body_bytes = response["Body"].read() # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + json_value = body_bytes.decode("utf-8") # pyright: ignore[reportUnknownMemberType] + + # Deserialize to ManagedEntry + managed_entry = ManagedEntry.from_json(json_str=json_value) + + # Check for client-side expiration + if managed_entry.expires_at and managed_entry.expires_at <= datetime.now(tz=timezone.utc): + # Entry expired, delete it and return None + self._delete_managed_entry(key=key, collection=collection) + return None + return managed_entry # noqa: TRY300 + except self._connected_client.exceptions.NoSuchKey: # pyright: ignore[reportUnknownMemberType] + # Object doesn't exist + return None + + @override + def _put_managed_entry(self, *, key: str, collection: str, managed_entry: ManagedEntry) -> None: + """Store a managed entry in S3. + + This method serializes the ManagedEntry to JSON and stores it as an S3 object. + TTL information is stored in the object metadata for potential use by S3 lifecycle + policies (though lifecycle policies don't support atomic TTL+retrieval, so client-side + checking is still required). + + Args: + key: The key to store. + collection: The collection to store in. + managed_entry: The ManagedEntry to store. + """ + s3_key = self._get_s3_key(collection=collection, key=key) + json_value = managed_entry.to_json() + + # Prepare metadata + metadata: dict[str, str] = {} + if managed_entry.expires_at: + metadata["expires-at"] = managed_entry.expires_at.isoformat() + if managed_entry.created_at: + metadata["created-at"] = managed_entry.created_at.isoformat() + # pyright: ignore[reportUnknownMemberType] + self._connected_client.put_object( + Bucket=self._bucket_name, Key=s3_key, Body=json_value.encode("utf-8"), ContentType="application/json", Metadata=metadata + ) + + @override + def _delete_managed_entry(self, *, key: str, collection: str) -> bool: + """Delete a managed entry from S3. + + Args: + key: The key to delete. + collection: The collection to delete from. + + Returns: + True if an object was deleted, False if the object didn't exist. + """ + s3_key = self._get_s3_key(collection=collection, key=key) + + try: + # Check if object exists before deletion + # pyright: ignore[reportUnknownMemberType] + self._connected_client.head_object(Bucket=self._bucket_name, Key=s3_key) + except self._connected_client.exceptions.NoSuchKey: # pyright: ignore[reportUnknownMemberType, reportUnknownAttributeType] + # Object doesn't exist + return False + except Exception: + # For 404 errors that don't raise NoSuchKey exception + return False + else: + # Object exists, delete it + # pyright: ignore[reportUnknownMemberType] + self._connected_client.delete_object(Bucket=self._bucket_name, Key=s3_key) + return True + + @override + def _close(self) -> None: + """Close the S3 client.""" + if self._client and self._raw_client: + self._client.__exit__(None, None, None) # pyright: ignore[reportUnknownMemberType] diff --git a/key-value/key-value-sync/src/key_value/sync/stores/s3/__init__.py b/key-value/key-value-sync/src/key_value/sync/stores/s3/__init__.py new file mode 100644 index 00000000..51f7d45f --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/stores/s3/__init__.py @@ -0,0 +1,8 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file '__init__.py' +# DO NOT CHANGE! Change the original file instead. +"""AWS S3-based key-value store.""" + +from key_value.sync.code_gen.stores.s3.store import S3Store + +__all__ = ["S3Store"] diff --git a/key-value/key-value-sync/tests/code_gen/stores/s3/__init__.py b/key-value/key-value-sync/tests/code_gen/stores/s3/__init__.py new file mode 100644 index 00000000..fddddb9b --- /dev/null +++ b/key-value/key-value-sync/tests/code_gen/stores/s3/__init__.py @@ -0,0 +1,4 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file '__init__.py' +# DO NOT CHANGE! Change the original file instead. +"""Tests for S3Store.""" diff --git a/key-value/key-value-sync/tests/code_gen/stores/s3/test_s3.py b/key-value/key-value-sync/tests/code_gen/stores/s3/test_s3.py new file mode 100644 index 00000000..b9040b04 --- /dev/null +++ b/key-value/key-value-sync/tests/code_gen/stores/s3/test_s3.py @@ -0,0 +1,102 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file 'test_s3.py' +# DO NOT CHANGE! Change the original file instead. +import contextlib +from collections.abc import Generator + +import pytest +from key_value.shared.stores.wait import wait_for_true +from typing_extensions import override + +from key_value.sync.code_gen.stores.base import BaseStore +from key_value.sync.code_gen.stores.s3 import S3Store +from tests.code_gen.conftest import docker_container, should_skip_docker_tests +from tests.code_gen.stores.base import BaseStoreTests, ContextManagerStoreTestMixin + +# S3 test configuration (using LocalStack) +S3_HOST = "localhost" +S3_HOST_PORT = 4566 +S3_ENDPOINT = f"http://{S3_HOST}:{S3_HOST_PORT}" +S3_TEST_BUCKET = "kv-store-test" + +WAIT_FOR_S3_TIMEOUT = 30 + +# LocalStack versions to test + +# Latest stable version +LOCALSTACK_VERSIONS_TO_TEST = ["4.0.3"] + +LOCALSTACK_CONTAINER_PORT = 4566 + + +def ping_s3() -> bool: + """Check if LocalStack S3 is running.""" + try: + import aioboto3 + + session = aioboto3.Session(aws_access_key_id="test", aws_secret_access_key="test", region_name="us-east-1") + with session.client(service_name="s3", endpoint_url=S3_ENDPOINT) as client: # type: ignore + client.list_buckets() # type: ignore + except Exception: + return False + else: + return True + + +class S3FailedToStartError(Exception): + pass + + +@pytest.mark.skipif(should_skip_docker_tests(), reason="Docker is not available") +class TestS3Store(ContextManagerStoreTestMixin, BaseStoreTests): + @pytest.fixture(autouse=True, scope="session", params=LOCALSTACK_VERSIONS_TO_TEST) + def setup_s3(self, request: pytest.FixtureRequest) -> Generator[None, None, None]: + version = request.param + + # LocalStack container for S3 + with docker_container( + f"s3-test-{version}", + f"localstack/localstack:{version}", + {str(LOCALSTACK_CONTAINER_PORT): S3_HOST_PORT}, + environment={"SERVICES": "s3"}, + ): + if not wait_for_true(bool_fn=ping_s3, tries=WAIT_FOR_S3_TIMEOUT, wait_time=1): + msg = f"LocalStack S3 {version} failed to start" + raise S3FailedToStartError(msg) + + yield + + @override + @pytest.fixture + def store(self, setup_s3: None) -> S3Store: + store = S3Store( + bucket_name=S3_TEST_BUCKET, + endpoint_url=S3_ENDPOINT, + aws_access_key_id="test", + aws_secret_access_key="test", + region_name="us-east-1", + ) + + # Clean up test bucket if it exists + import aioboto3 + + session = aioboto3.Session(aws_access_key_id="test", aws_secret_access_key="test", region_name="us-east-1") + with session.client(service_name="s3", endpoint_url=S3_ENDPOINT) as client, contextlib.suppress(Exception): # type: ignore + # Delete all objects in the bucket + response = client.list_objects_v2(Bucket=S3_TEST_BUCKET) # type: ignore + if "Contents" in response: + for obj in response["Contents"]: + client.delete_object(Bucket=S3_TEST_BUCKET, Key=obj["Key"]) # type: ignore + + # Delete the bucket + client.delete_bucket(Bucket=S3_TEST_BUCKET) # type: ignore + + return store + + @pytest.fixture + def s3_store(self, store: S3Store) -> S3Store: + return store + + @pytest.mark.skip(reason="Distributed Caches are unbounded") + @override + def test_not_unbounded(self, store: BaseStore): ... diff --git a/uv.lock b/uv.lock index bea6629b..e1355868 100644 --- a/uv.lock +++ b/uv.lock @@ -1743,6 +1743,10 @@ redis = [ rocksdb = [ { name = "rocksdict" }, ] +s3 = [ + { name = "aioboto3" }, + { name = "types-aiobotocore-s3" }, +] valkey = [ { name = "valkey-glide" }, ] @@ -1757,13 +1761,14 @@ wrappers-encryption = [ [package.dev-dependencies] dev = [ { name = "py-key-value", extra = ["dev"] }, - { name = "py-key-value-aio", extra = ["disk", "dynamodb", "elasticsearch", "keyring", "memcached", "memory", "mongodb", "pydantic", "redis", "rocksdb", "vault", "wrappers-encryption"] }, + { name = "py-key-value-aio", extra = ["disk", "dynamodb", "elasticsearch", "keyring", "memcached", "memory", "mongodb", "pydantic", "redis", "rocksdb", "s3", "vault", "wrappers-encryption"] }, { name = "py-key-value-aio", extra = ["valkey"], marker = "sys_platform != 'win32'" }, ] [package.metadata] requires-dist = [ { name = "aioboto3", marker = "extra == 'dynamodb'", specifier = ">=13.3.0" }, + { name = "aioboto3", marker = "extra == 's3'", specifier = ">=13.3.0" }, { name = "aiohttp", marker = "extra == 'elasticsearch'", specifier = ">=3.12" }, { name = "aiomcache", marker = "extra == 'memcached'", specifier = ">=0.8.0" }, { name = "beartype", specifier = ">=0.20.0" }, @@ -1783,16 +1788,17 @@ requires-dist = [ { name = "rocksdict", marker = "python_full_version >= '3.12' and extra == 'rocksdb'", specifier = ">=0.3.24" }, { name = "rocksdict", marker = "python_full_version < '3.12' and extra == 'rocksdb'", specifier = ">=0.3.2" }, { name = "types-aiobotocore-dynamodb", marker = "extra == 'dynamodb'", specifier = ">=2.16.0" }, + { name = "types-aiobotocore-s3", marker = "extra == 's3'", specifier = ">=2.16.0" }, { name = "types-hvac", marker = "extra == 'vault'", specifier = ">=2.3.0" }, { name = "valkey-glide", marker = "extra == 'valkey'", specifier = ">=2.1.0" }, ] -provides-extras = ["memory", "disk", "redis", "mongodb", "valkey", "vault", "memcached", "elasticsearch", "dynamodb", "keyring", "keyring-linux", "pydantic", "rocksdb", "wrappers-encryption"] +provides-extras = ["memory", "disk", "redis", "mongodb", "valkey", "vault", "memcached", "elasticsearch", "dynamodb", "s3", "keyring", "keyring-linux", "pydantic", "rocksdb", "wrappers-encryption"] [package.metadata.requires-dev] dev = [ { name = "py-key-value", extras = ["dev"], editable = "." }, { name = "py-key-value-aio", extras = ["keyring"] }, - { name = "py-key-value-aio", extras = ["memory", "disk", "redis", "elasticsearch", "memcached", "mongodb", "vault", "dynamodb", "rocksdb"] }, + { name = "py-key-value-aio", extras = ["memory", "disk", "redis", "elasticsearch", "memcached", "mongodb", "vault", "dynamodb", "s3", "rocksdb"] }, { name = "py-key-value-aio", extras = ["pydantic"] }, { name = "py-key-value-aio", extras = ["valkey"], marker = "sys_platform != 'win32'" }, { name = "py-key-value-aio", extras = ["wrappers-encryption"] }, @@ -2617,6 +2623,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/4f/05d80aa8b5a95b82ddb89547c3a037b9460702286c66ca6e0fbb8fa2ce86/types_aiobotocore_dynamodb-2.25.0-py3-none-any.whl", hash = "sha256:de791dfcef90eb3431c09b63419301f9ff824d82970623e149a427d5fd325430", size = 57971, upload-time = "2025-10-11T01:27:39.639Z" }, ] +[[package]] +name = "types-aiobotocore-s3" +version = "2.25.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/17/c7/d69fd0aa19b61ae67357b4d4aa546be7a6e0b38cde5d0c0704208e00791a/types_aiobotocore_s3-2.25.0.tar.gz", hash = "sha256:3885d2d975b02490c4a59ab0d9c607dec2b7d650189f19a2b6a5e0f81a07e56d", size = 76334, upload-time = "2025-10-11T01:32:43.509Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/42/0606f39a94b828f53db6741a5803626766379fc653c43fa98d1a3da28fb8/types_aiobotocore_s3-2.25.0-py3-none-any.whl", hash = "sha256:637b8b3eabd71a8083e835adb696e08b8bab75fb2e3f1ef3020334659abdf698", size = 83849, upload-time = "2025-10-11T01:32:42.437Z" }, +] + [[package]] name = "types-hvac" version = "2.3.0.20250914" From 1b1c3d784d1f18abfb56a1b65382972798475658 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Wed, 29 Oct 2025 02:47:27 +0000 Subject: [PATCH 02/19] fix: exclude S3 from sync library and improve test configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add S105 and S106 to ignored linting rules for test files (hardcoded test credentials) - Exclude S3Store from sync library generation (uses aioboto3 which is async-only) - Fix contextlib.suppress usage in S3 test file for proper async context handling - Remove generated sync S3 files S3Store follows the same pattern as DynamoDBStore - it's async-only because aioboto3 doesn't have a direct sync equivalent. The README correctly indicates S3 as async-only. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: William Easton --- .../tests/stores/dynamodb/test_dynamodb.py | 6 +- .../key-value-aio/tests/stores/s3/test_s3.py | 28 +- .../tests/stores/vault/test_vault.py | 2 +- .../key_value/sync/code_gen/stores/base.py | 1 - .../sync/code_gen/stores/s3/__init__.py | 8 - .../sync/code_gen/stores/s3/store.py | 302 ------------------ .../src/key_value/sync/stores/s3/__init__.py | 8 - .../tests/code_gen/stores/s3/__init__.py | 4 - .../tests/code_gen/stores/s3/test_s3.py | 102 ------ .../tests/code_gen/stores/vault/test_vault.py | 2 +- pyproject.toml | 2 + scripts/build_sync_library.py | 2 + 12 files changed, 22 insertions(+), 445 deletions(-) delete mode 100644 key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/__init__.py delete mode 100644 key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/store.py delete mode 100644 key-value/key-value-sync/src/key_value/sync/stores/s3/__init__.py delete mode 100644 key-value/key-value-sync/tests/code_gen/stores/s3/__init__.py delete mode 100644 key-value/key-value-sync/tests/code_gen/stores/s3/test_s3.py diff --git a/key-value/key-value-aio/tests/stores/dynamodb/test_dynamodb.py b/key-value/key-value-aio/tests/stores/dynamodb/test_dynamodb.py index d29733e2..d8518674 100644 --- a/key-value/key-value-aio/tests/stores/dynamodb/test_dynamodb.py +++ b/key-value/key-value-aio/tests/stores/dynamodb/test_dynamodb.py @@ -40,7 +40,7 @@ async def ping_dynamodb() -> bool: session = aioboto3.Session( aws_access_key_id="test", - aws_secret_access_key="test", # noqa: S106 + aws_secret_access_key="test", region_name="us-east-1", ) async with session.client(service_name="dynamodb", endpoint_url=DYNAMODB_ENDPOINT) as client: # type: ignore @@ -89,7 +89,7 @@ async def store(self, setup_dynamodb: None) -> DynamoDBStore: table_name=DYNAMODB_TEST_TABLE, endpoint_url=DYNAMODB_ENDPOINT, aws_access_key_id="test", - aws_secret_access_key="test", # noqa: S106 + aws_secret_access_key="test", region_name="us-east-1", ) @@ -98,7 +98,7 @@ async def store(self, setup_dynamodb: None) -> DynamoDBStore: session = aioboto3.Session( aws_access_key_id="test", - aws_secret_access_key="test", # noqa: S106 + aws_secret_access_key="test", region_name="us-east-1", ) async with session.client(service_name="dynamodb", endpoint_url=DYNAMODB_ENDPOINT) as client: # type: ignore diff --git a/key-value/key-value-aio/tests/stores/s3/test_s3.py b/key-value/key-value-aio/tests/stores/s3/test_s3.py index b1e0bc88..f07092b1 100644 --- a/key-value/key-value-aio/tests/stores/s3/test_s3.py +++ b/key-value/key-value-aio/tests/stores/s3/test_s3.py @@ -33,7 +33,7 @@ async def ping_s3() -> bool: session = aioboto3.Session( aws_access_key_id="test", - aws_secret_access_key="test", # noqa: S106 + aws_secret_access_key="test", region_name="us-east-1", ) async with session.client(service_name="s3", endpoint_url=S3_ENDPOINT) as client: # type: ignore @@ -74,7 +74,7 @@ async def store(self, setup_s3: None) -> S3Store: bucket_name=S3_TEST_BUCKET, endpoint_url=S3_ENDPOINT, aws_access_key_id="test", - aws_secret_access_key="test", # noqa: S106 + aws_secret_access_key="test", region_name="us-east-1", ) @@ -83,21 +83,19 @@ async def store(self, setup_s3: None) -> S3Store: session = aioboto3.Session( aws_access_key_id="test", - aws_secret_access_key="test", # noqa: S106 + aws_secret_access_key="test", region_name="us-east-1", ) - async with ( - session.client(service_name="s3", endpoint_url=S3_ENDPOINT) as client, # type: ignore - contextlib.suppress(Exception), - ): - # Delete all objects in the bucket - response = await client.list_objects_v2(Bucket=S3_TEST_BUCKET) # type: ignore - if "Contents" in response: - for obj in response["Contents"]: - await client.delete_object(Bucket=S3_TEST_BUCKET, Key=obj["Key"]) # type: ignore - - # Delete the bucket - await client.delete_bucket(Bucket=S3_TEST_BUCKET) # type: ignore + async with session.client(service_name="s3", endpoint_url=S3_ENDPOINT) as client: # type: ignore + with contextlib.suppress(Exception): + # Delete all objects in the bucket + response = await client.list_objects_v2(Bucket=S3_TEST_BUCKET) # type: ignore + if "Contents" in response: + for obj in response["Contents"]: # type: ignore + await client.delete_object(Bucket=S3_TEST_BUCKET, Key=obj["Key"]) # type: ignore + + # Delete the bucket + await client.delete_bucket(Bucket=S3_TEST_BUCKET) # type: ignore return store diff --git a/key-value/key-value-aio/tests/stores/vault/test_vault.py b/key-value/key-value-aio/tests/stores/vault/test_vault.py index e9704641..b7e25ec0 100644 --- a/key-value/key-value-aio/tests/stores/vault/test_vault.py +++ b/key-value/key-value-aio/tests/stores/vault/test_vault.py @@ -13,7 +13,7 @@ # Vault test configuration VAULT_HOST = "localhost" VAULT_PORT = 8200 -VAULT_TOKEN = "dev-root-token" # noqa: S105 +VAULT_TOKEN = "dev-root-token" VAULT_MOUNT_POINT = "secret" VAULT_CONTAINER_PORT = 8200 diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/base.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/base.py index 3dc71480..1b3ff65e 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/base.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/base.py @@ -285,7 +285,6 @@ def _put_managed_entries( created_at: datetime, expires_at: datetime | None, ) -> None: - """Store multiple managed entries by key in the specified collection. Args: diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/__init__.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/__init__.py deleted file mode 100644 index 51f7d45f..00000000 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# WARNING: this file is auto-generated by 'build_sync_library.py' -# from the original file '__init__.py' -# DO NOT CHANGE! Change the original file instead. -"""AWS S3-based key-value store.""" - -from key_value.sync.code_gen.stores.s3.store import S3Store - -__all__ = ["S3Store"] diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/store.py deleted file mode 100644 index 6b09977b..00000000 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/s3/store.py +++ /dev/null @@ -1,302 +0,0 @@ -# WARNING: this file is auto-generated by 'build_sync_library.py' -# from the original file 'store.py' -# DO NOT CHANGE! Change the original file instead. -from datetime import datetime, timezone -from types import TracebackType -from typing import TYPE_CHECKING, Any, overload - -from key_value.shared.utils.managed_entry import ManagedEntry -from typing_extensions import Self, override - -from key_value.sync.code_gen.stores.base import BaseContextManagerStore, BaseStore - -try: - import aioboto3 - from aioboto3.session import Session # noqa: TC002 -except ImportError as e: - msg = "S3Store requires py-key-value-aio[s3]" - raise ImportError(msg) from e - -# aioboto3 generates types at runtime, so we use AioBaseClient at runtime but S3Client during static type checking -if TYPE_CHECKING: - from types_aiobotocore_s3.client import S3Client -else: - from aiobotocore.client import AioBaseClient as S3Client - - -class S3Store(BaseContextManagerStore, BaseStore): - """AWS S3-based key-value store. - - This store uses AWS S3 to store key-value pairs as objects. Each entry is stored - as a separate S3 object with the path format: {collection}/{key}. The ManagedEntry - is serialized to JSON and stored as the object body. TTL information is stored in - S3 object metadata and checked client-side during retrieval (S3 lifecycle policies - can be configured separately for background cleanup, but don't provide atomic TTL+retrieval). - - Example: - Basic usage with automatic AWS credentials: - - >>> async with S3Store(bucket_name="my-kv-store") as store: - ... await store.put(key="user:123", value={"name": "Alice"}, ttl=3600) - ... user = await store.get(key="user:123") - - With custom AWS credentials: - - >>> async with S3Store( - ... bucket_name="my-kv-store", - ... region_name="us-west-2", - ... aws_access_key_id="...", - ... aws_secret_access_key="...", - ... ) as store: - ... await store.put(key="config", value={"setting": "value"}) - - For local testing with LocalStack: - - >>> async with S3Store( - ... bucket_name="test-bucket", - ... endpoint_url="http://localhost:4566", - ... ) as store: - ... await store.put(key="test", value={"data": "test"}) - """ - - _session: aioboto3.Session # pyright: ignore[reportAny] - _bucket_name: str - _endpoint_url: str | None - _raw_client: Any # S3 client from aioboto3 - _client: S3Client | None - - @overload - def __init__(self, *, client: S3Client, bucket_name: str, default_collection: str | None = None) -> None: - """Initialize the S3 store with a pre-configured client. - - Args: - client: The S3 client to use. You must have entered the context manager before passing this in. - bucket_name: The name of the S3 bucket to use. - default_collection: The default collection to use if no collection is provided. - """ - - @overload - def __init__( - self, - *, - bucket_name: str, - region_name: str | None = None, - endpoint_url: str | None = None, - aws_access_key_id: str | None = None, - aws_secret_access_key: str | None = None, - aws_session_token: str | None = None, - default_collection: str | None = None, - ) -> None: - """Initialize the S3 store with AWS credentials. - - Args: - bucket_name: The name of the S3 bucket to use. - region_name: AWS region name. Defaults to None (uses AWS default). - endpoint_url: Custom endpoint URL (useful for LocalStack/MinIO). Defaults to None. - aws_access_key_id: AWS access key ID. Defaults to None (uses AWS default credentials). - aws_secret_access_key: AWS secret access key. Defaults to None (uses AWS default credentials). - aws_session_token: AWS session token. Defaults to None (uses AWS default credentials). - default_collection: The default collection to use if no collection is provided. - """ - - def __init__( - self, - *, - client: S3Client | None = None, - bucket_name: str, - region_name: str | None = None, - endpoint_url: str | None = None, - aws_access_key_id: str | None = None, - aws_secret_access_key: str | None = None, - aws_session_token: str | None = None, - default_collection: str | None = None, - ) -> None: - """Initialize the S3 store. - - Args: - client: The S3 client to use. Defaults to None (creates a new client). - bucket_name: The name of the S3 bucket to use. - region_name: AWS region name. Defaults to None (uses AWS default). - endpoint_url: Custom endpoint URL (useful for LocalStack/MinIO). Defaults to None. - aws_access_key_id: AWS access key ID. Defaults to None (uses AWS default credentials). - aws_secret_access_key: AWS secret access key. Defaults to None (uses AWS default credentials). - aws_session_token: AWS session token. Defaults to None (uses AWS default credentials). - default_collection: The default collection to use if no collection is provided. - """ - self._bucket_name = bucket_name - self._endpoint_url = endpoint_url - - if client: - self._client = client - self._raw_client = None - else: - session: Session = aioboto3.Session( - region_name=region_name, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_session_token=aws_session_token, - ) - self._raw_client = session.client(service_name="s3", endpoint_url=endpoint_url) # pyright: ignore[reportUnknownMemberType] - self._client = None - - super().__init__(default_collection=default_collection) - - @override - def __enter__(self) -> Self: - if self._raw_client: - self._client = self._raw_client.__enter__() - super().__enter__() - return self - - @override - def __exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: TracebackType | None) -> None: - super().__exit__(exc_type, exc_value, traceback) - if self._client and self._raw_client: - self._client.__exit__(exc_type, exc_value, traceback) - - @property - def _connected_client(self) -> S3Client: - """Get the connected S3 client. - - Raises: - ValueError: If the client is not connected. - - Returns: - The connected S3 client. - """ - if not self._client: - msg = "Client not connected" - raise ValueError(msg) - return self._client - - @override - def _setup(self) -> None: - """Setup the S3 client and ensure bucket exists. - - This method creates the S3 bucket if it doesn't already exist. It uses the - HeadBucket operation to check for bucket existence and creates it if not found. - """ - if not self._client and self._raw_client: - self._client = self._raw_client.__enter__() - - try: - # Check if bucket exists - self._connected_client.head_bucket(Bucket=self._bucket_name) # pyright: ignore[reportUnknownMemberType] - except Exception: - # Bucket doesn't exist, create it - import contextlib - - with contextlib.suppress(self._connected_client.exceptions.BucketAlreadyOwnedByYou): # pyright: ignore[reportUnknownMemberType] - self._connected_client.create_bucket(Bucket=self._bucket_name) # pyright: ignore[reportUnknownMemberType] - - def _get_s3_key(self, *, collection: str, key: str) -> str: - """Generate the S3 object key for a given collection and key. - - Args: - collection: The collection name. - key: The key within the collection. - - Returns: - The S3 object key in format: {collection}/{key} - """ - return f"{collection}/{key}" - - @override - def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None: - """Retrieve a managed entry from S3. - - This method fetches the object from S3, deserializes the JSON body to a ManagedEntry, - and checks for client-side TTL expiration. If the entry has expired, it is deleted - and None is returned. - - Args: - key: The key to retrieve. - collection: The collection to retrieve from. - - Returns: - The ManagedEntry if found and not expired, otherwise None. - """ - s3_key = self._get_s3_key(collection=collection, key=key) - - try: # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] - response = self._connected_client.get_object(Bucket=self._bucket_name, Key=s3_key) - - # Read the object body - body_bytes = response["Body"].read() # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] - json_value = body_bytes.decode("utf-8") # pyright: ignore[reportUnknownMemberType] - - # Deserialize to ManagedEntry - managed_entry = ManagedEntry.from_json(json_str=json_value) - - # Check for client-side expiration - if managed_entry.expires_at and managed_entry.expires_at <= datetime.now(tz=timezone.utc): - # Entry expired, delete it and return None - self._delete_managed_entry(key=key, collection=collection) - return None - return managed_entry # noqa: TRY300 - except self._connected_client.exceptions.NoSuchKey: # pyright: ignore[reportUnknownMemberType] - # Object doesn't exist - return None - - @override - def _put_managed_entry(self, *, key: str, collection: str, managed_entry: ManagedEntry) -> None: - """Store a managed entry in S3. - - This method serializes the ManagedEntry to JSON and stores it as an S3 object. - TTL information is stored in the object metadata for potential use by S3 lifecycle - policies (though lifecycle policies don't support atomic TTL+retrieval, so client-side - checking is still required). - - Args: - key: The key to store. - collection: The collection to store in. - managed_entry: The ManagedEntry to store. - """ - s3_key = self._get_s3_key(collection=collection, key=key) - json_value = managed_entry.to_json() - - # Prepare metadata - metadata: dict[str, str] = {} - if managed_entry.expires_at: - metadata["expires-at"] = managed_entry.expires_at.isoformat() - if managed_entry.created_at: - metadata["created-at"] = managed_entry.created_at.isoformat() - # pyright: ignore[reportUnknownMemberType] - self._connected_client.put_object( - Bucket=self._bucket_name, Key=s3_key, Body=json_value.encode("utf-8"), ContentType="application/json", Metadata=metadata - ) - - @override - def _delete_managed_entry(self, *, key: str, collection: str) -> bool: - """Delete a managed entry from S3. - - Args: - key: The key to delete. - collection: The collection to delete from. - - Returns: - True if an object was deleted, False if the object didn't exist. - """ - s3_key = self._get_s3_key(collection=collection, key=key) - - try: - # Check if object exists before deletion - # pyright: ignore[reportUnknownMemberType] - self._connected_client.head_object(Bucket=self._bucket_name, Key=s3_key) - except self._connected_client.exceptions.NoSuchKey: # pyright: ignore[reportUnknownMemberType, reportUnknownAttributeType] - # Object doesn't exist - return False - except Exception: - # For 404 errors that don't raise NoSuchKey exception - return False - else: - # Object exists, delete it - # pyright: ignore[reportUnknownMemberType] - self._connected_client.delete_object(Bucket=self._bucket_name, Key=s3_key) - return True - - @override - def _close(self) -> None: - """Close the S3 client.""" - if self._client and self._raw_client: - self._client.__exit__(None, None, None) # pyright: ignore[reportUnknownMemberType] diff --git a/key-value/key-value-sync/src/key_value/sync/stores/s3/__init__.py b/key-value/key-value-sync/src/key_value/sync/stores/s3/__init__.py deleted file mode 100644 index 51f7d45f..00000000 --- a/key-value/key-value-sync/src/key_value/sync/stores/s3/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# WARNING: this file is auto-generated by 'build_sync_library.py' -# from the original file '__init__.py' -# DO NOT CHANGE! Change the original file instead. -"""AWS S3-based key-value store.""" - -from key_value.sync.code_gen.stores.s3.store import S3Store - -__all__ = ["S3Store"] diff --git a/key-value/key-value-sync/tests/code_gen/stores/s3/__init__.py b/key-value/key-value-sync/tests/code_gen/stores/s3/__init__.py deleted file mode 100644 index fddddb9b..00000000 --- a/key-value/key-value-sync/tests/code_gen/stores/s3/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# WARNING: this file is auto-generated by 'build_sync_library.py' -# from the original file '__init__.py' -# DO NOT CHANGE! Change the original file instead. -"""Tests for S3Store.""" diff --git a/key-value/key-value-sync/tests/code_gen/stores/s3/test_s3.py b/key-value/key-value-sync/tests/code_gen/stores/s3/test_s3.py deleted file mode 100644 index b9040b04..00000000 --- a/key-value/key-value-sync/tests/code_gen/stores/s3/test_s3.py +++ /dev/null @@ -1,102 +0,0 @@ -# WARNING: this file is auto-generated by 'build_sync_library.py' -# from the original file 'test_s3.py' -# DO NOT CHANGE! Change the original file instead. -import contextlib -from collections.abc import Generator - -import pytest -from key_value.shared.stores.wait import wait_for_true -from typing_extensions import override - -from key_value.sync.code_gen.stores.base import BaseStore -from key_value.sync.code_gen.stores.s3 import S3Store -from tests.code_gen.conftest import docker_container, should_skip_docker_tests -from tests.code_gen.stores.base import BaseStoreTests, ContextManagerStoreTestMixin - -# S3 test configuration (using LocalStack) -S3_HOST = "localhost" -S3_HOST_PORT = 4566 -S3_ENDPOINT = f"http://{S3_HOST}:{S3_HOST_PORT}" -S3_TEST_BUCKET = "kv-store-test" - -WAIT_FOR_S3_TIMEOUT = 30 - -# LocalStack versions to test - -# Latest stable version -LOCALSTACK_VERSIONS_TO_TEST = ["4.0.3"] - -LOCALSTACK_CONTAINER_PORT = 4566 - - -def ping_s3() -> bool: - """Check if LocalStack S3 is running.""" - try: - import aioboto3 - - session = aioboto3.Session(aws_access_key_id="test", aws_secret_access_key="test", region_name="us-east-1") - with session.client(service_name="s3", endpoint_url=S3_ENDPOINT) as client: # type: ignore - client.list_buckets() # type: ignore - except Exception: - return False - else: - return True - - -class S3FailedToStartError(Exception): - pass - - -@pytest.mark.skipif(should_skip_docker_tests(), reason="Docker is not available") -class TestS3Store(ContextManagerStoreTestMixin, BaseStoreTests): - @pytest.fixture(autouse=True, scope="session", params=LOCALSTACK_VERSIONS_TO_TEST) - def setup_s3(self, request: pytest.FixtureRequest) -> Generator[None, None, None]: - version = request.param - - # LocalStack container for S3 - with docker_container( - f"s3-test-{version}", - f"localstack/localstack:{version}", - {str(LOCALSTACK_CONTAINER_PORT): S3_HOST_PORT}, - environment={"SERVICES": "s3"}, - ): - if not wait_for_true(bool_fn=ping_s3, tries=WAIT_FOR_S3_TIMEOUT, wait_time=1): - msg = f"LocalStack S3 {version} failed to start" - raise S3FailedToStartError(msg) - - yield - - @override - @pytest.fixture - def store(self, setup_s3: None) -> S3Store: - store = S3Store( - bucket_name=S3_TEST_BUCKET, - endpoint_url=S3_ENDPOINT, - aws_access_key_id="test", - aws_secret_access_key="test", - region_name="us-east-1", - ) - - # Clean up test bucket if it exists - import aioboto3 - - session = aioboto3.Session(aws_access_key_id="test", aws_secret_access_key="test", region_name="us-east-1") - with session.client(service_name="s3", endpoint_url=S3_ENDPOINT) as client, contextlib.suppress(Exception): # type: ignore - # Delete all objects in the bucket - response = client.list_objects_v2(Bucket=S3_TEST_BUCKET) # type: ignore - if "Contents" in response: - for obj in response["Contents"]: - client.delete_object(Bucket=S3_TEST_BUCKET, Key=obj["Key"]) # type: ignore - - # Delete the bucket - client.delete_bucket(Bucket=S3_TEST_BUCKET) # type: ignore - - return store - - @pytest.fixture - def s3_store(self, store: S3Store) -> S3Store: - return store - - @pytest.mark.skip(reason="Distributed Caches are unbounded") - @override - def test_not_unbounded(self, store: BaseStore): ... diff --git a/key-value/key-value-sync/tests/code_gen/stores/vault/test_vault.py b/key-value/key-value-sync/tests/code_gen/stores/vault/test_vault.py index 6eddf1f1..d0cbc6d3 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/vault/test_vault.py +++ b/key-value/key-value-sync/tests/code_gen/stores/vault/test_vault.py @@ -14,7 +14,7 @@ # Vault test configuration VAULT_HOST = "localhost" VAULT_PORT = 8200 -VAULT_TOKEN = "dev-root-token" # noqa: S105 +VAULT_TOKEN = "dev-root-token" VAULT_MOUNT_POINT = "secret" VAULT_CONTAINER_PORT = 8200 diff --git a/pyproject.toml b/pyproject.toml index ed23a84a..a947d4e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,8 @@ line-length = 140 [tool.ruff.lint.extend-per-file-ignores] "**/tests/*.py" = [ "S101", # Ignore asserts + "S105", # Ignore hardcoded password string (test credentials) + "S106", # Ignore hardcoded password function argument (test credentials) "DTZ005", # Ignore datetime.UTC "PLR2004", # Ignore magic values "E501", # Ignore line length diff --git a/scripts/build_sync_library.py b/scripts/build_sync_library.py index c79a870b..1263fdc5 100644 --- a/scripts/build_sync_library.py +++ b/scripts/build_sync_library.py @@ -56,6 +56,8 @@ "key-value/key-value-aio/tests/stores/dynamodb", "key-value/key-value-aio/src/key_value/aio/stores/memcached", "key-value/key-value-aio/tests/stores/memcached", + "key-value/key-value-aio/src/key_value/aio/stores/s3", + "key-value/key-value-aio/tests/stores/s3", "key-value/key-value-aio/src/key_value/aio/wrappers/timeout", "key-value/key-value-aio/tests/wrappers/timeout", ] From a8a093fe41ac056d7cc3cc3f2c75cc2de4e7f0b4 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Wed, 29 Oct 2025 03:00:22 +0000 Subject: [PATCH 03/19] fix: address CodeRabbit review feedback for S3 store - Remove unused _session instance variable - Improve exception handling to use ClientError and check error codes - Use ManagedEntry.is_expired property for consistency - Remove redundant s3_store fixture - Add HTTP_NOT_FOUND constant to avoid magic numbers - Add type ignores for boto response types Co-authored-by: William Easton --- .../src/key_value/aio/stores/s3/store.py | 45 +++++++++++++------ .../key-value-aio/tests/stores/s3/test_s3.py | 4 -- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py index d6c03de2..b62b1ca3 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py @@ -1,4 +1,3 @@ -from datetime import datetime, timezone from types import TracebackType from typing import TYPE_CHECKING, Any, overload @@ -10,6 +9,9 @@ BaseStore, ) +# HTTP status code for not found errors +HTTP_NOT_FOUND = 404 + try: import aioboto3 from aioboto3.session import Session # noqa: TC002 @@ -59,7 +61,6 @@ class S3Store(BaseContextManagerStore, BaseStore): ... await store.put(key="test", value={"data": "test"}) """ - _session: aioboto3.Session # pyright: ignore[reportAny] _bucket_name: str _endpoint_url: str | None _raw_client: Any # S3 client from aioboto3 @@ -182,15 +183,25 @@ async def _setup(self) -> None: if not self._client and self._raw_client: self._client = await self._raw_client.__aenter__() + from botocore.exceptions import ClientError + try: # Check if bucket exists await self._connected_client.head_bucket(Bucket=self._bucket_name) # pyright: ignore[reportUnknownMemberType] - except Exception: - # Bucket doesn't exist, create it - import contextlib + except ClientError as e: + # Only proceed with bucket creation if it's a 404/NoSuchBucket error + error_code = e.response.get("Error", {}).get("Code", "") # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + http_status = e.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 0) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + + if error_code in ("404", "NoSuchBucket") or http_status == HTTP_NOT_FOUND: + # Bucket doesn't exist, create it + import contextlib - with contextlib.suppress(self._connected_client.exceptions.BucketAlreadyOwnedByYou): # pyright: ignore[reportUnknownMemberType] - await self._connected_client.create_bucket(Bucket=self._bucket_name) # pyright: ignore[reportUnknownMemberType] + with contextlib.suppress(self._connected_client.exceptions.BucketAlreadyOwnedByYou): # pyright: ignore[reportUnknownMemberType] + await self._connected_client.create_bucket(Bucket=self._bucket_name) # pyright: ignore[reportUnknownMemberType] + else: + # Re-raise authentication, permission, or other errors + raise def _get_s3_key(self, *, collection: str, key: str) -> str: """Generate the S3 object key for a given collection and key. @@ -235,7 +246,7 @@ async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry managed_entry = ManagedEntry.from_json(json_str=json_value) # Check for client-side expiration - if managed_entry.expires_at and managed_entry.expires_at <= datetime.now(tz=timezone.utc): + if managed_entry.is_expired: # Entry expired, delete it and return None await self._delete_managed_entry(key=key, collection=collection) return None @@ -296,6 +307,8 @@ async def _delete_managed_entry(self, *, key: str, collection: str) -> bool: """ s3_key = self._get_s3_key(collection=collection, key=key) + from botocore.exceptions import ClientError + try: # Check if object exists before deletion await self._connected_client.head_object( # pyright: ignore[reportUnknownMemberType] @@ -303,12 +316,16 @@ async def _delete_managed_entry(self, *, key: str, collection: str) -> bool: Key=s3_key, ) - except self._connected_client.exceptions.NoSuchKey: # pyright: ignore[reportUnknownMemberType, reportUnknownAttributeType] - # Object doesn't exist - return False - except Exception: - # For 404 errors that don't raise NoSuchKey exception - return False + except ClientError as e: + # Check if it's a 404/not found error + error_code = e.response.get("Error", {}).get("Code", "") # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + http_status = e.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 0) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + + if error_code in ("404", "NoSuchKey") or http_status == HTTP_NOT_FOUND: + # Object doesn't exist + return False + # Re-raise other errors (auth, network, etc.) + raise else: # Object exists, delete it await self._connected_client.delete_object( # pyright: ignore[reportUnknownMemberType] diff --git a/key-value/key-value-aio/tests/stores/s3/test_s3.py b/key-value/key-value-aio/tests/stores/s3/test_s3.py index f07092b1..66e6673d 100644 --- a/key-value/key-value-aio/tests/stores/s3/test_s3.py +++ b/key-value/key-value-aio/tests/stores/s3/test_s3.py @@ -99,10 +99,6 @@ async def store(self, setup_s3: None) -> S3Store: return store - @pytest.fixture - async def s3_store(self, store: S3Store) -> S3Store: - return store - @pytest.mark.skip(reason="Distributed Caches are unbounded") @override async def test_not_unbounded(self, store: BaseStore): ... From 401d947f0f2e22319394e7e8a8e8ade7f16b69a9 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 30 Oct 2025 20:59:07 +0000 Subject: [PATCH 04/19] fix: handle S3 key length limits by hashing long collection/key names S3 has a maximum key length of 1024 bytes. This fix ensures that long collection or key names are automatically hashed when they exceed safe limits (500 bytes each), keeping the total S3 key under 1024 bytes while maintaining uniqueness through hash suffixes. - Import hash_excess_length utility from key_value.shared.utils.sanitize - Add MAX_COLLECTION_LENGTH and MAX_KEY_LENGTH constants (500 bytes each) - Update _get_s3_key() to hash collection and key names if they exceed limits - Follows the same pattern used by MongoDB store for handling long names Fixes test_long_collection_name test which was failing with KeyTooLongError. Co-authored-by: William Easton --- .../src/key_value/aio/stores/s3/store.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py index b62b1ca3..0b257922 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Any, overload from key_value.shared.utils.managed_entry import ManagedEntry +from key_value.shared.utils.sanitize import hash_excess_length from typing_extensions import Self, override from key_value.aio.stores.base import ( @@ -12,6 +13,12 @@ # HTTP status code for not found errors HTTP_NOT_FOUND = 404 +# S3 key length limit is 1024 bytes +# We allocate space for collection, separator, and key +# Using 500 bytes for each allows for the separator and stays well under 1024 +MAX_COLLECTION_LENGTH = 500 +MAX_KEY_LENGTH = 500 + try: import aioboto3 from aioboto3.session import Session # noqa: TC002 @@ -206,6 +213,9 @@ async def _setup(self) -> None: def _get_s3_key(self, *, collection: str, key: str) -> str: """Generate the S3 object key for a given collection and key. + S3 has a maximum key length of 1024 bytes. To ensure compliance, we hash + long collection or key names to stay within limits while maintaining uniqueness. + Args: collection: The collection name. key: The key within the collection. @@ -213,7 +223,11 @@ def _get_s3_key(self, *, collection: str, key: str) -> str: Returns: The S3 object key in format: {collection}/{key} """ - return f"{collection}/{key}" + # Hash collection and key if they exceed their max lengths + # This ensures the combined S3 key stays under 1024 bytes + safe_collection = hash_excess_length(collection, MAX_COLLECTION_LENGTH) + safe_key = hash_excess_length(key, MAX_KEY_LENGTH) + return f"{safe_collection}/{safe_key}" @override async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None: From 82d3f6ac28a62e030e34b8208968c54b8654f9ee Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Fri, 31 Oct 2025 17:58:30 +0000 Subject: [PATCH 05/19] refactor: update S3Store to use new serialization adapter pattern Adapt S3Store to match the serialization refactoring from PR #184: - Replace ManagedEntry.from_json() with self._serialization_adapter.load_json() - Replace managed_entry.to_json() with self._serialization_adapter.dump_json() - Leverage BasicSerializationAdapter automatically initialized by BaseStore Co-authored-by: William Easton --- key-value/key-value-aio/src/key_value/aio/stores/s3/store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py index 0b257922..e31eb38e 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py @@ -257,7 +257,7 @@ async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry json_value = body_bytes.decode("utf-8") # pyright: ignore[reportUnknownMemberType] # Deserialize to ManagedEntry - managed_entry = ManagedEntry.from_json(json_str=json_value) + managed_entry = self._serialization_adapter.load_json(json_str=json_value) # Check for client-side expiration if managed_entry.is_expired: @@ -291,7 +291,7 @@ async def _put_managed_entry( managed_entry: The ManagedEntry to store. """ s3_key = self._get_s3_key(collection=collection, key=key) - json_value = managed_entry.to_json() + json_value = self._serialization_adapter.dump_json(entry=managed_entry) # Prepare metadata metadata: dict[str, str] = {} From d9c1d7b5de578fc6255f1e3731722c0b52a46376 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sun, 2 Nov 2025 00:23:16 +0000 Subject: [PATCH 06/19] feat: add length_is_bytes parameter to sanitize_string and hash_excess_length - Add _truncate_to_bytes() helper that uses binary search to safely truncate strings at byte boundaries without splitting multi-byte UTF-8 characters - Add length_is_bytes parameter to sanitize_string() and hash_excess_length() to support byte-based length limits - Update S3Store to use length_is_bytes=True for proper S3 key length handling - Fixes KeyTooLongError when using multi-byte characters (emoji, CJK, etc.) This ensures S3 keys stay within the 1024-byte limit even with multi-byte UTF-8 characters. Co-authored-by: William Easton --- .../src/key_value/aio/stores/s3/store.py | 6 +- .../src/key_value/shared/utils/sanitize.py | 57 +++++++++++++++---- 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py index e31eb38e..5c076cfc 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py @@ -223,10 +223,10 @@ def _get_s3_key(self, *, collection: str, key: str) -> str: Returns: The S3 object key in format: {collection}/{key} """ - # Hash collection and key if they exceed their max lengths + # Hash collection and key if they exceed their max byte lengths # This ensures the combined S3 key stays under 1024 bytes - safe_collection = hash_excess_length(collection, MAX_COLLECTION_LENGTH) - safe_key = hash_excess_length(key, MAX_KEY_LENGTH) + safe_collection = hash_excess_length(collection, MAX_COLLECTION_LENGTH, length_is_bytes=True) + safe_key = hash_excess_length(key, MAX_KEY_LENGTH, length_is_bytes=True) return f"{safe_collection}/{safe_key}" @override diff --git a/key-value/key-value-shared/src/key_value/shared/utils/sanitize.py b/key-value/key-value-shared/src/key_value/shared/utils/sanitize.py index ce1e5df3..91a684a4 100644 --- a/key-value/key-value-shared/src/key_value/shared/utils/sanitize.py +++ b/key-value/key-value-shared/src/key_value/shared/utils/sanitize.py @@ -61,6 +61,37 @@ def sanitize_characters_in_string(value: str, allowed_characters: str, replace_w return new_value +def _truncate_to_bytes(value: str, max_bytes: int, encoding: str = "utf-8") -> str: + """Truncate a string to fit within max_bytes when encoded, without splitting multi-byte characters. + + Args: + value: The string to truncate. + max_bytes: The maximum number of bytes. + encoding: The encoding to use (default: utf-8). + + Returns: + The truncated string that fits within max_bytes. + """ + encoded = value.encode(encoding) + if len(encoded) <= max_bytes: + return value + + # Binary search to find the longest substring that fits + left, right = 0, len(value) + result = "" + + while left <= right: + mid = (left + right) // 2 + candidate = value[:mid] + if len(candidate.encode(encoding)) <= max_bytes: + result = candidate + left = mid + 1 + else: + right = mid - 1 + + return result + + @bear_enforce def sanitize_string( value: str, @@ -70,6 +101,7 @@ def sanitize_string( hash_fragment_separator: str = DEFAULT_HASH_FRAGMENT_SEPARATOR, hash_fragment_mode: HashFragmentMode = HashFragmentMode.ONLY_IF_CHANGED, hash_fragment_length: int = DEFAULT_HASH_FRAGMENT_SIZE, + length_is_bytes: bool = False, ) -> str: """Sanitize the value, replacing characters and optionally adding a fragment a hash of the value if requested. @@ -81,9 +113,10 @@ def sanitize_string( Args: value: The value to sanitize. allowed_characters: The allowed characters in the value. - max_length: The maximum length of the value (with the hash fragment added). + max_length: The maximum length of the value (with hash fragment). Interpreted as bytes if length_is_bytes is True. hash_fragment_separator: The separator to add between the value and the hash fragment. hash_fragment_mode: The mode to add the hash fragment. + length_is_bytes: If True, max_length is interpreted as bytes instead of characters. """ if max_length < MINIMUM_MAX_LENGTH: msg = f"max_length must be greater than or equal to {MINIMUM_MAX_LENGTH}" @@ -106,8 +139,7 @@ def sanitize_string( if hash_fragment_mode == HashFragmentMode.ALWAYS: actual_max_length = max_length - hash_fragment_size_required - - sanitized_value = sanitized_value[:actual_max_length] + sanitized_value = _truncate_to_bytes(sanitized_value, actual_max_length) if length_is_bytes else sanitized_value[:actual_max_length] if not sanitized_value: return hash_fragment @@ -115,14 +147,13 @@ def sanitize_string( return sanitized_value + hash_fragment_separator + hash_fragment if hash_fragment_mode == HashFragmentMode.ONLY_IF_CHANGED: - sanitized_value = sanitized_value[:max_length] + sanitized_value = _truncate_to_bytes(sanitized_value, max_length) if length_is_bytes else sanitized_value[:max_length] if value == sanitized_value: return value actual_max_length = max_length - hash_fragment_size_required - - sanitized_value = sanitized_value[:actual_max_length] + sanitized_value = _truncate_to_bytes(sanitized_value, actual_max_length) if length_is_bytes else sanitized_value[:actual_max_length] if not sanitized_value: return hash_fragment @@ -133,18 +164,19 @@ def sanitize_string( msg = "Entire value was sanitized and hash_fragment_mode is HashFragmentMode.NEVER" raise ValueError(msg) - return sanitized_value + return _truncate_to_bytes(sanitized_value, max_length) if length_is_bytes else sanitized_value @bear_enforce -def hash_excess_length(value: str, max_length: int) -> str: +def hash_excess_length(value: str, max_length: int, length_is_bytes: bool = False) -> str: """Hash part of the value if it exceeds the maximum length. This operation will truncate the value to the maximum length minus 8 characters and will swap the last 8 characters with the first 8 characters of the generated hash. Args: value: The value to hash. - max_length: The maximum length of the value. Must be greater than 32. + max_length: The maximum length of the value. Must be greater than 16. If length_is_bytes is True, this is interpreted as bytes. + length_is_bytes: If True, max_length is interpreted as bytes instead of characters. Returns: The hashed value if the value exceeds the maximum length, otherwise the original value. @@ -153,10 +185,13 @@ def hash_excess_length(value: str, max_length: int) -> str: msg = f"max_length must be greater than {MINIMUM_MAX_LENGTH}" raise ValueError(msg) - if len(value) <= max_length: + # Check if truncation is needed + current_length = len(value.encode("utf-8")) if length_is_bytes else len(value) + if current_length <= max_length: return value - truncated_value = value[: max_length - 8] + # Truncate to max_length - 8 to make room for hash + truncated_value = _truncate_to_bytes(value, max_length - 8) if length_is_bytes else value[: max_length - 8] hash_of_value = hashlib.sha256(value.encode()).hexdigest() first_eight_of_hash = hash_of_value[:8] From 731afa1684db98aafcde90356b1575ba4622d820 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sun, 2 Nov 2025 02:27:57 +0000 Subject: [PATCH 07/19] fix: address review feedback for S3 store - Add explicit client cleanup (set _client = None after __aexit__) - Remove deletion of expired objects during retrieval - Update both __aexit__ and _close for consistency Co-authored-by: William Easton --- key-value/key-value-aio/src/key_value/aio/stores/s3/store.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py index 5c076cfc..23d2efc6 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py @@ -164,6 +164,7 @@ async def __aexit__( await super().__aexit__(exc_type, exc_value, traceback) if self._client and self._raw_client: await self._client.__aexit__(exc_type, exc_value, traceback) + self._client = None @property def _connected_client(self) -> S3Client: @@ -261,8 +262,7 @@ async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry # Check for client-side expiration if managed_entry.is_expired: - # Entry expired, delete it and return None - await self._delete_managed_entry(key=key, collection=collection) + # Entry expired, return None without deleting return None return managed_entry # noqa: TRY300 @@ -353,3 +353,4 @@ async def _close(self) -> None: """Close the S3 client.""" if self._client and self._raw_client: await self._client.__aexit__(None, None, None) # pyright: ignore[reportUnknownMemberType] + self._client = None From d5289332ec2abadf5ba959e6e0417d0791d80b55 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sun, 2 Nov 2025 03:40:22 +0000 Subject: [PATCH 08/19] fix: address CodeRabbit review feedback for S3 store - Close streaming body properly to prevent connection leaks - Add regional bucket creation support for non-us-east-1 regions Co-authored-by: William Easton --- .../src/key_value/aio/stores/s3/store.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py index 23d2efc6..4aa19dc1 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py @@ -206,7 +206,18 @@ async def _setup(self) -> None: import contextlib with contextlib.suppress(self._connected_client.exceptions.BucketAlreadyOwnedByYou): # pyright: ignore[reportUnknownMemberType] - await self._connected_client.create_bucket(Bucket=self._bucket_name) # pyright: ignore[reportUnknownMemberType] + # Build create_bucket parameters + create_params: dict[str, Any] = {"Bucket": self._bucket_name} + + # Get region from client metadata + region_name = getattr(self._connected_client.meta, "region_name", None) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + + # For regions other than us-east-1, we need to specify LocationConstraint + # Skip this for custom endpoints (LocalStack, MinIO) which may not support it + if region_name and region_name != "us-east-1" and not self._endpoint_url: + create_params["CreateBucketConfiguration"] = {"LocationConstraint": region_name} + + await self._connected_client.create_bucket(**create_params) # pyright: ignore[reportUnknownMemberType] else: # Re-raise authentication, permission, or other errors raise @@ -253,8 +264,9 @@ async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry Key=s3_key, ) - # Read the object body - body_bytes = await response["Body"].read() # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + # Read the object body and ensure the streaming body is closed + async with response["Body"] as stream: # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + body_bytes = await stream.read() # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] json_value = body_bytes.decode("utf-8") # pyright: ignore[reportUnknownMemberType] # Deserialize to ManagedEntry From e0f452900df128eed09a6d14f4942da6594ab03f Mon Sep 17 00:00:00 2001 From: William Easton Date: Sun, 2 Nov 2025 08:41:27 -0600 Subject: [PATCH 09/19] simplify setup/teardown in s3 --- .../src/key_value/aio/stores/s3/store.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py index 4aa19dc1..249c22ad 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py @@ -150,10 +150,18 @@ def __init__( super().__init__(default_collection=default_collection) + async def _connect(self) -> None: + if self._client is None and self._raw_client: + self._client = await self._raw_client.__aenter__() + + async def _disconnect(self) -> None: + if self._client is not None: + await self._client.__aexit__(None, None, None) + self._client = None + @override async def __aenter__(self) -> Self: - if self._raw_client: - self._client = await self._raw_client.__aenter__() + await self._connect() await super().__aenter__() return self @@ -162,9 +170,7 @@ async def __aexit__( self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: TracebackType | None ) -> None: await super().__aexit__(exc_type, exc_value, traceback) - if self._client and self._raw_client: - await self._client.__aexit__(exc_type, exc_value, traceback) - self._client = None + await self._disconnect() @property def _connected_client(self) -> S3Client: @@ -188,8 +194,7 @@ async def _setup(self) -> None: This method creates the S3 bucket if it doesn't already exist. It uses the HeadBucket operation to check for bucket existence and creates it if not found. """ - if not self._client and self._raw_client: - self._client = await self._raw_client.__aenter__() + await self._connect() from botocore.exceptions import ClientError @@ -363,6 +368,4 @@ async def _delete_managed_entry(self, *, key: str, collection: str) -> bool: @override async def _close(self) -> None: """Close the S3 client.""" - if self._client and self._raw_client: - await self._client.__aexit__(None, None, None) # pyright: ignore[reportUnknownMemberType] - self._client = None + await self._disconnect() From 065b551d5f18d683d633f2a6b7cee36803d9e9ca Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Fri, 7 Nov 2025 18:00:39 +0000 Subject: [PATCH 10/19] refactor: align S3 store with sanitization strategy pattern - Add S3KeySanitizationStrategy and S3CollectionSanitizationStrategy classes - Remove automatic sanitization from _get_s3_key() - Add collection_sanitization_strategy and key_sanitization_strategy parameters to S3Store - Update S3Store to use BaseStore's _sanitize_collection_and_key() method - By default, collections/keys are not sanitized (matches new main pattern) - Update tests to use sanitization strategies - Update documentation to explain when/why to use strategies This aligns with the main branch refactor where stores no longer sanitize collections and keys by default, making sanitization opt-in via strategy parameters. Co-authored-by: William Easton --- .../src/key_value/aio/stores/s3/__init__.py | 12 +- .../src/key_value/aio/stores/s3/store.py | 103 ++++++++++++++++-- .../key-value-aio/tests/stores/s3/test_s3.py | 5 + 3 files changed, 109 insertions(+), 11 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/__init__.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/__init__.py index 67a57ea1..95d93ceb 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/s3/__init__.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/__init__.py @@ -1,5 +1,13 @@ """AWS S3-based key-value store.""" -from key_value.aio.stores.s3.store import S3Store +from key_value.aio.stores.s3.store import ( + S3CollectionSanitizationStrategy, + S3KeySanitizationStrategy, + S3Store, +) -__all__ = ["S3Store"] +__all__ = [ + "S3CollectionSanitizationStrategy", + "S3KeySanitizationStrategy", + "S3Store", +] diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py index 249c22ad..3804fe52 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Any, overload from key_value.shared.utils.managed_entry import ManagedEntry +from key_value.shared.utils.sanitization import SanitizationStrategy from key_value.shared.utils.sanitize import hash_excess_length from typing_extensions import Self, override @@ -33,6 +34,55 @@ from aiobotocore.client import AioBaseClient as S3Client +class S3KeySanitizationStrategy(SanitizationStrategy): + """Sanitization strategy for S3 keys with byte-aware length limits. + + S3 has a maximum key length of 1024 bytes (UTF-8 encoded). This strategy + hashes keys that exceed the specified byte limit to ensure compliance. + + Args: + max_bytes: Maximum key length in bytes. Defaults to 500. + """ + + def __init__(self, max_bytes: int = MAX_KEY_LENGTH) -> None: + """Initialize the S3 key sanitization strategy. + + Args: + max_bytes: Maximum key length in bytes. + """ + self.max_bytes = max_bytes + + def sanitize(self, value: str) -> str: + """Hash the value if it exceeds max_bytes when UTF-8 encoded. + + Args: + value: The key to sanitize. + + Returns: + The original value if within limit, or truncated+hashed if too long. + """ + return hash_excess_length(value, self.max_bytes, length_is_bytes=True) + + def validate(self, value: str) -> None: + """No validation needed for S3 keys.""" + + +class S3CollectionSanitizationStrategy(S3KeySanitizationStrategy): + """Sanitization strategy for S3 collection names with byte-aware length limits. + + This is identical to S3KeySanitizationStrategy but uses a default of 500 bytes + for collection names to match the S3 key format {collection}/{key}. + """ + + def __init__(self, max_bytes: int = MAX_COLLECTION_LENGTH) -> None: + """Initialize the S3 collection sanitization strategy. + + Args: + max_bytes: Maximum collection name length in bytes. + """ + super().__init__(max_bytes=max_bytes) + + class S3Store(BaseContextManagerStore, BaseStore): """AWS S3-based key-value store. @@ -42,6 +92,12 @@ class S3Store(BaseContextManagerStore, BaseStore): S3 object metadata and checked client-side during retrieval (S3 lifecycle policies can be configured separately for background cleanup, but don't provide atomic TTL+retrieval). + By default, collections and keys are not sanitized. This means you must ensure that + the combined "{collection}/{key}" path does not exceed S3's 1024-byte limit when UTF-8 encoded. + + To handle long collection or key names, use the S3CollectionSanitizationStrategy and + S3KeySanitizationStrategy which will hash values exceeding the byte limit. + Example: Basic usage with automatic AWS credentials: @@ -49,6 +105,15 @@ class S3Store(BaseContextManagerStore, BaseStore): ... await store.put(key="user:123", value={"name": "Alice"}, ttl=3600) ... user = await store.get(key="user:123") + With sanitization for long keys/collections: + + >>> async with S3Store( + ... bucket_name="my-kv-store", + ... collection_sanitization_strategy=S3CollectionSanitizationStrategy(), + ... key_sanitization_strategy=S3KeySanitizationStrategy(), + ... ) as store: + ... await store.put(key="very_long_key" * 100, value={"data": "test"}) + With custom AWS credentials: >>> async with S3Store( @@ -74,13 +139,23 @@ class S3Store(BaseContextManagerStore, BaseStore): _client: S3Client | None @overload - def __init__(self, *, client: S3Client, bucket_name: str, default_collection: str | None = None) -> None: + def __init__( + self, + *, + client: S3Client, + bucket_name: str, + default_collection: str | None = None, + collection_sanitization_strategy: SanitizationStrategy | None = None, + key_sanitization_strategy: SanitizationStrategy | None = None, + ) -> None: """Initialize the S3 store with a pre-configured client. Args: client: The S3 client to use. You must have entered the context manager before passing this in. bucket_name: The name of the S3 bucket to use. default_collection: The default collection to use if no collection is provided. + collection_sanitization_strategy: Strategy for sanitizing collection names. Defaults to None (no sanitization). + key_sanitization_strategy: Strategy for sanitizing keys. Defaults to None (no sanitization). """ @overload @@ -94,6 +169,8 @@ def __init__( aws_secret_access_key: str | None = None, aws_session_token: str | None = None, default_collection: str | None = None, + collection_sanitization_strategy: SanitizationStrategy | None = None, + key_sanitization_strategy: SanitizationStrategy | None = None, ) -> None: """Initialize the S3 store with AWS credentials. @@ -105,6 +182,8 @@ def __init__( aws_secret_access_key: AWS secret access key. Defaults to None (uses AWS default credentials). aws_session_token: AWS session token. Defaults to None (uses AWS default credentials). default_collection: The default collection to use if no collection is provided. + collection_sanitization_strategy: Strategy for sanitizing collection names. Defaults to None (no sanitization). + key_sanitization_strategy: Strategy for sanitizing keys. Defaults to None (no sanitization). """ def __init__( @@ -118,6 +197,8 @@ def __init__( aws_secret_access_key: str | None = None, aws_session_token: str | None = None, default_collection: str | None = None, + collection_sanitization_strategy: SanitizationStrategy | None = None, + key_sanitization_strategy: SanitizationStrategy | None = None, ) -> None: """Initialize the S3 store. @@ -130,6 +211,8 @@ def __init__( aws_secret_access_key: AWS secret access key. Defaults to None (uses AWS default credentials). aws_session_token: AWS session token. Defaults to None (uses AWS default credentials). default_collection: The default collection to use if no collection is provided. + collection_sanitization_strategy: Strategy for sanitizing collection names. Defaults to None (no sanitization). + key_sanitization_strategy: Strategy for sanitizing keys. Defaults to None (no sanitization). """ self._bucket_name = bucket_name self._endpoint_url = endpoint_url @@ -148,7 +231,11 @@ def __init__( self._raw_client = session.client(service_name="s3", endpoint_url=endpoint_url) # pyright: ignore[reportUnknownMemberType] self._client = None - super().__init__(default_collection=default_collection) + super().__init__( + default_collection=default_collection, + collection_sanitization_strategy=collection_sanitization_strategy, + key_sanitization_strategy=key_sanitization_strategy, + ) async def _connect(self) -> None: if self._client is None and self._raw_client: @@ -230,8 +317,8 @@ async def _setup(self) -> None: def _get_s3_key(self, *, collection: str, key: str) -> str: """Generate the S3 object key for a given collection and key. - S3 has a maximum key length of 1024 bytes. To ensure compliance, we hash - long collection or key names to stay within limits while maintaining uniqueness. + The collection and key are sanitized using the configured sanitization strategies + before being combined into the S3 object key format: {collection}/{key}. Args: collection: The collection name. @@ -240,11 +327,9 @@ def _get_s3_key(self, *, collection: str, key: str) -> str: Returns: The S3 object key in format: {collection}/{key} """ - # Hash collection and key if they exceed their max byte lengths - # This ensures the combined S3 key stays under 1024 bytes - safe_collection = hash_excess_length(collection, MAX_COLLECTION_LENGTH, length_is_bytes=True) - safe_key = hash_excess_length(key, MAX_KEY_LENGTH, length_is_bytes=True) - return f"{safe_collection}/{safe_key}" + # Use the sanitization strategies from BaseStore + sanitized_collection, sanitized_key = self._sanitize_collection_and_key(collection=collection, key=key) + return f"{sanitized_collection}/{sanitized_key}" @override async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None: diff --git a/key-value/key-value-aio/tests/stores/s3/test_s3.py b/key-value/key-value-aio/tests/stores/s3/test_s3.py index 66e6673d..efec4965 100644 --- a/key-value/key-value-aio/tests/stores/s3/test_s3.py +++ b/key-value/key-value-aio/tests/stores/s3/test_s3.py @@ -70,12 +70,17 @@ async def setup_s3(self, request: pytest.FixtureRequest) -> AsyncGenerator[None, @override @pytest.fixture async def store(self, setup_s3: None) -> S3Store: + from key_value.aio.stores.s3 import S3CollectionSanitizationStrategy, S3KeySanitizationStrategy + store = S3Store( bucket_name=S3_TEST_BUCKET, endpoint_url=S3_ENDPOINT, aws_access_key_id="test", aws_secret_access_key="test", region_name="us-east-1", + # Use sanitization strategies for tests to handle long collection/key names + collection_sanitization_strategy=S3CollectionSanitizationStrategy(), + key_sanitization_strategy=S3KeySanitizationStrategy(), ) # Clean up test bucket if it exists From dbab902b6e273ffaa33388d689dea52049ca0c18 Mon Sep 17 00:00:00 2001 From: William Easton Date: Fri, 7 Nov 2025 17:21:31 -0600 Subject: [PATCH 11/19] Longer wait for s3 spinup --- key-value/key-value-aio/tests/stores/s3/test_s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/key-value/key-value-aio/tests/stores/s3/test_s3.py b/key-value/key-value-aio/tests/stores/s3/test_s3.py index efec4965..a280a33a 100644 --- a/key-value/key-value-aio/tests/stores/s3/test_s3.py +++ b/key-value/key-value-aio/tests/stores/s3/test_s3.py @@ -61,7 +61,7 @@ async def setup_s3(self, request: pytest.FixtureRequest) -> AsyncGenerator[None, {str(LOCALSTACK_CONTAINER_PORT): S3_HOST_PORT}, environment={"SERVICES": "s3"}, ): - if not await async_wait_for_true(bool_fn=ping_s3, tries=WAIT_FOR_S3_TIMEOUT, wait_time=1): + if not await async_wait_for_true(bool_fn=ping_s3, tries=WAIT_FOR_S3_TIMEOUT, wait_time=2): msg = f"LocalStack S3 {version} failed to start" raise S3FailedToStartError(msg) From e5125c2fe1f7653fb7ba4d05c868a151fa38a59b Mon Sep 17 00:00:00 2001 From: William Easton Date: Fri, 7 Nov 2025 17:45:47 -0600 Subject: [PATCH 12/19] Small test changes --- key-value/key-value-aio/tests/stores/base.py | 2 +- .../tests/stores/elasticsearch/test_elasticsearch.py | 9 ++++++++- key-value/key-value-aio/tests/stores/s3/test_s3.py | 2 +- key-value/key-value-sync/tests/code_gen/stores/base.py | 2 +- .../code_gen/stores/elasticsearch/test_elasticsearch.py | 9 ++++++++- 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/key-value/key-value-aio/tests/stores/base.py b/key-value/key-value-aio/tests/stores/base.py index 43177ee2..c2e57acf 100644 --- a/key-value/key-value-aio/tests/stores/base.py +++ b/key-value/key-value-aio/tests/stores/base.py @@ -30,7 +30,7 @@ async def eventually_consistent(self) -> None: # noqa: B027 @abstractmethod async def store(self) -> BaseStore | AsyncGenerator[BaseStore, None]: ... - @pytest.mark.timeout(60) + @pytest.mark.timeout(90) async def test_store(self, store: BaseStore): """Tests that the store is a valid AsyncKeyValueProtocol.""" assert isinstance(store, AsyncKeyValueProtocol) is True diff --git a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py index fbd00ddb..7b4dd39a 100644 --- a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py @@ -1,3 +1,4 @@ +import logging from collections.abc import AsyncGenerator from datetime import datetime, timedelta, timezone @@ -32,6 +33,8 @@ "9.2.0", # Released Oct 2025 ] +logger = logging.getLogger(__name__) + def get_elasticsearch_client() -> AsyncElasticsearch: return AsyncElasticsearch(hosts=[ES_URL]) @@ -41,7 +44,11 @@ async def ping_elasticsearch() -> bool: es_client: AsyncElasticsearch = get_elasticsearch_client() async with es_client: - return await es_client.ping() + if await es_client.ping(): + logger.info("Elasticsearch pinged, wait for yellow status") + await es_client.cluster.health(wait_for_status="yellow", timeout="10s") + logger.info("Elasticsearch is ready") + return False async def cleanup_elasticsearch_indices(elasticsearch_client: AsyncElasticsearch): diff --git a/key-value/key-value-aio/tests/stores/s3/test_s3.py b/key-value/key-value-aio/tests/stores/s3/test_s3.py index a280a33a..23035e75 100644 --- a/key-value/key-value-aio/tests/stores/s3/test_s3.py +++ b/key-value/key-value-aio/tests/stores/s3/test_s3.py @@ -50,7 +50,7 @@ class S3FailedToStartError(Exception): @pytest.mark.skipif(should_skip_docker_tests(), reason="Docker is not available") class TestS3Store(ContextManagerStoreTestMixin, BaseStoreTests): - @pytest.fixture(autouse=True, scope="session", params=LOCALSTACK_VERSIONS_TO_TEST) + @pytest.fixture(scope="session", params=LOCALSTACK_VERSIONS_TO_TEST) async def setup_s3(self, request: pytest.FixtureRequest) -> AsyncGenerator[None, None]: version = request.param diff --git a/key-value/key-value-sync/tests/code_gen/stores/base.py b/key-value/key-value-sync/tests/code_gen/stores/base.py index b78e279d..9c6303b1 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/base.py +++ b/key-value/key-value-sync/tests/code_gen/stores/base.py @@ -27,7 +27,7 @@ def eventually_consistent(self) -> None: # noqa: B027 @abstractmethod def store(self) -> BaseStore | Generator[BaseStore, None, None]: ... - @pytest.mark.timeout(60) + @pytest.mark.timeout(90) def test_store(self, store: BaseStore): """Tests that the store is a valid KeyValueProtocol.""" assert isinstance(store, KeyValueProtocol) is True diff --git a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py index 07d83d95..8aeb2348 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py @@ -1,6 +1,7 @@ # WARNING: this file is auto-generated by 'build_sync_library.py' # from the original file 'test_elasticsearch.py' # DO NOT CHANGE! Change the original file instead. +import logging from collections.abc import Generator from datetime import datetime, timedelta, timezone @@ -33,6 +34,8 @@ # Released Oct 2025 ELASTICSEARCH_VERSIONS_TO_TEST = ["9.0.0", "9.2.0"] +logger = logging.getLogger(__name__) + def get_elasticsearch_client() -> Elasticsearch: return Elasticsearch(hosts=[ES_URL]) @@ -42,7 +45,11 @@ def ping_elasticsearch() -> bool: es_client: Elasticsearch = get_elasticsearch_client() with es_client: - return es_client.ping() + if es_client.ping(): + logger.info("Elasticsearch pinged, wait for yellow status") + es_client.cluster.health(wait_for_status="yellow", timeout="10s") + logger.info("Elasticsearch is ready") + return False def cleanup_elasticsearch_indices(elasticsearch_client: Elasticsearch): From 89ccb199afc23132adb03ddbccec0da8e3ff2314 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sat, 8 Nov 2025 16:43:06 +0000 Subject: [PATCH 13/19] fix: resolve CodeRabbit feedback and failing tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix Elasticsearch ping functions to return True on success - Fix sanitize.py byte-length calculations for hash fragments - Add truncation in NEVER mode for consistency - Delete expired S3 entries on read to prevent stale data - Support delete-only IAM roles for S3 operations - Handle pagination in S3 test cleanup 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: William Easton --- .../src/key_value/aio/stores/s3/store.py | 38 +++++++++++++------ .../elasticsearch/test_elasticsearch.py | 1 + .../key-value-aio/tests/stores/s3/test_s3.py | 19 ++++++++-- .../src/key_value/shared/utils/sanitize.py | 8 +++- .../elasticsearch/test_elasticsearch.py | 1 + 5 files changed, 49 insertions(+), 18 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py index 3804fe52..f0851358 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py @@ -364,7 +364,11 @@ async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry # Check for client-side expiration if managed_entry.is_expired: - # Entry expired, return None without deleting + # Entry expired, delete it and return None + await self._connected_client.delete_object( # type: ignore[reportUnknownMemberType] + Bucket=self._bucket_name, + Key=s3_key, + ) return None return managed_entry # noqa: TRY300 @@ -433,22 +437,32 @@ async def _delete_managed_entry(self, *, key: str, collection: str) -> bool: ) except ClientError as e: - # Check if it's a 404/not found error - error_code = e.response.get("Error", {}).get("Code", "") # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] - http_status = e.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 0) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + error = e.response.get("Error", {}) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + metadata = e.response.get("ResponseMetadata", {}) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + error_code = error.get("Code", "") # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + http_status = metadata.get("HTTPStatusCode", 0) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] if error_code in ("404", "NoSuchKey") or http_status == HTTP_NOT_FOUND: # Object doesn't exist return False - # Re-raise other errors (auth, network, etc.) + + # If AccessDenied on head_object, try delete anyway (delete-only IAM role) + if error_code in ("403", "AccessDenied"): + await self._connected_client.delete_object( # pyright: ignore[reportUnknownMemberType] + Bucket=self._bucket_name, + Key=s3_key, + ) + return True + + # Re-raise other errors (network, etc.) raise - else: - # Object exists, delete it - await self._connected_client.delete_object( # pyright: ignore[reportUnknownMemberType] - Bucket=self._bucket_name, - Key=s3_key, - ) - return True + + # Object exists, delete it + await self._connected_client.delete_object( # pyright: ignore[reportUnknownMemberType] + Bucket=self._bucket_name, + Key=s3_key, + ) + return True @override async def _close(self) -> None: diff --git a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py index 02048f91..e228861d 100644 --- a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py @@ -48,6 +48,7 @@ async def ping_elasticsearch() -> bool: logger.info("Elasticsearch pinged, wait for yellow status") await es_client.cluster.health(wait_for_status="yellow", timeout="10s") logger.info("Elasticsearch is ready") + return True return False diff --git a/key-value/key-value-aio/tests/stores/s3/test_s3.py b/key-value/key-value-aio/tests/stores/s3/test_s3.py index 23035e75..3fd838a9 100644 --- a/key-value/key-value-aio/tests/stores/s3/test_s3.py +++ b/key-value/key-value-aio/tests/stores/s3/test_s3.py @@ -93,12 +93,23 @@ async def store(self, setup_s3: None) -> S3Store: ) async with session.client(service_name="s3", endpoint_url=S3_ENDPOINT) as client: # type: ignore with contextlib.suppress(Exception): - # Delete all objects in the bucket - response = await client.list_objects_v2(Bucket=S3_TEST_BUCKET) # type: ignore - if "Contents" in response: - for obj in response["Contents"]: # type: ignore + # Delete all objects in the bucket (handle pagination) + continuation_token: str | None = None + while True: + list_kwargs = {"Bucket": S3_TEST_BUCKET} + if continuation_token: + list_kwargs["ContinuationToken"] = continuation_token + response = await client.list_objects_v2(**list_kwargs) # type: ignore + + # Delete objects from this page + for obj in response.get("Contents", []): # type: ignore await client.delete_object(Bucket=S3_TEST_BUCKET, Key=obj["Key"]) # type: ignore + # Check if there are more pages + continuation_token = response.get("NextContinuationToken") # type: ignore + if not continuation_token: + break + # Delete the bucket await client.delete_bucket(Bucket=S3_TEST_BUCKET) # type: ignore diff --git a/key-value/key-value-shared/src/key_value/shared/utils/sanitize.py b/key-value/key-value-shared/src/key_value/shared/utils/sanitize.py index 91a684a4..d6c84ddf 100644 --- a/key-value/key-value-shared/src/key_value/shared/utils/sanitize.py +++ b/key-value/key-value-shared/src/key_value/shared/utils/sanitize.py @@ -127,7 +127,11 @@ def sanitize_string( raise ValueError(msg) hash_fragment: str = generate_hash_fragment(value=value, size=hash_fragment_length) - hash_fragment_size_required: int = len(hash_fragment_separator) + len(hash_fragment) + hash_fragment_size_required: int = ( + len((hash_fragment_separator + hash_fragment).encode("utf-8")) + if length_is_bytes + else len(hash_fragment_separator) + len(hash_fragment) + ) sanitized_value: str = ( sanitize_characters_in_string(value=value, allowed_characters=allowed_characters, replace_with=replacement_character) @@ -164,7 +168,7 @@ def sanitize_string( msg = "Entire value was sanitized and hash_fragment_mode is HashFragmentMode.NEVER" raise ValueError(msg) - return _truncate_to_bytes(sanitized_value, max_length) if length_is_bytes else sanitized_value + return _truncate_to_bytes(sanitized_value, max_length) if length_is_bytes else sanitized_value[:max_length] @bear_enforce diff --git a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py index b634263f..1f917cb2 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py @@ -49,6 +49,7 @@ def ping_elasticsearch() -> bool: logger.info("Elasticsearch pinged, wait for yellow status") es_client.cluster.health(wait_for_status="yellow", timeout="10s") logger.info("Elasticsearch is ready") + return True return False From 8618596bf44275777a10168cd78ac2bcdb2962eb Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sun, 9 Nov 2025 17:42:41 +0000 Subject: [PATCH 14/19] Update lockfile and generate sync code for filetree store - Updated uv.lock after merge - Generated sync versions of filetree store (from codegen) Co-authored-by: William Easton --- .../sync/code_gen/stores/filetree/__init__.py | 12 + .../sync/code_gen/stores/filetree/store.py | 405 ++++++++++++++++++ .../sync/stores/filetree/__init__.py | 12 + .../code_gen/stores/filetree/test_filetree.py | 43 ++ uv.lock | 24 +- 5 files changed, 493 insertions(+), 3 deletions(-) create mode 100644 key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/__init__.py create mode 100644 key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/store.py create mode 100644 key-value/key-value-sync/src/key_value/sync/stores/filetree/__init__.py create mode 100644 key-value/key-value-sync/tests/code_gen/stores/filetree/test_filetree.py diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/__init__.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/__init__.py new file mode 100644 index 00000000..ac91fc37 --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/__init__.py @@ -0,0 +1,12 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file '__init__.py' +# DO NOT CHANGE! Change the original file instead. +"""File-tree based store for visual inspection and testing.""" + +from key_value.sync.code_gen.stores.filetree.store import ( + FileTreeStore, + FileTreeV1CollectionSanitizationStrategy, + FileTreeV1KeySanitizationStrategy, +) + +__all__ = ["FileTreeStore", "FileTreeV1CollectionSanitizationStrategy", "FileTreeV1KeySanitizationStrategy"] diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/store.py new file mode 100644 index 00000000..f6054d1d --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/store.py @@ -0,0 +1,405 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file 'store.py' +# DO NOT CHANGE! Change the original file instead. +"""FileTreeStore implementation using async filesystem operations.""" + +import os +from collections.abc import Generator +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Any + +from aiofile import async_open as aopen +from anyio import Path as AsyncPath +from key_value.shared.utils.managed_entry import ManagedEntry, dump_to_json, load_from_json +from key_value.shared.utils.sanitization import HybridSanitizationStrategy, SanitizationStrategy +from key_value.shared.utils.sanitize import ALPHANUMERIC_CHARACTERS +from key_value.shared.utils.serialization import BasicSerializationAdapter, SerializationAdapter +from key_value.shared.utils.time_to_live import now +from typing_extensions import Self, override + +from key_value.sync.code_gen.stores.base import BaseStore + +DIRECTORY_ALLOWED_CHARACTERS = ALPHANUMERIC_CHARACTERS + "_" + +MAX_FILE_NAME_LENGTH = 255 +FILE_NAME_ALLOWED_CHARACTERS = ALPHANUMERIC_CHARACTERS + "_" + +MAX_PATH_LENGTH = 260 + + +def get_max_path_length(root: Path | AsyncPath) -> int: + """Get the maximum path length for the filesystem. + + Returns platform-specific limits: + - Windows: 260 characters (MAX_PATH) + - Unix/Linux: Uses pathconf to get PC_PATH_MAX + """ + if os.name == "nt": # Windows + return MAX_PATH_LENGTH # MAX_PATH on Windows + + reported_max_length = os.pathconf(path=Path(root), name="PC_PATH_MAX") + if reported_max_length > 0: + return reported_max_length + return MAX_PATH_LENGTH + + +def get_max_file_name_length(root: Path | AsyncPath) -> int: + """Get the maximum filename length for the filesystem. + + Returns platform-specific limits: + - Windows: 255 characters + - Unix/Linux: Uses pathconf to get PC_NAME_MAX + """ + if os.name == "nt": # Windows + return MAX_FILE_NAME_LENGTH # Maximum filename length on Windows (NTFS, FAT32, etc.) + + reported_max_length = os.pathconf(path=Path(root), name="PC_NAME_MAX") + + if reported_max_length > 0: + return reported_max_length + + return MAX_FILE_NAME_LENGTH + + +class FileTreeV1CollectionSanitizationStrategy(HybridSanitizationStrategy): + """V1 sanitization strategy for FileTreeStore collections. + + This strategy sanitizes collection names to comply with filesystem directory naming requirements. + It replaces invalid characters with underscores and truncates to fit within directory name length limits. + + Collection names (directories) are subject to the same length limit as file names (typically 255 bytes). + The sanitized name is also used for the collection info file (with `-info.json` suffix), so we need + to leave room for that suffix (10 characters). + """ + + def __init__(self, directory: Path | AsyncPath) -> None: + # Directory names are subject to the same NAME_MAX limit as file names + max_name_length: int = get_max_file_name_length(root=directory) + + # Leave room for `-info.json` suffix (10 chars) that's added to the metadata file name + suffix_length = 10 + + super().__init__( + replacement_character="_", max_length=max_name_length - suffix_length, allowed_characters=DIRECTORY_ALLOWED_CHARACTERS + ) + + +class FileTreeV1KeySanitizationStrategy(HybridSanitizationStrategy): + """V1 sanitization strategy for FileTreeStore keys. + + This strategy sanitizes key names to comply with filesystem file naming requirements. + It replaces invalid characters with underscores and truncates to fit within both path + length limits and filename length limits. + """ + + def __init__(self, directory: Path | AsyncPath) -> None: + # We need to account for our current location in the filesystem to stay under the max path length + max_path_length: int = get_max_path_length(root=directory) + current_path_length: int = len(Path(directory).as_posix()) + remaining_length: int = max_path_length - current_path_length + + # We need to account for limits on file names + max_file_name_length: int = get_max_file_name_length(root=directory) - 5 # 5 for .json extension + + # We need to stay under both limits + max_length = min(remaining_length, max_file_name_length) + + super().__init__(replacement_character="_", max_length=max_length, allowed_characters=FILE_NAME_ALLOWED_CHARACTERS) + + +@dataclass(kw_only=True) +class DiskCollectionInfo: + version: int = 1 + + collection: str + + directory: AsyncPath + + created_at: datetime + + serialization_adapter: SerializationAdapter + key_sanitization_strategy: SanitizationStrategy + + def _list_file_paths(self) -> Generator[AsyncPath]: + for item_path in AsyncPath(self.directory).iterdir(): + if not item_path.is_file() or item_path.suffix != ".json": + continue + if item_path.stem == "info": + continue + yield item_path + + def get_entry(self, *, key: str) -> ManagedEntry | None: + sanitized_key = self.key_sanitization_strategy.sanitize(value=key) + key_path: AsyncPath = AsyncPath(self.directory / f"{sanitized_key}.json") + + if not key_path.exists(): + return None + + data_dict: dict[str, Any] = read_file(file=key_path) + + return self.serialization_adapter.load_dict(data=data_dict) + + def put_entry(self, *, key: str, data: ManagedEntry) -> None: + sanitized_key = self.key_sanitization_strategy.sanitize(value=key) + key_path: AsyncPath = AsyncPath(self.directory / f"{sanitized_key}.json") + write_file(file=key_path, text=self.serialization_adapter.dump_json(entry=data)) + + def delete_entry(self, *, key: str) -> bool: + sanitized_key = self.key_sanitization_strategy.sanitize(value=key) + key_path: AsyncPath = AsyncPath(self.directory / f"{sanitized_key}.json") + + if not key_path.exists(): + return False + + key_path.unlink() + + return True + + def to_dict(self) -> dict[str, Any]: + return { + "version": self.version, + "collection": self.collection, + "directory": str(self.directory), + "created_at": self.created_at.isoformat(), + } + + def to_json(self) -> str: + return dump_to_json(obj=self.to_dict()) + + @classmethod + def from_dict( + cls, *, data: dict[str, Any], serialization_adapter: SerializationAdapter, key_sanitization_strategy: SanitizationStrategy + ) -> Self: + return cls( + version=data["version"], + collection=data["collection"], + directory=AsyncPath(data["directory"]), + created_at=datetime.fromisoformat(data["created_at"]), + serialization_adapter=serialization_adapter, + key_sanitization_strategy=key_sanitization_strategy, + ) + + @classmethod + def from_file( + cls, *, file: AsyncPath, serialization_adapter: SerializationAdapter, key_sanitization_strategy: SanitizationStrategy + ) -> Self: + if data := read_file(file=file): + resolved_directory = AsyncPath(data["directory"]).resolve() + data["directory"] = str(resolved_directory) + return cls.from_dict( + data=data, serialization_adapter=serialization_adapter, key_sanitization_strategy=key_sanitization_strategy + ) + + msg = f"File {file} not found" + + raise FileNotFoundError(msg) + + @classmethod + def create_or_get_info( + cls, + *, + data_directory: AsyncPath, + metadata_directory: AsyncPath, + collection: str, + sanitized_collection: str, + serialization_adapter: SerializationAdapter, + key_sanitization_strategy: SanitizationStrategy, + ) -> Self: + info_file: AsyncPath = AsyncPath(metadata_directory / f"{sanitized_collection}-info.json") + + if info_file.exists(): + return cls.from_file( + file=info_file, serialization_adapter=serialization_adapter, key_sanitization_strategy=key_sanitization_strategy + ) + + info = cls( + collection=collection, + directory=data_directory, + created_at=now(), + serialization_adapter=serialization_adapter, + key_sanitization_strategy=key_sanitization_strategy, + ) + + write_file(file=info_file, text=info.to_json()) + return info + + +def read_file(file: AsyncPath) -> dict[str, Any]: + with aopen(file_specifier=Path(file), mode="r", encoding="utf-8") as f: + body: str = f.read() + return load_from_json(json_str=body) + + +def write_file(file: AsyncPath, text: str) -> None: + with aopen(file_specifier=Path(file), mode="w", encoding="utf-8") as f: + f.write(data=text) + + +class FileTreeStore(BaseStore): + """A file-tree based store using directories for collections and files for keys. + + This store uses the native filesystem: + - Each collection is a subdirectory under the base directory + - Each key is stored as a JSON file named "{key}.json" + - File contents contain the ManagedEntry serialized to JSON + + Directory structure: + {base_directory}/ + {collection_1}/ + {key_1}.json + {key_2}.json + {collection_2}/ + {key_3}.json + + By default, collections and keys are not sanitized. This means that filesystem limitations + on path lengths and special characters may cause errors when trying to get and put entries. + + To avoid issues, you may want to consider leveraging the `FileTreeV1CollectionSanitizationStrategy` + and `FileTreeV1KeySanitizationStrategy` strategies. + + Warning: + This store is intended for development and testing purposes only. + It is not suitable for production use due to: + - Poor performance with many keys + - No atomic operations + - No built-in cleanup of expired entries + - Filesystem limitations on file names and directory sizes + + The store does NOT automatically clean up expired entries from disk. Expired entries + are only filtered out when read via get() or similar methods. + """ + + _data_directory: AsyncPath + _metadata_directory: AsyncPath + + _collection_infos: dict[str, DiskCollectionInfo] + + def __init__( + self, + *, + data_directory: Path | str, + metadata_directory: Path | str | None = None, + default_collection: str | None = None, + serialization_adapter: SerializationAdapter | None = None, + key_sanitization_strategy: SanitizationStrategy | None = None, + collection_sanitization_strategy: SanitizationStrategy | None = None, + ) -> None: + """Initialize the file-tree store. + + Args: + data_directory: The base directory to use for storing collections and keys. + metadata_directory: The directory to use for storing metadata. Defaults to data_directory. + default_collection: The default collection to use if no collection is provided. + serialization_adapter: The serialization adapter to use for the store. + key_sanitization_strategy: The sanitization strategy to use for keys. + collection_sanitization_strategy: The sanitization strategy to use for collections. + """ + data_directory = Path(data_directory).resolve() + data_directory.mkdir(parents=True, exist_ok=True) + + if metadata_directory is None: + metadata_directory = data_directory + + metadata_directory = Path(metadata_directory).resolve() + metadata_directory.mkdir(parents=True, exist_ok=True) + + self._data_directory = AsyncPath(data_directory) + self._metadata_directory = AsyncPath(metadata_directory) + + self._collection_infos = {} + + self._stable_api = False + + super().__init__( + serialization_adapter=serialization_adapter or BasicSerializationAdapter(), + key_sanitization_strategy=key_sanitization_strategy, + collection_sanitization_strategy=collection_sanitization_strategy, + default_collection=default_collection, + ) + + def _get_data_directories(self) -> Generator[AsyncPath]: + for directory in self._data_directory.iterdir(): + if directory.is_dir(): + yield directory + + def _get_metadata_entries(self) -> Generator[AsyncPath]: + for entry in self._metadata_directory.iterdir(): + if entry.is_file() and entry.suffix == ".json": + yield entry.resolve() + + def _load_collection_infos(self) -> None: + for entry in self._get_metadata_entries(): + collection_info: DiskCollectionInfo = DiskCollectionInfo.from_file( + file=entry, serialization_adapter=self._serialization_adapter, key_sanitization_strategy=self._key_sanitization_strategy + ) + self._collection_infos[collection_info.collection] = collection_info + + @override + def _setup_collection(self, *, collection: str) -> None: + """Set up a collection by creating its directory if it doesn't exist. + + Args: + collection: The collection name. + """ + if collection in self._collection_infos: + return + + # Sanitize the collection name using the strategy + sanitized_collection = self._sanitize_collection(collection=collection) + + # Create the collection directory under the data directory + data_directory: AsyncPath = AsyncPath(self._data_directory / sanitized_collection) + data_directory.mkdir(parents=True, exist_ok=True) + + self._collection_infos[collection] = DiskCollectionInfo.create_or_get_info( + data_directory=data_directory, + metadata_directory=self._metadata_directory, + collection=collection, + sanitized_collection=sanitized_collection, + serialization_adapter=self._serialization_adapter, + key_sanitization_strategy=self._key_sanitization_strategy, + ) + + @override + def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None: + """Retrieve a managed entry by key from the specified collection. + + Args: + collection: The collection name. + key: The key name. + + Returns: + The managed entry if found and not expired, None otherwise. + """ + collection_info: DiskCollectionInfo = self._collection_infos[collection] + + return collection_info.get_entry(key=key) + + @override + def _put_managed_entry(self, *, key: str, collection: str, managed_entry: ManagedEntry) -> None: + """Store a managed entry at the specified key in the collection. + + Args: + collection: The collection name. + key: The key name. + managed_entry: The managed entry to store. + """ + collection_info: DiskCollectionInfo = self._collection_infos[collection] + collection_info.put_entry(key=key, data=managed_entry) + + @override + def _delete_managed_entry(self, *, key: str, collection: str) -> bool: + """Delete a managed entry from the specified collection. + + Args: + collection: The collection name. + key: The key name. + + Returns: + True if the entry was deleted, False if it didn't exist. + """ + collection_info: DiskCollectionInfo = self._collection_infos[collection] + + return collection_info.delete_entry(key=key) diff --git a/key-value/key-value-sync/src/key_value/sync/stores/filetree/__init__.py b/key-value/key-value-sync/src/key_value/sync/stores/filetree/__init__.py new file mode 100644 index 00000000..ac91fc37 --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/stores/filetree/__init__.py @@ -0,0 +1,12 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file '__init__.py' +# DO NOT CHANGE! Change the original file instead. +"""File-tree based store for visual inspection and testing.""" + +from key_value.sync.code_gen.stores.filetree.store import ( + FileTreeStore, + FileTreeV1CollectionSanitizationStrategy, + FileTreeV1KeySanitizationStrategy, +) + +__all__ = ["FileTreeStore", "FileTreeV1CollectionSanitizationStrategy", "FileTreeV1KeySanitizationStrategy"] diff --git a/key-value/key-value-sync/tests/code_gen/stores/filetree/test_filetree.py b/key-value/key-value-sync/tests/code_gen/stores/filetree/test_filetree.py new file mode 100644 index 00000000..7c112ef4 --- /dev/null +++ b/key-value/key-value-sync/tests/code_gen/stores/filetree/test_filetree.py @@ -0,0 +1,43 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file 'test_filetree.py' +# DO NOT CHANGE! Change the original file instead. +"""Tests for FileTreeStore.""" + +import tempfile +from collections.abc import Generator +from pathlib import Path + +import pytest +from typing_extensions import override + +from key_value.sync.code_gen.stores.base import BaseStore +from key_value.sync.code_gen.stores.filetree import ( + FileTreeStore, + FileTreeV1CollectionSanitizationStrategy, + FileTreeV1KeySanitizationStrategy, +) +from tests.code_gen.stores.base import BaseStoreTests + + +class TestFileTreeStore(BaseStoreTests): + """Test suite for FileTreeStore.""" + + @pytest.fixture + def store(self) -> Generator[FileTreeStore, None, None]: + """Create a FileTreeStore instance with a temporary directory. + + Uses V1 sanitization strategies to maintain backwards compatibility + and pass tests that rely on sanitization for long/special names. + """ + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + yield FileTreeStore( + data_directory=temp_path, + key_sanitization_strategy=FileTreeV1KeySanitizationStrategy(directory=temp_path), + collection_sanitization_strategy=FileTreeV1CollectionSanitizationStrategy(directory=temp_path), + ) + + @override + def test_not_unbounded(self, store: BaseStore): + """FileTreeStore is unbounded, so skip this test.""" + pytest.skip("FileTreeStore is unbounded and does not evict old entries") diff --git a/uv.lock b/uv.lock index ecc91b5e..fc1c3895 100644 --- a/uv.lock +++ b/uv.lock @@ -1779,6 +1779,10 @@ redis = [ rocksdb = [ { name = "rocksdict" }, ] +s3 = [ + { name = "aioboto3" }, + { name = "types-aiobotocore-s3" }, +] valkey = [ { name = "valkey-glide" }, ] @@ -1793,13 +1797,14 @@ wrappers-encryption = [ [package.dev-dependencies] dev = [ { name = "py-key-value", extra = ["dev"] }, - { name = "py-key-value-aio", extra = ["disk", "dynamodb", "elasticsearch", "filetree", "keyring", "memcached", "memory", "mongodb", "pydantic", "redis", "rocksdb", "vault", "wrappers-encryption"] }, + { name = "py-key-value-aio", extra = ["disk", "dynamodb", "elasticsearch", "filetree", "keyring", "memcached", "memory", "mongodb", "pydantic", "redis", "rocksdb", "s3", "vault", "wrappers-encryption"] }, { name = "py-key-value-aio", extra = ["valkey"], marker = "sys_platform != 'win32'" }, ] [package.metadata] requires-dist = [ { name = "aioboto3", marker = "extra == 'dynamodb'", specifier = ">=13.3.0" }, + { name = "aioboto3", marker = "extra == 's3'", specifier = ">=13.3.0" }, { name = "aiofile", marker = "extra == 'filetree'", specifier = ">=3.5.0" }, { name = "aiohttp", marker = "extra == 'elasticsearch'", specifier = ">=3.12" }, { name = "aiomcache", marker = "extra == 'memcached'", specifier = ">=0.8.0" }, @@ -1821,16 +1826,17 @@ requires-dist = [ { name = "rocksdict", marker = "python_full_version >= '3.12' and extra == 'rocksdb'", specifier = ">=0.3.24" }, { name = "rocksdict", marker = "python_full_version < '3.12' and extra == 'rocksdb'", specifier = ">=0.3.2" }, { name = "types-aiobotocore-dynamodb", marker = "extra == 'dynamodb'", specifier = ">=2.16.0" }, + { name = "types-aiobotocore-s3", marker = "extra == 's3'", specifier = ">=2.16.0" }, { name = "types-hvac", marker = "extra == 'vault'", specifier = ">=2.3.0" }, { name = "valkey-glide", marker = "extra == 'valkey'", specifier = ">=2.1.0" }, ] -provides-extras = ["memory", "disk", "filetree", "redis", "mongodb", "valkey", "vault", "memcached", "elasticsearch", "dynamodb", "keyring", "keyring-linux", "pydantic", "rocksdb", "wrappers-encryption"] +provides-extras = ["memory", "disk", "filetree", "redis", "mongodb", "valkey", "vault", "memcached", "elasticsearch", "dynamodb", "s3", "keyring", "keyring-linux", "pydantic", "rocksdb", "wrappers-encryption"] [package.metadata.requires-dev] dev = [ { name = "py-key-value", extras = ["dev"], editable = "." }, { name = "py-key-value-aio", extras = ["keyring"] }, - { name = "py-key-value-aio", extras = ["memory", "disk", "filetree", "redis", "elasticsearch", "memcached", "mongodb", "vault", "dynamodb", "rocksdb"] }, + { name = "py-key-value-aio", extras = ["memory", "disk", "filetree", "redis", "elasticsearch", "memcached", "mongodb", "vault", "dynamodb", "s3", "rocksdb"] }, { name = "py-key-value-aio", extras = ["pydantic"] }, { name = "py-key-value-aio", extras = ["valkey"], marker = "sys_platform != 'win32'" }, { name = "py-key-value-aio", extras = ["wrappers-encryption"] }, @@ -2655,6 +2661,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/4f/05d80aa8b5a95b82ddb89547c3a037b9460702286c66ca6e0fbb8fa2ce86/types_aiobotocore_dynamodb-2.25.0-py3-none-any.whl", hash = "sha256:de791dfcef90eb3431c09b63419301f9ff824d82970623e149a427d5fd325430", size = 57971, upload-time = "2025-10-11T01:27:39.639Z" }, ] +[[package]] +name = "types-aiobotocore-s3" +version = "2.25.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8e/c7/5ceb5faf9475470cf01e764def05e96a11db734139acd0a114195ff7e353/types_aiobotocore_s3-2.25.1.tar.gz", hash = "sha256:d7729dea44d144e9a24fdd17f4dc66b98ad71864f9bf6eb6db05958e0e59d01e", size = 76444, upload-time = "2025-10-29T01:52:02.49Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/5b/fbef89694f035c69cfcfd998fefe2c82ff1975ab9cd963b5a9627298afae/types_aiobotocore_s3-2.25.1-py3-none-any.whl", hash = "sha256:c6f1f4f60c253248fbf463ed179b9302e5258e07073e3786eafefff2891332e3", size = 83906, upload-time = "2025-10-29T01:52:01.029Z" }, +] + [[package]] name = "types-hvac" version = "2.3.0.20250914" From f2721ae41945963d8cd0cf3f0ea9034b25673db2 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sun, 9 Nov 2025 18:38:00 +0000 Subject: [PATCH 15/19] Exclude FileTreeStore from sync code generation FileTreeStore is async-only and uses aiofile/anyio libraries that cannot be converted to sync. Added to EXCLUDE_DIRECTORIES and removed incorrectly generated sync files. Co-authored-by: William Easton --- .../sync/code_gen/stores/filetree/__init__.py | 12 - .../sync/code_gen/stores/filetree/store.py | 405 ------------------ .../sync/stores/filetree/__init__.py | 12 - .../code_gen/stores/filetree/test_filetree.py | 43 -- scripts/build_sync_library.py | 2 + 5 files changed, 2 insertions(+), 472 deletions(-) delete mode 100644 key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/__init__.py delete mode 100644 key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/store.py delete mode 100644 key-value/key-value-sync/src/key_value/sync/stores/filetree/__init__.py delete mode 100644 key-value/key-value-sync/tests/code_gen/stores/filetree/test_filetree.py diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/__init__.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/__init__.py deleted file mode 100644 index ac91fc37..00000000 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# WARNING: this file is auto-generated by 'build_sync_library.py' -# from the original file '__init__.py' -# DO NOT CHANGE! Change the original file instead. -"""File-tree based store for visual inspection and testing.""" - -from key_value.sync.code_gen.stores.filetree.store import ( - FileTreeStore, - FileTreeV1CollectionSanitizationStrategy, - FileTreeV1KeySanitizationStrategy, -) - -__all__ = ["FileTreeStore", "FileTreeV1CollectionSanitizationStrategy", "FileTreeV1KeySanitizationStrategy"] diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/store.py deleted file mode 100644 index f6054d1d..00000000 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/filetree/store.py +++ /dev/null @@ -1,405 +0,0 @@ -# WARNING: this file is auto-generated by 'build_sync_library.py' -# from the original file 'store.py' -# DO NOT CHANGE! Change the original file instead. -"""FileTreeStore implementation using async filesystem operations.""" - -import os -from collections.abc import Generator -from dataclasses import dataclass -from datetime import datetime -from pathlib import Path -from typing import Any - -from aiofile import async_open as aopen -from anyio import Path as AsyncPath -from key_value.shared.utils.managed_entry import ManagedEntry, dump_to_json, load_from_json -from key_value.shared.utils.sanitization import HybridSanitizationStrategy, SanitizationStrategy -from key_value.shared.utils.sanitize import ALPHANUMERIC_CHARACTERS -from key_value.shared.utils.serialization import BasicSerializationAdapter, SerializationAdapter -from key_value.shared.utils.time_to_live import now -from typing_extensions import Self, override - -from key_value.sync.code_gen.stores.base import BaseStore - -DIRECTORY_ALLOWED_CHARACTERS = ALPHANUMERIC_CHARACTERS + "_" - -MAX_FILE_NAME_LENGTH = 255 -FILE_NAME_ALLOWED_CHARACTERS = ALPHANUMERIC_CHARACTERS + "_" - -MAX_PATH_LENGTH = 260 - - -def get_max_path_length(root: Path | AsyncPath) -> int: - """Get the maximum path length for the filesystem. - - Returns platform-specific limits: - - Windows: 260 characters (MAX_PATH) - - Unix/Linux: Uses pathconf to get PC_PATH_MAX - """ - if os.name == "nt": # Windows - return MAX_PATH_LENGTH # MAX_PATH on Windows - - reported_max_length = os.pathconf(path=Path(root), name="PC_PATH_MAX") - if reported_max_length > 0: - return reported_max_length - return MAX_PATH_LENGTH - - -def get_max_file_name_length(root: Path | AsyncPath) -> int: - """Get the maximum filename length for the filesystem. - - Returns platform-specific limits: - - Windows: 255 characters - - Unix/Linux: Uses pathconf to get PC_NAME_MAX - """ - if os.name == "nt": # Windows - return MAX_FILE_NAME_LENGTH # Maximum filename length on Windows (NTFS, FAT32, etc.) - - reported_max_length = os.pathconf(path=Path(root), name="PC_NAME_MAX") - - if reported_max_length > 0: - return reported_max_length - - return MAX_FILE_NAME_LENGTH - - -class FileTreeV1CollectionSanitizationStrategy(HybridSanitizationStrategy): - """V1 sanitization strategy for FileTreeStore collections. - - This strategy sanitizes collection names to comply with filesystem directory naming requirements. - It replaces invalid characters with underscores and truncates to fit within directory name length limits. - - Collection names (directories) are subject to the same length limit as file names (typically 255 bytes). - The sanitized name is also used for the collection info file (with `-info.json` suffix), so we need - to leave room for that suffix (10 characters). - """ - - def __init__(self, directory: Path | AsyncPath) -> None: - # Directory names are subject to the same NAME_MAX limit as file names - max_name_length: int = get_max_file_name_length(root=directory) - - # Leave room for `-info.json` suffix (10 chars) that's added to the metadata file name - suffix_length = 10 - - super().__init__( - replacement_character="_", max_length=max_name_length - suffix_length, allowed_characters=DIRECTORY_ALLOWED_CHARACTERS - ) - - -class FileTreeV1KeySanitizationStrategy(HybridSanitizationStrategy): - """V1 sanitization strategy for FileTreeStore keys. - - This strategy sanitizes key names to comply with filesystem file naming requirements. - It replaces invalid characters with underscores and truncates to fit within both path - length limits and filename length limits. - """ - - def __init__(self, directory: Path | AsyncPath) -> None: - # We need to account for our current location in the filesystem to stay under the max path length - max_path_length: int = get_max_path_length(root=directory) - current_path_length: int = len(Path(directory).as_posix()) - remaining_length: int = max_path_length - current_path_length - - # We need to account for limits on file names - max_file_name_length: int = get_max_file_name_length(root=directory) - 5 # 5 for .json extension - - # We need to stay under both limits - max_length = min(remaining_length, max_file_name_length) - - super().__init__(replacement_character="_", max_length=max_length, allowed_characters=FILE_NAME_ALLOWED_CHARACTERS) - - -@dataclass(kw_only=True) -class DiskCollectionInfo: - version: int = 1 - - collection: str - - directory: AsyncPath - - created_at: datetime - - serialization_adapter: SerializationAdapter - key_sanitization_strategy: SanitizationStrategy - - def _list_file_paths(self) -> Generator[AsyncPath]: - for item_path in AsyncPath(self.directory).iterdir(): - if not item_path.is_file() or item_path.suffix != ".json": - continue - if item_path.stem == "info": - continue - yield item_path - - def get_entry(self, *, key: str) -> ManagedEntry | None: - sanitized_key = self.key_sanitization_strategy.sanitize(value=key) - key_path: AsyncPath = AsyncPath(self.directory / f"{sanitized_key}.json") - - if not key_path.exists(): - return None - - data_dict: dict[str, Any] = read_file(file=key_path) - - return self.serialization_adapter.load_dict(data=data_dict) - - def put_entry(self, *, key: str, data: ManagedEntry) -> None: - sanitized_key = self.key_sanitization_strategy.sanitize(value=key) - key_path: AsyncPath = AsyncPath(self.directory / f"{sanitized_key}.json") - write_file(file=key_path, text=self.serialization_adapter.dump_json(entry=data)) - - def delete_entry(self, *, key: str) -> bool: - sanitized_key = self.key_sanitization_strategy.sanitize(value=key) - key_path: AsyncPath = AsyncPath(self.directory / f"{sanitized_key}.json") - - if not key_path.exists(): - return False - - key_path.unlink() - - return True - - def to_dict(self) -> dict[str, Any]: - return { - "version": self.version, - "collection": self.collection, - "directory": str(self.directory), - "created_at": self.created_at.isoformat(), - } - - def to_json(self) -> str: - return dump_to_json(obj=self.to_dict()) - - @classmethod - def from_dict( - cls, *, data: dict[str, Any], serialization_adapter: SerializationAdapter, key_sanitization_strategy: SanitizationStrategy - ) -> Self: - return cls( - version=data["version"], - collection=data["collection"], - directory=AsyncPath(data["directory"]), - created_at=datetime.fromisoformat(data["created_at"]), - serialization_adapter=serialization_adapter, - key_sanitization_strategy=key_sanitization_strategy, - ) - - @classmethod - def from_file( - cls, *, file: AsyncPath, serialization_adapter: SerializationAdapter, key_sanitization_strategy: SanitizationStrategy - ) -> Self: - if data := read_file(file=file): - resolved_directory = AsyncPath(data["directory"]).resolve() - data["directory"] = str(resolved_directory) - return cls.from_dict( - data=data, serialization_adapter=serialization_adapter, key_sanitization_strategy=key_sanitization_strategy - ) - - msg = f"File {file} not found" - - raise FileNotFoundError(msg) - - @classmethod - def create_or_get_info( - cls, - *, - data_directory: AsyncPath, - metadata_directory: AsyncPath, - collection: str, - sanitized_collection: str, - serialization_adapter: SerializationAdapter, - key_sanitization_strategy: SanitizationStrategy, - ) -> Self: - info_file: AsyncPath = AsyncPath(metadata_directory / f"{sanitized_collection}-info.json") - - if info_file.exists(): - return cls.from_file( - file=info_file, serialization_adapter=serialization_adapter, key_sanitization_strategy=key_sanitization_strategy - ) - - info = cls( - collection=collection, - directory=data_directory, - created_at=now(), - serialization_adapter=serialization_adapter, - key_sanitization_strategy=key_sanitization_strategy, - ) - - write_file(file=info_file, text=info.to_json()) - return info - - -def read_file(file: AsyncPath) -> dict[str, Any]: - with aopen(file_specifier=Path(file), mode="r", encoding="utf-8") as f: - body: str = f.read() - return load_from_json(json_str=body) - - -def write_file(file: AsyncPath, text: str) -> None: - with aopen(file_specifier=Path(file), mode="w", encoding="utf-8") as f: - f.write(data=text) - - -class FileTreeStore(BaseStore): - """A file-tree based store using directories for collections and files for keys. - - This store uses the native filesystem: - - Each collection is a subdirectory under the base directory - - Each key is stored as a JSON file named "{key}.json" - - File contents contain the ManagedEntry serialized to JSON - - Directory structure: - {base_directory}/ - {collection_1}/ - {key_1}.json - {key_2}.json - {collection_2}/ - {key_3}.json - - By default, collections and keys are not sanitized. This means that filesystem limitations - on path lengths and special characters may cause errors when trying to get and put entries. - - To avoid issues, you may want to consider leveraging the `FileTreeV1CollectionSanitizationStrategy` - and `FileTreeV1KeySanitizationStrategy` strategies. - - Warning: - This store is intended for development and testing purposes only. - It is not suitable for production use due to: - - Poor performance with many keys - - No atomic operations - - No built-in cleanup of expired entries - - Filesystem limitations on file names and directory sizes - - The store does NOT automatically clean up expired entries from disk. Expired entries - are only filtered out when read via get() or similar methods. - """ - - _data_directory: AsyncPath - _metadata_directory: AsyncPath - - _collection_infos: dict[str, DiskCollectionInfo] - - def __init__( - self, - *, - data_directory: Path | str, - metadata_directory: Path | str | None = None, - default_collection: str | None = None, - serialization_adapter: SerializationAdapter | None = None, - key_sanitization_strategy: SanitizationStrategy | None = None, - collection_sanitization_strategy: SanitizationStrategy | None = None, - ) -> None: - """Initialize the file-tree store. - - Args: - data_directory: The base directory to use for storing collections and keys. - metadata_directory: The directory to use for storing metadata. Defaults to data_directory. - default_collection: The default collection to use if no collection is provided. - serialization_adapter: The serialization adapter to use for the store. - key_sanitization_strategy: The sanitization strategy to use for keys. - collection_sanitization_strategy: The sanitization strategy to use for collections. - """ - data_directory = Path(data_directory).resolve() - data_directory.mkdir(parents=True, exist_ok=True) - - if metadata_directory is None: - metadata_directory = data_directory - - metadata_directory = Path(metadata_directory).resolve() - metadata_directory.mkdir(parents=True, exist_ok=True) - - self._data_directory = AsyncPath(data_directory) - self._metadata_directory = AsyncPath(metadata_directory) - - self._collection_infos = {} - - self._stable_api = False - - super().__init__( - serialization_adapter=serialization_adapter or BasicSerializationAdapter(), - key_sanitization_strategy=key_sanitization_strategy, - collection_sanitization_strategy=collection_sanitization_strategy, - default_collection=default_collection, - ) - - def _get_data_directories(self) -> Generator[AsyncPath]: - for directory in self._data_directory.iterdir(): - if directory.is_dir(): - yield directory - - def _get_metadata_entries(self) -> Generator[AsyncPath]: - for entry in self._metadata_directory.iterdir(): - if entry.is_file() and entry.suffix == ".json": - yield entry.resolve() - - def _load_collection_infos(self) -> None: - for entry in self._get_metadata_entries(): - collection_info: DiskCollectionInfo = DiskCollectionInfo.from_file( - file=entry, serialization_adapter=self._serialization_adapter, key_sanitization_strategy=self._key_sanitization_strategy - ) - self._collection_infos[collection_info.collection] = collection_info - - @override - def _setup_collection(self, *, collection: str) -> None: - """Set up a collection by creating its directory if it doesn't exist. - - Args: - collection: The collection name. - """ - if collection in self._collection_infos: - return - - # Sanitize the collection name using the strategy - sanitized_collection = self._sanitize_collection(collection=collection) - - # Create the collection directory under the data directory - data_directory: AsyncPath = AsyncPath(self._data_directory / sanitized_collection) - data_directory.mkdir(parents=True, exist_ok=True) - - self._collection_infos[collection] = DiskCollectionInfo.create_or_get_info( - data_directory=data_directory, - metadata_directory=self._metadata_directory, - collection=collection, - sanitized_collection=sanitized_collection, - serialization_adapter=self._serialization_adapter, - key_sanitization_strategy=self._key_sanitization_strategy, - ) - - @override - def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None: - """Retrieve a managed entry by key from the specified collection. - - Args: - collection: The collection name. - key: The key name. - - Returns: - The managed entry if found and not expired, None otherwise. - """ - collection_info: DiskCollectionInfo = self._collection_infos[collection] - - return collection_info.get_entry(key=key) - - @override - def _put_managed_entry(self, *, key: str, collection: str, managed_entry: ManagedEntry) -> None: - """Store a managed entry at the specified key in the collection. - - Args: - collection: The collection name. - key: The key name. - managed_entry: The managed entry to store. - """ - collection_info: DiskCollectionInfo = self._collection_infos[collection] - collection_info.put_entry(key=key, data=managed_entry) - - @override - def _delete_managed_entry(self, *, key: str, collection: str) -> bool: - """Delete a managed entry from the specified collection. - - Args: - collection: The collection name. - key: The key name. - - Returns: - True if the entry was deleted, False if it didn't exist. - """ - collection_info: DiskCollectionInfo = self._collection_infos[collection] - - return collection_info.delete_entry(key=key) diff --git a/key-value/key-value-sync/src/key_value/sync/stores/filetree/__init__.py b/key-value/key-value-sync/src/key_value/sync/stores/filetree/__init__.py deleted file mode 100644 index ac91fc37..00000000 --- a/key-value/key-value-sync/src/key_value/sync/stores/filetree/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# WARNING: this file is auto-generated by 'build_sync_library.py' -# from the original file '__init__.py' -# DO NOT CHANGE! Change the original file instead. -"""File-tree based store for visual inspection and testing.""" - -from key_value.sync.code_gen.stores.filetree.store import ( - FileTreeStore, - FileTreeV1CollectionSanitizationStrategy, - FileTreeV1KeySanitizationStrategy, -) - -__all__ = ["FileTreeStore", "FileTreeV1CollectionSanitizationStrategy", "FileTreeV1KeySanitizationStrategy"] diff --git a/key-value/key-value-sync/tests/code_gen/stores/filetree/test_filetree.py b/key-value/key-value-sync/tests/code_gen/stores/filetree/test_filetree.py deleted file mode 100644 index 7c112ef4..00000000 --- a/key-value/key-value-sync/tests/code_gen/stores/filetree/test_filetree.py +++ /dev/null @@ -1,43 +0,0 @@ -# WARNING: this file is auto-generated by 'build_sync_library.py' -# from the original file 'test_filetree.py' -# DO NOT CHANGE! Change the original file instead. -"""Tests for FileTreeStore.""" - -import tempfile -from collections.abc import Generator -from pathlib import Path - -import pytest -from typing_extensions import override - -from key_value.sync.code_gen.stores.base import BaseStore -from key_value.sync.code_gen.stores.filetree import ( - FileTreeStore, - FileTreeV1CollectionSanitizationStrategy, - FileTreeV1KeySanitizationStrategy, -) -from tests.code_gen.stores.base import BaseStoreTests - - -class TestFileTreeStore(BaseStoreTests): - """Test suite for FileTreeStore.""" - - @pytest.fixture - def store(self) -> Generator[FileTreeStore, None, None]: - """Create a FileTreeStore instance with a temporary directory. - - Uses V1 sanitization strategies to maintain backwards compatibility - and pass tests that rely on sanitization for long/special names. - """ - with tempfile.TemporaryDirectory() as temp_dir: - temp_path = Path(temp_dir) - yield FileTreeStore( - data_directory=temp_path, - key_sanitization_strategy=FileTreeV1KeySanitizationStrategy(directory=temp_path), - collection_sanitization_strategy=FileTreeV1CollectionSanitizationStrategy(directory=temp_path), - ) - - @override - def test_not_unbounded(self, store: BaseStore): - """FileTreeStore is unbounded, so skip this test.""" - pytest.skip("FileTreeStore is unbounded and does not evict old entries") diff --git a/scripts/build_sync_library.py b/scripts/build_sync_library.py index 1263fdc5..05bc58bb 100644 --- a/scripts/build_sync_library.py +++ b/scripts/build_sync_library.py @@ -54,6 +54,8 @@ EXCLUDE_DIRECTORIES = [ "key-value/key-value-aio/src/key_value/aio/stores/dynamodb", "key-value/key-value-aio/tests/stores/dynamodb", + "key-value/key-value-aio/src/key_value/aio/stores/filetree", + "key-value/key-value-aio/tests/stores/filetree", "key-value/key-value-aio/src/key_value/aio/stores/memcached", "key-value/key-value-aio/tests/stores/memcached", "key-value/key-value-aio/src/key_value/aio/stores/s3", From f32a8ce430f858da01aff2be2da9eb67a77e4d00 Mon Sep 17 00:00:00 2001 From: William Easton Date: Sun, 9 Nov 2025 12:47:50 -0600 Subject: [PATCH 16/19] Apply suggestion from @coderabbitai[bot] Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .../tests/stores/elasticsearch/test_elasticsearch.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py index e228861d..741ad7ab 100644 --- a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py @@ -46,9 +46,13 @@ async def ping_elasticsearch() -> bool: async with es_client: if await es_client.ping(): logger.info("Elasticsearch pinged, wait for yellow status") - await es_client.cluster.health(wait_for_status="yellow", timeout="10s") - logger.info("Elasticsearch is ready") - return True + try: + await es_client.cluster.health(wait_for_status="yellow", timeout="10s") + logger.info("Elasticsearch is ready") + return True + except Exception as e: + logger.warning(f"Cluster health check failed: {e}") + return False return False From b230647558493640c3fd4cc53a05dfc481671940 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sun, 9 Nov 2025 19:06:34 +0000 Subject: [PATCH 17/19] Fix Elasticsearch index already exists exception check - Changed exception name check from 'index_already_exists_exception' to 'already_exists_exception' - This handles both old (index_already_exists_exception) and new (resource_already_exists_exception) Elasticsearch versions - Fixed linting issue in ping_elasticsearch by moving return True to else block Co-authored-by: William Easton --- .../src/key_value/aio/stores/elasticsearch/store.py | 2 +- .../tests/stores/elasticsearch/test_elasticsearch.py | 3 ++- .../sync/code_gen/stores/elasticsearch/store.py | 2 +- .../stores/elasticsearch/test_elasticsearch.py | 11 ++++++++--- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/elasticsearch/store.py b/key-value/key-value-aio/src/key_value/aio/stores/elasticsearch/store.py index 9b120157..2e888296 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/elasticsearch/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/elasticsearch/store.py @@ -268,7 +268,7 @@ async def _setup_collection(self, *, collection: str) -> None: try: _ = await self._client.options(ignore_status=404).indices.create(index=index_name, mappings=DEFAULT_MAPPING, settings={}) except BadRequestError as e: - if "index_already_exists_exception" in str(e).lower(): + if "already_exists_exception" in str(e).lower(): return raise diff --git a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py index 741ad7ab..b28e95c3 100644 --- a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py @@ -49,10 +49,11 @@ async def ping_elasticsearch() -> bool: try: await es_client.cluster.health(wait_for_status="yellow", timeout="10s") logger.info("Elasticsearch is ready") - return True except Exception as e: logger.warning(f"Cluster health check failed: {e}") return False + else: + return True return False diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py index cbd016cb..3694e831 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py @@ -240,7 +240,7 @@ def _setup_collection(self, *, collection: str) -> None: try: _ = self._client.options(ignore_status=404).indices.create(index=index_name, mappings=DEFAULT_MAPPING, settings={}) except BadRequestError as e: - if "index_already_exists_exception" in str(e).lower(): + if "already_exists_exception" in str(e).lower(): return raise diff --git a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py index 1f917cb2..b8d08a44 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py @@ -47,9 +47,14 @@ def ping_elasticsearch() -> bool: with es_client: if es_client.ping(): logger.info("Elasticsearch pinged, wait for yellow status") - es_client.cluster.health(wait_for_status="yellow", timeout="10s") - logger.info("Elasticsearch is ready") - return True + try: + es_client.cluster.health(wait_for_status="yellow", timeout="10s") + logger.info("Elasticsearch is ready") + except Exception as e: + logger.warning(f"Cluster health check failed: {e}") + return False + else: + return True return False From 5df62e33b104dd7b8e76e7d48dd5433358f0c0c5 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sun, 9 Nov 2025 19:21:53 +0000 Subject: [PATCH 18/19] Extend Elasticsearch cluster health check timeout from 10s to 15s Co-authored-by: William Easton --- .../tests/stores/elasticsearch/test_elasticsearch.py | 2 +- .../tests/code_gen/stores/elasticsearch/test_elasticsearch.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py index b28e95c3..4e2fe1b9 100644 --- a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py @@ -47,7 +47,7 @@ async def ping_elasticsearch() -> bool: if await es_client.ping(): logger.info("Elasticsearch pinged, wait for yellow status") try: - await es_client.cluster.health(wait_for_status="yellow", timeout="10s") + await es_client.cluster.health(wait_for_status="yellow", timeout="15s") logger.info("Elasticsearch is ready") except Exception as e: logger.warning(f"Cluster health check failed: {e}") diff --git a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py index b8d08a44..91c987b4 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py @@ -48,7 +48,7 @@ def ping_elasticsearch() -> bool: if es_client.ping(): logger.info("Elasticsearch pinged, wait for yellow status") try: - es_client.cluster.health(wait_for_status="yellow", timeout="10s") + es_client.cluster.health(wait_for_status="yellow", timeout="15s") logger.info("Elasticsearch is ready") except Exception as e: logger.warning(f"Cluster health check failed: {e}") From f14500aa846ea7439aa89ecef81b17c05f13349d Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 16:09:52 +0000 Subject: [PATCH 19/19] Remove excessive code comments from S3 store - Remove obvious comments that state what the code already shows - Keep only non-obvious comments (e.g., region-specific AWS logic) - Simplify constant comments to be more concise - Remove type-hinting comments where type is already declared Co-authored-by: William Easton --- .../src/key_value/aio/stores/s3/store.py | 29 +++---------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py index f0851358..2e66d205 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/s3/store.py @@ -11,12 +11,10 @@ BaseStore, ) -# HTTP status code for not found errors HTTP_NOT_FOUND = 404 # S3 key length limit is 1024 bytes -# We allocate space for collection, separator, and key -# Using 500 bytes for each allows for the separator and stays well under 1024 +# Allocating 500 bytes each for collection and key stays well under the limit MAX_COLLECTION_LENGTH = 500 MAX_KEY_LENGTH = 500 @@ -135,7 +133,7 @@ class S3Store(BaseContextManagerStore, BaseStore): _bucket_name: str _endpoint_url: str | None - _raw_client: Any # S3 client from aioboto3 + _raw_client: Any _client: S3Client | None @overload @@ -286,32 +284,25 @@ async def _setup(self) -> None: from botocore.exceptions import ClientError try: - # Check if bucket exists await self._connected_client.head_bucket(Bucket=self._bucket_name) # pyright: ignore[reportUnknownMemberType] except ClientError as e: - # Only proceed with bucket creation if it's a 404/NoSuchBucket error error_code = e.response.get("Error", {}).get("Code", "") # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] http_status = e.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 0) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] if error_code in ("404", "NoSuchBucket") or http_status == HTTP_NOT_FOUND: - # Bucket doesn't exist, create it import contextlib with contextlib.suppress(self._connected_client.exceptions.BucketAlreadyOwnedByYou): # pyright: ignore[reportUnknownMemberType] - # Build create_bucket parameters create_params: dict[str, Any] = {"Bucket": self._bucket_name} - - # Get region from client metadata region_name = getattr(self._connected_client.meta, "region_name", None) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] - # For regions other than us-east-1, we need to specify LocationConstraint - # Skip this for custom endpoints (LocalStack, MinIO) which may not support it + # For regions other than us-east-1, specify LocationConstraint + # Skip for custom endpoints (LocalStack, MinIO) which may not support it if region_name and region_name != "us-east-1" and not self._endpoint_url: create_params["CreateBucketConfiguration"] = {"LocationConstraint": region_name} await self._connected_client.create_bucket(**create_params) # pyright: ignore[reportUnknownMemberType] else: - # Re-raise authentication, permission, or other errors raise def _get_s3_key(self, *, collection: str, key: str) -> str: @@ -327,7 +318,6 @@ def _get_s3_key(self, *, collection: str, key: str) -> str: Returns: The S3 object key in format: {collection}/{key} """ - # Use the sanitization strategies from BaseStore sanitized_collection, sanitized_key = self._sanitize_collection_and_key(collection=collection, key=key) return f"{sanitized_collection}/{sanitized_key}" @@ -354,17 +344,13 @@ async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry Key=s3_key, ) - # Read the object body and ensure the streaming body is closed async with response["Body"] as stream: # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] body_bytes = await stream.read() # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] json_value = body_bytes.decode("utf-8") # pyright: ignore[reportUnknownMemberType] - # Deserialize to ManagedEntry managed_entry = self._serialization_adapter.load_json(json_str=json_value) - # Check for client-side expiration if managed_entry.is_expired: - # Entry expired, delete it and return None await self._connected_client.delete_object( # type: ignore[reportUnknownMemberType] Bucket=self._bucket_name, Key=s3_key, @@ -373,7 +359,6 @@ async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry return managed_entry # noqa: TRY300 except self._connected_client.exceptions.NoSuchKey: # pyright: ignore[reportUnknownMemberType] - # Object doesn't exist return None @override @@ -399,7 +384,6 @@ async def _put_managed_entry( s3_key = self._get_s3_key(collection=collection, key=key) json_value = self._serialization_adapter.dump_json(entry=managed_entry) - # Prepare metadata metadata: dict[str, str] = {} if managed_entry.expires_at: metadata["expires-at"] = managed_entry.expires_at.isoformat() @@ -430,7 +414,6 @@ async def _delete_managed_entry(self, *, key: str, collection: str) -> bool: from botocore.exceptions import ClientError try: - # Check if object exists before deletion await self._connected_client.head_object( # pyright: ignore[reportUnknownMemberType] Bucket=self._bucket_name, Key=s3_key, @@ -443,10 +426,8 @@ async def _delete_managed_entry(self, *, key: str, collection: str) -> bool: http_status = metadata.get("HTTPStatusCode", 0) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] if error_code in ("404", "NoSuchKey") or http_status == HTTP_NOT_FOUND: - # Object doesn't exist return False - # If AccessDenied on head_object, try delete anyway (delete-only IAM role) if error_code in ("403", "AccessDenied"): await self._connected_client.delete_object( # pyright: ignore[reportUnknownMemberType] Bucket=self._bucket_name, @@ -454,10 +435,8 @@ async def _delete_managed_entry(self, *, key: str, collection: str) -> bool: ) return True - # Re-raise other errors (network, etc.) raise - # Object exists, delete it await self._connected_client.delete_object( # pyright: ignore[reportUnknownMemberType] Bucket=self._bucket_name, Key=s3_key,