Skip to content

Commit 0eff950

Browse files
feat: add DuckDB store support
- Add DuckDBStore implementation with BaseStore and BaseContextManagerStore - Support both in-memory (:memory:) and persistent (file-based) storage - Include seed support following current store patterns - Add comprehensive test suite (294 tests passing) - Update pyproject.toml with duckdb>=1.0.0 optional dependency - Mark as unstable API (_stable_api = False) Implements #11 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: William Easton <[email protected]>
1 parent 6505d3d commit 0eff950

File tree

6 files changed

+376
-5
lines changed

6 files changed

+376
-5
lines changed

key-value/key-value-aio/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ rocksdb = [
4848
"rocksdict>=0.3.24 ; python_version >= '3.12'", # RocksDB 0.3.24 is the first version to support Python 3.13
4949
"rocksdict>=0.3.2 ; python_version < '3.12'"
5050
]
51+
duckdb = ["duckdb>=1.0.0"]
5152
wrappers-encryption = ["cryptography>=45.0.0"]
5253

5354
[tool.pytest.ini_options]
@@ -67,7 +68,7 @@ env_files = [".env"]
6768

6869
[dependency-groups]
6970
dev = [
70-
"py-key-value-aio[memory,disk,redis,elasticsearch,memcached,mongodb,vault,dynamodb,rocksdb]",
71+
"py-key-value-aio[memory,disk,redis,elasticsearch,memcached,mongodb,vault,dynamodb,rocksdb,duckdb]",
7172
"py-key-value-aio[valkey]; platform_system != 'Windows'",
7273
"py-key-value-aio[keyring]",
7374
"py-key-value-aio[pydantic]",
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from key_value.aio.stores.duckdb.store import DuckDBStore
2+
3+
__all__ = ["DuckDBStore"]
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
from pathlib import Path
2+
from typing import overload
3+
4+
from key_value.shared.utils.managed_entry import ManagedEntry
5+
from typing_extensions import override
6+
7+
from key_value.aio.stores.base import SEED_DATA_TYPE, BaseContextManagerStore, BaseStore
8+
9+
try:
10+
import duckdb
11+
except ImportError as e:
12+
msg = "DuckDBStore requires py-key-value-aio[duckdb]"
13+
raise ImportError(msg) from e
14+
15+
16+
class DuckDBStore(BaseContextManagerStore, BaseStore):
17+
"""A DuckDB-based key-value store supporting both in-memory and persistent storage.
18+
19+
DuckDB is an in-process SQL OLAP database that provides excellent performance
20+
for analytical workloads while supporting standard SQL operations. This store
21+
can operate in memory-only mode or persist data to disk.
22+
"""
23+
24+
_connection: duckdb.DuckDBPyConnection
25+
_is_closed: bool
26+
27+
@overload
28+
def __init__(
29+
self,
30+
*,
31+
connection: duckdb.DuckDBPyConnection,
32+
default_collection: str | None = None,
33+
seed: SEED_DATA_TYPE | None = None,
34+
) -> None:
35+
"""Initialize the DuckDB store with an existing connection.
36+
37+
Args:
38+
connection: An existing DuckDB connection to use.
39+
default_collection: The default collection to use if no collection is provided.
40+
seed: Optional seed data to pre-populate the store.
41+
"""
42+
43+
@overload
44+
def __init__(
45+
self,
46+
*,
47+
database_path: Path | str | None = None,
48+
default_collection: str | None = None,
49+
seed: SEED_DATA_TYPE | None = None,
50+
) -> None:
51+
"""Initialize the DuckDB store with a database path.
52+
53+
Args:
54+
database_path: Path to the database file. If None or ':memory:', uses in-memory database.
55+
default_collection: The default collection to use if no collection is provided.
56+
seed: Optional seed data to pre-populate the store.
57+
"""
58+
59+
def __init__(
60+
self,
61+
*,
62+
connection: duckdb.DuckDBPyConnection | None = None,
63+
database_path: Path | str | None = None,
64+
default_collection: str | None = None,
65+
seed: SEED_DATA_TYPE | None = None,
66+
) -> None:
67+
"""Initialize the DuckDB store.
68+
69+
Args:
70+
connection: An existing DuckDB connection to use.
71+
database_path: Path to the database file. If None or ':memory:', uses in-memory database.
72+
default_collection: The default collection to use if no collection is provided.
73+
seed: Optional seed data to pre-populate the store.
74+
"""
75+
if connection is not None and database_path is not None:
76+
msg = "Provide only one of connection or database_path"
77+
raise ValueError(msg)
78+
79+
if connection is not None:
80+
self._connection = connection
81+
else:
82+
# Convert Path to string if needed
83+
if isinstance(database_path, Path):
84+
database_path = str(database_path)
85+
86+
# Use in-memory database if no path specified
87+
if database_path is None or database_path == ":memory:":
88+
self._connection = duckdb.connect(":memory:")
89+
else:
90+
self._connection = duckdb.connect(database=database_path)
91+
92+
self._is_closed = False
93+
self._stable_api = False
94+
95+
super().__init__(default_collection=default_collection, seed=seed)
96+
97+
@override
98+
async def _setup(self) -> None:
99+
"""Initialize the database schema for key-value storage."""
100+
# Create the main table for storing key-value entries
101+
self._connection.execute("""
102+
CREATE TABLE IF NOT EXISTS kv_entries (
103+
collection VARCHAR NOT NULL,
104+
key VARCHAR NOT NULL,
105+
value_json TEXT NOT NULL,
106+
created_at DOUBLE,
107+
ttl DOUBLE,
108+
expires_at DOUBLE,
109+
PRIMARY KEY (collection, key)
110+
)
111+
""")
112+
113+
# Create index for efficient collection queries
114+
self._connection.execute("""
115+
CREATE INDEX IF NOT EXISTS idx_kv_collection
116+
ON kv_entries(collection)
117+
""")
118+
119+
# Create index for expiration-based queries
120+
self._connection.execute("""
121+
CREATE INDEX IF NOT EXISTS idx_kv_expires_at
122+
ON kv_entries(expires_at)
123+
""")
124+
125+
@override
126+
async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None:
127+
"""Retrieve a managed entry by key from the specified collection."""
128+
result = self._connection.execute(
129+
"SELECT value_json FROM kv_entries WHERE collection = ? AND key = ?",
130+
[collection, key],
131+
).fetchone()
132+
133+
if result is None:
134+
return None
135+
136+
value_json = result[0]
137+
return ManagedEntry.from_json(json_str=value_json)
138+
139+
@override
140+
async def _put_managed_entry(
141+
self,
142+
*,
143+
key: str,
144+
collection: str,
145+
managed_entry: ManagedEntry,
146+
) -> None:
147+
"""Store a managed entry by key in the specified collection."""
148+
# Insert or replace the entry
149+
self._connection.execute(
150+
"""
151+
INSERT OR REPLACE INTO kv_entries
152+
(collection, key, value_json, created_at, ttl, expires_at)
153+
VALUES (?, ?, ?, ?, ?, ?)
154+
""",
155+
[
156+
collection,
157+
key,
158+
managed_entry.to_json(),
159+
managed_entry.created_at.timestamp() if managed_entry.created_at else None,
160+
managed_entry.ttl,
161+
managed_entry.expires_at.timestamp() if managed_entry.expires_at else None,
162+
],
163+
)
164+
165+
@override
166+
async def _delete_managed_entry(self, *, key: str, collection: str) -> bool:
167+
"""Delete a managed entry by key from the specified collection."""
168+
result = self._connection.execute(
169+
"DELETE FROM kv_entries WHERE collection = ? AND key = ? RETURNING key",
170+
[collection, key],
171+
)
172+
173+
# Check if any rows were deleted by counting returned rows
174+
deleted_rows = result.fetchall()
175+
return len(deleted_rows) > 0
176+
177+
@override
178+
async def _close(self) -> None:
179+
"""Close the DuckDB connection."""
180+
if not self._is_closed:
181+
self._connection.close()
182+
self._is_closed = True
183+
184+
def __del__(self) -> None:
185+
"""Clean up the DuckDB connection on deletion."""
186+
if not self._is_closed:
187+
self._connection.close()
188+
self._is_closed = True
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# DuckDB store tests
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
from collections.abc import AsyncGenerator
2+
from pathlib import Path
3+
from tempfile import TemporaryDirectory
4+
5+
import pytest
6+
from typing_extensions import override
7+
8+
from key_value.aio.stores.base import BaseStore
9+
from key_value.aio.stores.duckdb import DuckDBStore
10+
from tests.stores.base import BaseStoreTests, ContextManagerStoreTestMixin
11+
12+
13+
class TestDuckDBStore(ContextManagerStoreTestMixin, BaseStoreTests):
14+
@override
15+
@pytest.fixture
16+
async def store(self) -> AsyncGenerator[DuckDBStore, None]:
17+
"""Test with in-memory DuckDB database."""
18+
duckdb_store = DuckDBStore()
19+
yield duckdb_store
20+
await duckdb_store.close()
21+
22+
@pytest.mark.skip(reason="Local disk stores are unbounded")
23+
@override
24+
async def test_not_unbounded(self, store: BaseStore): ...
25+
26+
27+
class TestDuckDBStorePersistent(ContextManagerStoreTestMixin, BaseStoreTests):
28+
@override
29+
@pytest.fixture
30+
async def store(self) -> AsyncGenerator[DuckDBStore, None]:
31+
"""Test with persistent DuckDB database file."""
32+
with TemporaryDirectory() as temp_dir:
33+
db_path = Path(temp_dir) / "test.db"
34+
duckdb_store = DuckDBStore(database_path=db_path)
35+
yield duckdb_store
36+
await duckdb_store.close()
37+
38+
@pytest.mark.skip(reason="Local disk stores are unbounded")
39+
@override
40+
async def test_not_unbounded(self, store: BaseStore): ...
41+
42+
43+
class TestDuckDBStoreSpecific:
44+
"""Test DuckDB-specific functionality."""
45+
46+
@pytest.fixture
47+
async def store(self) -> AsyncGenerator[DuckDBStore, None]:
48+
"""Provide DuckDB store instance."""
49+
duckdb_store = DuckDBStore()
50+
yield duckdb_store
51+
await duckdb_store.close()
52+
53+
async def test_database_path_initialization(self):
54+
"""Test that store can be initialized with different database path options."""
55+
# In-memory (default)
56+
store1 = DuckDBStore()
57+
await store1.put(collection="test", key="key1", value={"test": "value1"})
58+
result1 = await store1.get(collection="test", key="key1")
59+
assert result1 == {"test": "value1"}
60+
await store1.close()
61+
62+
# Explicit in-memory
63+
store2 = DuckDBStore(database_path=":memory:")
64+
await store2.put(collection="test", key="key2", value={"test": "value2"})
65+
result2 = await store2.get(collection="test", key="key2")
66+
assert result2 == {"test": "value2"}
67+
await store2.close()
68+
69+
async def test_persistent_database(self):
70+
"""Test that data persists across store instances when using file database."""
71+
with TemporaryDirectory() as temp_dir:
72+
db_path = Path(temp_dir) / "persist_test.db"
73+
74+
# Store data in first instance
75+
store1 = DuckDBStore(database_path=db_path)
76+
await store1.put(collection="test", key="persist_key", value={"data": "persistent"})
77+
await store1.close()
78+
79+
# Create second instance with same database file
80+
store2 = DuckDBStore(database_path=db_path)
81+
result = await store2.get(collection="test", key="persist_key")
82+
await store2.close()
83+
84+
assert result == {"data": "persistent"}
85+
86+
async def test_sql_injection_protection(self, store: DuckDBStore):
87+
"""Test that the store is protected against SQL injection attacks."""
88+
malicious_collection = "test'; DROP TABLE kv_entries; --"
89+
malicious_key = "key'; DELETE FROM kv_entries; --"
90+
91+
# These operations should not cause SQL injection
92+
await store.put(collection=malicious_collection, key=malicious_key, value={"safe": "data"})
93+
result = await store.get(collection=malicious_collection, key=malicious_key)
94+
assert result == {"safe": "data"}
95+
96+
# Verify the table still exists and other data is safe
97+
await store.put(collection="normal", key="normal_key", value={"normal": "data"})
98+
normal_result = await store.get(collection="normal", key="normal_key")
99+
assert normal_result == {"normal": "data"}
100+
101+
async def test_large_data_storage(self, store: DuckDBStore):
102+
"""Test storing and retrieving large data values."""
103+
# Create a large value (1MB of data)
104+
large_value = {"large_data": "x" * (1024 * 1024)}
105+
106+
await store.put(collection="test", key="large_key", value=large_value)
107+
result = await store.get(collection="test", key="large_key")
108+
109+
assert result == large_value
110+
111+
async def test_unicode_support(self, store: DuckDBStore):
112+
"""Test that the store properly handles Unicode characters."""
113+
unicode_data = {
114+
"english": "Hello World",
115+
"chinese": "你好世界",
116+
"japanese": "こんにちは世界",
117+
"arabic": "مرحبا بالعالم",
118+
"emoji": "🌍🚀💻",
119+
"special": "Special chars: !@#$%^&*()_+-={}[]|\\:;\"'<>?,./",
120+
}
121+
122+
await store.put(collection="unicode_test", key="unicode_key", value=unicode_data)
123+
result = await store.get(collection="unicode_test", key="unicode_key")
124+
125+
assert result == unicode_data
126+
127+
async def test_connection_initialization(self):
128+
"""Test that store can be initialized with existing DuckDB connection."""
129+
import duckdb
130+
131+
conn = duckdb.connect(":memory:")
132+
store = DuckDBStore(connection=conn)
133+
134+
await store.put(collection="test", key="conn_test", value={"test": "value"})
135+
result = await store.get(collection="test", key="conn_test")
136+
assert result == {"test": "value"}
137+
138+
await store.close()
139+
140+
@pytest.mark.skip(reason="Local disk stores are unbounded")
141+
@override
142+
async def test_not_unbounded(self, store: BaseStore): ...

0 commit comments

Comments
 (0)