Skip to content

Commit a6ba8f4

Browse files
tylerhutchersonabrookins
authored andcommitted
Support client-side schema validation using Pydantic (#304)
This PR implements a layered architecture for managing and validating searchable data in Redis, with clear separation of concerns between schema definition, data validation, and storage operations. ## Key Components ### 1. Schema Definition Layer - `IndexSchema` provides the blueprint for data structure and constraints - Defines fields with specific types (TEXT, TAG, NUMERIC, GEO, VECTOR) - Supports different storage types (HASH, JSON) with appropriate configuration ### 2. Validation Layer - `SchemaModelGenerator` dynamically creates Pydantic models from schema definitions - Implements a caching mechanism to avoid redundant model generation - Maps Redis field types to appropriate Python/Pydantic types - Provides type-specific validators: - VECTOR: validates dimensions and value ranges (e.g., INT8 range checks) - GEO: validates geographic coordinate format - NUMERIC: prevents boolean values ### 3. Storage Layer - `BaseStorage` is the abstract class provides the foundation for Redis operations - Specialized implementations (HashStorage, JsonStorage) for different Redis data types - Enforces schema validation during write operations when set to True - Implements optimized batch operations using Redis pipelines - Supports both synchronous and asynchronous interfaces - Handles key generation, preprocessing, and error handling ### 4. Index Layer The `SearchIndex` contains the setting `validate_on_load`, which defaults on `False`. ## Data Flow ### Write Flow: Objects are preprocessed and validated against the schema Objects are prepared with appropriate keys Batch writing occurs using Redis pipelines for efficiency TTL (expiration) can be applied if specified ### Read Flow: Keys are fetched in batches using pipelines Data is converted from Redis format to Python objects Bytes are automatically converted to appropriate types
1 parent e474e97 commit a6ba8f4

25 files changed

+2196
-439
lines changed

.github/workflows/test.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ jobs:
133133
with:
134134
credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
135135

136+
- name: Set HuggingFace token
137+
run: |
138+
mkdir -p ~/.huggingface
139+
echo '{"token":"${{ secrets.HF_TOKEN }}"}' > ~/.huggingface/token
140+
136141
- name: Run tests
137142
if: matrix.connection == 'plain' && matrix.redis-version == 'latest'
138143
env:
@@ -149,6 +154,7 @@ jobs:
149154
OPENAI_API_VERSION: ${{ secrets.OPENAI_API_VERSION }}
150155
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
151156
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
157+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
152158
run: |
153159
make test-all
154160
@@ -173,6 +179,7 @@ jobs:
173179
OPENAI_API_VERSION: ${{ secrets.OPENAI_API_VERSION }}
174180
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
175181
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
182+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
176183
run: |
177184
docker run -d --name redis -p 6379:6379 redis/redis-stack-server:latest
178185
make test-notebooks

docs/user_guide/01_getting_started.ipynb

Lines changed: 138 additions & 100 deletions
Large diffs are not rendered by default.

poetry.lock

Lines changed: 547 additions & 15 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ tenacity = ">=8.2.2"
3333
tabulate = "^0.9.0"
3434
ml-dtypes = "^0.4.0"
3535
python-ulid = "^3.0.0"
36+
jsonpath-ng = "^1.5.0"
37+
3638
openai = { version = "^1.13.0", optional = true }
3739
sentence-transformers = { version = "^3.4.0", optional = true }
3840
scipy = [

redisvl/exceptions.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,32 @@
1-
class RedisVLException(Exception):
2-
"""Base RedisVL exception"""
1+
"""
2+
RedisVL Exception Classes
33
4+
This module defines all custom exceptions used throughout the RedisVL library.
5+
"""
46

5-
class RedisModuleVersionError(RedisVLException):
6-
"""Invalid module versions installed"""
77

8+
class RedisVLError(Exception):
9+
"""Base exception for all RedisVL errors."""
810

9-
class RedisSearchError(RedisVLException):
10-
"""Error while performing a search or aggregate request"""
11+
pass
12+
13+
14+
class RedisModuleVersionError(RedisVLError):
15+
"""Error raised when required Redis modules are missing or have incompatible versions."""
16+
17+
pass
18+
19+
20+
class RedisSearchError(RedisVLError):
21+
"""Error raised for Redis Search specific operations."""
22+
23+
pass
24+
25+
26+
class SchemaValidationError(RedisVLError):
27+
"""Error when validating data against a schema."""
28+
29+
def __init__(self, message, index=None):
30+
if index is not None:
31+
message = f"Validation failed for object at index {index}: {message}"
32+
super().__init__(message)

redisvl/extensions/llmcache/semantic.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,8 @@ def __init__(
9595
}
9696

9797
# Use the index name as the key prefix by default
98-
if "prefix" in kwargs:
99-
prefix = kwargs["prefix"]
100-
else:
101-
prefix = name
102-
103-
dtype = kwargs.get("dtype")
98+
prefix = kwargs.pop("prefix", name)
99+
dtype = kwargs.pop("dtype", None)
104100

105101
# Validate a provided vectorizer or set the default
106102
if vectorizer:
@@ -111,7 +107,10 @@ def __init__(
111107
f"Provided dtype {dtype} does not match vectorizer dtype {vectorizer.dtype}"
112108
)
113109
else:
114-
vectorizer_kwargs = {"dtype": dtype} if dtype else {}
110+
vectorizer_kwargs = kwargs
111+
112+
if dtype:
113+
vectorizer_kwargs.update(**{"dtype": dtype})
115114

116115
vectorizer = HFTextVectorizer(
117116
model="sentence-transformers/all-mpnet-base-v2",

redisvl/extensions/router/semantic.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def __init__(
7272
connection_kwargs (Dict[str, Any]): The connection arguments
7373
for the redis client. Defaults to empty {}.
7474
"""
75-
dtype = kwargs.get("dtype")
75+
dtype = kwargs.pop("dtype", None)
7676

7777
# Validate a provided vectorizer or set the default
7878
if vectorizer:
@@ -83,8 +83,15 @@ def __init__(
8383
f"Provided dtype {dtype} does not match vectorizer dtype {vectorizer.dtype}"
8484
)
8585
else:
86-
vectorizer_kwargs = {"dtype": dtype} if dtype else {}
87-
vectorizer = HFTextVectorizer(**vectorizer_kwargs)
86+
vectorizer_kwargs = kwargs
87+
88+
if dtype:
89+
vectorizer_kwargs.update(**{"dtype": dtype})
90+
91+
vectorizer = HFTextVectorizer(
92+
model="sentence-transformers/all-mpnet-base-v2",
93+
**vectorizer_kwargs,
94+
)
8895

8996
if routing_config is None:
9097
routing_config = RoutingConfig()

redisvl/extensions/session_manager/semantic_session.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def __init__(
7171
super().__init__(name, session_tag)
7272

7373
prefix = prefix or name
74-
dtype = kwargs.get("dtype")
74+
dtype = kwargs.pop("dtype", None)
7575

7676
# Validate a provided vectorizer or set the default
7777
if vectorizer:
@@ -82,10 +82,13 @@ def __init__(
8282
f"Provided dtype {dtype} does not match vectorizer dtype {vectorizer.dtype}"
8383
)
8484
else:
85-
vectorizer_kwargs = {"dtype": dtype} if dtype else {}
85+
vectorizer_kwargs = kwargs
86+
87+
if dtype:
88+
vectorizer_kwargs.update(**{"dtype": dtype})
8689

8790
vectorizer = HFTextVectorizer(
88-
model="sentence-transformers/msmarco-distilbert-cos-v5",
91+
model="sentence-transformers/all-mpnet-base-v2",
8992
**vectorizer_kwargs,
9093
)
9194

0 commit comments

Comments
 (0)