|
| 1 | +from typing import Any, Dict, List, Optional |
| 2 | + |
| 3 | +from pydantic.v1 import BaseModel, Field, root_validator, validator |
| 4 | + |
| 5 | +from redisvl.redis.utils import array_to_buffer, hashify |
| 6 | +from redisvl.schema import IndexSchema |
| 7 | +from redisvl.utils.utils import current_timestamp, deserialize, serialize |
| 8 | + |
| 9 | + |
| 10 | +class CacheEntry(BaseModel): |
| 11 | + """A single cache entry in Redis""" |
| 12 | + |
| 13 | + entry_id: Optional[str] = Field(default=None) |
| 14 | + """Cache entry identifier""" |
| 15 | + prompt: str |
| 16 | + """Input prompt or question cached in Redis""" |
| 17 | + response: str |
| 18 | + """Response or answer to the question, cached in Redis""" |
| 19 | + prompt_vector: List[float] |
| 20 | + """Text embedding representation of the prompt""" |
| 21 | + inserted_at: float = Field(default_factory=current_timestamp) |
| 22 | + """Timestamp of when the entry was added to the cache""" |
| 23 | + updated_at: float = Field(default_factory=current_timestamp) |
| 24 | + """Timestamp of when the entry was updated in the cache""" |
| 25 | + metadata: Optional[Dict[str, Any]] = Field(default=None) |
| 26 | + """Optional metadata stored on the cache entry""" |
| 27 | + filters: Optional[Dict[str, Any]] = Field(default=None) |
| 28 | + """Optional filter data stored on the cache entry for customizing retrieval""" |
| 29 | + |
| 30 | + @root_validator(pre=True) |
| 31 | + @classmethod |
| 32 | + def generate_id(cls, values): |
| 33 | + # Ensure entry_id is set |
| 34 | + if not values.get("entry_id"): |
| 35 | + values["entry_id"] = hashify(values["prompt"]) |
| 36 | + return values |
| 37 | + |
| 38 | + @validator("metadata") |
| 39 | + def non_empty_metadata(cls, v): |
| 40 | + if v is not None and not isinstance(v, dict): |
| 41 | + raise TypeError("Metadata must be a dictionary.") |
| 42 | + return v |
| 43 | + |
| 44 | + def to_dict(self) -> Dict: |
| 45 | + data = self.dict(exclude_none=True) |
| 46 | + data["prompt_vector"] = array_to_buffer(self.prompt_vector) |
| 47 | + if self.metadata: |
| 48 | + data["metadata"] = serialize(self.metadata) |
| 49 | + if self.filters: |
| 50 | + data.update(self.filters) |
| 51 | + del data["filters"] |
| 52 | + return data |
| 53 | + |
| 54 | + |
| 55 | +class CacheHit(BaseModel): |
| 56 | + """A cache hit based on some input query""" |
| 57 | + |
| 58 | + entry_id: str |
| 59 | + """Cache entry identifier""" |
| 60 | + prompt: str |
| 61 | + """Input prompt or question cached in Redis""" |
| 62 | + response: str |
| 63 | + """Response or answer to the question, cached in Redis""" |
| 64 | + vector_distance: float |
| 65 | + """The semantic distance between the query vector and the stored prompt vector""" |
| 66 | + inserted_at: float |
| 67 | + """Timestamp of when the entry was added to the cache""" |
| 68 | + updated_at: float |
| 69 | + """Timestamp of when the entry was updated in the cache""" |
| 70 | + metadata: Optional[Dict[str, Any]] = Field(default=None) |
| 71 | + """Optional metadata stored on the cache entry""" |
| 72 | + filters: Optional[Dict[str, Any]] = Field(default=None) |
| 73 | + """Optional filter data stored on the cache entry for customizing retrieval""" |
| 74 | + |
| 75 | + @root_validator(pre=True) |
| 76 | + @classmethod |
| 77 | + def validate_cache_hit(cls, values): |
| 78 | + # Deserialize metadata if necessary |
| 79 | + if "metadata" in values and isinstance(values["metadata"], str): |
| 80 | + values["metadata"] = deserialize(values["metadata"]) |
| 81 | + |
| 82 | + # Separate filters from other fields |
| 83 | + known_fields = set(cls.__fields__.keys()) |
| 84 | + filters = {k: v for k, v in values.items() if k not in known_fields} |
| 85 | + |
| 86 | + # Add filters to values |
| 87 | + if filters: |
| 88 | + values["filters"] = filters |
| 89 | + |
| 90 | + # Remove filter fields from the main values |
| 91 | + for k in filters: |
| 92 | + values.pop(k) |
| 93 | + |
| 94 | + return values |
| 95 | + |
| 96 | + def to_dict(self) -> Dict: |
| 97 | + data = self.dict(exclude_none=True) |
| 98 | + if self.filters: |
| 99 | + data.update(self.filters) |
| 100 | + del data["filters"] |
| 101 | + |
| 102 | + return data |
| 103 | + |
| 104 | + |
| 105 | +class SemanticCacheIndexSchema(IndexSchema): |
| 106 | + |
| 107 | + @classmethod |
| 108 | + def from_params(cls, name: str, prefix: str, vector_dims: int): |
| 109 | + |
| 110 | + return cls( |
| 111 | + index={"name": name, "prefix": prefix}, # type: ignore |
| 112 | + fields=[ # type: ignore |
| 113 | + {"name": "prompt", "type": "text"}, |
| 114 | + {"name": "response", "type": "text"}, |
| 115 | + {"name": "inserted_at", "type": "numeric"}, |
| 116 | + {"name": "updated_at", "type": "numeric"}, |
| 117 | + { |
| 118 | + "name": "prompt_vector", |
| 119 | + "type": "vector", |
| 120 | + "attrs": { |
| 121 | + "dims": vector_dims, |
| 122 | + "datatype": "float32", |
| 123 | + "distance_metric": "cosine", |
| 124 | + "algorithm": "flat", |
| 125 | + }, |
| 126 | + }, |
| 127 | + ], |
| 128 | + ) |
0 commit comments