Skip to content

Commit eb1a907

Browse files
Enable creating an index "from_existing" (#174)
Since version 2.8.10 of the search module in Redis, `FT.INFO` now reports the contents of the index and associated fields and all low level attributes. Raw response from Redis: ``` 1) index_name 2) user_simple 3) index_options 4) (empty array) 5) index_definition 6) 1) key_type 2) HASH 3) prefixes 4) 1) user_simple_docs 5) default_score 6) "1" 7) attributes 8) 1) 1) identifier 2) user 3) attribute 4) user 5) type 6) TAG 7) SEPARATOR 8) , 2) 1) identifier 2) credit_score 3) attribute 4) credit_score 5) type 6) TAG 7) SEPARATOR 8) , 3) 1) identifier 2) job 3) attribute 4) job 5) type 6) TEXT 7) WEIGHT 8) "1" 4) 1) identifier 2) age 3) attribute 4) age 5) type 6) NUMERIC 5) 1) identifier 2) user_embedding 3) attribute 4) user_embedding 5) type 6) VECTOR 7) algorithm 8) FLAT 9) data_type 10) FLOAT32 11) dim 12) (integer) 3 13) distance_metric 14) COSINE 9) num_docs 10) "0" 11) max_doc_id 12) "0" 13) num_terms 14) "0" 15) num_records 16) "0" 17) inverted_sz_mb 18) "0" 19) vector_index_sz_mb 20) "0.00818634033203125" 21) total_inverted_index_blocks 22) "1813" 23) offset_vectors_sz_mb 24) "0" 25) doc_table_size_mb 26) "0" 27) sortable_values_size_mb 28) "0" 29) key_table_size_mb 30) "0" 31) geoshapes_sz_mb 32) "0" 33) records_per_doc_avg 34) "nan" 35) bytes_per_record_avg 36) "nan" 37) offsets_per_term_avg 38) "nan" 39) offset_bits_per_record_avg 40) "nan" 41) hash_indexing_failures 42) "0" 43) total_indexing_time 44) "0" 45) indexing 46) "0" 47) percent_indexed 48) "1" 49) number_of_uses 50) (integer) 2 51) cleaning 52) (integer) 0 53) gc_stats 54) 1) bytes_collected 2) "0" 3) total_ms_run 4) "0" 5) total_cycles 6) "0" 7) average_cycle_time_ms 8) "nan" 9) last_run_time_ms 10) "0" 11) gc_numeric_trees_missed 12) "0" 13) gc_blocks_denied 14) "0" 55) cursor_stats 56) 1) global_idle 2) (integer) 0 3) global_total 4) (integer) 0 5) index_capacity 6) (integer) 128 7) index_total 8) (integer) 0 57) dialect_stats 58) 1) dialect_1 2) (integer) 0 3) dialect_2 4) (integer) 0 5) dialect_3 6) (integer) 0 7) dialect_4 8) (integer) 0 59) Index Errors 60) 1) indexing failures 2) (integer) 0 3) last indexing error 4) N/A 5) last indexing error key 6) "N/A" 61) field statistics 62) 1) 1) identifier 2) user 3) attribute 4) user 5) Index Errors 6) 1) indexing failures 2) (integer) 0 3) last indexing error 4) N/A 5) last indexing error key 6) "N/A" 2) 1) identifier 2) credit_score 3) attribute 4) credit_score 5) Index Errors 6) 1) indexing failures 2) (integer) 0 3) last indexing error 4) N/A 5) last indexing error key 6) "N/A" 3) 1) identifier 2) job 3) attribute 4) job 5) Index Errors 6) 1) indexing failures 2) (integer) 0 3) last indexing error 4) N/A 5) last indexing error key 6) "N/A" 4) 1) identifier 2) age 3) attribute 4) age 5) Index Errors 6) 1) indexing failures 2) (integer) 0 3) last indexing error 4) N/A 5) last indexing error key 6) "N/A" 5) 1) identifier 2) user_embedding 3) attribute 4) user_embedding 5) Index Errors 6) 1) indexing failures 2) (integer) 0 3) last indexing error 4) N/A 5) last indexing error key 6) "N/A" ``` This enables the ability to "hydrate" a RedisVL `IndexSchema` class from this output. This makes state management for index information MUCH simpler. The caveat is that this capability is only available in newer versions of redis/search. So we have to hide it behind a "Feature flag" of sorts. This is directly needed in our integration clients like LangChain and LlamaIndex too.
1 parent 2e5ba37 commit eb1a907

File tree

6 files changed

+461
-73
lines changed

6 files changed

+461
-73
lines changed

redisvl/index/index.py

+95-23
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,11 @@
2424

2525
from redisvl.index.storage import HashStorage, JsonStorage
2626
from redisvl.query.query import BaseQuery, CountQuery, FilterQuery
27-
from redisvl.redis.connection import RedisConnectionFactory
27+
from redisvl.redis.connection import (
28+
RedisConnectionFactory,
29+
convert_index_info_to_schema,
30+
validate_modules,
31+
)
2832
from redisvl.redis.utils import convert_bytes
2933
from redisvl.schema import IndexSchema, StorageType
3034
from redisvl.utils.log import get_logger
@@ -102,7 +106,7 @@ def decorator(func):
102106
@wraps(func)
103107
def wrapper(self, *args, **kwargs):
104108
if not self.exists():
105-
raise ValueError(
109+
raise RuntimeError(
106110
f"Index has not been created. Must be created before calling {func.__name__}"
107111
)
108112
return func(self, *args, **kwargs)
@@ -162,7 +166,6 @@ def __init__(
162166

163167
self.schema = schema
164168

165-
# set custom lib name
166169
self._lib_name: Optional[str] = kwargs.pop("lib_name", None)
167170

168171
# set up redis connection
@@ -317,6 +320,34 @@ class SearchIndex(BaseSearchIndex):
317320
318321
"""
319322

323+
@classmethod
324+
def from_existing(
325+
cls,
326+
name: str,
327+
redis_client: Optional[redis.Redis] = None,
328+
redis_url: Optional[str] = None,
329+
**kwargs,
330+
):
331+
# Handle redis instance
332+
if redis_url:
333+
redis_client = RedisConnectionFactory.connect(
334+
redis_url=redis_url, use_async=False, **kwargs
335+
)
336+
if not redis_client:
337+
raise ValueError(
338+
"Must provide either a redis_url or redis_client to fetch Redis index info."
339+
)
340+
341+
# Validate modules
342+
installed_modules = RedisConnectionFactory._get_modules(redis_client)
343+
validate_modules(installed_modules, [{"name": "search", "ver": 20810}])
344+
345+
# Fetch index info and convert to schema
346+
index_info = cls._info(name, redis_client)
347+
schema_dict = convert_index_info_to_schema(index_info)
348+
schema = IndexSchema.from_dict(schema_dict)
349+
return cls(schema, redis_client, **kwargs)
350+
320351
def connect(self, redis_url: Optional[str] = None, **kwargs):
321352
"""Connect to a Redis instance using the provided `redis_url`, falling
322353
back to the `REDIS_URL` environment variable (if available).
@@ -653,22 +684,28 @@ def exists(self) -> bool:
653684
"""
654685
return self.schema.index.name in self.listall()
655686

687+
@staticmethod
688+
def _info(name: str, redis_client: redis.Redis) -> Dict[str, Any]:
689+
"""Run FT.INFO to fetch information about the index."""
690+
try:
691+
return convert_bytes(redis_client.ft(name).info()) # type: ignore
692+
except:
693+
logger.exception(f"Error while fetching {name} index info")
694+
raise
695+
656696
@check_index_exists()
657-
def info(self) -> Dict[str, Any]:
697+
def info(self, name: Optional[str] = None) -> Dict[str, Any]:
658698
"""Get information about the index.
659699
700+
Args:
701+
name (str, optional): Index name to fetch info about.
702+
Defaults to None.
703+
660704
Returns:
661705
dict: A dictionary containing the information about the index.
662706
"""
663-
try:
664-
return convert_bytes(
665-
self._redis_client.ft(self.schema.index.name).info() # type: ignore
666-
)
667-
except:
668-
logger.exception(
669-
f"Error while fetching {self.schema.index.name} index info"
670-
)
671-
raise
707+
index_name = name or self.schema.index.name
708+
return self._info(index_name, self._redis_client) # type: ignore
672709

673710

674711
class AsyncSearchIndex(BaseSearchIndex):
@@ -698,6 +735,36 @@ class AsyncSearchIndex(BaseSearchIndex):
698735
699736
"""
700737

738+
@classmethod
739+
async def from_existing(
740+
cls,
741+
name: str,
742+
redis_client: Optional[aredis.Redis] = None,
743+
redis_url: Optional[str] = None,
744+
**kwargs,
745+
):
746+
if redis_url:
747+
redis_client = RedisConnectionFactory.connect(
748+
redis_url=redis_url, use_async=True, **kwargs
749+
)
750+
751+
if not redis_client:
752+
raise ValueError(
753+
"Must provide either a redis_url or redis_client to fetch Redis index info."
754+
)
755+
756+
# Validate modules
757+
installed_modules = await RedisConnectionFactory._get_modules_async(
758+
redis_client
759+
)
760+
validate_modules(installed_modules, [{"name": "search", "ver": 20810}])
761+
762+
# Fetch index info and convert to schema
763+
index_info = await cls._info(name, redis_client)
764+
schema_dict = convert_index_info_to_schema(index_info)
765+
schema = IndexSchema.from_dict(schema_dict)
766+
return cls(schema, redis_client, **kwargs)
767+
701768
def connect(self, redis_url: Optional[str] = None, **kwargs):
702769
"""Connect to a Redis instance using the provided `redis_url`, falling
703770
back to the `REDIS_URL` environment variable (if available).
@@ -1035,19 +1102,24 @@ async def exists(self) -> bool:
10351102
"""
10361103
return self.schema.index.name in await self.listall()
10371104

1105+
@staticmethod
1106+
async def _info(name: str, redis_client: aredis.Redis) -> Dict[str, Any]:
1107+
try:
1108+
return convert_bytes(await redis_client.ft(name).info()) # type: ignore
1109+
except:
1110+
logger.exception(f"Error while fetching {name} index info")
1111+
raise
1112+
10381113
@check_async_index_exists()
1039-
async def info(self) -> Dict[str, Any]:
1114+
async def info(self, name: Optional[str] = None) -> Dict[str, Any]:
10401115
"""Get information about the index.
10411116
1117+
Args:
1118+
name (str, optional): Index name to fetch info about.
1119+
Defaults to None.
1120+
10421121
Returns:
10431122
dict: A dictionary containing the information about the index.
10441123
"""
1045-
try:
1046-
return convert_bytes(
1047-
await self._redis_client.ft(self.schema.index.name).info() # type: ignore
1048-
)
1049-
except:
1050-
logger.exception(
1051-
f"Error while fetching {self.schema.index.name} index info"
1052-
)
1053-
raise
1124+
index_name = name or self.schema.index.name
1125+
return await self._info(index_name, self._redis_client) # type: ignore

redisvl/redis/connection.py

+100-39
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,16 @@
1313
)
1414
from redis.exceptions import ResponseError
1515

16-
from redisvl.redis.constants import REDIS_REQUIRED_MODULES
16+
from redisvl.redis.constants import DEFAULT_REQUIRED_MODULES
1717
from redisvl.redis.utils import convert_bytes
1818
from redisvl.version import __version__
1919

2020

21+
def unpack_redis_modules(module_list: List[Dict[str, Any]]) -> Dict[str, Any]:
22+
"""Unpack a list of Redis modules pulled from the MODULES LIST command."""
23+
return {module["name"]: module["ver"] for module in module_list}
24+
25+
2126
def get_address_from_env() -> str:
2227
"""Get a redis connection from environment variables.
2328
@@ -43,6 +48,82 @@ def make_lib_name(*args) -> str:
4348
return f"redis-py({custom_libs})"
4449

4550

51+
def convert_index_info_to_schema(index_info: Dict[str, Any]) -> Dict[str, Any]:
52+
"""Convert the output of FT.INFO into a schema-ready dictionary.
53+
54+
Args:
55+
index_info (Dict[str, Any]): Output of the Redis FT.INFO command.
56+
57+
Returns:
58+
Dict[str, Any]: Schema dictionary.
59+
"""
60+
index_name = index_info["index_name"]
61+
prefixes = index_info["index_definition"][3][0]
62+
storage_type = index_info["index_definition"][1].lower()
63+
64+
index_fields = index_info["attributes"]
65+
66+
def parse_vector_attrs(attrs):
67+
vector_attrs = {attrs[i].lower(): attrs[i + 1] for i in range(6, len(attrs), 2)}
68+
vector_attrs["dims"] = int(vector_attrs.pop("dim"))
69+
vector_attrs["distance_metric"] = vector_attrs.pop("distance_metric").lower()
70+
vector_attrs["algorithm"] = vector_attrs.pop("algorithm").lower()
71+
vector_attrs["datatype"] = vector_attrs.pop("data_type").lower()
72+
return vector_attrs
73+
74+
def parse_attrs(attrs):
75+
return {attrs[i].lower(): attrs[i + 1] for i in range(6, len(attrs), 2)}
76+
77+
schema_fields = []
78+
79+
for field_attrs in index_fields:
80+
# parse field info
81+
name = field_attrs[1] if storage_type == "hash" else field_attrs[3]
82+
field = {"name": name, "type": field_attrs[5].lower()}
83+
if storage_type == "json":
84+
field["path"] = field_attrs[1]
85+
# parse field attrs
86+
if field_attrs[5] == "VECTOR":
87+
field["attrs"] = parse_vector_attrs(field_attrs)
88+
else:
89+
field["attrs"] = parse_attrs(field_attrs)
90+
# append field
91+
schema_fields.append(field)
92+
93+
return {
94+
"index": {"name": index_name, "prefix": prefixes, "storage_type": storage_type},
95+
"fields": schema_fields,
96+
}
97+
98+
99+
def validate_modules(
100+
installed_modules: Dict[str, Any],
101+
required_modules: Optional[List[Dict[str, Any]]] = None,
102+
) -> None:
103+
"""
104+
Validates if required Redis modules are installed.
105+
106+
Args:
107+
installed_modules: List of installed modules.
108+
required_modules: List of required modules.
109+
110+
Raises:
111+
ValueError: If required Redis modules are not installed.
112+
"""
113+
required_modules = required_modules or DEFAULT_REQUIRED_MODULES
114+
115+
for required_module in required_modules:
116+
if required_module["name"] in installed_modules:
117+
installed_version = installed_modules[required_module["name"]] # type: ignore
118+
if int(installed_version) >= int(required_module["ver"]): # type: ignore
119+
return
120+
121+
raise ValueError(
122+
f"Required Redis database module {required_module['name']} with version >= {required_module['ver']} not installed. "
123+
"See Redis Stack documentation: https://redis.io/docs/stack/"
124+
)
125+
126+
46127
class RedisConnectionFactory:
47128
"""Builds connections to a Redis database, supporting both synchronous and
48129
asynchronous clients.
@@ -128,14 +209,14 @@ def get_async_redis_connection(url: Optional[str] = None, **kwargs) -> AsyncRedi
128209
def validate_redis(
129210
client: Union[Redis, AsyncRedis],
130211
lib_name: Optional[str] = None,
131-
redis_required_modules: Optional[List[Dict[str, Any]]] = None,
212+
required_modules: Optional[List[Dict[str, Any]]] = None,
132213
) -> None:
133214
"""Validates the Redis connection.
134215
135216
Args:
136217
client (Redis or AsyncRedis): Redis client.
137218
lib_name (str): Library name to set on the Redis client.
138-
redis_required_modules (List[Dict[str, Any]]): List of required modules and their versions.
219+
required_modules (List[Dict[str, Any]]): List of required modules and their versions.
139220
140221
Raises:
141222
ValueError: If required Redis modules are not installed.
@@ -145,18 +226,26 @@ def validate_redis(
145226
RedisConnectionFactory._validate_async_redis,
146227
client,
147228
lib_name,
148-
redis_required_modules,
229+
required_modules,
149230
)
150231
else:
151232
RedisConnectionFactory._validate_sync_redis(
152-
client, lib_name, redis_required_modules
233+
client, lib_name, required_modules
153234
)
154235

236+
@staticmethod
237+
def _get_modules(client: Redis) -> Dict[str, Any]:
238+
return unpack_redis_modules(convert_bytes(client.module_list()))
239+
240+
@staticmethod
241+
async def _get_modules_async(client: AsyncRedis) -> Dict[str, Any]:
242+
return unpack_redis_modules(convert_bytes(await client.module_list()))
243+
155244
@staticmethod
156245
def _validate_sync_redis(
157246
client: Redis,
158247
lib_name: Optional[str],
159-
redis_required_modules: Optional[List[Dict[str, Any]]],
248+
required_modules: Optional[List[Dict[str, Any]]],
160249
) -> None:
161250
"""Validates the sync client."""
162251
# Set client library name
@@ -168,16 +257,16 @@ def _validate_sync_redis(
168257
client.echo(_lib_name)
169258

170259
# Get list of modules
171-
modules_list = convert_bytes(client.module_list())
260+
installed_modules = RedisConnectionFactory._get_modules(client)
172261

173262
# Validate available modules
174-
RedisConnectionFactory._validate_modules(modules_list, redis_required_modules)
263+
validate_modules(installed_modules, required_modules)
175264

176265
@staticmethod
177266
async def _validate_async_redis(
178267
client: AsyncRedis,
179268
lib_name: Optional[str],
180-
redis_required_modules: Optional[List[Dict[str, Any]]],
269+
required_modules: Optional[List[Dict[str, Any]]],
181270
) -> None:
182271
"""Validates the async client."""
183272
# Set client library name
@@ -189,10 +278,10 @@ async def _validate_async_redis(
189278
await client.echo(_lib_name)
190279

191280
# Get list of modules
192-
modules_list = convert_bytes(await client.module_list())
281+
installed_modules = await RedisConnectionFactory._get_modules_async(client)
193282

194283
# Validate available modules
195-
RedisConnectionFactory._validate_modules(modules_list, redis_required_modules)
284+
validate_modules(installed_modules, required_modules)
196285

197286
@staticmethod
198287
def _run_async(coro, *args, **kwargs):
@@ -232,31 +321,3 @@ def _run_async(coro, *args, **kwargs):
232321
finally:
233322
# Close the event loop to release resources
234323
loop.close()
235-
236-
@staticmethod
237-
def _validate_modules(
238-
installed_modules, redis_required_modules: Optional[List[Dict[str, Any]]] = None
239-
) -> None:
240-
"""
241-
Validates if required Redis modules are installed.
242-
243-
Args:
244-
installed_modules: List of installed modules.
245-
redis_required_modules: List of required modules.
246-
247-
Raises:
248-
ValueError: If required Redis modules are not installed.
249-
"""
250-
installed_modules = {module["name"]: module for module in installed_modules}
251-
redis_required_modules = redis_required_modules or REDIS_REQUIRED_MODULES
252-
253-
for required_module in redis_required_modules:
254-
if required_module["name"] in installed_modules:
255-
installed_version = installed_modules[required_module["name"]]["ver"]
256-
if int(installed_version) >= int(required_module["ver"]): # type: ignore
257-
return
258-
259-
raise ValueError(
260-
f"Required Redis database module {required_module['name']} with version >= {required_module['ver']} not installed. "
261-
"Refer to Redis Stack documentation: https://redis.io/docs/stack/"
262-
)

redisvl/redis/constants.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# required modules
2-
REDIS_REQUIRED_MODULES = [
2+
DEFAULT_REQUIRED_MODULES = [
33
{"name": "search", "ver": 20600},
44
{"name": "searchlight", "ver": 20600},
55
]

0 commit comments

Comments
 (0)