Skip to content

Commit 9ca93ef

Browse files
Add ability to clear ALL data associated with an index (#179)
This PR introduces the `clear()` method to the core SearchEngine class in RedisVL. This now allows for the ability to clear out all data, while leaving the index in place. Useful for manual cache eviction, manual session clearing, and more. This PR also updates the extension classes to use the new `clear()` method as opposed to the SCAN ITER approach.
1 parent 61e7338 commit 9ca93ef

File tree

12 files changed

+135
-22
lines changed

12 files changed

+135
-22
lines changed

docs/user_guide/getting_started_01.ipynb

+33-2
Original file line numberDiff line numberDiff line change
@@ -653,13 +653,44 @@
653653
"## Cleanup"
654654
]
655655
},
656+
{
657+
"cell_type": "markdown",
658+
"metadata": {},
659+
"source": [
660+
"Below we will clean up after our work. First, you can optionally flush all data from Redis associated with the index by\n",
661+
"using the `.clear()` method. This will leave the secondary index in place for future insertions or updates.\n",
662+
"\n",
663+
"But if you want to clean up everything, including the index, just use `.delete()`\n",
664+
"which will by default remove the index AND the underlying data."
665+
]
666+
},
667+
{
668+
"cell_type": "code",
669+
"execution_count": null,
670+
"metadata": {},
671+
"outputs": [],
672+
"source": [
673+
"# (optionally) clear all data from Redis associated with the index\n",
674+
"await index.clear()"
675+
]
676+
},
677+
{
678+
"cell_type": "code",
679+
"execution_count": null,
680+
"metadata": {},
681+
"outputs": [],
682+
"source": [
683+
"# but the index is still in place\n",
684+
"await index.exists()"
685+
]
686+
},
656687
{
657688
"cell_type": "code",
658689
"execution_count": 19,
659690
"metadata": {},
660691
"outputs": [],
661692
"source": [
662-
"# clean up the index\n",
693+
"# remove / delete the index in its entirety\n",
663694
"await index.delete()"
664695
]
665696
}
@@ -680,7 +711,7 @@
680711
"name": "python",
681712
"nbconvert_exporter": "python",
682713
"pygments_lexer": "ipython3",
683-
"version": "3.9.12"
714+
"version": "3.10.14"
684715
},
685716
"orig_nbformat": 4,
686717
"vscode": {

docs/user_guide/vectorizers_04.ipynb

+6-6
Original file line numberDiff line numberDiff line change
@@ -531,14 +531,14 @@
531531
}
532532
],
533533
"source": [
534-
"from redisvl.utils.vectorize import MistralAITextVectorizer\n",
534+
"# from redisvl.utils.vectorize import MistralAITextVectorizer\n",
535535
"\n",
536-
"mistral = MistralAITextVectorizer()\n",
536+
"# mistral = MistralAITextVectorizer()\n",
537537
"\n",
538-
"# mebed a sentence using their asyncronous method\n",
539-
"test = await mistral.aembed(\"This is a test sentence.\")\n",
540-
"print(\"Vector dimensions: \", len(test))\n",
541-
"print(test[:10])"
538+
"# # embed a sentence using their asyncronous method\n",
539+
"# test = await mistral.aembed(\"This is a test sentence.\")\n",
540+
"# print(\"Vector dimensions: \", len(test))\n",
541+
"# print(test[:10])"
542542
]
543543
},
544544
{

redisvl/extensions/llmcache/semantic.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,7 @@ def set_vectorizer(self, vectorizer: BaseVectorizer) -> None:
176176

177177
def clear(self) -> None:
178178
"""Clear the cache of all keys while preserving the index."""
179-
with self._index.client.pipeline(transaction=False) as pipe: # type: ignore
180-
for key in self._index.client.scan_iter(match=f"{self._index.prefix}:*"): # type: ignore
181-
pipe.delete(key)
182-
pipe.execute()
179+
self._index.clear()
183180

184181
def delete(self) -> None:
185182
"""Clear the semantic cache of all keys and remove the underlying search

redisvl/extensions/session_manager/semantic_session.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,7 @@ def set_scope(
130130

131131
def clear(self) -> None:
132132
"""Clears the chat session history."""
133-
with self._index.client.pipeline(transaction=False) as pipe: # type: ignore
134-
for key in self._index.client.scan_iter(match=f"{self._index.prefix}:*"): # type: ignore
135-
pipe.delete(key)
136-
pipe.execute()
133+
self._index.clear()
137134

138135
def delete(self) -> None:
139136
"""Clear all conversation keys and remove the search index."""

redisvl/index/index.py

+42-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
from redis.commands.search.indexDefinition import IndexDefinition
2424

2525
from redisvl.index.storage import HashStorage, JsonStorage
26-
from redisvl.query.query import BaseQuery, CountQuery, FilterQuery
26+
from redisvl.query import BaseQuery, CountQuery, FilterQuery
27+
from redisvl.query.filter import FilterExpression
2728
from redisvl.redis.connection import (
2829
RedisConnectionFactory,
2930
convert_index_info_to_schema,
@@ -476,6 +477,26 @@ def delete(self, drop: bool = True):
476477
except:
477478
logger.exception("Error while deleting index")
478479

480+
def clear(self) -> int:
481+
"""Clear all keys in Redis associated with the index, leaving the index
482+
available and in-place for future insertions or updates.
483+
484+
Returns:
485+
int: Count of records deleted from Redis.
486+
"""
487+
# Track deleted records
488+
total_records_deleted: int = 0
489+
490+
# Paginate using queries and delete in batches
491+
for batch in self.paginate(
492+
FilterQuery(FilterExpression("*"), return_fields=["id"]), page_size=500
493+
):
494+
batch_keys = [record["id"] for record in batch]
495+
record_deleted = self._redis_client.delete(*batch_keys) # type: ignore
496+
total_records_deleted += record_deleted # type: ignore
497+
498+
return total_records_deleted
499+
479500
def load(
480501
self,
481502
data: Iterable[Any],
@@ -894,6 +915,26 @@ async def delete(self, drop: bool = True):
894915
logger.exception("Error while deleting index")
895916
raise
896917

918+
async def clear(self) -> int:
919+
"""Clear all keys in Redis associated with the index, leaving the index
920+
available and in-place for future insertions or updates.
921+
922+
Returns:
923+
int: Count of records deleted from Redis.
924+
"""
925+
# Track deleted records
926+
total_records_deleted: int = 0
927+
928+
# Paginate using queries and delete in batches
929+
async for batch in self.paginate(
930+
FilterQuery(FilterExpression("*"), return_fields=["id"]), page_size=500
931+
):
932+
batch_keys = [record["id"] for record in batch]
933+
records_deleted = await self._redis_client.delete(*batch_keys) # type: ignore
934+
total_records_deleted += records_deleted # type: ignore
935+
936+
return total_records_deleted
937+
897938
async def load(
898939
self,
899940
data: Iterable[Any],

redisvl/query/__init__.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1-
from redisvl.query.query import CountQuery, FilterQuery, RangeQuery, VectorQuery
1+
from redisvl.query.query import (
2+
BaseQuery,
3+
CountQuery,
4+
FilterQuery,
5+
RangeQuery,
6+
VectorQuery,
7+
)
28

3-
__all__ = ["VectorQuery", "FilterQuery", "RangeQuery", "CountQuery"]
9+
__all__ = ["BaseQuery", "VectorQuery", "FilterQuery", "RangeQuery", "CountQuery"]

schemas/schema.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ index:
99
fields:
1010
- name: user
1111
type: tag
12-
path: '.user'
12+
path: '$.user'
1313
- name: credit_score
1414
type: tag
1515
path: '$.credit_score'

tests/integration/test_async_search_index.py

+12
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,18 @@ async def test_search_index_delete(async_client, async_index):
172172
)
173173

174174

175+
@pytest.mark.asyncio
176+
async def test_search_index_clear(async_client, async_index):
177+
async_index.set_client(async_client)
178+
await async_index.create(overwrite=True, drop=True)
179+
data = [{"id": "1", "test": "foo"}]
180+
await async_index.load(data, id_field="id")
181+
182+
count = await async_index.clear()
183+
assert count == len(data)
184+
assert await async_index.exists()
185+
186+
175187
@pytest.mark.asyncio
176188
async def test_search_index_load_and_fetch(async_client, async_index):
177189
async_index.set_client(async_client)

tests/integration/test_flow.py

+7
Original file line numberDiff line numberDiff line change
@@ -90,4 +90,11 @@ def hash_preprocess(item: dict) -> dict:
9090
for field in return_fields:
9191
assert getattr(doc1, field) == doc2[field]
9292

93+
count_deleted_keys = index.clear()
94+
assert count_deleted_keys == len(sample_data)
95+
96+
assert index.exists() == True
97+
9398
index.delete()
99+
100+
assert index.exists() == False

tests/integration/test_flow_async.py

+7
Original file line numberDiff line numberDiff line change
@@ -93,4 +93,11 @@ async def hash_preprocess(item: dict) -> dict:
9393
for field in return_fields:
9494
assert getattr(doc1, field) == doc2[field]
9595

96+
count_deleted_keys = await index.clear()
97+
assert count_deleted_keys == len(sample_data)
98+
99+
assert await index.exists() == True
100+
96101
await index.delete()
102+
103+
assert await index.exists() == False

tests/integration/test_search_index.py

+11
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,17 @@ def test_search_index_delete(client, index):
159159
assert index.name not in convert_bytes(index.client.execute_command("FT._LIST"))
160160

161161

162+
def test_search_index_clear(client, index):
163+
index.set_client(client)
164+
index.create(overwrite=True, drop=True)
165+
data = [{"id": "1", "test": "foo"}]
166+
index.load(data, id_field="id")
167+
168+
count = index.clear()
169+
assert count == len(data)
170+
assert index.exists()
171+
172+
162173
def test_search_index_load_and_fetch(client, index):
163174
index.set_client(client)
164175
index.create(overwrite=True, drop=True)

tests/integration/test_vectorizers.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def skip_vectorizer() -> bool:
2727
VertexAITextVectorizer,
2828
CohereTextVectorizer,
2929
AzureOpenAITextVectorizer,
30-
MistralAITextVectorizer,
30+
# MistralAITextVectorizer,
3131
CustomTextVectorizer,
3232
]
3333
)
@@ -218,7 +218,11 @@ def bad_return_type(text: str) -> str:
218218

219219

220220
@pytest.fixture(
221-
params=[OpenAITextVectorizer, MistralAITextVectorizer, CustomTextVectorizer]
221+
params=[
222+
OpenAITextVectorizer,
223+
# MistralAITextVectorizer,
224+
CustomTextVectorizer,
225+
]
222226
)
223227
def avectorizer(request, skip_vectorizer):
224228
if skip_vectorizer:

0 commit comments

Comments
 (0)