Skip to content

Commit 46f8672

Browse files
committed
docstrings and hybrid updates
1 parent e71a35e commit 46f8672

File tree

4 files changed

+176
-26
lines changed

4 files changed

+176
-26
lines changed

docs/user_guide/release_guide/0_5_0_release.ipynb

Lines changed: 96 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,26 +23,25 @@
2323
},
2424
{
2525
"cell_type": "code",
26-
"execution_count": 7,
26+
"execution_count": 9,
2727
"metadata": {},
2828
"outputs": [
2929
{
30-
"name": "stderr",
30+
"name": "stdout",
3131
"output_type": "stream",
3232
"text": [
33-
"/Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
34-
" warnings.warn(\n"
33+
"\u001b[32m12:44:52\u001b[0m \u001b[34mredisvl.index.index\u001b[0m \u001b[1;30mINFO\u001b[0m Index already exists, overwriting.\n"
3534
]
3635
},
3736
{
3837
"data": {
3938
"text/plain": [
40-
"['jobs:01JQYMYZBA6NM6DX9YW35MCHJZ',\n",
41-
" 'jobs:01JQYMYZBABXYR96H96SQ99ZPS',\n",
42-
" 'jobs:01JQYMYZBAGEBDS270EZADQ1TM']"
39+
"['jobs:01JR0V1SA29RVD9AAVSTBV9P5H',\n",
40+
" 'jobs:01JR0V1SA209KMVHMD7G54P3H5',\n",
41+
" 'jobs:01JR0V1SA23ZE7BRERXTZWC33Z']"
4342
]
4443
},
45-
"execution_count": 7,
44+
"execution_count": 9,
4645
"metadata": {},
4746
"output_type": "execute_result"
4847
}
@@ -115,19 +114,101 @@
115114
"cell_type": "markdown",
116115
"metadata": {},
117116
"source": [
118-
"# Hybrid query and text query classes\n",
117+
"# HybridQuery class\n",
118+
"\n",
119+
"Perform hybrid lexical (BM25) and vector search where results are ranked by: `hybrid_score = (1-alpha)*lexical_Score + alpha*vector_similarity`."
120+
]
121+
},
122+
{
123+
"cell_type": "code",
124+
"execution_count": 14,
125+
"metadata": {},
126+
"outputs": [
127+
{
128+
"data": {
129+
"text/plain": [
130+
"[{'vector_distance': '0.655903100967',\n",
131+
" 'job_title': 'Software Engineer',\n",
132+
" 'vector_similarity': '0.672048449516',\n",
133+
" 'text_score': '0',\n",
134+
" 'hybrid_score': '0.470433914661'},\n",
135+
" {'vector_distance': '0.892600417137',\n",
136+
" 'job_title': 'Data Analyst',\n",
137+
" 'vector_similarity': '0.553699791431',\n",
138+
" 'text_score': '0',\n",
139+
" 'hybrid_score': '0.387589854002'},\n",
140+
" {'vector_distance': '0.958741784096',\n",
141+
" 'job_title': 'Marketing Manager',\n",
142+
" 'vector_similarity': '0.520629107952',\n",
143+
" 'text_score': '0',\n",
144+
" 'hybrid_score': '0.364440375566'}]"
145+
]
146+
},
147+
"execution_count": 14,
148+
"metadata": {},
149+
"output_type": "execute_result"
150+
}
151+
],
152+
"source": [
153+
"from redisvl.query import HybridQuery\n",
154+
"\n",
155+
"text = \"Find a job as a software engineer\"\n",
156+
"vec = emb_model.embed(text, as_buffer=True)\n",
157+
"\n",
158+
"query = HybridQuery(\n",
159+
" text=text,\n",
160+
" text_field_name=\"job_description\",\n",
161+
" vector=vec,\n",
162+
" vector_field_name=\"job_embedding\",\n",
163+
" alpha=0.7,\n",
164+
" num_results=10,\n",
165+
" return_fields=[\"job_title\"],\n",
166+
")\n",
119167
"\n",
120-
"In 0.5.0 we introduced classes to make it easier to perform hybrid lexical (BM25) and vector searches.\n",
168+
"results = index.query(query)\n",
169+
"results"
170+
]
171+
},
172+
{
173+
"cell_type": "markdown",
174+
"metadata": {},
175+
"source": [
176+
"# TextQueries\n",
121177
"\n",
122-
"> TODO: update hybrid search notebook to use the class and make sure it works the same"
178+
"TextQueries make it easy to perform pure lexical search with redisvl."
123179
]
124180
},
125181
{
126182
"cell_type": "code",
127-
"execution_count": null,
183+
"execution_count": 13,
128184
"metadata": {},
129-
"outputs": [],
130-
"source": []
185+
"outputs": [
186+
{
187+
"data": {
188+
"text/plain": [
189+
"[]"
190+
]
191+
},
192+
"execution_count": 13,
193+
"metadata": {},
194+
"output_type": "execute_result"
195+
}
196+
],
197+
"source": [
198+
"from redisvl.query import TextQuery\n",
199+
"\n",
200+
"text = \"Find a job as a software engineer\"\n",
201+
"\n",
202+
"query = TextQuery(\n",
203+
" text=text,\n",
204+
" text_field_name=\"job_description\",\n",
205+
" return_fields=[\"job_title\"],\n",
206+
" num_results=10,\n",
207+
")\n",
208+
"\n",
209+
"results = index.query(query)\n",
210+
"results"
211+
]
131212
},
132213
{
133214
"cell_type": "markdown",
@@ -607,7 +688,7 @@
607688
],
608689
"metadata": {
609690
"kernelspec": {
610-
"display_name": "Python 3",
691+
"display_name": "redisvl-56gG2io_-py3.11",
611692
"language": "python",
612693
"name": "python3"
613694
},

redisvl/query/aggregate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def __init__(
7777
from redisvl.query import HybridQuery
7878
from redisvl.index import SearchIndex
7979
80-
index = SearchIndex.from_yaml(index.yaml)
80+
index = SearchIndex.from_yaml("path/to/index.yaml")
8181
8282
query = HybridQuery(
8383
text="example text",

redisvl/utils/optimize/cache.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def __init__(
8080
opt_fn: Callable = _grid_search_opt_cache,
8181
eval_metric: str = "f1",
8282
):
83-
"""Initialize the optimizer.
83+
"""Initialize the cache optimizer.
8484
8585
Args:
8686
cache (SemanticCache): The RedisVL SemanticCache instance to optimize.
@@ -91,18 +91,45 @@ def __init__(
9191
Defaults to "f1" score.
9292
9393
.. code-block:: python
94-
95-
# TODO
96-
94+
from redisvl.extensions.llmcache import SemanticCache
95+
from redisvl.utils.optimize import CacheThresholdOptimizer
96+
97+
sem_cache = SemanticCache(
98+
name="sem_cache", # underlying search index name
99+
redis_url="redis://localhost:6379", # redis connection url string
100+
distance_threshold=0.5 # semantic cache distance threshold
101+
)
102+
103+
paris_key = sem_cache.store(prompt="what is the capital of france?", response="paris")
104+
rabat_key = sem_cache.store(prompt="what is the capital of morocco?", response="rabat")
105+
106+
test_data = [
107+
{
108+
"query": "What's the capital of Britain?",
109+
"query_match": ""
110+
},
111+
{
112+
"query": "What's the capital of France??",
113+
"query_match": paris_key
114+
},
115+
{
116+
"query": "What's the capital city of Morocco?",
117+
"query_match": rabat_key
118+
},
119+
]
120+
121+
optimizer = CacheThresholdOptimizer(sem_cache, test_data)
122+
optimizer.optimize()
97123
"""
98124
super().__init__(cache, test_dict, opt_fn, eval_metric)
99125

100126
def optimize(self, **kwargs: Any):
101127
"""Optimize thresholds using the provided optimization function for cache case.
102128
103129
.. code-block:: python
130+
from redisvl.utils.optimize import CacheThresholdOptimizer
104131
105-
# TODO
106-
132+
optimizer = CacheThresholdOptimizer(semantic_cache, test_data)
133+
optimizer.optimize(*kwargs)
107134
"""
108135
self.opt_fn(self.optimizable, self.test_data, self.eval_metric, **kwargs)

redisvl/utils/optimize/router.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,21 +99,63 @@ def __init__(
9999
opt_fn: Callable = _random_search_opt_router,
100100
eval_metric: str = "f1",
101101
):
102-
"""
103-
# TODO
102+
"""Initialize the router optimizer.
103+
104+
Args:
105+
router (SemanticRouter): The RedisVL SemanticRouter instance to optimize.
106+
test_dict (List[Dict[str, Any]]): List of test cases.
107+
opt_fn (Callable): Function to perform optimization. Defaults to
108+
grid search.
109+
eval_metric (str): Evaluation metric for threshold optimization.
110+
Defaults to "f1" score.
104111
105112
.. code-block:: python
113+
from redisvl.extensions.router import Route, SemanticRouter
114+
from redisvl.utils.vectorize import HFTextVectorizer
115+
from redisvl.utils.optimize import RouterThresholdOptimizer
116+
117+
routes = [
118+
Route(
119+
name="greeting",
120+
references=["hello", "hi"],
121+
metadata={"type": "greeting"},
122+
distance_threshold=0.5,
123+
),
124+
Route(
125+
name="farewell",
126+
references=["bye", "goodbye"],
127+
metadata={"type": "farewell"},
128+
distance_threshold=0.5,
129+
),
130+
]
131+
132+
router = SemanticRouter(
133+
name="greeting-router",
134+
vectorizer=HFTextVectorizer(),
135+
routes=routes,
136+
redis_url="redis://localhost:6379",
137+
overwrite=True # Blow away any other routing index with this name
138+
)
139+
140+
test_data = [
141+
{"query": "hello", "query_match": "greeting"},
142+
{"query": "goodbye", "query_match": "farewell"},
143+
...
144+
]
106145
107-
# TODO
146+
optimizer = RouterThresholdOptimizer(router, test_data)
147+
optimizer.optimize()
108148
"""
109149
super().__init__(router, test_dict, opt_fn, eval_metric)
110150

111151
def optimize(self, **kwargs: Any):
112152
"""Optimize thresholds using the provided optimization function for router case.
113153
114154
.. code-block:: python
155+
from redisvl.utils.optimize import RouterThresholdOptimizer
115156
116-
# TODO
157+
optimizer = RouterThresholdOptimizer(router, test_data)
158+
optimizer.optimize(search_step=0.05, max_iterations=50)
117159
"""
118160
qrels = _format_qrels(self.test_data)
119161
self.opt_fn(self.optimizable, self.test_data, qrels, self.eval_metric, **kwargs)

0 commit comments

Comments
 (0)