1010import weaviate .classes as wvc
1111from numpy .typing import NDArray
1212from weaviate .classes .init import Auth
13- from weaviate .classes .query import Filter
13+ from weaviate .classes .query import Filter , HybridFusion
1414
1515from llama_stack .apis .common .content_types import InterleavedContent
1616from llama_stack .apis .common .errors import VectorStoreNotFoundError
2626 OpenAIVectorStoreMixin ,
2727)
2828from llama_stack .providers .utils .memory .vector_store import (
29+ RERANKER_TYPE_RRF ,
2930 ChunkForDeletion ,
3031 EmbeddingIndex ,
3132 VectorDBWithIndex ,
@@ -88,6 +89,9 @@ async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> No
8889 collection .data .delete_many (where = Filter .by_property ("chunk_id" ).contains_any (chunk_ids ))
8990
9091 async def query_vector (self , embedding : NDArray , k : int , score_threshold : float ) -> QueryChunksResponse :
92+ log .info (
93+ f"WEAVIATE VECTOR SEARCH CALLED: embedding_shape={ embedding .shape } , k={ k } , threshold={ score_threshold } "
94+ )
9195 sanitized_collection_name = sanitize_collection_name (self .collection_name , weaviate_format = True )
9296 collection = self .client .collections .get (sanitized_collection_name )
9397
@@ -109,12 +113,16 @@ async def query_vector(self, embedding: NDArray, k: int, score_threshold: float)
109113 continue
110114
111115 score = 1.0 / doc .metadata .distance if doc .metadata .distance != 0 else float ("inf" )
116+ log .info (f"📈 Document distance: { doc .metadata .distance } , calculated score: { score } " )
117+
112118 if score < score_threshold :
113119 continue
114120
121+ log .info (f"Document { chunk .metadata .get ('document_id' )} has score { score } " )
115122 chunks .append (chunk )
116123 scores .append (score )
117124
125+ log .info (f"WEAVIATE VECTOR SEARCH RESULTS: Found { len (chunks )} chunks with scores { scores } " )
118126 return QueryChunksResponse (chunks = chunks , scores = scores )
119127
120128 async def delete (self , chunk_ids : list [str ] | None = None ) -> None :
@@ -136,7 +144,46 @@ async def query_keyword(
136144 k : int ,
137145 score_threshold : float ,
138146 ) -> QueryChunksResponse :
139- raise NotImplementedError ("Keyword search is not supported in Weaviate" )
147+ """
148+ Performs BM25-based keyword search using Weaviate's built-in full-text search.
149+ Args:
150+ query_string: The text query for keyword search
151+ k: Limit of number of results to return
152+ score_threshold: Minimum similarity score threshold
153+ Returns:
154+ QueryChunksResponse with combined results
155+ """
156+ log .info (f"WEAVIATE KEYWORD SEARCH CALLED: query='{ query_string } ', k={ k } , threshold={ score_threshold } " )
157+ sanitized_collection_name = sanitize_collection_name (self .collection_name , weaviate_format = True )
158+ collection = self .client .collections .get (sanitized_collection_name )
159+
160+ # Perform BM25 keyword search on chunk_content field
161+ results = collection .query .bm25 (
162+ query = query_string ,
163+ limit = k ,
164+ return_metadata = wvc .query .MetadataQuery (score = True ),
165+ )
166+
167+ chunks = []
168+ scores = []
169+ for doc in results .objects :
170+ chunk_json = doc .properties ["chunk_content" ]
171+ try :
172+ chunk_dict = json .loads (chunk_json )
173+ chunk = Chunk (** chunk_dict )
174+ except Exception :
175+ log .exception (f"Failed to parse document: { chunk_json } " )
176+ continue
177+
178+ score = doc .metadata .score if doc .metadata .score is not None else 0.0
179+ if score < score_threshold :
180+ continue
181+
182+ chunks .append (chunk )
183+ scores .append (score )
184+
185+ log .info (f"WEAVIATE KEYWORD SEARCH RESULTS: Found { len (chunks )} chunks with scores { scores } ." )
186+ return QueryChunksResponse (chunks = chunks , scores = scores )
140187
141188 async def query_hybrid (
142189 self ,
@@ -147,7 +194,62 @@ async def query_hybrid(
147194 reranker_type : str ,
148195 reranker_params : dict [str , Any ] | None = None ,
149196 ) -> QueryChunksResponse :
150- raise NotImplementedError ("Hybrid search is not supported in Weaviate" )
197+ """
198+ Hybrid search combining vector similarity and keyword search using Weaviate's native hybrid search.
199+ Args:
200+ embedding: The query embedding vector
201+ query_string: The text query for keyword search
202+ k: Limit of number of results to return
203+ score_threshold: Minimum similarity score threshold
204+ reranker_type: Type of reranker to use ("rrf" or "normalized")
205+ reranker_params: Parameters for the reranker
206+ Returns:
207+ QueryChunksResponse with combined results
208+ """
209+ log .info (
210+ f"WEAVIATE HYBRID SEARCH CALLED: query='{ query_string } ', embedding_shape={ embedding .shape } , k={ k } , threshold={ score_threshold } , reranker={ reranker_type } "
211+ )
212+ sanitized_collection_name = sanitize_collection_name (self .collection_name , weaviate_format = True )
213+ collection = self .client .collections .get (sanitized_collection_name )
214+
215+ # Ranked (RRF) reranker fusion type
216+ if reranker_type == RERANKER_TYPE_RRF :
217+ rerank = HybridFusion .RANKED
218+ # Relative score (Normalized) reranker fusion type
219+ else :
220+ rerank = HybridFusion .RELATIVE_SCORE
221+
222+ # Perform hybrid search using Weaviate's native hybrid search
223+ results = collection .query .hybrid (
224+ query = query_string ,
225+ alpha = 0.5 , # Range <0, 1>, where 0.5 will equally favor vector and keyword search
226+ vector = embedding .tolist (),
227+ limit = k ,
228+ fusion_type = rerank ,
229+ return_metadata = wvc .query .MetadataQuery (score = True ),
230+ )
231+
232+ chunks = []
233+ scores = []
234+ for doc in results .objects :
235+ chunk_json = doc .properties ["chunk_content" ]
236+ try :
237+ chunk_dict = json .loads (chunk_json )
238+ chunk = Chunk (** chunk_dict )
239+ except Exception :
240+ log .exception (f"Failed to parse document: { chunk_json } " )
241+ continue
242+
243+ score = doc .metadata .score if doc .metadata .score is not None else 0.0
244+ if score < score_threshold :
245+ continue
246+
247+ log .info (f"Document { chunk .metadata .get ('document_id' )} has score { score } " )
248+ chunks .append (chunk )
249+ scores .append (score )
250+
251+ log .info (f"WEAVIATE HYBRID SEARCH RESULTS: Found { len (chunks )} chunks with scores { scores } " )
252+ return QueryChunksResponse (chunks = chunks , scores = scores )
151253
152254
153255class WeaviateVectorIOAdapter (
0 commit comments