Skip to content

Commit 83cece1

Browse files
updated prompt:
Signed-off-by: Francisco Javier Arceo <[email protected]>
1 parent c0447dd commit 83cece1

File tree

1 file changed

+28
-5
lines changed

1 file changed

+28
-5
lines changed

src/llama_stack/providers/utils/memory/vector_store.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -358,20 +358,43 @@ async def _rewrite_query_for_search(self, query: str) -> str:
358358

359359
chat_model = None
360360
# Look for an LLM model (for chat completion)
361-
for model in models_response.data:
362-
if model.model_type == ModelType.llm:
363-
chat_model = model.identifier
361+
# Prefer local or non-cloud providers to avoid credential issues
362+
llm_models = [m for m in models_response.data if m.model_type == ModelType.llm]
363+
364+
# Filter out models that are known to be embedding models (misclassified as LLM)
365+
embedding_model_patterns = ["minilm", "embed", "embedding", "nomic-embed"]
366+
llm_models = [
367+
m for m in llm_models if not any(pattern in m.identifier.lower() for pattern in embedding_model_patterns)
368+
]
369+
370+
# Priority order: ollama (local), then OpenAI, then others
371+
provider_priority = ["ollama", "openai", "gemini", "bedrock"]
372+
373+
for provider in provider_priority:
374+
for model in llm_models:
375+
model_id = model.identifier.lower()
376+
if provider == "ollama" and "ollama/" in model_id:
377+
chat_model = model.identifier
378+
break
379+
elif provider in model_id:
380+
chat_model = model.identifier
381+
break
382+
if chat_model:
364383
break
365384

385+
# Fallback: use first available LLM model if no preferred provider found
386+
if not chat_model and llm_models:
387+
chat_model = llm_models[0].identifier
388+
366389
# If no suitable model found, raise an error
367390
if not chat_model:
368391
raise ValueError("No LLM model available for query rewriting")
369392

370-
rewrite_prompt = f"""Rewrite this search query to improve vector search results by expanding it with relevant synonyms and related terms while maintaining the original intent:
393+
rewrite_prompt = f"""Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:
371394
372395
{query}
373396
374-
Rewritten query:"""
397+
Improved query:"""
375398

376399
chat_request = OpenAIChatCompletionRequestWithExtraBody(
377400
model=chat_model,

0 commit comments

Comments
 (0)