Skip to content

Commit 2b54b57

Browse files
committed
feat: implement graceful model discovery for vLLM provider
- Attempt model discovery first for backward compatibility - If discovery fails and refresh_models=false, continue without error - If discovery fails and refresh_models=true, fail hard with ValueError - Supports dynamic token authentication scenarios Fixes OAuth authentication issues when vLLM service requires dynamic tokens
1 parent 188a56a commit 2b54b57

File tree

1 file changed

+13
-11
lines changed
  • llama_stack/providers/remote/inference/vllm

1 file changed

+13
-11
lines changed

llama_stack/providers/remote/inference/vllm/vllm.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from urllib.parse import urljoin
1010

1111
import httpx
12-
from openai import APIConnectionError
1312
from openai.types.chat.chat_completion_chunk import (
1413
ChatCompletionChunk as OpenAIChatCompletionChunk,
1514
)
@@ -339,16 +338,19 @@ async def register_model(self, model: Model) -> Model:
339338
pass # Ignore statically unknown model, will check live listing
340339
try:
341340
res = self.client.models.list()
342-
except APIConnectionError as e:
343-
raise ValueError(
344-
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
345-
) from e
346-
available_models = [m.id async for m in res]
347-
if model.provider_resource_id not in available_models:
348-
raise ValueError(
349-
f"Model {model.provider_resource_id} is not being served by vLLM. "
350-
f"Available models: {', '.join(available_models)}"
351-
)
341+
available_models = [m.id async for m in res]
342+
if model.provider_resource_id not in available_models:
343+
raise ValueError(
344+
f"Model {model.provider_resource_id} is not being served by vLLM. "
345+
f"Available models: {', '.join(available_models)}"
346+
)
347+
except Exception as e:
348+
if self.config.refresh_models:
349+
raise ValueError(f"Model verification failed: {e}") from e
350+
# if refresh_models is false, gracefully continue without verification
351+
log.warning(f"Model verification failed for model {model.model_id} with error {e}")
352+
log.warning("Continuing without live check (refresh_models=false).")
353+
352354
return model
353355

354356
async def _get_params(self, request: ChatCompletionRequest) -> dict:

0 commit comments

Comments
 (0)