Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions llama_stack/providers/remote/inference/vllm/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from urllib.parse import urljoin

import httpx
from openai import APIConnectionError
from openai.types.chat.chat_completion_chunk import (
ChatCompletionChunk as OpenAIChatCompletionChunk,
)
Expand Down Expand Up @@ -339,16 +338,19 @@ async def register_model(self, model: Model) -> Model:
pass # Ignore statically unknown model, will check live listing
try:
res = self.client.models.list()
except APIConnectionError as e:
raise ValueError(
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
) from e
available_models = [m.id async for m in res]
if model.provider_resource_id not in available_models:
raise ValueError(
f"Model {model.provider_resource_id} is not being served by vLLM. "
f"Available models: {', '.join(available_models)}"
)
available_models = [m.id async for m in res]
if model.provider_resource_id not in available_models:
raise ValueError(
f"Model {model.provider_resource_id} is not being served by vLLM. "
f"Available models: {', '.join(available_models)}"
)
except Exception as e:
if self.config.refresh_models:
raise ValueError(f"Model verification failed: {e}") from e
# if refresh_models is false, gracefully continue without verification
log.warning(f"Model verification failed for model {model.model_id} with error {e}")
log.warning("Continuing without live check (refresh_models=false).")

return model

async def _get_params(self, request: ChatCompletionRequest) -> dict:
Expand Down
Loading