Skip to content

Commit 1d4d263

Browse files
committed
fix: use extra_body for passing input_type params for asymmetric embedding models for NVIDIA Inference Provider
1 parent 007efa6 commit 1d4d263

File tree

3 files changed

+243
-128
lines changed

3 files changed

+243
-128
lines changed

llama_stack/providers/remote/inference/nvidia/NVIDIA.md

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -139,16 +139,13 @@ print(f"Structured Response: {structured_response.choices[0].message.content}")
139139

140140
The following example shows how to create embeddings for an NVIDIA NIM.
141141

142-
> [!NOTE]
143-
> NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. The NVIDIA Inference Adapter automatically sets `input_type="query"` when using the OpenAI-compatible embeddings endpoint for NVIDIA. For passage embeddings, use the `embeddings` API with `task_type="document"`.
144-
145142
```python
146-
response = client.inference.embeddings(
147-
model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",
148-
contents=["What is the capital of France?"],
149-
task_type="query",
143+
response = client.embeddings.create(
144+
model="nvidia/llama-3.2-nv-embedqa-1b-v2",
145+
input=["What is the capital of France?"],
146+
extra_body={"input_type": "query"},
150147
)
151-
print(f"Embeddings: {response.embeddings}")
148+
print(f"Embeddings: {response.data}")
152149
```
153150

154151
### Vision Language Models Example

llama_stack/providers/remote/inference/nvidia/nvidia.py

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,6 @@
55
# the root directory of this source tree.
66

77

8-
from openai import NOT_GIVEN
9-
10-
from llama_stack.apis.inference import (
11-
OpenAIEmbeddingData,
12-
OpenAIEmbeddingsRequestWithExtraBody,
13-
OpenAIEmbeddingsResponse,
14-
OpenAIEmbeddingUsage,
15-
)
168
from llama_stack.log import get_logger
179
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
1810

@@ -76,50 +68,3 @@ def get_base_url(self) -> str:
7668
:return: The NVIDIA API base URL
7769
"""
7870
return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
79-
80-
async def openai_embeddings(
81-
self,
82-
params: OpenAIEmbeddingsRequestWithExtraBody,
83-
) -> OpenAIEmbeddingsResponse:
84-
"""
85-
OpenAI-compatible embeddings for NVIDIA NIM.
86-
87-
Note: NVIDIA NIM asymmetric embedding models require an "input_type" field not present in the standard OpenAI embeddings API.
88-
We default this to "query" to ensure requests succeed when using the
89-
OpenAI-compatible endpoint. For passage embeddings, use the embeddings API with
90-
`task_type='document'`.
91-
"""
92-
extra_body: dict[str, object] = {"input_type": "query"}
93-
logger.warning(
94-
"NVIDIA OpenAI-compatible embeddings: defaulting to input_type='query'. "
95-
"For passage embeddings, use the embeddings API with task_type='document'."
96-
)
97-
98-
response = await self.client.embeddings.create(
99-
model=await self._get_provider_model_id(params.model),
100-
input=params.input,
101-
encoding_format=params.encoding_format if params.encoding_format is not None else NOT_GIVEN,
102-
dimensions=params.dimensions if params.dimensions is not None else NOT_GIVEN,
103-
user=params.user if params.user is not None else NOT_GIVEN,
104-
extra_body=extra_body,
105-
)
106-
107-
data = []
108-
for i, embedding_data in enumerate(response.data):
109-
data.append(
110-
OpenAIEmbeddingData(
111-
embedding=embedding_data.embedding,
112-
index=i,
113-
)
114-
)
115-
116-
usage = OpenAIEmbeddingUsage(
117-
prompt_tokens=response.usage.prompt_tokens,
118-
total_tokens=response.usage.total_tokens,
119-
)
120-
121-
return OpenAIEmbeddingsResponse(
122-
data=data,
123-
model=response.model,
124-
usage=usage,
125-
)

0 commit comments

Comments
 (0)