|
5 | 5 | # the root directory of this source tree. |
6 | 6 |
|
7 | 7 |
|
8 | | -from openai import NOT_GIVEN |
9 | | - |
10 | | -from llama_stack.apis.inference import ( |
11 | | - OpenAIEmbeddingData, |
12 | | - OpenAIEmbeddingsRequestWithExtraBody, |
13 | | - OpenAIEmbeddingsResponse, |
14 | | - OpenAIEmbeddingUsage, |
15 | | -) |
16 | 8 | from llama_stack.log import get_logger |
17 | 9 | from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin |
18 | 10 |
|
@@ -76,50 +68,3 @@ def get_base_url(self) -> str: |
76 | 68 | :return: The NVIDIA API base URL |
77 | 69 | """ |
78 | 70 | return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url |
79 | | - |
80 | | - async def openai_embeddings( |
81 | | - self, |
82 | | - params: OpenAIEmbeddingsRequestWithExtraBody, |
83 | | - ) -> OpenAIEmbeddingsResponse: |
84 | | - """ |
85 | | - OpenAI-compatible embeddings for NVIDIA NIM. |
86 | | -
|
87 | | - Note: NVIDIA NIM asymmetric embedding models require an "input_type" field not present in the standard OpenAI embeddings API. |
88 | | - We default this to "query" to ensure requests succeed when using the |
89 | | - OpenAI-compatible endpoint. For passage embeddings, use the embeddings API with |
90 | | - `task_type='document'`. |
91 | | - """ |
92 | | - extra_body: dict[str, object] = {"input_type": "query"} |
93 | | - logger.warning( |
94 | | - "NVIDIA OpenAI-compatible embeddings: defaulting to input_type='query'. " |
95 | | - "For passage embeddings, use the embeddings API with task_type='document'." |
96 | | - ) |
97 | | - |
98 | | - response = await self.client.embeddings.create( |
99 | | - model=await self._get_provider_model_id(params.model), |
100 | | - input=params.input, |
101 | | - encoding_format=params.encoding_format if params.encoding_format is not None else NOT_GIVEN, |
102 | | - dimensions=params.dimensions if params.dimensions is not None else NOT_GIVEN, |
103 | | - user=params.user if params.user is not None else NOT_GIVEN, |
104 | | - extra_body=extra_body, |
105 | | - ) |
106 | | - |
107 | | - data = [] |
108 | | - for i, embedding_data in enumerate(response.data): |
109 | | - data.append( |
110 | | - OpenAIEmbeddingData( |
111 | | - embedding=embedding_data.embedding, |
112 | | - index=i, |
113 | | - ) |
114 | | - ) |
115 | | - |
116 | | - usage = OpenAIEmbeddingUsage( |
117 | | - prompt_tokens=response.usage.prompt_tokens, |
118 | | - total_tokens=response.usage.total_tokens, |
119 | | - ) |
120 | | - |
121 | | - return OpenAIEmbeddingsResponse( |
122 | | - data=data, |
123 | | - model=response.model, |
124 | | - usage=usage, |
125 | | - ) |
0 commit comments