Skip to content

Commit 0b76999

Browse files
authored
[Bugfix]: Use float32 for base64 embedding (#7855)
Signed-off-by: Hollow Man <[email protected]>
1 parent 1856aff commit 0b76999

File tree

3 files changed

+13
-3
lines changed

3 files changed

+13
-3
lines changed

examples/openai_embedding_client.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
"The best thing about vLLM is that it supports many different models"
2020
],
2121
model=model,
22-
encoding_format="float",
2322
)
2423

2524
for data in responses.data:

tests/entrypoints/openai/test_embedding.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,18 @@ async def test_batch_base64_embedding(embedding_client: openai.AsyncOpenAI,
128128
for data in responses_base64.data:
129129
decoded_responses_base64_data.append(
130130
np.frombuffer(base64.b64decode(data.embedding),
131-
dtype="float").tolist())
131+
dtype="float32").tolist())
132132

133133
assert responses_float.data[0].embedding == decoded_responses_base64_data[
134134
0]
135135
assert responses_float.data[1].embedding == decoded_responses_base64_data[
136136
1]
137+
138+
# Default response is float32 decoded from base64 by OpenAI Client
139+
responses_default = await embedding_client.embeddings.create(
140+
input=input_texts, model=model_name)
141+
142+
assert responses_float.data[0].embedding == responses_default.data[
143+
0].embedding
144+
assert responses_float.data[1].embedding == responses_default.data[
145+
1].embedding

vllm/entrypoints/openai/serving_embedding.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ def _get_embedding(
3131
if encoding_format == "float":
3232
return output.embedding
3333
elif encoding_format == "base64":
34-
embedding_bytes = np.array(output.embedding).tobytes()
34+
# Force to use float32 for base64 encoding
35+
# to match the OpenAI python client behavior
36+
embedding_bytes = np.array(output.embedding, dtype="float32").tobytes()
3537
return base64.b64encode(embedding_bytes).decode("utf-8")
3638

3739
assert_never(encoding_format)

0 commit comments

Comments
 (0)