Skip to content

Commit 5945822

Browse files
authored
Using the correct datatypes for streaming non-chat completions (#134)
1 parent e7c3a5c commit 5945822

File tree

1 file changed

+15
-14
lines changed

1 file changed

+15
-14
lines changed

vllm/entrypoints/sync_openai/api_server.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@
2525
ChatCompletionRequest, ChatCompletionResponse,
2626
ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice,
2727
ChatCompletionStreamResponse, ChatMessage, CompletionRequest,
28-
CompletionResponse, CompletionResponseChoice, DeltaMessage, ErrorResponse,
29-
ModelCard, ModelList, ModelPermission, UsageInfo)
28+
CompletionResponse, CompletionResponseChoice,
29+
CompletionResponseStreamChoice, CompletionStreamResponse, DeltaMessage,
30+
ErrorResponse, ModelCard, ModelList, ModelPermission, UsageInfo)
3031
from vllm.entrypoints.openai.serving_chat import (ChatMessageParseResult,
3132
ConversationMessage)
3233
from vllm.logger import init_logger
@@ -174,18 +175,18 @@ async def completion_generator(model, result_queue, choices, created_time,
174175
request_id, token, stats = await result_queue.get()
175176

176177
choice_idx = choices[request_id]
177-
res = CompletionResponse(id=request_id,
178-
created=created_time,
179-
model=model,
180-
choices=[
181-
CompletionResponseChoice(
182-
index=choice_idx,
183-
text=token,
184-
logprobs=None,
185-
finish_reason=None,
186-
stop_reason=None)
187-
],
188-
usage=None)
178+
res = CompletionStreamResponse(id=request_id,
179+
created=created_time,
180+
model=model,
181+
choices=[
182+
CompletionResponseStreamChoice(
183+
index=choice_idx,
184+
text=token,
185+
logprobs=None,
186+
finish_reason=None,
187+
stop_reason=None)
188+
],
189+
usage=None)
189190
if stats is not None:
190191
res.usage = UsageInfo()
191192
res.usage.completion_tokens = stats.get("tokens", 0)

0 commit comments

Comments
 (0)