We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 565c1ef commit b80a1b6Copy full SHA for b80a1b6
vllm/v1/engine/output_processor.py
@@ -179,11 +179,14 @@ def process_outputs(
179
# in the EngineCore.
180
req_state.is_prefilling = not new_token_ids
181
182
+ stop_reason = engine_core_output.stop_reason
183
+
184
# 2) Detokenize the token ids into text and check for stop
185
# strings.
- stop_reason = req_state.detokenizer.update(new_token_ids)
- if stop_reason:
186
+ stop_string = req_state.detokenizer.update(new_token_ids)
187
+ if stop_string and finish_reason != FinishReason.STOP:
188
finish_reason = FinishReason.STOP
189
+ stop_reason = stop_string
190
191
# 3) Compute sample and prompt logprobs for request,
192
# if required.
0 commit comments