Skip to content

Commit 79f61e1

Browse files
committed
Follow detokenize sampling param
Signed-off-by: Himanshu Jaju <[email protected]>
1 parent f89978a commit 79f61e1

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

vllm/v1/engine/output_processor.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def __init__(
3636
prompt_token_ids: list[int],
3737
logprobs_processor: LogprobsProcessor,
3838
detokenizer: IncrementalDetokenizer,
39+
detokenize: bool,
3940
max_tokens_param: Optional[int],
4041
arrival_time: float,
4142
queue: Optional[asyncio.Queue[RequestOutput]],
@@ -51,6 +52,7 @@ def __init__(
5152
self.prompt_len = len(prompt_token_ids)
5253
self.logprobs_processor = logprobs_processor
5354
self.detokenizer = detokenizer
55+
self.detokenize = detokenize
5456
self.max_tokens_param = max_tokens_param
5557
self.is_prefilling = True
5658
self.queue = queue
@@ -85,6 +87,7 @@ def from_new_request(
8587
tokenizer=tokenizer,
8688
request=request,
8789
),
90+
detokenize=request.sampling_params.detokenize,
8891
max_tokens_param=(request.sampling_params.max_tokens if
8992
request.sampling_params is not None else None),
9093
arrival_time=request.arrival_time,
@@ -156,7 +159,7 @@ def _new_completion_output(
156159
delta = self.output_kind == RequestOutputKind.DELTA
157160

158161
# Prepare text and token_ids, based on delta mode
159-
text = self.detokenizer.get_next_output_text(finished, delta)
162+
text = self.detokenizer.get_next_output_text(finished, delta) if self.detokenize else ""
160163
if not delta:
161164
token_ids = self.detokenizer.output_token_ids
162165

@@ -290,10 +293,11 @@ def process_outputs(
290293

291294
# 2) Detokenize the token ids into text and check for stop
292295
# strings.
293-
stop_string = req_state.detokenizer.update(new_token_ids)
294-
if stop_string and finish_reason != FinishReason.STOP:
295-
finish_reason = FinishReason.STOP
296-
stop_reason = stop_string
296+
if req_state.detokenize:
297+
stop_string = req_state.detokenizer.update(new_token_ids)
298+
if stop_string and finish_reason != FinishReason.STOP:
299+
finish_reason = FinishReason.STOP
300+
stop_reason = stop_string
297301

298302
# 3) Compute sample and prompt logprobs for request,
299303
# if required.

0 commit comments

Comments
 (0)