@@ -36,6 +36,7 @@ def __init__(
36
36
prompt_token_ids : list [int ],
37
37
logprobs_processor : LogprobsProcessor ,
38
38
detokenizer : IncrementalDetokenizer ,
39
+ detokenize : bool ,
39
40
max_tokens_param : Optional [int ],
40
41
arrival_time : float ,
41
42
queue : Optional [asyncio .Queue [RequestOutput ]],
@@ -51,6 +52,7 @@ def __init__(
51
52
self .prompt_len = len (prompt_token_ids )
52
53
self .logprobs_processor = logprobs_processor
53
54
self .detokenizer = detokenizer
55
+ self .detokenize = detokenize
54
56
self .max_tokens_param = max_tokens_param
55
57
self .is_prefilling = True
56
58
self .queue = queue
@@ -85,6 +87,7 @@ def from_new_request(
85
87
tokenizer = tokenizer ,
86
88
request = request ,
87
89
),
90
+ detokenize = request .sampling_params .detokenize ,
88
91
max_tokens_param = (request .sampling_params .max_tokens if
89
92
request .sampling_params is not None else None ),
90
93
arrival_time = request .arrival_time ,
@@ -156,7 +159,7 @@ def _new_completion_output(
156
159
delta = self .output_kind == RequestOutputKind .DELTA
157
160
158
161
# Prepare text and token_ids, based on delta mode
159
- text = self .detokenizer .get_next_output_text (finished , delta )
162
+ text = self .detokenizer .get_next_output_text (finished , delta ) if self . detokenize else ""
160
163
if not delta :
161
164
token_ids = self .detokenizer .output_token_ids
162
165
@@ -290,10 +293,11 @@ def process_outputs(
290
293
291
294
# 2) Detokenize the token ids into text and check for stop
292
295
# strings.
293
- stop_string = req_state .detokenizer .update (new_token_ids )
294
- if stop_string and finish_reason != FinishReason .STOP :
295
- finish_reason = FinishReason .STOP
296
- stop_reason = stop_string
296
+ if req_state .detokenize :
297
+ stop_string = req_state .detokenizer .update (new_token_ids )
298
+ if stop_string and finish_reason != FinishReason .STOP :
299
+ finish_reason = FinishReason .STOP
300
+ stop_reason = stop_string
297
301
298
302
# 3) Compute sample and prompt logprobs for request,
299
303
# if required.
0 commit comments