|
3 | 3 | import pytest
|
4 | 4 | from transformers import AutoTokenizer
|
5 | 5 |
|
6 |
| -from vllm.sampling_params import RequestOutputKind |
7 |
| -from vllm.v1.engine import EngineCoreOutput |
8 |
| -from vllm.v1.engine.detokenizer import Detokenizer, DetokenizerRequest |
| 6 | +from vllm.sampling_params import RequestOutputKind, SamplingParams |
| 7 | +from vllm.v1.engine import EngineCoreOutput, EngineCoreRequest |
| 8 | +from vllm.v1.engine.detokenizer import Detokenizer |
9 | 9 |
|
10 | 10 | TOKENIZER_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
|
11 | 11 | tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
|
@@ -71,16 +71,22 @@ def test_incremental_detokenization(request_output_kind: RequestOutputKind):
|
71 | 71 |
|
72 | 72 | # Make N requests.
|
73 | 73 | requests = [
|
74 |
| - DetokenizerRequest( |
75 |
| - request_id=f"request-{idx}", |
76 |
| - prompt=prompt, |
77 |
| - prompt_token_ids=prompt_tokens, |
78 |
| - skip_special_tokens=False, |
79 |
| - spaces_between_special_tokens=False, |
80 |
| - output_kind=request_output_kind, |
81 |
| - stop=[], |
82 |
| - include_stop_str_in_output=False, |
83 |
| - ) for idx, ( |
| 74 | + EngineCoreRequest(request_id=f"request-{idx}", |
| 75 | + prompt=prompt, |
| 76 | + prompt_token_ids=prompt_tokens, |
| 77 | + arrival_time=0, |
| 78 | + mm_inputs=None, |
| 79 | + mm_hashes=None, |
| 80 | + mm_placeholders=None, |
| 81 | + eos_token_id=None, |
| 82 | + lora_request=None, |
| 83 | + sampling_params=SamplingParams( |
| 84 | + skip_special_tokens=False, |
| 85 | + spaces_between_special_tokens=False, |
| 86 | + output_kind=request_output_kind, |
| 87 | + stop=[], |
| 88 | + include_stop_str_in_output=False)) |
| 89 | + for idx, ( |
84 | 90 | prompt,
|
85 | 91 | prompt_tokens) in enumerate(zip(PROMPT_STRINGS, PROMPT_TOKENS))
|
86 | 92 | ]
|
@@ -133,18 +139,25 @@ def test_stop_string(include_stop_str_in_output: bool):
|
133 | 139 |
|
134 | 140 | # Make N requests.
|
135 | 141 | requests = [
|
136 |
| - DetokenizerRequest( |
| 142 | + EngineCoreRequest( |
137 | 143 | request_id=f"request-{idx}",
|
138 | 144 | prompt=prompt,
|
139 | 145 | prompt_token_ids=prompt_tokens,
|
140 |
| - skip_special_tokens=False, |
141 |
| - spaces_between_special_tokens=False, |
142 |
| - output_kind=RequestOutputKind.DELTA, |
143 |
| - stop=STOP_STRINGS, |
144 |
| - include_stop_str_in_output=include_stop_str_in_output, |
145 |
| - ) for idx, ( |
146 |
| - prompt, |
147 |
| - prompt_tokens) in enumerate(zip(PROMPT_STRINGS, PROMPT_TOKENS)) |
| 146 | + arrival_time=0, |
| 147 | + mm_inputs=None, |
| 148 | + mm_hashes=None, |
| 149 | + mm_placeholders=None, |
| 150 | + eos_token_id=None, |
| 151 | + lora_request=None, |
| 152 | + sampling_params=SamplingParams( |
| 153 | + skip_special_tokens=False, |
| 154 | + spaces_between_special_tokens=False, |
| 155 | + output_kind=RequestOutputKind.DELTA, |
| 156 | + stop=STOP_STRINGS, |
| 157 | + include_stop_str_in_output=include_stop_str_in_output, |
| 158 | + )) for idx, ( |
| 159 | + prompt, |
| 160 | + prompt_tokens) in enumerate(zip(PROMPT_STRINGS, PROMPT_TOKENS)) |
148 | 161 | ]
|
149 | 162 |
|
150 | 163 | # Add requests to the detokenizer.
|
|
0 commit comments