Skip to content

Commit cffd20c

Browse files
committed
fix
Signed-off-by: wang.yuqi <[email protected]>
1 parent 995ebbd commit cffd20c

File tree

4 files changed

+15
-13
lines changed

4 files changed

+15
-13
lines changed

tests/models/language/pooling/test_extract_hidden_states.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def test_embed_models(hf_runner, vllm_runner, model: str):
3030

3131
for n, output in zip(n_prompt_tokens, pooling_outputs):
3232
assert len(output.prompt_token_ids) == n
33+
assert len(output.outputs.data) == n
3334
assert output.num_cached_tokens == 0
3435

3536
# test enable_prefix_caching plus all pooling
@@ -40,4 +41,5 @@ def test_embed_models(hf_runner, vllm_runner, model: str):
4041

4142
for n, output in zip(n_prompt_tokens, pooling_outputs):
4243
assert len(output.prompt_token_ids) == n
44+
assert len(output.outputs.data) == n
4345
assert output.num_cached_tokens == 0

vllm/pooling_params.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class PoolingParams(
5757
## Internal use only
5858
task: PoolingTask | None = None
5959
requires_token_ids: bool = False
60-
not_using_prefix_caching: bool = None
60+
disable_prefix_caching: bool = None
6161
extra_kwargs: dict[str, Any] | None = None
6262
output_kind: RequestOutputKind = RequestOutputKind.FINAL_ONLY
6363

@@ -123,14 +123,14 @@ def _merge_default_parameters(
123123
if getattr(self, k, None) is None:
124124
setattr(self, k, getattr(pooler_config, k))
125125

126-
if self.not_using_prefix_caching is None:
127-
# If enable_prefix_caching is enabled,
126+
if self.disable_prefix_caching is None:
127+
# If prefix caching is enabled,
128128
# the output of all pooling may less than n_prompt_tokens,
129-
# we need to not using prefix_caching at this request.
129+
# we need to disable prefix_caching at this request.
130130
if self.task in ["token_embed", "token_classify"]:
131-
self.not_using_prefix_caching = True
131+
self.disable_prefix_caching = True
132132
else:
133-
self.not_using_prefix_caching = False
133+
self.disable_prefix_caching = False
134134

135135
self._verify_step_pooling(pooler_config, valid_parameters)
136136

vllm/sampling_params.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ class SamplingParams(
252252
generated token can complete the sequence."""
253253
_bad_words_token_ids: list[list[int]] | None = None
254254

255-
not_using_prefix_caching: bool = None
255+
disable_prefix_caching: bool = None
256256

257257
@staticmethod
258258
def from_optional(
@@ -414,11 +414,11 @@ def __post_init__(self) -> None:
414414
self.structured_outputs = self.guided_decoding
415415
self.guided_decoding = None
416416

417-
if self.not_using_prefix_caching is None:
418-
# If enable_prefix_caching is enabled,
417+
if self.disable_prefix_caching is None:
418+
# If prefix caching is enabled,
419419
# the output of prompt logprobs may less than n_prompt_tokens,
420-
# we need to not using prefix_caching at this request.
421-
self.not_using_prefix_caching = not self.prompt_logprobs
420+
# we need to disable prefix_caching at this request.
421+
self.disable_prefix_caching = not self.prompt_logprobs
422422

423423
def _verify_args(self) -> None:
424424
if not isinstance(self.n, int):

vllm/v1/core/kv_cache_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,11 +193,11 @@ def get_computed_blocks(self, request: Request) -> tuple[KVCacheBlocks, int]:
193193
not self.enable_caching
194194
or (
195195
request.sampling_params is not None
196-
and request.sampling_params.not_using_prefix_caching
196+
and request.sampling_params.disable_prefix_caching
197197
)
198198
or (
199199
request.pooling_params is not None
200-
and request.pooling_params.not_using_prefix_caching
200+
and request.pooling_params.disable_prefix_caching
201201
)
202202
):
203203
return self.empty_kv_cache_blocks, 0

0 commit comments

Comments
 (0)