Skip to content

Commit 07a5977

Browse files
Sugar-zsgxuebwang-amd
authored andcommitted
Directly get max encoder len from VLLM config in V1 (vllm-project#24866)
Signed-off-by: Sugar-zsg <[email protected]> Signed-off-by: xuebwang-amd <[email protected]>
1 parent 3665dd2 commit 07a5977

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

vllm/attention/layers/cross_attention.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from vllm.attention.selector import get_attn_backend
1515
from vllm.config import CacheConfig, VllmConfig
1616
from vllm.logger import init_logger
17-
from vllm.multimodal import MULTIMODAL_REGISTRY
1817
from vllm.utils import cdiv
1918
from vllm.v1.attention.backends.utils import (CommonAttentionMetadata,
2019
subclass_attention_backend)
@@ -23,9 +22,13 @@
2322
logger = init_logger(__name__)
2423

2524

26-
def _get_max_encoder_len(vllm_config: VllmConfig) -> int:
27-
return MULTIMODAL_REGISTRY.get_encdec_max_encoder_len(
28-
vllm_config.model_config)
25+
def _get_max_encoder_len(vllm_config: "VllmConfig") -> int:
26+
"""Gets the max number of encoder input tokens from the config.
27+
"""
28+
sc = vllm_config.scheduler_config
29+
assert sc and isinstance(sc.max_num_encoder_input_tokens, int), \
30+
"max_num_encoder_input_tokens must be int for enc-dec models"
31+
return sc.max_num_encoder_input_tokens
2932

3033

3134
def _get_cross_slot_mapping(encoder_seq_lens: np.ndarray,

0 commit comments

Comments
 (0)