We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 85a8854 commit d992f22Copy full SHA for d992f22
vllm/attention/layers/cross_attention.py
@@ -25,12 +25,10 @@
25
def _get_max_encoder_len(vllm_config: "VllmConfig") -> int:
26
"""Gets the max number of encoder input tokens from the config.
27
"""
28
- max_len = vllm_config.scheduler_config.max_num_encoder_input_tokens
29
-
30
- if isinstance(max_len, int) and max_len > 0:
31
- return max_len
32
33
- return 0
+ sc = vllm_config.scheduler_config
+ assert sc and isinstance(sc.max_num_encoder_input_tokens, int), \
+ "max_num_encoder_input_tokens must be int for enc-dec models"
+ return sc.max_num_encoder_input_tokens
34
35
36
def _get_cross_slot_mapping(encoder_seq_lens: np.ndarray,
0 commit comments