We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 5a9da1d commit 7b8fb4aCopy full SHA for 7b8fb4a
vllm/utils/flashinfer.py
@@ -228,7 +228,7 @@ def use_trtllm_attention(
228
229
if force_use_trtllm is None:
230
# Environment variable not set - use auto-detection
231
- use_trtllm = (num_tokens <= 256 and max_seq_len < 131072
+ use_trtllm = (num_tokens <= 256 and max_seq_len <= 131072
232
and kv_cache_dtype == "auto")
233
if use_trtllm:
234
logger.warning_once("Using TRTLLM attention (auto-detected).")
0 commit comments