We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0578e5a commit 1137a05Copy full SHA for 1137a05
vllm/v1/attention/backends/pallas.py
@@ -12,8 +12,8 @@
12
from vllm.attention.backends.utils import CommonAttentionState
13
14
# These are the 2 tunable parameters of the paged attention Pallas kernel.
15
-NUM_QUERIES_PER_BLOCK = 32
16
-NUM_KV_PAGES_PER_BLOCK = 128
+NUM_QUERIES_PER_BLOCK = 16
+NUM_KV_PAGES_PER_BLOCK = 256
17
18
19
class PallasAttentionBackend(AttentionBackend):
0 commit comments