We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 30eef1d commit f50aebaCopy full SHA for f50aeba
tests/v1/tpu/test_sampler.py
@@ -26,7 +26,7 @@ def test_sampler_different(model_name: str):
26
enforce_eager=False,
27
max_num_seqs=1,
28
max_model_len=512,
29
- max_num_batched_tokens=512)
+ max_num_batched_tokens=256)
30
prompts = [
31
"Write a short story about a robot that dreams for the first time."
32
]
vllm/v1/attention/backends/pallas.py
@@ -95,7 +95,7 @@ class PallasMetadata:
95
block_tables: torch.Tensor
96
context_lens: torch.Tensor
97
query_start_loc: torch.Tensor
98
- num_seqs: int
+ num_seqs: torch.Tensor
99
100
101
class PallasAttentionBackendImpl(AttentionImpl):
0 commit comments