[TPU] Fix the test_sampler (vllm-project#17820)

bythew3i · princepride · commit f50aebadf0b4 · 2025-05-10T12:03:38.000+08:00
Signed-off-by: 汪志鹏 &lt;wangzhipeng628@gmail.com&gt;
diff --git a/tests/v1/tpu/test_sampler.py b/tests/v1/tpu/test_sampler.py
@@ -26,7 +26,7 @@ def test_sampler_different(model_name: str):
               enforce_eager=False,
               max_num_seqs=1,
               max_model_len=512,
-              max_num_batched_tokens=512)
+              max_num_batched_tokens=256)
     prompts = [
         "Write a short story about a robot that dreams for the first time."
     ]
diff --git a/vllm/v1/attention/backends/pallas.py b/vllm/v1/attention/backends/pallas.py
@@ -95,7 +95,7 @@ class PallasMetadata:
     block_tables: torch.Tensor
     context_lens: torch.Tensor
     query_start_loc: torch.Tensor
-    num_seqs: int
+    num_seqs: torch.Tensor
 
 
 class PallasAttentionBackendImpl(AttentionImpl):

Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@ def test_sampler_different(model_name: str):`
`26`	`26`	`enforce_eager=False,`
`27`	`27`	`max_num_seqs=1,`
`28`	`28`	`max_model_len=512,`
`29`		`- max_num_batched_tokens=512)`
	`29`	`+ max_num_batched_tokens=256)`
`30`	`30`	`prompts = [`
`31`	`31`	`"Write a short story about a robot that dreams for the first time."`
`32`	`32`	`]`