We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2c3ba73 commit d0a1364Copy full SHA for d0a1364
vllm/v1/attention/backends/flashinfer.py
@@ -585,9 +585,10 @@ def build(self,
585
kv_data_type=self.kv_cache_dtype,
586
)
587
else:
588
- attn_metadata.qo_indptr_gpu = qo_indptr_cpu.to(self.device)
+ attn_metadata.qo_indptr_gpu = qo_indptr_cpu.to(
589
+ self.device, non_blocking=True)
590
attn_metadata.paged_kv_indptr_gpu = paged_kv_indptr_cpu.to(
- self.device)
591
592
593
if num_decodes > 0:
594
pure_decode = num_prefills == 0
0 commit comments