diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index dda6dd4fbea7..cb092aa74e7f 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -585,9 +585,10 @@ def build(self, kv_data_type=self.kv_cache_dtype, ) else: - attn_metadata.qo_indptr_gpu = qo_indptr_cpu.to(self.device) + attn_metadata.qo_indptr_gpu = qo_indptr_cpu.to( + self.device, non_blocking=True) attn_metadata.paged_kv_indptr_gpu = paged_kv_indptr_cpu.to( - self.device) + self.device, non_blocking=True) if num_decodes > 0: pure_decode = num_prefills == 0