Suppress benign cuBLAS warning when capturing cudagraphs with DBO (#25596)

SageMoore · yewentao256 · commit 0e0d51c9c6c2 · 2025-10-03T13:35:55.000-07:00
Signed-off-by: Sage Moore &lt;sage@neuralmagic.com&gt;
Signed-off-by: yewentao256 &lt;zhyanwentao@126.com&gt;
diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py
@@ -104,6 +104,7 @@ def __init__(self, runnable: Callable, vllm_config: VllmConfig,
             self.graph_pool = current_platform.get_global_graph_pool()
 
         self.sm_control = self._create_sm_control_context(vllm_config)
+        self.device = device
 
     @staticmethod
     def _create_sm_control_context(vllm_config: VllmConfig):
@@ -168,6 +169,7 @@ def _capture_ubatches(self, ubatch_metadata, model) -> torch.Tensor:
 
         @torch.inference_mode()
         def _capture_ubatch_thread(results, ubatch_metadata):
+            torch.cuda.set_device(self.device)
             ubatch_context = ubatch_metadata.context
             with torch.cuda.stream(ubatch_context.compute_stream):
                 _ = torch.cuda.current_blas_handle()