Skip to content

Commit 9fccd04

Browse files
authored
[Bugfix] Fix Stream usage in CPU model runner and OneDNN kernel check (#25046)
Signed-off-by: jiang1.li <[email protected]>
1 parent 252ada5 commit 9fccd04

File tree

3 files changed

+14
-1
lines changed

3 files changed

+14
-1
lines changed

csrc/cpu/dnnl_kernels.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ void onednn_mm(torch::Tensor& c, // [M, OC], row-major
523523
CPU_KERNEL_GUARD_IN(onednn_mm)
524524
TORCH_CHECK(a.dim() == 2);
525525
TORCH_CHECK(a.stride(-1) == 1);
526-
TORCH_CHECK(c.is_contiguous());
526+
TORCH_CHECK(c.stride(-1) == 1);
527527
MatMulPrimitiveHandler* ptr =
528528
reinterpret_cast<MatMulPrimitiveHandler*>(handler);
529529

vllm/platforms/cpu.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,11 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
185185
parallel_config.distributed_executor_backend = "mp"
186186
if parallel_config.worker_cls == "auto":
187187
parallel_config.worker_cls = "vllm.v1.worker.cpu_worker.CPUWorker"
188+
# Disable DBO
189+
if parallel_config.enable_dbo:
190+
logger.warning(
191+
"Dual-Batch Overlap is not supported on CPU, disabled.")
192+
parallel_config.enable_dbo = False
188193

189194
# Note: workaround for v1 gpu_model_runner
190195
from vllm.config import CompilationLevel

vllm/v1/worker/cpu_model_runner.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,20 @@ def __init__(self, *args, **kwargs) -> None:
145145
self.record = lambda: None
146146
self.synchronize = lambda: None
147147

148+
class _StreamPlaceholder:
149+
150+
def __init__(self, *args, **kwargs) -> None:
151+
pass
152+
148153
cuda_event = torch.cuda.Event
154+
cuda_stream = torch.cuda.Stream
149155
try:
150156
torch.cuda.Event = _EventPlaceholder
157+
torch.cuda.Stream = _StreamPlaceholder
151158
yield
152159
finally:
153160
torch.cuda.Event = cuda_event
161+
torch.cuda.Stream = cuda_stream
154162

155163

156164
@contextmanager

0 commit comments

Comments
 (0)