[torch.compile] add warning for unsupported models (#10622)

youkaichao · web-flow · commit 65813781a2e2 · 2024-11-24T23:27:51.000-08:00
Signed-off-by: youkaichao &lt;youkaichao@gmail.com&gt;
diff --git a/vllm/compilation/counter.py b/vllm/compilation/counter.py
@@ -5,6 +5,7 @@
 
 @dataclasses.dataclass
 class CompilationCounter:
+    num_models_seen: int = 0
     num_graphs_seen: int = 0
     # including the splitting ops
     num_piecewise_graphs_seen: int = 0
diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py
@@ -3,6 +3,7 @@
 
 import torch
 
+from vllm.compilation.counter import compilation_counter
 from vllm.compilation.wrapper import TorchCompileWrapperWithCustomDispatcher
 from vllm.config import CompilationLevel, VllmConfig
 from vllm.logger import init_logger
@@ -130,6 +131,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = '', **kwargs):
         ] or not supports_dynamo()
         if self.do_not_compile:
             return
+        compilation_counter.num_models_seen += 1
         TorchCompileWrapperWithCustomDispatcher.__init__(
             self, compilation_level=vllm_config.compilation_config.level)
 
diff --git a/vllm/plugins/__init__.py b/vllm/plugins/__init__.py
@@ -80,6 +80,9 @@ def set_current_vllm_config(vllm_config: "VllmConfig"):
     """
     global _current_vllm_config
     old_vllm_config = _current_vllm_config
+    from vllm.compilation.counter import compilation_counter
+    from vllm.config import CompilationLevel
+    num_models_seen = compilation_counter.num_models_seen
     try:
         _current_vllm_config = vllm_config
         yield
@@ -88,6 +91,18 @@ def set_current_vllm_config(vllm_config: "VllmConfig"):
                      vllm_config.compilation_config.enabled_custom_ops)
         logger.debug("disabled custom ops: %s",
                      vllm_config.compilation_config.disabled_custom_ops)
+        if vllm_config.compilation_config.level == CompilationLevel.PIECEWISE \
+            and compilation_counter.num_models_seen == num_models_seen:
+            # If the model supports compilation,
+            # compilation_counter.num_models_seen should be increased
+            # by at least 1.
+            # If it is not increased, it means the model does not support
+            # compilation (does not have @support_torch_compile decorator).
+            logger.warning(
+                "`torch.compile` is turned on, but the model %s"
+                " does not support it. Please open an issue on GitHub"
+                "if you want it to be supported.",
+                vllm_config.model_config.model)
         _current_vllm_config = old_vllm_config