From 838b01e17d8a075f99cda3b839989d2a49992b18 Mon Sep 17 00:00:00 2001
From: Joe Runde <Joseph.Runde@ibm.com>
Date: Mon, 24 Mar 2025 15:08:41 -0600
Subject: [PATCH 1/6] :bug: re-allow OOT platforms on V1

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
---
 vllm/engine/arg_utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 38a47a846df7..8d96c836fbdc 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1670,8 +1670,10 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
             return False
 
         # No support for device type other than CUDA, AMD (experiemntal) or
-        # TPU (experimental) so far.
-        if not (current_platform.is_cuda_alike() or current_platform.is_tpu()):
+        # TPU (experimental) so far. Out-of-tree device support plugins can
+        # maintain their own v1 compatibility checks.
+        if not (current_platform.is_cuda_alike() or current_platform.is_tpu()
+                or current_platform.is_out_of_tree()):
             _raise_or_fallback(
                 feature_name=f"device type={current_platform.device_type}",
                 recommend_to_remove=False)

From 3a03b67e2ac7ec90ae02c6261800b8803ab29979 Mon Sep 17 00:00:00 2001
From: Joe Runde <Joseph.Runde@ibm.com>
Date: Tue, 25 Mar 2025 07:33:18 -0600
Subject: [PATCH 2/6] :sparkles: add supports_v1 interface

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
---
 vllm/engine/arg_utils.py    | 7 ++-----
 vllm/platforms/cuda.py      | 4 ++++
 vllm/platforms/interface.py | 9 ++++++++-
 vllm/platforms/rocm.py      | 4 ++++
 vllm/platforms/tpu.py       | 4 ++++
 5 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 8d96c836fbdc..69a164bbc6a8 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1669,11 +1669,8 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
             _raise_or_fallback(feature_name=name, recommend_to_remove=True)
             return False
 
-        # No support for device type other than CUDA, AMD (experiemntal) or
-        # TPU (experimental) so far. Out-of-tree device support plugins can
-        # maintain their own v1 compatibility checks.
-        if not (current_platform.is_cuda_alike() or current_platform.is_tpu()
-                or current_platform.is_out_of_tree()):
+        # Platforms must decide if they can support v1 for this model
+        if not current_platform.supports_v1(model_config=model_config):
             _raise_or_fallback(
                 feature_name=f"device type={current_platform.device_type}",
                 recommend_to_remove=False)
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index bb77318092fc..019b1635e688 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -303,6 +303,10 @@ def get_device_communicator_cls(cls) -> str:
     def supports_fp8(cls) -> bool:
         return cls.has_device_capability(89)
 
+    @classmethod
+    def supports_v1(cls, model_config):
+        return True
+
 
 # NVML utils
 # Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`,
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 9981deee39b7..5dc7cea504dd 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -12,7 +12,7 @@
 from vllm.logger import init_logger
 
 if TYPE_CHECKING:
-    from vllm.config import VllmConfig
+    from vllm.config import ModelConfig, VllmConfig
     from vllm.utils import FlexibleArgumentParser
 else:
     VllmConfig = None
@@ -371,6 +371,13 @@ def use_all_gather(cls) -> bool:
                 or parallel_config.distributed_executor_backend
                 == "external_launcher")
 
+    @classmethod
+    def supports_v1(cls, model_config: ModelConfig) -> None:
+        """Returns whether the current platform can support v1 for the supplied
+        model configuration.
+        """
+        return False
+
 
 class UnspecifiedPlatform(Platform):
     _enum = PlatformEnum.UNSPECIFIED
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index ee708f5961df..ea37f2fc3a25 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -249,3 +249,7 @@ def fp8_dtype(cls) -> torch.dtype:
             return torch.float8_e4m3fnuz
         else:
             return torch.float8_e4m3fn
+
+    @classmethod
+    def supports_v1(cls, model_config):
+        return True
diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py
index 073d46c25d57..eb42b8b134e2 100644
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@@ -127,3 +127,7 @@ def get_device_communicator_cls(cls) -> str:
     @classmethod
     def use_all_gather(cls) -> bool:
         return True
+
+    @classmethod
+    def supports_v1(cls, model_config):
+        return True
\ No newline at end of file

From 3cbef7315ba349848a9e29e44c9888923fe9a58d Mon Sep 17 00:00:00 2001
From: Joe Runde <Joseph.Runde@ibm.com>
Date: Tue, 25 Mar 2025 07:34:57 -0600
Subject: [PATCH 3/6] :art: newline

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
---
 vllm/platforms/tpu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py
index eb42b8b134e2..cf33f735190b 100644
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@@ -130,4 +130,4 @@ def use_all_gather(cls) -> bool:
 
     @classmethod
     def supports_v1(cls, model_config):
-        return True
\ No newline at end of file
+        return True

From d39a178dcda1060b5a93dc62b9cb8f5695af1a5f Mon Sep 17 00:00:00 2001
From: Joe Runde <Joseph.Runde@ibm.com>
Date: Tue, 25 Mar 2025 07:36:01 -0600
Subject: [PATCH 4/6] :memo: add comment re: experimental support

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
---
 vllm/platforms/rocm.py | 1 +
 vllm/platforms/tpu.py  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index ea37f2fc3a25..e5ec6fa48cd3 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -252,4 +252,5 @@ def fp8_dtype(cls) -> torch.dtype:
 
     @classmethod
     def supports_v1(cls, model_config):
+        # V1 support on AMD gpus is experimental
         return True
diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py
index cf33f735190b..5470daf41a43 100644
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@@ -130,4 +130,5 @@ def use_all_gather(cls) -> bool:
 
     @classmethod
     def supports_v1(cls, model_config):
+        # V1 support on TPU is experimental
         return True

From 96a2c0a891e0f31ea8e98f1883f45f656eadf385 Mon Sep 17 00:00:00 2001
From: Joe Runde <Joseph.Runde@ibm.com>
Date: Tue, 25 Mar 2025 07:56:55 -0600
Subject: [PATCH 5/6] :bug: add bool return annotation

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
---
 vllm/platforms/cuda.py      | 2 +-
 vllm/platforms/interface.py | 2 +-
 vllm/platforms/rocm.py      | 2 +-
 vllm/platforms/tpu.py       | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index 019b1635e688..f1d5ef026b9d 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -304,7 +304,7 @@ def supports_fp8(cls) -> bool:
         return cls.has_device_capability(89)
 
     @classmethod
-    def supports_v1(cls, model_config):
+    def supports_v1(cls, model_config) -> bool:
         return True
 
 
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 5dc7cea504dd..0e62927d23f7 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -372,7 +372,7 @@ def use_all_gather(cls) -> bool:
                 == "external_launcher")
 
     @classmethod
-    def supports_v1(cls, model_config: ModelConfig) -> None:
+    def supports_v1(cls, model_config: ModelConfig) -> bool:
         """Returns whether the current platform can support v1 for the supplied
         model configuration.
         """
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index e5ec6fa48cd3..1e9eaffdca74 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -251,6 +251,6 @@ def fp8_dtype(cls) -> torch.dtype:
             return torch.float8_e4m3fn
 
     @classmethod
-    def supports_v1(cls, model_config):
+    def supports_v1(cls, model_config) -> bool:
         # V1 support on AMD gpus is experimental
         return True
diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py
index 5470daf41a43..e92c63881bcf 100644
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@@ -129,6 +129,6 @@ def use_all_gather(cls) -> bool:
         return True
 
     @classmethod
-    def supports_v1(cls, model_config):
+    def supports_v1(cls, model_config) -> bool:
         # V1 support on TPU is experimental
         return True

From aaf21cd9d07d32ec682c8104bbd31a41d7239493 Mon Sep 17 00:00:00 2001
From: Joe Runde <Joseph.Runde@ibm.com>
Date: Tue, 25 Mar 2025 09:15:34 -0600
Subject: [PATCH 6/6] :bug: fixup ModelConfig import, type hint all platforms

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
---
 vllm/platforms/cuda.py      | 5 +++--
 vllm/platforms/interface.py | 1 +
 vllm/platforms/rocm.py      | 5 +++--
 vllm/platforms/tpu.py       | 5 +++--
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index f1d5ef026b9d..ca8a2d2640ec 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -20,8 +20,9 @@
 from .interface import DeviceCapability, Platform, PlatformEnum, _Backend
 
 if TYPE_CHECKING:
-    from vllm.config import VllmConfig
+    from vllm.config import ModelConfig, VllmConfig
 else:
+    ModelConfig = None
     VllmConfig = None
 
 logger = init_logger(__name__)
@@ -304,7 +305,7 @@ def supports_fp8(cls) -> bool:
         return cls.has_device_capability(89)
 
     @classmethod
-    def supports_v1(cls, model_config) -> bool:
+    def supports_v1(cls, model_config: ModelConfig) -> bool:
         return True
 
 
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 0e62927d23f7..36db70681a19 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -15,6 +15,7 @@
     from vllm.config import ModelConfig, VllmConfig
     from vllm.utils import FlexibleArgumentParser
 else:
+    ModelConfig = None
     VllmConfig = None
     FlexibleArgumentParser = None
 
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 1e9eaffdca74..d196e24ac7ac 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -12,8 +12,9 @@
 from .interface import DeviceCapability, Platform, PlatformEnum, _Backend
 
 if TYPE_CHECKING:
-    from vllm.config import VllmConfig
+    from vllm.config import ModelConfig, VllmConfig
 else:
+    ModelConfig = None
     VllmConfig = None
 
 logger = init_logger(__name__)
@@ -251,6 +252,6 @@ def fp8_dtype(cls) -> torch.dtype:
             return torch.float8_e4m3fn
 
     @classmethod
-    def supports_v1(cls, model_config) -> bool:
+    def supports_v1(cls, model_config: ModelConfig) -> bool:
         # V1 support on AMD gpus is experimental
         return True
diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py
index e92c63881bcf..43d3044cb93e 100644
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@@ -10,8 +10,9 @@
 from .interface import Platform, PlatformEnum, _Backend
 
 if TYPE_CHECKING:
-    from vllm.config import VllmConfig
+    from vllm.config import ModelConfig, VllmConfig
 else:
+    ModelConfig = None
     VllmConfig = None
 
 logger = init_logger(__name__)
@@ -129,6 +130,6 @@ def use_all_gather(cls) -> bool:
         return True
 
     @classmethod
-    def supports_v1(cls, model_config) -> bool:
+    def supports_v1(cls, model_config: ModelConfig) -> bool:
         # V1 support on TPU is experimental
         return True