From 838b01e17d8a075f99cda3b839989d2a49992b18 Mon Sep 17 00:00:00 2001 From: Joe Runde Date: Mon, 24 Mar 2025 15:08:41 -0600 Subject: [PATCH 1/6] :bug: re-allow OOT platforms on V1 Signed-off-by: Joe Runde --- vllm/engine/arg_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 38a47a846df7..8d96c836fbdc 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1670,8 +1670,10 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: return False # No support for device type other than CUDA, AMD (experiemntal) or - # TPU (experimental) so far. - if not (current_platform.is_cuda_alike() or current_platform.is_tpu()): + # TPU (experimental) so far. Out-of-tree device support plugins can + # maintain their own v1 compatibility checks. + if not (current_platform.is_cuda_alike() or current_platform.is_tpu() + or current_platform.is_out_of_tree()): _raise_or_fallback( feature_name=f"device type={current_platform.device_type}", recommend_to_remove=False) From 3a03b67e2ac7ec90ae02c6261800b8803ab29979 Mon Sep 17 00:00:00 2001 From: Joe Runde Date: Tue, 25 Mar 2025 07:33:18 -0600 Subject: [PATCH 2/6] :sparkles: add supports_v1 interface Signed-off-by: Joe Runde --- vllm/engine/arg_utils.py | 7 ++----- vllm/platforms/cuda.py | 4 ++++ vllm/platforms/interface.py | 9 ++++++++- vllm/platforms/rocm.py | 4 ++++ vllm/platforms/tpu.py | 4 ++++ 5 files changed, 22 insertions(+), 6 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 8d96c836fbdc..69a164bbc6a8 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1669,11 +1669,8 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: _raise_or_fallback(feature_name=name, recommend_to_remove=True) return False - # No support for device type other than CUDA, AMD (experiemntal) or - # TPU (experimental) so far. Out-of-tree device support plugins can - # maintain their own v1 compatibility checks. - if not (current_platform.is_cuda_alike() or current_platform.is_tpu() - or current_platform.is_out_of_tree()): + # Platforms must decide if they can support v1 for this model + if not current_platform.supports_v1(model_config=model_config): _raise_or_fallback( feature_name=f"device type={current_platform.device_type}", recommend_to_remove=False) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index bb77318092fc..019b1635e688 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -303,6 +303,10 @@ def get_device_communicator_cls(cls) -> str: def supports_fp8(cls) -> bool: return cls.has_device_capability(89) + @classmethod + def supports_v1(cls, model_config): + return True + # NVML utils # Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`, diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 9981deee39b7..5dc7cea504dd 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -12,7 +12,7 @@ from vllm.logger import init_logger if TYPE_CHECKING: - from vllm.config import VllmConfig + from vllm.config import ModelConfig, VllmConfig from vllm.utils import FlexibleArgumentParser else: VllmConfig = None @@ -371,6 +371,13 @@ def use_all_gather(cls) -> bool: or parallel_config.distributed_executor_backend == "external_launcher") + @classmethod + def supports_v1(cls, model_config: ModelConfig) -> None: + """Returns whether the current platform can support v1 for the supplied + model configuration. + """ + return False + class UnspecifiedPlatform(Platform): _enum = PlatformEnum.UNSPECIFIED diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index ee708f5961df..ea37f2fc3a25 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -249,3 +249,7 @@ def fp8_dtype(cls) -> torch.dtype: return torch.float8_e4m3fnuz else: return torch.float8_e4m3fn + + @classmethod + def supports_v1(cls, model_config): + return True diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index 073d46c25d57..eb42b8b134e2 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -127,3 +127,7 @@ def get_device_communicator_cls(cls) -> str: @classmethod def use_all_gather(cls) -> bool: return True + + @classmethod + def supports_v1(cls, model_config): + return True \ No newline at end of file From 3cbef7315ba349848a9e29e44c9888923fe9a58d Mon Sep 17 00:00:00 2001 From: Joe Runde Date: Tue, 25 Mar 2025 07:34:57 -0600 Subject: [PATCH 3/6] :art: newline Signed-off-by: Joe Runde --- vllm/platforms/tpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index eb42b8b134e2..cf33f735190b 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -130,4 +130,4 @@ def use_all_gather(cls) -> bool: @classmethod def supports_v1(cls, model_config): - return True \ No newline at end of file + return True From d39a178dcda1060b5a93dc62b9cb8f5695af1a5f Mon Sep 17 00:00:00 2001 From: Joe Runde Date: Tue, 25 Mar 2025 07:36:01 -0600 Subject: [PATCH 4/6] :memo: add comment re: experimental support Signed-off-by: Joe Runde --- vllm/platforms/rocm.py | 1 + vllm/platforms/tpu.py | 1 + 2 files changed, 2 insertions(+) diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index ea37f2fc3a25..e5ec6fa48cd3 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -252,4 +252,5 @@ def fp8_dtype(cls) -> torch.dtype: @classmethod def supports_v1(cls, model_config): + # V1 support on AMD gpus is experimental return True diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index cf33f735190b..5470daf41a43 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -130,4 +130,5 @@ def use_all_gather(cls) -> bool: @classmethod def supports_v1(cls, model_config): + # V1 support on TPU is experimental return True From 96a2c0a891e0f31ea8e98f1883f45f656eadf385 Mon Sep 17 00:00:00 2001 From: Joe Runde Date: Tue, 25 Mar 2025 07:56:55 -0600 Subject: [PATCH 5/6] :bug: add bool return annotation Signed-off-by: Joe Runde --- vllm/platforms/cuda.py | 2 +- vllm/platforms/interface.py | 2 +- vllm/platforms/rocm.py | 2 +- vllm/platforms/tpu.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 019b1635e688..f1d5ef026b9d 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -304,7 +304,7 @@ def supports_fp8(cls) -> bool: return cls.has_device_capability(89) @classmethod - def supports_v1(cls, model_config): + def supports_v1(cls, model_config) -> bool: return True diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 5dc7cea504dd..0e62927d23f7 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -372,7 +372,7 @@ def use_all_gather(cls) -> bool: == "external_launcher") @classmethod - def supports_v1(cls, model_config: ModelConfig) -> None: + def supports_v1(cls, model_config: ModelConfig) -> bool: """Returns whether the current platform can support v1 for the supplied model configuration. """ diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index e5ec6fa48cd3..1e9eaffdca74 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -251,6 +251,6 @@ def fp8_dtype(cls) -> torch.dtype: return torch.float8_e4m3fn @classmethod - def supports_v1(cls, model_config): + def supports_v1(cls, model_config) -> bool: # V1 support on AMD gpus is experimental return True diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index 5470daf41a43..e92c63881bcf 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -129,6 +129,6 @@ def use_all_gather(cls) -> bool: return True @classmethod - def supports_v1(cls, model_config): + def supports_v1(cls, model_config) -> bool: # V1 support on TPU is experimental return True From aaf21cd9d07d32ec682c8104bbd31a41d7239493 Mon Sep 17 00:00:00 2001 From: Joe Runde Date: Tue, 25 Mar 2025 09:15:34 -0600 Subject: [PATCH 6/6] :bug: fixup ModelConfig import, type hint all platforms Signed-off-by: Joe Runde --- vllm/platforms/cuda.py | 5 +++-- vllm/platforms/interface.py | 1 + vllm/platforms/rocm.py | 5 +++-- vllm/platforms/tpu.py | 5 +++-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index f1d5ef026b9d..ca8a2d2640ec 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -20,8 +20,9 @@ from .interface import DeviceCapability, Platform, PlatformEnum, _Backend if TYPE_CHECKING: - from vllm.config import VllmConfig + from vllm.config import ModelConfig, VllmConfig else: + ModelConfig = None VllmConfig = None logger = init_logger(__name__) @@ -304,7 +305,7 @@ def supports_fp8(cls) -> bool: return cls.has_device_capability(89) @classmethod - def supports_v1(cls, model_config) -> bool: + def supports_v1(cls, model_config: ModelConfig) -> bool: return True diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 0e62927d23f7..36db70681a19 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -15,6 +15,7 @@ from vllm.config import ModelConfig, VllmConfig from vllm.utils import FlexibleArgumentParser else: + ModelConfig = None VllmConfig = None FlexibleArgumentParser = None diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 1e9eaffdca74..d196e24ac7ac 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -12,8 +12,9 @@ from .interface import DeviceCapability, Platform, PlatformEnum, _Backend if TYPE_CHECKING: - from vllm.config import VllmConfig + from vllm.config import ModelConfig, VllmConfig else: + ModelConfig = None VllmConfig = None logger = init_logger(__name__) @@ -251,6 +252,6 @@ def fp8_dtype(cls) -> torch.dtype: return torch.float8_e4m3fn @classmethod - def supports_v1(cls, model_config) -> bool: + def supports_v1(cls, model_config: ModelConfig) -> bool: # V1 support on AMD gpus is experimental return True diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index e92c63881bcf..43d3044cb93e 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -10,8 +10,9 @@ from .interface import Platform, PlatformEnum, _Backend if TYPE_CHECKING: - from vllm.config import VllmConfig + from vllm.config import ModelConfig, VllmConfig else: + ModelConfig = None VllmConfig = None logger = init_logger(__name__) @@ -129,6 +130,6 @@ def use_all_gather(cls) -> bool: return True @classmethod - def supports_v1(cls, model_config) -> bool: + def supports_v1(cls, model_config: ModelConfig) -> bool: # V1 support on TPU is experimental return True