diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 452c13827761..f82af426b5a8 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -21,9 +21,6 @@ if TYPE_CHECKING: from vllm.config import ModelConfig, VllmConfig -else: - ModelConfig = None - VllmConfig = None logger = init_logger(__name__) @@ -109,7 +106,7 @@ def log_warnings(cls): pass @classmethod - def check_and_update_config(cls, vllm_config: VllmConfig) -> None: + def check_and_update_config(cls, vllm_config: "VllmConfig") -> None: parallel_config = vllm_config.parallel_config scheduler_config = vllm_config.scheduler_config compilation_config = vllm_config.compilation_config @@ -308,7 +305,7 @@ def supports_fp8(cls) -> bool: return cls.has_device_capability(89) @classmethod - def supports_v1(cls, model_config: ModelConfig) -> bool: + def supports_v1(cls, model_config: "ModelConfig") -> bool: return True @classmethod diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 3d5e90dc32a8..ba8f49ca9150 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -13,9 +13,6 @@ if TYPE_CHECKING: from vllm.config import ModelConfig, VllmConfig -else: - ModelConfig = None - VllmConfig = None logger = init_logger(__name__) @@ -243,7 +240,7 @@ def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool: return True @classmethod - def check_and_update_config(cls, vllm_config: VllmConfig) -> None: + def check_and_update_config(cls, vllm_config: "VllmConfig") -> None: cache_config = vllm_config.cache_config if cache_config and cache_config.block_size is None: cache_config.block_size = 16 @@ -332,7 +329,7 @@ def fp8_dtype(cls) -> torch.dtype: return torch.float8_e4m3fn @classmethod - def supports_v1(cls, model_config: ModelConfig) -> bool: + def supports_v1(cls, model_config: "ModelConfig") -> bool: # V1 support on AMD gpus is experimental return True