vllm-project · DarkLight1337 · Apr 27, 2025 · Apr 27, 2025 · Apr 27, 2025 · aarnphm
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
@@ -21,9 +21,6 @@
 
 if TYPE_CHECKING:
     from vllm.config import ModelConfig, VllmConfig
-else:
-    ModelConfig = None
-    VllmConfig = None
 
 logger = init_logger(__name__)
 
@@ -109,7 +106,7 @@ def log_warnings(cls):
         pass
 
     @classmethod
-    def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
+    def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
         parallel_config = vllm_config.parallel_config
         scheduler_config = vllm_config.scheduler_config
         compilation_config = vllm_config.compilation_config
@@ -308,7 +305,7 @@ def supports_fp8(cls) -> bool:
         return cls.has_device_capability(89)
 
     @classmethod
-    def supports_v1(cls, model_config: ModelConfig) -> bool:
+    def supports_v1(cls, model_config: "ModelConfig") -> bool:
         return True
 
     @classmethod

diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
@@ -13,9 +13,6 @@
 
 if TYPE_CHECKING:
     from vllm.config import ModelConfig, VllmConfig
-else:
-    ModelConfig = None
-    VllmConfig = None
 
 logger = init_logger(__name__)
 
@@ -243,7 +240,7 @@ def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool:
         return True
 
     @classmethod
-    def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
+    def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
         cache_config = vllm_config.cache_config
         if cache_config and cache_config.block_size is None:
             cache_config.block_size = 16
@@ -332,7 +329,7 @@ def fp8_dtype(cls) -> torch.dtype:
             return torch.float8_e4m3fn
 
     @classmethod
-    def supports_v1(cls, model_config: ModelConfig) -> bool:
+    def supports_v1(cls, model_config: "ModelConfig") -> bool:
         # V1 support on AMD gpus is experimental
         return True