vllm-project · LucasWilkinson · Mar 5, 2025 · Mar 4, 2025
diff --git a/vllm_flash_attn/flash_attn_interface.py b/vllm_flash_attn/flash_attn_interface.py
@@ -41,11 +41,12 @@ def _is_fa3_supported(device = None) -> Tuple[bool, Optional[str]]:
     if not FA3_AVAILABLE:
         return False, f"FA3 is unavaible due to: {FA3_UNAVAILABLE_REASON}"
     if torch.cuda.get_device_capability(device)[0] < 8 \
+        or torch.cuda.get_device_capability(device)[0] >= 10 \
         or torch.cuda.get_device_capability(device) == (8, 6) \
         or torch.cuda.get_device_capability(device) == (8, 9):
         return False, \
             "FA3 is only supported on devices with compute capability >= 8" \
-            " excluding 8.6 and 8.9"
+            " excluding 8.6 and 8.9 and Blackwell archs (>=10)"
     return True, None
 
 def is_fa_version_supported(fa_version: int, device = None) -> bool: