File tree Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Original file line number Diff line number Diff line change 38
38
FetchContent_Declare(
39
39
vllm-flash-attn
40
40
GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
41
- GIT_TAG 720c94869cf2e0ff5a706e9c7f1dce0939686ade
41
+ GIT_TAG 9bfa9869829d8c593527eb34c5271d0090f7ccc9
42
42
GIT_PROGRESS TRUE
43
43
# Don't share the vllm-flash-attn build between build types
44
44
BINARY_DIR ${CMAKE_BINARY_DIR} /vllm-flash-attn
@@ -64,4 +64,4 @@ install(
64
64
DESTINATION vllm_flash_attn
65
65
COMPONENT _vllm_fa3_C
66
66
FILES_MATCHING PATTERN "*.py"
67
- )
67
+ )
Original file line number Diff line number Diff line change @@ -595,14 +595,19 @@ def get_flash_attn_version():
595
595
# if hopper default to FA3, otherwise stick to FA2 for now
596
596
# TODO(lucas): profile FA3 on ampere to see if it makes sense to
597
597
# use FA3 as default for both
598
- if current_platform .get_device_capability ()[0 ] > = 9 :
598
+ if current_platform .get_device_capability ()[0 ] = = 9 :
599
599
fa_version = 3 if is_fa_version_supported (3 ) else 2
600
600
else :
601
601
fa_version = 2
602
602
603
603
if envs .VLLM_FLASH_ATTN_VERSION is not None :
604
604
assert envs .VLLM_FLASH_ATTN_VERSION in [2 , 3 ]
605
605
fa_version = envs .VLLM_FLASH_ATTN_VERSION
606
+ if (current_platform .get_device_capability ()[0 ] == 10
607
+ and envs .VLLM_FLASH_ATTN_VERSION == 3 ):
608
+ logger .warning ("Cannot use FA version 3 on Blackwell platform" ,
609
+ "defaulting to FA version 2." )
610
+ fa_version = 2
606
611
607
612
if not is_fa_version_supported (fa_version ):
608
613
logger .error ("Cannot use FA version %d is not supported due to %s" ,
You can’t perform that action at this time.
0 commit comments