We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c574bdd commit 1e64d51Copy full SHA for 1e64d51
CMakeLists.txt
@@ -280,8 +280,8 @@ if (LLAMA_CUBLAS)
280
# 52 == lowest CUDA 12 standard
281
# 60 == f16 CUDA intrinsics
282
# 61 == integer CUDA intrinsics
283
- # 70 == (assumed) compute capability at which unrolling a loop in mul_mat_q kernels is faster
284
- if (LLAMA_CUDA_DMMV_F16)
+ # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
+ if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
285
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
286
else()
287
set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
0 commit comments