Skip to content

Commit 071cc1a

Browse files
committed
Update CMakeLists.txt
1 parent e18e678 commit 071cc1a

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

ggml/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ option(GGML_MUSA "ggml: use MUSA"
117117
option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF)
118118
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
119119
option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)
120-
set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels")
120+
set (GGML_CUDA_DMMV_X "64" CACHE STRING "ggml: x stride for dmmv CUDA kernels")
121121
set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels")
122122
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)
123123
set (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING
@@ -127,7 +127,7 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
127127
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
128128
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
129129
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
130-
option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" OFF)
130+
option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ON)
131131

132132
option(GGML_CURL "ggml: use libcurl to download model from an URL" OFF)
133133
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)

0 commit comments

Comments
 (0)