File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -117,7 +117,7 @@ option(GGML_MUSA "ggml: use MUSA"
117
117
option (GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF )
118
118
option (GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF )
119
119
option (GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF )
120
- set (GGML_CUDA_DMMV_X "32 " CACHE STRING "ggml: x stride for dmmv CUDA kernels" )
120
+ set (GGML_CUDA_DMMV_X "64 " CACHE STRING "ggml: x stride for dmmv CUDA kernels" )
121
121
set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels" )
122
122
option (GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF )
123
123
set (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING
@@ -127,7 +127,7 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
127
127
option (GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF )
128
128
option (GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF )
129
129
option (GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF )
130
- option (GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" OFF )
130
+ option (GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ON )
131
131
132
132
option (GGML_CURL "ggml: use libcurl to download model from an URL" OFF )
133
133
option (GGML_HIPBLAS "ggml: use hipBLAS" OFF )
You can’t perform that action at this time.
0 commit comments