File tree 3 files changed +7
-1
lines changed
3 files changed +7
-1
lines changed Original file line number Diff line number Diff line change @@ -80,6 +80,7 @@ set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
80
80
set (GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS} )
81
81
set (GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS} )
82
82
set (GGML_LLAMAFILE ON )
83
+ set (GGML_CUDA_USE_GRAPHS ON )
83
84
84
85
# transition helpers
85
86
function (llama_option_depr TYPE OLD NEW)
Original file line number Diff line number Diff line change @@ -109,6 +109,7 @@ option(GGML_LLAMAFILE "ggml: use ggml SGEMM"
109
109
option (GGML_CUDA "ggml: use CUDA" OFF )
110
110
option (GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF )
111
111
option (GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF )
112
+ option (GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF )
112
113
set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels" )
113
114
set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels" )
114
115
option (GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF )
@@ -119,6 +120,7 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
119
120
option (GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF )
120
121
option (GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF )
121
122
option (GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF )
123
+ option (GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" OFF )
122
124
123
125
option (GGML_CURL "ggml: use libcurl to download model from an URL" OFF )
124
126
option (GGML_HIPBLAS "ggml: use hipBLAS" OFF )
Original file line number Diff line number Diff line change @@ -295,12 +295,15 @@ if (GGML_CUDA)
295
295
296
296
list (APPEND GGML_CDEF_PUBLIC GGML_USE_CUDA)
297
297
298
- add_compile_definitions (GGML_CUDA_USE_GRAPHS)
299
298
add_compile_definitions (GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X} )
300
299
add_compile_definitions (GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y} )
301
300
add_compile_definitions (K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER} )
302
301
add_compile_definitions (GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE} )
303
302
303
+ if (GGML_CUDA_USE_GRAPHS)
304
+ add_compile_definitions (GGML_CUDA_USE_GRAPHS)
305
+ endif ()
306
+
304
307
if (GGML_CUDA_FORCE_DMMV)
305
308
add_compile_definitions (GGML_CUDA_FORCE_DMMV)
306
309
endif ()
You can’t perform that action at this time.
0 commit comments