Skip to content

Commit 31ec399

Browse files
authored
ggml : add GGML_CUDA_USE_GRAPHS option, restore GGML_CUDA_FORCE_CUBLAS (cmake) (#8140)
1 parent c7ab7b6 commit 31ec399

File tree

3 files changed

+7
-1
lines changed

3 files changed

+7
-1
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
8080
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
8181
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
8282
set(GGML_LLAMAFILE ON)
83+
set(GGML_CUDA_USE_GRAPHS ON)
8384

8485
# transition helpers
8586
function (llama_option_depr TYPE OLD NEW)

ggml/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ option(GGML_LLAMAFILE "ggml: use ggml SGEMM"
109109
option(GGML_CUDA "ggml: use CUDA" OFF)
110110
option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF)
111111
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
112+
option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)
112113
set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels")
113114
set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels")
114115
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)
@@ -119,6 +120,7 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
119120
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
120121
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
121122
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
123+
option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" OFF)
122124

123125
option(GGML_CURL "ggml: use libcurl to download model from an URL" OFF)
124126
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)

ggml/src/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,12 +295,15 @@ if (GGML_CUDA)
295295

296296
list(APPEND GGML_CDEF_PUBLIC GGML_USE_CUDA)
297297

298-
add_compile_definitions(GGML_CUDA_USE_GRAPHS)
299298
add_compile_definitions(GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X})
300299
add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y})
301300
add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER})
302301
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
303302

303+
if (GGML_CUDA_USE_GRAPHS)
304+
add_compile_definitions(GGML_CUDA_USE_GRAPHS)
305+
endif()
306+
304307
if (GGML_CUDA_FORCE_DMMV)
305308
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
306309
endif()

0 commit comments

Comments
 (0)