Skip to content

Commit 6374743

Browse files
authored
ggml : add backend registry / device interfaces to BLAS backend (ggml-org#9752)
* ggml : add backend registry / device interfaces to BLAS backend * fix mmap usage when using host buffers
1 parent f1af42f commit 6374743

File tree

8 files changed

+292
-98
lines changed

8 files changed

+292
-98
lines changed

ggml/include/ggml-backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ extern "C" {
170170

171171
// Functions that may be obtained using ggml_backend_reg_get_proc_address
172172
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(const float *);
173+
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t, int);
173174

174175
//
175176
// Backend registry

ggml/include/ggml-blas.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);
1717
// for openblas and blis, this will also set the number of threads used for blas operations
1818
GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
1919

20+
GGML_API ggml_backend_reg_t ggml_backend_blas_reg(void);
21+
2022

2123
#ifdef __cplusplus
2224
}

ggml/src/CMakeLists.txt

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,22 +190,24 @@ if (GGML_BLAS)
190190
# see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
191191
find_package(PkgConfig REQUIRED)
192192
if (${GGML_BLAS_VENDOR} MATCHES "Generic")
193-
pkg_check_modules(DepBLAS REQUIRED blas)
193+
pkg_check_modules(DepBLAS blas)
194194
elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
195195
# As of openblas v0.3.22, the 64-bit is named openblas64.pc
196196
pkg_check_modules(DepBLAS openblas64)
197197
if (NOT DepBLAS_FOUND)
198-
pkg_check_modules(DepBLAS REQUIRED openblas)
198+
pkg_check_modules(DepBLAS openblas)
199199
endif()
200200
elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
201-
pkg_check_modules(DepBLAS REQUIRED blis)
201+
add_compile_definitions(GGML_BLAS_USE_BLIS)
202+
pkg_check_modules(DepBLAS blis)
202203
elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
203-
pkg_check_modules(DepBLAS REQUIRED blas-atlas)
204+
pkg_check_modules(DepBLAS blas-atlas)
204205
elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
205-
pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
206+
pkg_check_modules(DepBLAS flexiblas_api)
206207
elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
208+
add_compile_definitions(GGML_BLAS_USE_MKL)
207209
# all Intel* libraries share the same include path
208-
pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
210+
pkg_check_modules(DepBLAS mkl-sdl)
209211
elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
210212
# this doesn't provide pkg-config
211213
# suggest to assign BLAS_INCLUDE_DIRS on your own

ggml/src/ggml-backend-impl.h

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ extern "C" {
8888

8989
void (*free)(ggml_backend_t backend);
9090

91+
// Will be moved to the device interface
9192
// buffer allocation
9293
ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend);
9394

@@ -112,17 +113,9 @@ extern "C" {
112113

113114
// IMPORTANT: these functions have been moved to the device interface and will be removed from the backend interface
114115
// new backends should implement the device interface instead
115-
116116
// These functions are being moved to the device interface
117-
// check if the backend can compute an operation
118117
bool (*supports_op) (ggml_backend_t backend, const struct ggml_tensor * op);
119-
120-
// check if the backend can use tensors allocated in a buffer type
121118
bool (*supports_buft)(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
122-
123-
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
124-
// these should be expensive operations with large batch sizes that may benefit from running on this backend
125-
// even if the weight has to be copied from the CPU temporarily
126119
bool (*offload_op) (ggml_backend_t backend, const struct ggml_tensor * op);
127120

128121
// (optional) event synchronization
@@ -184,9 +177,8 @@ extern "C" {
184177
// check if the backend can use tensors allocated in a buffer type
185178
bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
186179

187-
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
188-
// these should be expensive operations with large batch sizes that may benefit from running on this backend
189-
// even if the weight has to be copied from the CPU temporarily
180+
// (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
181+
// these should be expensive operations that may benefit from running on this backend instead of the CPU backend
190182
bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
191183

192184
// (optional) event synchronization

ggml/src/ggml-backend.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,11 @@ bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buff
500500
}
501501

502502
bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op) {
503-
return device->iface.offload_op(device, op);
503+
if (device->iface.offload_op != NULL) {
504+
return device->iface.offload_op(device, op);
505+
}
506+
507+
return false;
504508
}
505509

506510
// Backend (reg)
@@ -534,6 +538,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
534538
#include "ggml-metal.h"
535539
#endif
536540

541+
#ifdef GGML_USE_BLAS
542+
#include "ggml-blas.h"
543+
#endif
544+
537545
struct ggml_backend_registry {
538546
std::vector<ggml_backend_reg_t> backends;
539547
std::vector<ggml_backend_dev_t> devices;
@@ -545,10 +553,13 @@ struct ggml_backend_registry {
545553
#ifdef GGML_USE_METAL
546554
register_backend(ggml_backend_metal_reg());
547555
#endif
548-
549-
register_backend(ggml_backend_cpu_reg());
556+
#ifdef GGML_USE_BLAS
557+
register_backend(ggml_backend_blas_reg());
558+
#endif
550559

551560
// TODO: sycl, vulkan, kompute, cann
561+
562+
register_backend(ggml_backend_cpu_reg());
552563
}
553564

554565
void register_backend(ggml_backend_reg_t reg) {
@@ -1229,16 +1240,22 @@ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg
12291240
};
12301241

12311242
return &ggml_backend_cpu_device;
1243+
}
1244+
1245+
static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
1246+
if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
1247+
return (void *)ggml_backend_cpu_set_n_threads;
1248+
}
1249+
return NULL;
12321250

12331251
GGML_UNUSED(reg);
1234-
GGML_UNUSED(index);
12351252
}
12361253

12371254
static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
12381255
/* .get_name = */ ggml_backend_cpu_reg_get_name,
12391256
/* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
12401257
/* .get_device = */ ggml_backend_cpu_reg_get_device,
1241-
/* .get_proc_address = */ NULL,
1258+
/* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
12421259
};
12431260

12441261
ggml_backend_reg_t ggml_backend_cpu_reg(void) {

0 commit comments

Comments
 (0)