Skip to content

ggml: offload the entire cgraph to a specified backend #12342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ggml/src/ggml-backend-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ extern "C" {
void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);
// wait for an event on on a different stream
void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event);

enum ggml_status (*graph_compute_entire) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
};

struct ggml_backend {
Expand Down
13 changes: 13 additions & 0 deletions ggml/src/ggml-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1577,6 +1577,19 @@ enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, st
}

enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
ggml_backend_t prefer_backend = nullptr;
for (size_t idx = 0; idx < GGML_SCHED_MAX_BACKENDS; idx++) {
prefer_backend = sched->backends[idx];
if (nullptr != prefer_backend) {
if (ggml_backend_dev_type(prefer_backend->device) == GGML_BACKEND_DEVICE_TYPE_CPU) {
continue;
} else {
if (nullptr != prefer_backend->iface.graph_compute_entire) {
return prefer_backend->iface.graph_compute_entire(prefer_backend, graph);
}
}
}
}
if (!sched->is_reset && !sched->is_alloc) {
ggml_backend_sched_reset(sched);
}
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-blas/ggml-blas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ static struct ggml_backend_i blas_backend_i = {
/* .graph_compute = */ ggml_backend_blas_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .graph_compute_entire = */ NULL,
};

static ggml_guid_t ggml_backend_blas_guid(void) {
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-cann/ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1897,6 +1897,7 @@ static const ggml_backend_i ggml_backend_cann_interface = {
/* .graph_compute = */ ggml_backend_cann_graph_compute,
/* .event_record = */ ggml_backend_cann_event_record,
/* .event_wait = */ ggml_backend_cann_event_wait,
/* .graph_compute_entire = */ NULL,
};

/**
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-cpu/ggml-cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ static const struct ggml_backend_i ggml_backend_cpu_i = {
/* .graph_compute = */ ggml_backend_cpu_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .graph_compute_entire = */ NULL,
};

static ggml_guid_t ggml_backend_cpu_guid(void) {
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-cuda/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2816,6 +2816,7 @@ static const ggml_backend_i ggml_backend_cuda_interface = {
/* .graph_compute = */ ggml_backend_cuda_graph_compute,
/* .event_record = */ ggml_backend_cuda_event_record,
/* .event_wait = */ ggml_backend_cuda_event_wait,
/* .graph_compute_entire = */ NULL,
};

static ggml_guid_t ggml_backend_cuda_guid() {
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-kompute/ggml-kompute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2058,6 +2058,7 @@ static struct ggml_backend_i kompute_backend_i = {
/* .graph_compute = */ ggml_backend_kompute_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .graph_compute_entire = */ NULL,
};

static ggml_guid_t ggml_backend_kompute_guid() {
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-metal/ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -4806,6 +4806,7 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
/* .graph_compute = */ ggml_backend_metal_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .graph_compute_entire = */ NULL,
};

static ggml_guid_t ggml_backend_metal_guid(void) {
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-opencl/ggml-opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1167,6 +1167,7 @@ static ggml_backend_i ggml_backend_opencl_i = {
/* .graph_compute = */ ggml_backend_opencl_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .graph_compute_entire = */ NULL,
};

ggml_backend_t ggml_backend_opencl_init(void) {
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-rpc/ggml-rpc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,7 @@ static ggml_backend_i ggml_backend_rpc_interface = {
/* .graph_compute = */ ggml_backend_rpc_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .graph_compute_entire = */ NULL,
};

ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint) {
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-sycl/ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3749,6 +3749,7 @@ static ggml_backend_i ggml_backend_sycl_interface = {
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
/* .event_record = */ ggml_backend_sycl_event_record,
/* .event_wait = */ ggml_backend_sycl_event_wait,
/* .graph_compute_entire = */ NULL,
};

static ggml_guid_t ggml_backend_sycl_guid() {
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-vulkan/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8332,6 +8332,7 @@ static ggml_backend_i ggml_backend_vk_interface = {
/* .graph_compute = */ ggml_backend_vk_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .graph_compute_entire = */ NULL,
};

static ggml_guid_t ggml_backend_vk_guid() {
Expand Down
Loading