@@ -22,7 +22,7 @@ extern "C" {
22
22
size_t (* get_max_size ) (ggml_backend_buffer_type_t buft );
23
23
// (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
24
24
size_t (* get_alloc_size )(ggml_backend_buffer_type_t buft , const struct ggml_tensor * tensor );
25
- // (optional) check if tensor data is in host memory (defaults to false)
25
+ // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
26
26
bool (* is_host ) (ggml_backend_buffer_type_t buft );
27
27
};
28
28
@@ -37,7 +37,6 @@ extern "C" {
37
37
//
38
38
39
39
struct ggml_backend_buffer_i {
40
- const char * (* get_name ) (ggml_backend_buffer_t buffer );
41
40
// (optional) free the buffer
42
41
void (* free_buffer ) (ggml_backend_buffer_t buffer );
43
42
// base address of the buffer
@@ -88,19 +87,16 @@ extern "C" {
88
87
89
88
void (* free )(ggml_backend_t backend );
90
89
91
- // Will be moved to the device interface
92
- // buffer allocation
93
- ggml_backend_buffer_type_t (* get_default_buffer_type )(ggml_backend_t backend );
94
-
95
90
// (optional) asynchronous tensor data access
96
91
void (* set_tensor_async )(ggml_backend_t backend , struct ggml_tensor * tensor , const void * data , size_t offset , size_t size );
97
92
void (* get_tensor_async )(ggml_backend_t backend , const struct ggml_tensor * tensor , void * data , size_t offset , size_t size );
98
93
bool (* cpy_tensor_async )(ggml_backend_t backend_src , ggml_backend_t backend_dst , const struct ggml_tensor * src , struct ggml_tensor * dst );
99
94
100
- // (optional) complete all pending operations
95
+ // (optional) complete all pending operations (required if the backend supports async operations)
101
96
void (* synchronize )(ggml_backend_t backend );
102
97
103
- // (optional) compute graph with a plan (not used currently)
98
+ // (optional) graph plans
99
+ // compute graph with a plan (not used currently)
104
100
ggml_backend_graph_plan_t (* graph_plan_create ) (ggml_backend_t backend , const struct ggml_cgraph * cgraph );
105
101
void (* graph_plan_free ) (ggml_backend_t backend , ggml_backend_graph_plan_t plan );
106
102
// update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
@@ -111,13 +107,6 @@ extern "C" {
111
107
// compute graph (always async if supported by the backend)
112
108
enum ggml_status (* graph_compute ) (ggml_backend_t backend , struct ggml_cgraph * cgraph );
113
109
114
- // IMPORTANT: these functions have been moved to the device interface and will be removed from the backend interface
115
- // new backends should implement the device interface instead
116
- // These functions are being moved to the device interface
117
- bool (* supports_op ) (ggml_backend_t backend , const struct ggml_tensor * op );
118
- bool (* supports_buft )(ggml_backend_t backend , ggml_backend_buffer_type_t buft );
119
- bool (* offload_op ) (ggml_backend_t backend , const struct ggml_tensor * op );
120
-
121
110
// (optional) event synchronization
122
111
// record an event on this stream
123
112
void (* event_record )(ggml_backend_t backend , ggml_backend_event_t event );
0 commit comments