Skip to content

Commit 72bf352

Browse files
ggerganovcodesoda
authored andcommitted
sync : ggml (backend v2, k-quants, CUDA opts, Metal opts, etc.) (ggml-org#1422)
* sync : ggml (backend v2, k-quants, CUDA opts, Metal opts, etc.) * metal : allow env metal variable to override resource path (ggml-org#1415) * Allow env variable to override resource path * Update ggml-metal.m --------- Co-authored-by: Georgi Gerganov <[email protected]> * sync : restore common / main from `master` * sync : restore whisper from `master` * talk-llama : update to latest llama.cpp * ruby : fix build * ggml : fix 32-bit ARM build * ggml : fix MIN / MAX macro collisions + update ios bindings * ggml : fix ifdefs and MIN / MAX again * exampels : fix Obj-C and Swift examples * ggml : fix 32-bit ARM compatibility * ggml : one more attempt to fix 32-bit ARM compat * whisper : fix support for larger graphs --------- Co-authored-by: Chris Raethke <[email protected]>
1 parent d3a09b0 commit 72bf352

38 files changed

+30777
-7745
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,10 @@ add_library(${TARGET}
464464
ggml.c
465465
ggml-alloc.h
466466
ggml-alloc.c
467+
ggml-backend.h
468+
ggml-backend.c
469+
ggml-quants.h
470+
ggml-quants.c
467471
${GGML_SOURCES_METAL}
468472
${GGML_SOURCES_CUDA}
469473
${GGML_SOURCES_OPENCL}

Makefile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,13 @@ ggml.o: ggml.c ggml.h ggml-cuda.h
301301
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
302302
$(CC) $(CFLAGS) -c $< -o $@
303303

304-
WHISPER_OBJ += ggml-alloc.o
304+
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
305+
$(CC) $(CFLAGS) -c $< -o $@
306+
307+
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
308+
$(CC) $(CFLAGS) -c $< -o $@
309+
310+
WHISPER_OBJ += ggml-alloc.o ggml-backend.o ggml-quants.o
305311

306312
whisper.o: whisper.cpp whisper.h ggml.h ggml-cuda.h
307313
$(CXX) $(CXXFLAGS) -c $< -o $@

bindings/ruby/ext/extconf.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,14 @@
33
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.h')} .")
44
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
55
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
6+
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-impl.h')} .")
67
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.h')} .")
78
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.c')} .")
9+
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend-impl.h')} .")
10+
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.h')} .")
11+
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.c')} .")
12+
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.h')} .")
13+
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.c')} .")
814
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','examples','dr_wav.h')} .")
915

1016

bindings/ruby/ext/ggml-backend-impl.h

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#pragma once
2+
3+
// ggml-backend internal header
4+
5+
#include "ggml-backend.h"
6+
7+
#ifdef __cplusplus
8+
extern "C" {
9+
#endif
10+
11+
//
12+
// Backend buffer
13+
//
14+
15+
typedef void * ggml_backend_buffer_context_t;
16+
17+
struct ggml_backend_buffer_i {
18+
void (*free_buffer) (ggml_backend_buffer_t buffer);
19+
void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer
20+
size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
21+
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
22+
void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
23+
};
24+
25+
struct ggml_backend_buffer {
26+
struct ggml_backend_buffer_i iface;
27+
28+
ggml_backend_t backend;
29+
ggml_backend_buffer_context_t context;
30+
31+
size_t size;
32+
};
33+
34+
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
35+
struct ggml_backend * backend,
36+
struct ggml_backend_buffer_i iface,
37+
ggml_backend_buffer_context_t context,
38+
size_t size);
39+
40+
//
41+
// Backend
42+
//
43+
44+
typedef void * ggml_backend_context_t;
45+
46+
struct ggml_backend_i {
47+
const char * (*get_name)(ggml_backend_t backend);
48+
49+
void (*free)(ggml_backend_t backend);
50+
51+
// buffer allocation
52+
ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);
53+
54+
// get buffer alignment
55+
size_t (*get_alignment)(ggml_backend_t backend);
56+
57+
// tensor data access
58+
// these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
59+
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
60+
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
61+
void (*synchronize) (ggml_backend_t backend);
62+
63+
// (optional) copy tensor between different backends, allow for single-copy tranfers
64+
void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
65+
void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
66+
67+
// compute graph with a plan
68+
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
69+
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
70+
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
71+
72+
// compute graph without a plan
73+
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
74+
75+
// check if the backend supports an operation
76+
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
77+
};
78+
79+
struct ggml_backend {
80+
struct ggml_backend_i iface;
81+
82+
ggml_backend_context_t context;
83+
};
84+
85+
#ifdef __cplusplus
86+
}
87+
#endif

0 commit comments

Comments
 (0)