Skip to content

Commit 3b9e496

Browse files
ggerganoviThalay
authored andcommitted
metal : sync ggml-metal (ref ggml-org#1047)
1 parent 4999b56 commit 3b9e496

File tree

4 files changed

+2627
-0
lines changed

4 files changed

+2627
-0
lines changed

extra/sync-ggml.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
55
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
66
cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h
77
cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp
8+
cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
9+
cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
10+
cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
811
cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h
912
cp -rpv ../ggml/examples/common.h ./examples/common.h
1013
cp -rpv ../ggml/examples/common.cpp ./examples/common.cpp

ggml-metal.h

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// An interface allowing to compute ggml_cgraph with Metal
2+
//
3+
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
4+
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, OpenCL, etc.)
5+
//
6+
// How it works?
7+
//
8+
// As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
9+
// interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
10+
// use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
11+
//
12+
// You only need to make sure that all memory buffers that you used during the graph creation
13+
// are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
14+
// used during the graph evaluation to determine the arguments of the compute kernels.
15+
//
16+
// Synchronization between device and host memory (for example for input and output tensors)
17+
// is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
18+
//
19+
20+
#pragma once
21+
22+
#include <stddef.h>
23+
#include <stdbool.h>
24+
25+
// max memory buffers that can be mapped to the device
26+
#define GGML_METAL_MAX_BUFFERS 16
27+
28+
struct ggml_tensor;
29+
struct ggml_cgraph;
30+
31+
#ifdef __cplusplus
32+
extern "C" {
33+
#endif
34+
35+
struct ggml_metal_context;
36+
37+
struct ggml_metal_context * ggml_metal_init(void);
38+
void ggml_metal_free(struct ggml_metal_context * ctx);
39+
40+
// creates a mapping between a host memory buffer and a device memory buffer
41+
// - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute
42+
// - the mapping is used during computation to determine the arguments of the compute kernels
43+
// - you don't need to keep the host memory buffer allocated as it is never accessed by Metal
44+
// - max_size specifies the maximum size of a tensor and is used to create shared views such
45+
// that it is guaranteed that the tensor will fit in at least one of the views
46+
//
47+
bool ggml_metal_add_buffer(
48+
struct ggml_metal_context * ctx,
49+
const char * name,
50+
void * data,
51+
size_t size,
52+
size_t max_size);
53+
54+
// set data from host memory into the device
55+
void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
56+
57+
// get data from the device into host memory
58+
void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
59+
60+
// same as ggml_graph_compute but uses Metal
61+
// creates gf->n_threads command buffers in parallel
62+
void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
63+
64+
#ifdef __cplusplus
65+
}
66+
#endif
67+

0 commit comments

Comments
 (0)