Skip to content

Commit 2674f02

Browse files
committed
metal : use residency sets
ggml-ci
1 parent 2cc9b8c commit 2674f02

File tree

1 file changed

+99
-5
lines changed

1 file changed

+99
-5
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 99 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
// max number of MTLCommandBuffer used to submit a graph for processing
2020
#define GGML_METAL_MAX_COMMAND_BUFFERS 8
2121

22+
// create residency sets only on macOS >= 15.0
23+
#if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000
24+
#define GGML_METAL_HAS_RESIDENCY_SETS 1
25+
#endif
26+
2227
#define UNUSED(x) (void)(x)
2328

2429
// globals
@@ -39,6 +44,7 @@
3944

4045
bool has_simdgroup_reduction;
4146
bool has_simdgroup_mm;
47+
bool has_residency_sets;
4248
bool has_bfloat;
4349
bool use_bfloat;
4450

@@ -48,6 +54,7 @@
4854
/*.mtl_device_ref_count =*/ 0,
4955
/*.has_simdgroup_reduction =*/ false,
5056
/*.has_simdgroup_mm =*/ false,
57+
/*.has_residency_sets =*/ false,
5158
/*.has_bfloat =*/ false,
5259
/*.use_bfloat =*/ false,
5360
/*.name =*/ "",
@@ -65,6 +72,10 @@
6572

6673
ctx->has_simdgroup_mm = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
6774

75+
#if defined(GGML_METAL_HAS_RESIDENCY_SETS)
76+
ctx->has_residency_sets = true;
77+
#endif
78+
6879
ctx->has_bfloat = [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
6980
ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6];
7081

@@ -483,6 +494,11 @@ @implementation GGMLMetalClass
483494
GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
484495

485496
ctx->queue = [device newCommandQueue];
497+
if (ctx->queue == nil) {
498+
GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
499+
return NULL;
500+
}
501+
486502
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
487503

488504
id<MTLLibrary> metal_library;
@@ -649,6 +665,7 @@ @implementation GGMLMetalClass
649665

650666
GGML_LOG_INFO("%s: simdgroup reduction = %s\n", __func__, ctx_dev->has_simdgroup_reduction ? "true" : "false");
651667
GGML_LOG_INFO("%s: simdgroup matrix mul. = %s\n", __func__, ctx_dev->has_simdgroup_mm ? "true" : "false");
668+
GGML_LOG_INFO("%s: has residency sets = %s\n", __func__, ctx_dev->has_residency_sets ? "true" : "false");
652669
GGML_LOG_INFO("%s: has bfloat = %s\n", __func__, ctx_dev->has_bfloat ? "true" : "false");
653670
GGML_LOG_INFO("%s: use bfloat = %s\n", __func__, ctx_dev->use_bfloat ? "true" : "false");
654671
GGML_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx_dev->mtl_device.hasUnifiedMemory ? "true" : "false");
@@ -1035,8 +1052,60 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
10351052
// multiple buffers are used only to avoid the maximum buffer size limitation when using mmap
10361053
int n_buffers;
10371054
struct ggml_backend_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
1055+
1056+
// optional MTLResidencySet
1057+
id rset;
10381058
};
10391059

1060+
// rset init
1061+
static bool ggml_backend_metal_buffer_rset_init(struct ggml_backend_metal_buffer_context * ctx, id<MTLDevice> device) {
1062+
#if defined(GGML_METAL_HAS_RESIDENCY_SETS)
1063+
if (@available(macOS 15.0, *)) {
1064+
MTLResidencySetDescriptor * desc;
1065+
desc = [[MTLResidencySetDescriptor alloc] init];
1066+
desc.label = @"ggml_backend_metal";
1067+
desc.initialCapacity = ctx->n_buffers;
1068+
1069+
NSError * error;
1070+
ctx->rset = [device newResidencySetWithDescriptor:desc error:&error];
1071+
if (error) {
1072+
GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
1073+
return false;
1074+
}
1075+
1076+
for (int i = 0; i < ctx->n_buffers; i++) {
1077+
[ctx->rset addAllocation:ctx->buffers[i].metal];
1078+
}
1079+
1080+
[ctx->rset commit];
1081+
[ctx->rset requestResidency];
1082+
1083+
return true;
1084+
}
1085+
#else
1086+
GGML_UNUSED(device);
1087+
#endif
1088+
1089+
ctx->rset = nil;
1090+
1091+
return true;
1092+
}
1093+
1094+
// rset free
1095+
static void ggml_backend_metal_buffer_rset_free(struct ggml_backend_metal_buffer_context * ctx) {
1096+
#if defined(GGML_METAL_HAS_RESIDENCY_SETS)
1097+
if (@available(macOS 15.0, *)) {
1098+
if (ctx->rset) {
1099+
[ctx->rset endResidency];
1100+
[ctx->rset removeAllAllocations];
1101+
[ctx->rset release];
1102+
}
1103+
}
1104+
#else
1105+
GGML_UNUSED(ctx);
1106+
#endif
1107+
}
1108+
10401109
// finds the Metal buffer that contains the tensor data on the GPU device
10411110
// the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
10421111
// Metal buffer based on the host memory pointer
@@ -4086,7 +4155,7 @@ static enum ggml_status ggml_metal_graph_compute(
40864155
// the main thread commits the first few commands immediately
40874156
// command_buffer[n_cb]
40884157
{
4089-
id<MTLCommandBuffer> command_buffer = [ctx->queue commandBufferWithUnretainedReferences];
4158+
id<MTLCommandBuffer> command_buffer = [ctx->queue commandBuffer];
40904159
ctx->command_buffers[n_cb] = command_buffer;
40914160

40924161
[command_buffer enqueue];
@@ -4096,7 +4165,7 @@ static enum ggml_status ggml_metal_graph_compute(
40964165
// prepare the rest of the command buffers asynchronously
40974166
// command_buffer[0.. n_cb)
40984167
for (int cb_idx = 0; cb_idx < n_cb; ++cb_idx) {
4099-
id<MTLCommandBuffer> command_buffer = [ctx->queue commandBufferWithUnretainedReferences];
4168+
id<MTLCommandBuffer> command_buffer = [ctx->queue commandBuffer];
41004169
ctx->command_buffers[cb_idx] = command_buffer;
41014170

41024171
// always enqueue the first two command buffers
@@ -4176,6 +4245,8 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
41764245
for (int i = 0; i < ctx->n_buffers; i++) {
41774246
[ctx->buffers[i].metal release];
41784247
}
4248+
4249+
ggml_backend_metal_buffer_rset_free(ctx);
41794250
ggml_backend_metal_device_rel(buffer->buft->device->context);
41804251

41814252
if (ctx->owned) {
@@ -4284,7 +4355,8 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
42844355
size_aligned += (size_page - (size_aligned % size_page));
42854356
}
42864357

4287-
id<MTLDevice> device = ggml_backend_metal_device_acq(buft->device->context);
4358+
struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)buft->device->context;
4359+
id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
42884360

42894361
ctx->all_data = ggml_metal_host_malloc(size_aligned);
42904362
ctx->all_size = size_aligned;
@@ -4307,7 +4379,14 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
43074379
if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) {
43084380
GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
43094381
free(ctx);
4310-
ggml_backend_metal_device_rel(buft->device->context);
4382+
ggml_backend_metal_device_rel(ctx_dev);
4383+
return NULL;
4384+
}
4385+
4386+
if (!ggml_backend_metal_buffer_rset_init(ctx, device)) {
4387+
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
4388+
free(ctx);
4389+
ggml_backend_metal_device_rel(ctx_dev);
43114390
return NULL;
43124391
}
43134392

@@ -4400,7 +4479,8 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
44004479
size_aligned += (size_page - (size_aligned % size_page));
44014480
}
44024481

4403-
id<MTLDevice> device = ggml_backend_metal_device_acq(&g_ggml_ctx_dev_main);
4482+
struct ggml_backend_metal_device_context * ctx_dev = &g_ggml_ctx_dev_main;
4483+
id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
44044484

44054485
// the buffer fits into the max buffer size allowed by the device
44064486
if (size_aligned <= device.maxBufferLength) {
@@ -4453,6 +4533,13 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
44534533
}
44544534
}
44554535

4536+
if (!ggml_backend_metal_buffer_rset_init(ctx, device)) {
4537+
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
4538+
free(ctx);
4539+
ggml_backend_metal_device_rel(ctx_dev);
4540+
return NULL;
4541+
}
4542+
44564543
return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);
44574544
}
44584545

@@ -4766,6 +4853,13 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back
47664853
}
47674854
}
47684855

4856+
if (!ggml_backend_metal_buffer_rset_init(ctx, device)) {
4857+
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
4858+
free(ctx);
4859+
ggml_backend_metal_device_rel(ctx_dev);
4860+
return NULL;
4861+
}
4862+
47694863
return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);
47704864
}
47714865

0 commit comments

Comments
 (0)