19
19
// max number of MTLCommandBuffer used to submit a graph for processing
20
20
#define GGML_METAL_MAX_COMMAND_BUFFERS 8
21
21
22
- // max number of buffers that can be allocated on the heap per command buffer
23
- #define GGML_METAL_MAX_HEAP_BUFFERS 64
24
-
25
22
#ifndef TARGET_OS_VISION
26
23
#define TARGET_OS_VISION 0
27
24
#endif
@@ -472,14 +469,15 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
472
469
};
473
470
474
471
struct ggml_metal_heap {
475
- int n;
476
472
int fail;
477
473
474
+ size_t offs;
478
475
size_t need;
479
476
480
477
id <MTLDevice > device;
481
478
id <MTLHeap > obj;
482
- id <MTLBuffer > bufs[GGML_METAL_MAX_HEAP_BUFFERS];
479
+
480
+ NSMutableArray * bufs;
483
481
};
484
482
485
483
static struct ggml_metal_heap * ggml_metal_heap_init (id <MTLDevice > device, size_t size) {
@@ -488,7 +486,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
488
486
MTLHeapDescriptor * desc = [[MTLHeapDescriptor alloc ] init ];
489
487
desc.storageMode = MTLStorageModePrivate ;
490
488
desc.cpuCacheMode = MTLCPUCacheModeDefaultCache ;
491
- desc.type = MTLHeapTypeAutomatic ; // TODO: use MTLHeapTypePlacement
489
+ desc.type = MTLHeapTypePlacement ;
492
490
desc.size = size;
493
491
494
492
heap->device = device;
@@ -501,39 +499,35 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
501
499
return false ;
502
500
}
503
501
504
- for (int i = 0 ; i < GGML_METAL_MAX_HEAP_BUFFERS; ++i) {
505
- heap->bufs [i] = nil ;
506
- }
507
-
508
502
[desc release ];
509
503
504
+ heap->bufs = [[NSMutableArray alloc ] init ];
505
+
510
506
return heap;
511
507
}
512
508
509
+ static void ggml_metal_heap_reset (struct ggml_metal_heap * heap) {
510
+ heap->fail = 0 ;
511
+ heap->offs = 0 ;
512
+ heap->need = 0 ;
513
+
514
+ for (id <MTLBuffer > buf in heap->bufs ) {
515
+ [buf release ];
516
+ }
517
+ [heap->bufs removeAllObjects ];
518
+ }
519
+
513
520
static void ggml_metal_heap_free (struct ggml_metal_heap * heap) {
514
521
if (heap == nil ) {
515
522
return ;
516
523
}
517
524
518
- [heap->obj release ];
519
-
520
- free (heap);
521
- }
522
-
523
- static void ggml_metal_heap_reset (struct ggml_metal_heap * heap) {
524
- heap->n = 0 ;
525
- heap->fail = 0 ;
526
- heap->need = 0 ;
525
+ ggml_metal_heap_reset (heap);
527
526
528
- for (int i = 0 ; i < GGML_METAL_MAX_HEAP_BUFFERS; i++) {
529
- if (heap->bufs [i]) {
530
- [heap->bufs[i] release ];
531
- heap->bufs [i] = nil ;
532
- continue ;
533
- }
527
+ [heap->obj release ];
528
+ [heap->bufs release ];
534
529
535
- break ;
536
- }
530
+ free (heap);
537
531
}
538
532
539
533
static bool ggml_metal_heap_resize (struct ggml_metal_heap * heap, size_t size) {
@@ -546,7 +540,7 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
546
540
MTLHeapDescriptor * desc = [[MTLHeapDescriptor alloc ] init ];
547
541
desc.storageMode = MTLStorageModePrivate ;
548
542
desc.cpuCacheMode = MTLCPUCacheModeDefaultCache ;
549
- desc.type = MTLHeapTypeAutomatic ; // TODO: use MTLHeapTypePlacement
543
+ desc.type = MTLHeapTypePlacement ;
550
544
desc.size = size;
551
545
552
546
heap->obj = [heap->device newHeapWithDescriptor: desc];
@@ -571,33 +565,32 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
571
565
572
566
const size_t size_aligned = GGML_PAD (size, alignment);
573
567
574
- // GGML_LOG_INFO("%s: size = %zu, size_aligned = %zu, need = %zu, fail = %d\n", __func__, size, size_aligned, heap->need, heap->fail);
568
+ heap->offs += size_aligned;
569
+ heap->need = MAX (heap->need , heap->offs + size_aligned);
575
570
576
- heap-> need += size_aligned;
571
+ // GGML_LOG_INFO("%s: size = %zu, size_aligned = %zu, offs = %zu, need = %zu\n", __func__, size, size_aligned, offs, heap->offs, heap->need) ;
577
572
578
573
if (no_alloc) {
579
574
return nil ;
580
575
}
581
576
582
- if (!heap->fail && size_aligned > [heap->obj maxAvailableSizeWithAlignment: alignment ]) {
577
+ if (!heap->fail && heap-> offs + size_aligned > [heap->obj size ]) {
583
578
heap->fail = 1 ;
584
579
}
585
580
586
- if (!heap->fail && heap->n >= GGML_METAL_MAX_HEAP_BUFFERS) {
587
- heap->fail = 2 ;
588
- }
589
-
590
581
if (heap->fail ) {
591
582
return nil ;
592
583
}
593
584
594
- id <MTLBuffer > buf = [heap->obj newBufferWithLength: size_aligned options: MTLResourceStorageModePrivate ];
585
+ id <MTLBuffer > buf = [heap->obj newBufferWithLength: size_aligned options: MTLResourceStorageModePrivate offset: heap->offs ];
595
586
if (!buf) {
596
587
heap->fail = 3 ;
597
588
return nil ;
598
589
}
599
590
600
- heap->bufs [heap->n++] = buf;
591
+ [heap->bufs addObject: buf];
592
+
593
+ // GGML_LOG_INFO("%s: allocated buffer, size = %zu, offs = %zu, heap size = %zu, heap used = %zu\n", __func__, size_aligned, offs, [heap->obj size], [heap->obj usedSize]);
601
594
602
595
return buf;
603
596
}
@@ -634,7 +627,6 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
634
627
void (^encode_async)(size_t ith);
635
628
636
629
// n_cb command buffers + 1 used by the main thread
637
- // id<MTLCommandBuffer> command_buffers[GGML_METAL_MAX_COMMAND_BUFFERS + 1];
638
630
struct ggml_metal_command_buffer cmd_bufs[GGML_METAL_MAX_COMMAND_BUFFERS + 1 ];
639
631
640
632
// abort ggml_metal_graph_compute if callback returns true
@@ -1638,13 +1630,16 @@ static bool ggml_metal_encode_node(
1638
1630
// heap buffers for temporary data
1639
1631
id <MTLBuffer > h_src0 = nil ;
1640
1632
1633
+ // always allocate buffers from the start of the heap for the current node
1634
+ heap->offs = 0 ;
1635
+
1641
1636
switch (dst->op ) {
1642
1637
case GGML_OP_SOFT_MAX:
1643
1638
{
1644
1639
h_src0 = ggml_metal_heap_alloc (heap, ggml_nbytes (src0), no_alloc);
1645
1640
if (!no_alloc && !h_src0) {
1646
- GGML_LOG_ERROR (" %s : failed to allocate buffer, idx = %4d , size = %8zu , need = %8zu , max available = %9zu , heap size = %9zu , heap used = %zu , fail = %d \n " ,
1647
- __func__, idx, ggml_nbytes (src0), heap->need , [heap->obj maxAvailableSizeWithAlignment: 0 ], [heap->obj size ], [heap->obj usedSize ], heap->fail );
1641
+ GGML_LOG_ERROR (" %s : failed to allocate buffer, idx = %4d , size = %8zu , offs = %8zu , max available = %9zu , heap size = %9zu , heap used = %zu , fail = %d \n " ,
1642
+ __func__, idx, ggml_nbytes (src0), heap->offs , [heap->obj maxAvailableSizeWithAlignment: 0 ], [heap->obj size ], [heap->obj usedSize ], heap->fail );
1648
1643
return false ;
1649
1644
}
1650
1645
} break ;
@@ -2250,8 +2245,6 @@ static bool ggml_metal_encode_node(
2250
2245
{
2251
2246
GGML_ASSERT (!src1 || src1->type == GGML_TYPE_F16 || src1->type == GGML_TYPE_F32);
2252
2247
2253
- GGML_ASSERT (ggml_is_contiguous (src0));
2254
-
2255
2248
int nth = 32 ; // SIMD width
2256
2249
2257
2250
id <MTLComputePipelineState > pipeline = nil ;
@@ -4836,6 +4829,12 @@ static enum ggml_status ggml_metal_graph_compute(
4836
4829
[next_buffer commit ];
4837
4830
}
4838
4831
4832
+ for (int i = 0 ; i <= n_cb; ++i) {
4833
+ struct ggml_metal_heap * heap = ctx->cmd_bufs [i].heap ;
4834
+
4835
+ [heap->obj setPurgeableState: MTLPurgeableStateEmpty ];
4836
+ }
4837
+
4839
4838
if (!should_capture && ctx->capture_started ) {
4840
4839
[ctx->capture_scope endScope ];
4841
4840
[[MTLCaptureManager sharedCaptureManager ] stopCapture ];
@@ -5233,6 +5232,8 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
5233
5232
}
5234
5233
}
5235
5234
5235
+ // GGML_LOG_INFO("XXXXXXXXXXXXXXXXXXXXXXXXX\n");
5236
+
5236
5237
if (can_compute) {
5237
5238
for (int idx = node_start; idx < node_end; ++idx) {
5238
5239
if (should_capture) {
0 commit comments