@@ -180,7 +180,15 @@ @interface GGMLMetalClass : NSObject
180
180
@implementation GGMLMetalClass
181
181
@end
182
182
183
- ggml_log_callback ggml_metal_log_callback = NULL ;
183
+
184
+ static void ggml_metal_default_log_callback (enum ggml_log_level level, const char * msg, void * user_data) {
185
+ fprintf (stderr, " %s " , msg);
186
+
187
+ UNUSED (level);
188
+ UNUSED (user_data);
189
+ }
190
+
191
+ ggml_log_callback ggml_metal_log_callback = ggml_metal_default_log_callback;
184
192
void * ggml_metal_log_user_data = NULL ;
185
193
186
194
void ggml_metal_log_set_callback (ggml_log_callback log_callback, void * user_data) {
@@ -622,7 +630,7 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
622
630
623
631
// multiple buffers are used only to avoid the maximum buffer size limitation when using mmap
624
632
int n_buffers;
625
- struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
633
+ struct ggml_backend_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
626
634
};
627
635
628
636
// finds the Metal buffer that contains the tensor data on the GPU device
@@ -2499,13 +2507,29 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
2499
2507
deallocator: nil ];
2500
2508
2501
2509
if (ctx->buffers [0 ].metal == nil ) {
2502
- GGML_METAL_LOG_ERROR (" %s : error: failed to allocate ' %-16s ' buffer, size = %8.2f MiB\n " , __func__, " default " , size_aligned / 1024.0 / 1024.0 );
2510
+ GGML_METAL_LOG_ERROR (" %s : error: failed to allocate buffer, size = %8.2f MiB\n " , __func__, size_aligned / 1024.0 / 1024.0 );
2503
2511
free (ctx);
2504
2512
ggml_backend_metal_free_device ();
2505
2513
return NULL ;
2506
2514
}
2507
2515
2508
- GGML_METAL_LOG_INFO (" %s : allocated '%-16s ' buffer, size = %8.2f MiB\n " , __func__, " default" , size_aligned / 1024.0 / 1024.0 );
2516
+ GGML_METAL_LOG_INFO (" %s : allocated buffer, size = %8.2f MiB" , __func__, size_aligned / 1024.0 / 1024.0 );
2517
+
2518
+
2519
+ #if TARGET_OS_OSX
2520
+ GGML_METAL_LOG_INFO (" , (%8.2f / %8.2f )" ,
2521
+ device.currentAllocatedSize / 1024.0 / 1024.0 ,
2522
+ device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0 );
2523
+
2524
+ if (device.currentAllocatedSize > device.recommendedMaxWorkingSetSize ) {
2525
+ GGML_METAL_LOG_WARN (" %s : warning: current allocated size is greater than the recommended max working set size\n " , __func__);
2526
+ } else {
2527
+ GGML_METAL_LOG_INFO (" \n " );
2528
+ }
2529
+ #else
2530
+ GGML_METAL_LOG_INFO (" , (%8.2f )\n " , device.currentAllocatedSize / 1024.0 / 1024.0 );
2531
+ #endif
2532
+
2509
2533
2510
2534
return ggml_backend_buffer_init (buft, ggml_backend_metal_buffer_i, ctx, size);
2511
2535
}
@@ -2560,22 +2584,19 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
2560
2584
2561
2585
id <MTLDevice > device = ggml_backend_metal_get_device ();
2562
2586
2563
- const char * name = " from_ptr" ;
2564
-
2565
2587
// the buffer fits into the max buffer size allowed by the device
2566
2588
if (size_aligned <= device.maxBufferLength ) {
2567
- ctx->buffers [ctx->n_buffers].name = name;
2568
2589
ctx->buffers [ctx->n_buffers].data = data;
2569
2590
ctx->buffers [ctx->n_buffers].size = size;
2570
2591
2571
2592
ctx->buffers [ctx->n_buffers].metal = [device newBufferWithBytesNoCopy: data length: size_aligned options: MTLResourceStorageModeShared deallocator: nil ];
2572
2593
2573
2594
if (ctx->buffers [ctx->n_buffers].metal == nil ) {
2574
- GGML_METAL_LOG_ERROR (" %s : error: failed to allocate ' %-16s ' buffer, size = %8.2f MiB\n " , __func__, name , size_aligned / 1024.0 / 1024.0 );
2595
+ GGML_METAL_LOG_ERROR (" %s : error: failed to allocate buffer, size = %8.2f MiB\n " , __func__, size_aligned / 1024.0 / 1024.0 );
2575
2596
return false ;
2576
2597
}
2577
2598
2578
- GGML_METAL_LOG_INFO (" %s : allocated ' %-16s ' buffer, size = %8.2f MiB\n " , __func__, name , size_aligned / 1024.0 / 1024.0 );
2599
+ GGML_METAL_LOG_INFO (" %s : allocated buffer, size = %8.2f MiB" , __func__, size_aligned / 1024.0 / 1024.0 );
2579
2600
2580
2601
++ctx->n_buffers ;
2581
2602
} else {
@@ -2588,18 +2609,17 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
2588
2609
for (size_t i = 0 ; i < size; i += size_step) {
2589
2610
const size_t size_step_aligned = (i + size_view <= size) ? size_view : (size_aligned - i);
2590
2611
2591
- ctx->buffers [ctx->n_buffers].name = name;
2592
2612
ctx->buffers [ctx->n_buffers].data = (void *) ((uint8_t *) data + i);
2593
2613
ctx->buffers [ctx->n_buffers].size = size_step_aligned;
2594
2614
2595
2615
ctx->buffers [ctx->n_buffers].metal = [device newBufferWithBytesNoCopy: (void *) ((uint8_t *) data + i) length: size_step_aligned options: MTLResourceStorageModeShared deallocator: nil ];
2596
2616
2597
2617
if (ctx->buffers [ctx->n_buffers].metal == nil ) {
2598
- GGML_METAL_LOG_ERROR (" %s : error: failed to allocate ' %-16s ' buffer, size = %8.2f MiB\n " , __func__, name , size_step_aligned / 1024.0 / 1024.0 );
2618
+ GGML_METAL_LOG_ERROR (" %s : error: failed to allocate buffer, size = %8.2f MiB\n " , __func__, size_step_aligned / 1024.0 / 1024.0 );
2599
2619
return false ;
2600
2620
}
2601
2621
2602
- GGML_METAL_LOG_INFO (" %s : allocated ' %-16s ' buffer, size = %8.2f MiB, offs = %12ld " , __func__, name , size_step_aligned / 1024.0 / 1024.0 , i);
2622
+ GGML_METAL_LOG_INFO (" %s : allocated buffer, size = %8.2f MiB, offs = %12ld " , __func__, size_step_aligned / 1024.0 / 1024.0 , i);
2603
2623
if (i + size_step < size) {
2604
2624
GGML_METAL_LOG_INFO (" \n " );
2605
2625
}
@@ -2673,17 +2693,7 @@ static bool ggml_backend_metal_supports_op(ggml_backend_t backend, const struct
2673
2693
/* .supports_op = */ ggml_backend_metal_supports_op,
2674
2694
};
2675
2695
2676
- // TODO: make a common log callback for all backends in ggml-backend
2677
- static void ggml_backend_log_callback (enum ggml_log_level level, const char * msg, void * user_data) {
2678
- fprintf (stderr, " %s " , msg);
2679
-
2680
- UNUSED (level);
2681
- UNUSED (user_data);
2682
- }
2683
-
2684
2696
ggml_backend_t ggml_backend_metal_init (void ) {
2685
- ggml_metal_log_set_callback (ggml_backend_log_callback, NULL );
2686
-
2687
2697
struct ggml_metal_context * ctx = ggml_metal_init (GGML_DEFAULT_N_THREADS);
2688
2698
2689
2699
if (ctx == NULL ) {
0 commit comments