11
11
#define MIN (a, b ) ((a) < (b) ? (a) : (b))
12
12
#define MAX (a, b ) ((a) > (b) ? (a) : (b))
13
13
14
+ // TODO: temporary - reuse llama.cpp logging
14
15
#ifdef GGML_METAL_NDEBUG
15
16
#define metal_printf (...)
16
17
#else
@@ -113,7 +114,7 @@ @implementation GGMLMetalClass
113
114
@end
114
115
115
116
struct ggml_metal_context * ggml_metal_init (int n_cb) {
116
- fprintf (stderr, " %s : allocating\n " , __func__);
117
+ metal_printf ( " %s : allocating\n " , __func__);
117
118
118
119
struct ggml_metal_context * ctx = malloc (sizeof (struct ggml_metal_context));
119
120
@@ -132,7 +133,7 @@ @implementation GGMLMetalClass
132
133
133
134
ctx->library = [ctx->device newLibraryWithSource:msl_library_source options:nil error:&error];
134
135
if (error) {
135
- fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]);
136
+ metal_printf( "%s: error: %s\n", __func__, [[error description] UTF8String]);
136
137
return NULL;
137
138
}
138
139
}
@@ -146,11 +147,11 @@ @implementation GGMLMetalClass
146
147
// NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
147
148
NSBundle * bundle = [NSBundle bundleForClass: [GGMLMetalClass class ]];
148
149
NSString * path = [bundle pathForResource: @" ggml-metal" ofType: @" metal" ];
149
- fprintf (stderr, " %s : loading '%s '\n " , __func__, [path UTF8String ]);
150
+ metal_printf ( " %s : loading '%s '\n " , __func__, [path UTF8String ]);
150
151
151
152
NSString * src = [NSString stringWithContentsOfFile: path encoding: NSUTF8StringEncoding error: &error];
152
153
if (error) {
153
- fprintf (stderr, " %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
154
+ metal_printf ( " %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
154
155
return NULL ;
155
156
}
156
157
@@ -162,7 +163,7 @@ @implementation GGMLMetalClass
162
163
ctx->library = [ctx->device newLibraryWithSource: src options: nil error: &error];
163
164
#endif
164
165
if (error) {
165
- fprintf (stderr, " %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
166
+ metal_printf ( " %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
166
167
return NULL ;
167
168
}
168
169
}
@@ -174,11 +175,11 @@ @implementation GGMLMetalClass
174
175
#define GGML_METAL_ADD_KERNEL (name ) \
175
176
ctx->function_ ##name = [ctx->library newFunctionWithName: @" kernel_" #name]; \
176
177
ctx->pipeline_ ##name = [ctx->device newComputePipelineStateWithFunction: ctx->function_##name error: &error]; \
177
- fprintf (stderr, " %s : loaded %-32s %16p | th_max = %4d | th_width = %4d \n " , __func__, " kernel_" #name, (void *) ctx->pipeline_ ##name, \
178
+ metal_printf ( " %s : loaded %-32s %16p | th_max = %4d | th_width = %4d \n " , __func__, " kernel_" #name, (void *) ctx->pipeline_ ##name, \
178
179
(int ) ctx->pipeline_ ##name.maxTotalThreadsPerThreadgroup , \
179
180
(int ) ctx->pipeline_ ##name.threadExecutionWidth ); \
180
181
if (error) { \
181
- fprintf (stderr, " %s : load pipeline error: %s \n " , __func__, [[error description ] UTF8String ]); \
182
+ metal_printf ( " %s : load pipeline error: %s \n " , __func__, [[error description ] UTF8String ]); \
182
183
return NULL ; \
183
184
}
184
185
@@ -230,19 +231,19 @@ @implementation GGMLMetalClass
230
231
#undef GGML_METAL_ADD_KERNEL
231
232
}
232
233
233
- fprintf (stderr, " %s : recommendedMaxWorkingSetSize = %8.2f MB\n " , __func__, ctx->device .recommendedMaxWorkingSetSize / 1024.0 / 1024.0 );
234
- fprintf (stderr, " %s : hasUnifiedMemory = %s \n " , __func__, ctx->device .hasUnifiedMemory ? " true" : " false" );
234
+ metal_printf ( " %s : recommendedMaxWorkingSetSize = %8.2f MB\n " , __func__, ctx->device .recommendedMaxWorkingSetSize / 1024.0 / 1024.0 );
235
+ metal_printf ( " %s : hasUnifiedMemory = %s \n " , __func__, ctx->device .hasUnifiedMemory ? " true" : " false" );
235
236
if (ctx->device .maxTransferRate != 0 ) {
236
- fprintf (stderr, " %s : maxTransferRate = %8.2f MB/s\n " , __func__, ctx->device .maxTransferRate / 1024.0 / 1024.0 );
237
+ metal_printf ( " %s : maxTransferRate = %8.2f MB/s\n " , __func__, ctx->device .maxTransferRate / 1024.0 / 1024.0 );
237
238
} else {
238
- fprintf (stderr, " %s : maxTransferRate = built-in GPU\n " , __func__);
239
+ metal_printf ( " %s : maxTransferRate = built-in GPU\n " , __func__);
239
240
}
240
241
241
242
return ctx;
242
243
}
243
244
244
245
void ggml_metal_free (struct ggml_metal_context * ctx) {
245
- fprintf (stderr, " %s : deallocating\n " , __func__);
246
+ metal_printf ( " %s : deallocating\n " , __func__);
246
247
#define GGML_METAL_DEL_KERNEL (name ) \
247
248
[ctx->function_##name release ]; \
248
249
[ctx->pipeline_##name release ];
@@ -311,7 +312,7 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
311
312
void * data = NULL ;
312
313
const int result = posix_memalign ((void **) &data, getpagesize (), n);
313
314
if (result != 0 ) {
314
- fprintf (stderr, " %s : error: posix_memalign failed\n " , __func__);
315
+ metal_printf ( " %s : error: posix_memalign failed\n " , __func__);
315
316
return NULL ;
316
317
}
317
318
@@ -339,7 +340,7 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
339
340
// Metal buffer based on the host memory pointer
340
341
//
341
342
static id <MTLBuffer > ggml_metal_get_buffer (struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
342
- // fprintf(stderr, "%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
343
+ // metal_printf( "%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
343
344
344
345
const int64_t tsize = ggml_nbytes (t);
345
346
@@ -350,13 +351,13 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
350
351
if (ioffs >= 0 && ioffs + tsize <= (int64_t ) ctx->buffers [i].size ) {
351
352
*offs = (size_t ) ioffs;
352
353
353
- // fprintf(stderr, "%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
354
+ // metal_printf( "%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
354
355
355
356
return ctx->buffers [i].metal ;
356
357
}
357
358
}
358
359
359
- fprintf (stderr, " %s : error: buffer is nil\n " , __func__);
360
+ metal_printf ( " %s : error: buffer is nil\n " , __func__);
360
361
361
362
return nil ;
362
363
}
@@ -368,7 +369,7 @@ bool ggml_metal_add_buffer(
368
369
size_t size,
369
370
size_t max_size) {
370
371
if (ctx->n_buffers >= GGML_METAL_MAX_BUFFERS) {
371
- fprintf (stderr, " %s : too many buffers\n " , __func__);
372
+ metal_printf ( " %s : too many buffers\n " , __func__);
372
373
return false ;
373
374
}
374
375
@@ -378,7 +379,7 @@ bool ggml_metal_add_buffer(
378
379
const int64_t ioffs = (int64_t ) data - (int64_t ) ctx->buffers [i].data ;
379
380
380
381
if (ioffs >= 0 && ioffs < (int64_t ) ctx->buffers [i].size ) {
381
- fprintf (stderr, " %s : error: buffer '%s ' overlaps with '%s '\n " , __func__, name, ctx->buffers [i].name );
382
+ metal_printf ( " %s : error: buffer '%s ' overlaps with '%s '\n " , __func__, name, ctx->buffers [i].name );
382
383
return false ;
383
384
}
384
385
}
@@ -399,11 +400,11 @@ bool ggml_metal_add_buffer(
399
400
ctx->buffers [ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy: data length: size_aligned options: MTLResourceStorageModeShared deallocator: nil ];
400
401
401
402
if (ctx->buffers [ctx->n_buffers].metal == nil ) {
402
- fprintf (stderr, " %s : failed to allocate '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, size_aligned / 1024.0 / 1024.0 );
403
+ metal_printf ( " %s : failed to allocate '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, size_aligned / 1024.0 / 1024.0 );
403
404
return false ;
404
405
}
405
406
406
- fprintf (stderr, " %s : allocated '%-16s ' buffer, size = %8.2f MB" , __func__, name, size_aligned / 1024.0 / 1024.0 );
407
+ metal_printf ( " %s : allocated '%-16s ' buffer, size = %8.2f MB" , __func__, name, size_aligned / 1024.0 / 1024.0 );
407
408
408
409
++ctx->n_buffers ;
409
410
} else {
@@ -423,27 +424,27 @@ bool ggml_metal_add_buffer(
423
424
ctx->buffers [ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy: (void *) ((uint8_t *) data + i) length: size_step_aligned options: MTLResourceStorageModeShared deallocator: nil ];
424
425
425
426
if (ctx->buffers [ctx->n_buffers].metal == nil ) {
426
- fprintf (stderr, " %s : failed to allocate '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, size_step_aligned / 1024.0 / 1024.0 );
427
+ metal_printf ( " %s : failed to allocate '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, size_step_aligned / 1024.0 / 1024.0 );
427
428
return false ;
428
429
}
429
430
430
- fprintf (stderr, " %s : allocated '%-16s ' buffer, size = %8.2f MB, offs = %12ld " , __func__, name, size_step_aligned / 1024.0 / 1024.0 , i);
431
+ metal_printf ( " %s : allocated '%-16s ' buffer, size = %8.2f MB, offs = %12ld " , __func__, name, size_step_aligned / 1024.0 / 1024.0 , i);
431
432
if (i + size_step < size) {
432
- fprintf (stderr, " \n " );
433
+ metal_printf ( " \n " );
433
434
}
434
435
435
436
++ctx->n_buffers ;
436
437
}
437
438
}
438
439
439
- fprintf (stderr, " , (%8.2f / %8.2f )" ,
440
+ metal_printf ( " , (%8.2f / %8.2f )" ,
440
441
ctx->device .currentAllocatedSize / 1024.0 / 1024.0 ,
441
442
ctx->device .recommendedMaxWorkingSetSize / 1024.0 / 1024.0 );
442
443
443
444
if (ctx->device .currentAllocatedSize > ctx->device .recommendedMaxWorkingSetSize ) {
444
- fprintf (stderr, " , warning: current allocated size is greater than the recommended max working set size\n " );
445
+ metal_printf ( " , warning: current allocated size is greater than the recommended max working set size\n " );
445
446
} else {
446
- fprintf (stderr, " \n " );
447
+ metal_printf ( " \n " );
447
448
}
448
449
}
449
450
@@ -453,8 +454,6 @@ bool ggml_metal_add_buffer(
453
454
void ggml_metal_set_tensor (
454
455
struct ggml_metal_context * ctx,
455
456
struct ggml_tensor * t) {
456
- metal_printf (" %s : set input for tensor '%s '\n " , __func__, t->name );
457
-
458
457
size_t offs;
459
458
id <MTLBuffer > id_dst = ggml_metal_get_buffer (ctx, t, &offs);
460
459
@@ -464,8 +463,6 @@ void ggml_metal_set_tensor(
464
463
void ggml_metal_get_tensor (
465
464
struct ggml_metal_context * ctx,
466
465
struct ggml_tensor * t) {
467
- metal_printf (" %s : extract results for tensor '%s '\n " , __func__, t->name );
468
-
469
466
size_t offs;
470
467
id <MTLBuffer > id_src = ggml_metal_get_buffer (ctx, t, &offs);
471
468
@@ -560,15 +557,13 @@ void ggml_metal_graph_find_concurrency(
560
557
}
561
558
562
559
if (ctx->concur_list_len > GGML_MAX_CONCUR) {
563
- fprintf (stderr, " %s : too many elements for metal ctx->concur_list!\n " , __func__);
560
+ metal_printf ( " %s : too many elements for metal ctx->concur_list!\n " , __func__);
564
561
}
565
562
}
566
563
567
564
void ggml_metal_graph_compute (
568
565
struct ggml_metal_context * ctx,
569
566
struct ggml_cgraph * gf) {
570
- metal_printf (" %s : evaluating graph\n " , __func__);
571
-
572
567
@autoreleasepool {
573
568
574
569
// if there is ctx->concur_list, dispatch concurrently
@@ -616,7 +611,7 @@ void ggml_metal_graph_compute(
616
611
continue ;
617
612
}
618
613
619
- metal_printf (" %s : encoding node %3d , op = %8s \n " , __func__, i, ggml_op_name (gf->nodes [i]->op ));
614
+ // metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
620
615
621
616
struct ggml_tensor * src0 = gf->nodes [i]->src [0 ];
622
617
struct ggml_tensor * src1 = gf->nodes [i]->src [1 ];
@@ -764,7 +759,7 @@ void ggml_metal_graph_compute(
764
759
} break ;
765
760
default :
766
761
{
767
- fprintf (stderr, " %s : node %3d , op = %8s not implemented\n " , __func__, i, ggml_op_name (dst->op ));
762
+ metal_printf ( " %s : node %3d , op = %8s not implemented\n " , __func__, i, ggml_op_name (dst->op ));
768
763
GGML_ASSERT (false );
769
764
}
770
765
} break ;
@@ -923,7 +918,7 @@ void ggml_metal_graph_compute(
923
918
} break ;
924
919
default :
925
920
{
926
- fprintf (stderr, " Asserting on type %d \n " ,(int )src0t);
921
+ metal_printf ( " Asserting on type %d \n " ,(int )src0t);
927
922
GGML_ASSERT (false && " not implemented" );
928
923
}
929
924
};
@@ -1161,7 +1156,7 @@ void ggml_metal_graph_compute(
1161
1156
} break ;
1162
1157
default :
1163
1158
{
1164
- fprintf (stderr, " %s : node %3d , op = %8s not implemented\n " , __func__, i, ggml_op_name (dst->op ));
1159
+ metal_printf ( " %s : node %3d , op = %8s not implemented\n " , __func__, i, ggml_op_name (dst->op ));
1165
1160
GGML_ASSERT (false );
1166
1161
}
1167
1162
}
@@ -1186,7 +1181,7 @@ void ggml_metal_graph_compute(
1186
1181
1187
1182
MTLCommandBufferStatus status = (MTLCommandBufferStatus ) [ctx->command_buffers[i] status ];
1188
1183
if (status != MTLCommandBufferStatusCompleted ) {
1189
- fprintf (stderr, " %s : command buffer %d failed with status %lu \n " , __func__, i, status);
1184
+ metal_printf ( " %s : command buffer %d failed with status %lu \n " , __func__, i, status);
1190
1185
GGML_ASSERT (false );
1191
1186
}
1192
1187
}
0 commit comments