Skip to content

Commit 3a00764

Browse files
committed
metal : add option to disable debug logs (close #2764)
1 parent 611363a commit 3a00764

File tree

3 files changed

+35
-40
lines changed

3 files changed

+35
-40
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ if (LLAMA_METAL)
301301
set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h)
302302

303303
add_compile_definitions(GGML_USE_METAL)
304-
add_compile_definitions(GGML_METAL_NDEBUG)
304+
#add_compile_definitions(GGML_METAL_NDEBUG)
305305

306306
# get full path to the file
307307
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
305305
endif # LLAMA_HIPBLAS
306306

307307
ifdef LLAMA_METAL
308-
CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG
308+
CFLAGS += -DGGML_USE_METAL #-DGGML_METAL_NDEBUG
309309
CXXFLAGS += -DGGML_USE_METAL
310310
LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
311311
OBJS += ggml-metal.o

ggml-metal.m

Lines changed: 33 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#define MIN(a, b) ((a) < (b) ? (a) : (b))
1212
#define MAX(a, b) ((a) > (b) ? (a) : (b))
1313

14+
// TODO: temporary - reuse llama.cpp logging
1415
#ifdef GGML_METAL_NDEBUG
1516
#define metal_printf(...)
1617
#else
@@ -113,7 +114,7 @@ @implementation GGMLMetalClass
113114
@end
114115

115116
struct ggml_metal_context * ggml_metal_init(int n_cb) {
116-
fprintf(stderr, "%s: allocating\n", __func__);
117+
metal_printf("%s: allocating\n", __func__);
117118

118119
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
119120

@@ -132,7 +133,7 @@ @implementation GGMLMetalClass
132133

133134
ctx->library = [ctx->device newLibraryWithSource:msl_library_source options:nil error:&error];
134135
if (error) {
135-
fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]);
136+
metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
136137
return NULL;
137138
}
138139
}
@@ -146,11 +147,11 @@ @implementation GGMLMetalClass
146147
//NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
147148
NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
148149
NSString * path = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
149-
fprintf(stderr, "%s: loading '%s'\n", __func__, [path UTF8String]);
150+
metal_printf("%s: loading '%s'\n", __func__, [path UTF8String]);
150151

151152
NSString * src = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error];
152153
if (error) {
153-
fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]);
154+
metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
154155
return NULL;
155156
}
156157

@@ -162,7 +163,7 @@ @implementation GGMLMetalClass
162163
ctx->library = [ctx->device newLibraryWithSource:src options:nil error:&error];
163164
#endif
164165
if (error) {
165-
fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]);
166+
metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
166167
return NULL;
167168
}
168169
}
@@ -174,11 +175,11 @@ @implementation GGMLMetalClass
174175
#define GGML_METAL_ADD_KERNEL(name) \
175176
ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
176177
ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
177-
fprintf(stderr, "%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
178+
metal_printf("%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
178179
(int) ctx->pipeline_##name.maxTotalThreadsPerThreadgroup, \
179180
(int) ctx->pipeline_##name.threadExecutionWidth); \
180181
if (error) { \
181-
fprintf(stderr, "%s: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
182+
metal_printf("%s: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
182183
return NULL; \
183184
}
184185

@@ -230,19 +231,19 @@ @implementation GGMLMetalClass
230231
#undef GGML_METAL_ADD_KERNEL
231232
}
232233

233-
fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
234-
fprintf(stderr, "%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
234+
metal_printf("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
235+
metal_printf("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
235236
if (ctx->device.maxTransferRate != 0) {
236-
fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
237+
metal_printf("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
237238
} else {
238-
fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
239+
metal_printf("%s: maxTransferRate = built-in GPU\n", __func__);
239240
}
240241

241242
return ctx;
242243
}
243244

244245
void ggml_metal_free(struct ggml_metal_context * ctx) {
245-
fprintf(stderr, "%s: deallocating\n", __func__);
246+
metal_printf("%s: deallocating\n", __func__);
246247
#define GGML_METAL_DEL_KERNEL(name) \
247248
[ctx->function_##name release]; \
248249
[ctx->pipeline_##name release];
@@ -311,7 +312,7 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
311312
void * data = NULL;
312313
const int result = posix_memalign((void **) &data, getpagesize(), n);
313314
if (result != 0) {
314-
fprintf(stderr, "%s: error: posix_memalign failed\n", __func__);
315+
metal_printf("%s: error: posix_memalign failed\n", __func__);
315316
return NULL;
316317
}
317318

@@ -339,7 +340,7 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
339340
// Metal buffer based on the host memory pointer
340341
//
341342
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
342-
//fprintf(stderr, "%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
343+
//metal_printf("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
343344

344345
const int64_t tsize = ggml_nbytes(t);
345346

@@ -350,13 +351,13 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
350351
if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
351352
*offs = (size_t) ioffs;
352353

353-
//fprintf(stderr, "%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
354+
//metal_printf("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
354355

355356
return ctx->buffers[i].metal;
356357
}
357358
}
358359

359-
fprintf(stderr, "%s: error: buffer is nil\n", __func__);
360+
metal_printf("%s: error: buffer is nil\n", __func__);
360361

361362
return nil;
362363
}
@@ -368,7 +369,7 @@ bool ggml_metal_add_buffer(
368369
size_t size,
369370
size_t max_size) {
370371
if (ctx->n_buffers >= GGML_METAL_MAX_BUFFERS) {
371-
fprintf(stderr, "%s: too many buffers\n", __func__);
372+
metal_printf("%s: too many buffers\n", __func__);
372373
return false;
373374
}
374375

@@ -378,7 +379,7 @@ bool ggml_metal_add_buffer(
378379
const int64_t ioffs = (int64_t) data - (int64_t) ctx->buffers[i].data;
379380

380381
if (ioffs >= 0 && ioffs < (int64_t) ctx->buffers[i].size) {
381-
fprintf(stderr, "%s: error: buffer '%s' overlaps with '%s'\n", __func__, name, ctx->buffers[i].name);
382+
metal_printf("%s: error: buffer '%s' overlaps with '%s'\n", __func__, name, ctx->buffers[i].name);
382383
return false;
383384
}
384385
}
@@ -399,11 +400,11 @@ bool ggml_metal_add_buffer(
399400
ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil];
400401

401402
if (ctx->buffers[ctx->n_buffers].metal == nil) {
402-
fprintf(stderr, "%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0);
403+
metal_printf("%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0);
403404
return false;
404405
}
405406

406-
fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
407+
metal_printf("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
407408

408409
++ctx->n_buffers;
409410
} else {
@@ -423,27 +424,27 @@ bool ggml_metal_add_buffer(
423424
ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil];
424425

425426
if (ctx->buffers[ctx->n_buffers].metal == nil) {
426-
fprintf(stderr, "%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0);
427+
metal_printf("%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0);
427428
return false;
428429
}
429430

430-
fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
431+
metal_printf("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
431432
if (i + size_step < size) {
432-
fprintf(stderr, "\n");
433+
metal_printf("\n");
433434
}
434435

435436
++ctx->n_buffers;
436437
}
437438
}
438439

439-
fprintf(stderr, ", (%8.2f / %8.2f)",
440+
metal_printf(", (%8.2f / %8.2f)",
440441
ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
441442
ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
442443

443444
if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
444-
fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n");
445+
metal_printf(", warning: current allocated size is greater than the recommended max working set size\n");
445446
} else {
446-
fprintf(stderr, "\n");
447+
metal_printf("\n");
447448
}
448449
}
449450

@@ -453,8 +454,6 @@ bool ggml_metal_add_buffer(
453454
void ggml_metal_set_tensor(
454455
struct ggml_metal_context * ctx,
455456
struct ggml_tensor * t) {
456-
metal_printf("%s: set input for tensor '%s'\n", __func__, t->name);
457-
458457
size_t offs;
459458
id<MTLBuffer> id_dst = ggml_metal_get_buffer(ctx, t, &offs);
460459

@@ -464,8 +463,6 @@ void ggml_metal_set_tensor(
464463
void ggml_metal_get_tensor(
465464
struct ggml_metal_context * ctx,
466465
struct ggml_tensor * t) {
467-
metal_printf("%s: extract results for tensor '%s'\n", __func__, t->name);
468-
469466
size_t offs;
470467
id<MTLBuffer> id_src = ggml_metal_get_buffer(ctx, t, &offs);
471468

@@ -560,15 +557,13 @@ void ggml_metal_graph_find_concurrency(
560557
}
561558

562559
if (ctx->concur_list_len > GGML_MAX_CONCUR) {
563-
fprintf(stderr, "%s: too many elements for metal ctx->concur_list!\n", __func__);
560+
metal_printf("%s: too many elements for metal ctx->concur_list!\n", __func__);
564561
}
565562
}
566563

567564
void ggml_metal_graph_compute(
568565
struct ggml_metal_context * ctx,
569566
struct ggml_cgraph * gf) {
570-
metal_printf("%s: evaluating graph\n", __func__);
571-
572567
@autoreleasepool {
573568

574569
// if there is ctx->concur_list, dispatch concurrently
@@ -616,7 +611,7 @@ void ggml_metal_graph_compute(
616611
continue;
617612
}
618613

619-
metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
614+
//metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
620615

621616
struct ggml_tensor * src0 = gf->nodes[i]->src[0];
622617
struct ggml_tensor * src1 = gf->nodes[i]->src[1];
@@ -764,7 +759,7 @@ void ggml_metal_graph_compute(
764759
} break;
765760
default:
766761
{
767-
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
762+
metal_printf("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
768763
GGML_ASSERT(false);
769764
}
770765
} break;
@@ -923,7 +918,7 @@ void ggml_metal_graph_compute(
923918
} break;
924919
default:
925920
{
926-
fprintf(stderr, "Asserting on type %d\n",(int)src0t);
921+
metal_printf("Asserting on type %d\n",(int)src0t);
927922
GGML_ASSERT(false && "not implemented");
928923
}
929924
};
@@ -1161,7 +1156,7 @@ void ggml_metal_graph_compute(
11611156
} break;
11621157
default:
11631158
{
1164-
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
1159+
metal_printf("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
11651160
GGML_ASSERT(false);
11661161
}
11671162
}
@@ -1186,7 +1181,7 @@ void ggml_metal_graph_compute(
11861181

11871182
MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status];
11881183
if (status != MTLCommandBufferStatusCompleted) {
1189-
fprintf(stderr, "%s: command buffer %d failed with status %lu\n", __func__, i, status);
1184+
metal_printf("%s: command buffer %d failed with status %lu\n", __func__, i, status);
11901185
GGML_ASSERT(false);
11911186
}
11921187
}

0 commit comments

Comments
 (0)