Skip to content

Commit f64e4f0

Browse files
committed
ggml : testing GPU FP precision via quantized CPY
1 parent 24a447e commit f64e4f0

File tree

3 files changed

+23
-7
lines changed

3 files changed

+23
-7
lines changed

ggml-metal.m

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,10 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
291291
options = [MTLCompileOptions new];
292292
options.preprocessorMacros = @{ @"QK_K" : @(64) };
293293
#endif
294+
// disable fast math
295+
// NOTE: this seems to have no effect whatsoever
296+
//[options setFastMathEnabled:false];
297+
294298
ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
295299
}
296300

ggml-quants.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,13 +1103,18 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
11031103
}
11041104
}
11051105

1106+
#include <stdio.h>
1107+
11061108
void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
11071109
static const int qk = QK4_1;
11081110

11091111
assert(k % qk == 0);
11101112

11111113
const int nb = k / qk;
11121114

1115+
printf("d = %9f\n", GGML_FP16_TO_FP32(x[0].d));
1116+
printf("m = %9f\n", GGML_FP16_TO_FP32(x[0].m));
1117+
11131118
for (int i = 0; i < nb; i++) {
11141119
const float d = GGML_FP16_TO_FP32(x[i].d);
11151120
const float m = GGML_FP16_TO_FP32(x[i].m);

tests/test-backend-ops.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -437,11 +437,12 @@ struct test_case {
437437
double err = nmse(f1.data(), f2.data(), f1.size());
438438
if (err > ud->max_err) {
439439
printf("[%s] NMSE = %f ", ggml_op_desc(t1), err);
440-
//for (int i = 0; i < f1.size(); i++) {
441-
// printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
442-
//}
443-
//printf("\n");
444-
//exit(1);
440+
printf("\n");
441+
for (int i = 0; i < f1.size(); i++) {
442+
printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
443+
}
444+
printf("\n");
445+
exit(1);
445446
ud->ok = false;
446447
}
447448
return true;
@@ -1459,8 +1460,14 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
14591460

14601461
test_cases.emplace_back(new test_dup());
14611462

1462-
for (ggml_type type : all_types) {
1463-
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
1463+
//for (ggml_type type : all_types) {
1464+
// test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
1465+
//}
1466+
1467+
for (ggml_type type : { GGML_TYPE_Q4_1} ) {
1468+
for (int i = 0; i < 2048; ++i) {
1469+
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {32, 1, 1, 1}));
1470+
}
14641471
}
14651472

14661473
test_cases.emplace_back(new test_cont());

0 commit comments

Comments
 (0)