ggml : testing GPU FP precision via quantized CPY

ggerganov · ggerganov · commit f64e4f04e760 · 2023-12-30T19:11:40.000+02:00
diff --git a/ggml-metal.m b/ggml-metal.m
@@ -291,6 +291,10 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
             options = [MTLCompileOptions new];
             options.preprocessorMacros = @{ @"QK_K" : @(64) };
 #endif
+            // disable fast math
+            // NOTE: this seems to have no effect whatsoever
+            //[options setFastMathEnabled:false];
+
             ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
         }
 
diff --git a/ggml-quants.c b/ggml-quants.c
@@ -1103,13 +1103,18 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
     }
 }
 
+#include <stdio.h>
+
 void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
     static const int qk = QK4_1;
 
     assert(k % qk == 0);
 
     const int nb = k / qk;
 
+    printf("d = %9f\n", GGML_FP16_TO_FP32(x[0].d));
+    printf("m = %9f\n", GGML_FP16_TO_FP32(x[0].m));
+
     for (int i = 0; i < nb; i++) {
         const float d = GGML_FP16_TO_FP32(x[i].d);
         const float m = GGML_FP16_TO_FP32(x[i].m);
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
@@ -437,11 +437,12 @@ struct test_case {
             double err = nmse(f1.data(), f2.data(), f1.size());
             if (err > ud->max_err) {
                 printf("[%s] NMSE = %f ", ggml_op_desc(t1), err);
-                //for (int i = 0; i < f1.size(); i++) {
-                //    printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
-                //}
-                //printf("\n");
-                //exit(1);
+                printf("\n");
+                for (int i = 0; i < f1.size(); i++) {
+                    printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
+                }
+                printf("\n");
+                exit(1);
                 ud->ok = false;
             }
             return true;
@@ -1459,8 +1460,14 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
 
     test_cases.emplace_back(new test_dup());
 
-    for (ggml_type type : all_types) {
-       test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
+    //for (ggml_type type : all_types) {
+    //   test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
+    //}
+
+    for (ggml_type type : { GGML_TYPE_Q4_1} ) {
+        for (int i = 0; i < 2048; ++i) {
+            test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {32, 1, 1, 1}));
+        }
     }
 
     test_cases.emplace_back(new test_cont());