RMSE-optimized quants for all quantization types

Kawrakow · Kawrakow · commit 284227284a7d · 2023-04-21T18:06:11.000+02:00
By default this new option is ON. One can turn it off
by setting LLAMA_NO_RMSE.

With this option enabled, the Q4_3 quantization results
in a perplexity  of 6.0344, so 0.0273 lower than simple
Q4_3 quantization.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -68,6 +68,9 @@ option(LLAMA_ACCELERATE             "llama: enable Accelerate framework"
 option(LLAMA_OPENBLAS               "llama: use OpenBLAS"                                   OFF)
 option(LLAMA_CUBLAS                 "llama: use cuBLAS"                                     OFF)
 
+# RMSE minimization when quantizing
+option(LLAMA_NO_RMSE                "llama: disable RMSE minimization"                      OFF)
+
 option(LLAMA_BUILD_TESTS            "llama: build tests"    ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_EXAMPLES         "llama: build examples" ${LLAMA_STANDALONE})
 
@@ -99,6 +102,10 @@ if (NOT MSVC)
     endif()
 endif()
 
+if (LLAMA_NO_RMSE)
+    add_compile_definitions(GGML_NO_RMSE)
+endif()
+
 if (APPLE AND LLAMA_ACCELERATE)
     find_library(ACCELERATE_FRAMEWORK Accelerate)
     if (ACCELERATE_FRAMEWORK)
diff --git a/Makefile b/Makefile
@@ -128,6 +128,10 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
 	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
 endif
 
+ifdef LLAMA_NO_RMSE
+	CFLAGS += -DGGML_NO_RMSE
+endif
+
 #
 # Print build information
 #
diff --git a/ggml.c b/ggml.c

Original file line number	Diff line number	Diff line change
`@@ -128,6 +128,10 @@ ifneq ($(filter armv8%,$(UNAME_M)),)`
`128`	`128`	`CFLAGS += -mfp16-format=ieee -mno-unaligned-access`
`129`	`129`	`endif`
`130`	`130`
	`131`	`+ifdef LLAMA_NO_RMSE`
	`132`	`+ CFLAGS += -DGGML_NO_RMSE`
	`133`	`+endif`
	`134`	`+`
`131`	`135`	`#`
`132`	`136`	`# Print build information`
`133`	`137`	`#`