Skip to content

Commit 2842272

Browse files
committed
RMSE-optimized quants for all quantization types
By default this new option is ON. One can turn it off by setting LLAMA_NO_RMSE. With this option enabled, the Q4_3 quantization results in a perplexity of 6.0344, so 0.0273 lower than simple Q4_3 quantization.
1 parent 8687c1f commit 2842272

File tree

3 files changed

+287
-81
lines changed

3 files changed

+287
-81
lines changed

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ option(LLAMA_ACCELERATE "llama: enable Accelerate framework"
6868
option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF)
6969
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
7070

71+
# RMSE minimization when quantizing
72+
option(LLAMA_NO_RMSE "llama: disable RMSE minimization" OFF)
73+
7174
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
7275
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
7376

@@ -99,6 +102,10 @@ if (NOT MSVC)
99102
endif()
100103
endif()
101104

105+
if (LLAMA_NO_RMSE)
106+
add_compile_definitions(GGML_NO_RMSE)
107+
endif()
108+
102109
if (APPLE AND LLAMA_ACCELERATE)
103110
find_library(ACCELERATE_FRAMEWORK Accelerate)
104111
if (ACCELERATE_FRAMEWORK)

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
128128
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
129129
endif
130130

131+
ifdef LLAMA_NO_RMSE
132+
CFLAGS += -DGGML_NO_RMSE
133+
endif
134+
131135
#
132136
# Print build information
133137
#

0 commit comments

Comments
 (0)