Correction too small tensor embeddings to quantize

Nexesenex · Nexesenex · commit 644aa9fd41bb · 2024-08-21T13:07:32.000+02:00
IQ2_XS doesn't seem to work as such, back to IQ2_S
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -15964,11 +15964,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
                 new_type = GGML_TYPE_Q4_K;
                 }
             }
-            else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
-                if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ2_XS;
-                else new_type = GGML_TYPE_IQ2_S;
-            }
-            else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
+            else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ||
+                     ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
                 new_type = GGML_TYPE_IQ2_S;
             }
             else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {

Original file line number	Diff line number	Diff line change
`@@ -15964,11 +15964,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n`
`15964`	`15964`	`new_type = GGML_TYPE_Q4_K;`
`15965`	`15965`	`}`
`15966`	`15966`	`}`
`15967`		`- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ1_S \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {`
`15968`		`- if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ2_XS;`
`15969`		`- else new_type = GGML_TYPE_IQ2_S;`
`15970`		`- }`
`15971`		`- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {`
	`15967`	`+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ1_S \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ1_M \|\|`
	`15968`	`+ ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {`
`15972`	`15969`	`new_type = GGML_TYPE_IQ2_S;`
`15973`	`15970`	`}`
`15974`	`15971`	`else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {`