Skip to content

Commit 644aa9f

Browse files
committed
Correction too small tensor embeddings to quantize
IQ2_XS doesn't seem to work as such, back to IQ2_S
1 parent 32f6ead commit 644aa9f

File tree

1 file changed

+2
-5
lines changed

1 file changed

+2
-5
lines changed

src/llama.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15964,11 +15964,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1596415964
new_type = GGML_TYPE_Q4_K;
1596515965
}
1596615966
}
15967-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
15968-
if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ2_XS;
15969-
else new_type = GGML_TYPE_IQ2_S;
15970-
}
15971-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
15967+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ||
15968+
ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
1597215969
new_type = GGML_TYPE_IQ2_S;
1597315970
}
1597415971
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {

0 commit comments

Comments
 (0)