Skip to content

Commit c6732bf

Browse files
committed
Bump a bit output for big models in IQ2 and IQ3
1 parent cce61d3 commit c6732bf

File tree

1 file changed

+6
-7
lines changed

1 file changed

+6
-7
lines changed

src/llama.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17601,23 +17601,22 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1760117601
else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K;
1760217602
else new_type = GGML_TYPE_IQ4_XS;
1760317603
}
17604-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
17605-
ftype == LLAMA_FTYPE_MOSTLY_IQ2_S) {
17604+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
1760617605
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1760717606
else if (qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ4_XS;
1760817607
else new_type = GGML_TYPE_Q4_K;
1760917608
}
17610-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
17609+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
1761117610
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
17612-
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q4_K;
17611+
else if (qs.model.hparams.n_vocab >= 127999 && qs.model.hparams.n_gqa() < 12) new_type = GGML_TYPE_Q4_K;
1761317612
else new_type = GGML_TYPE_Q5_K;
1761417613
}
17615-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
17616-
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
17614+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
17615+
if (qs.model.hparams.n_expert >= 4 && qs.model.hparams.n_gqa() >= 12) new_type = GGML_TYPE_Q6_K;
1761717616
else new_type = GGML_TYPE_Q5_K;
1761817617
}
1761917618
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_ML) {
17620-
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
17619+
if (qs.model.hparams.n_expert >= 4 && qs.model.hparams.n_gqa() >= 12) new_type = GGML_TYPE_Q6_K;
1762117620
else if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_Q5_K;
1762217621
else new_type = GGML_TYPE_Q6_K;
1762317622
}

0 commit comments

Comments
 (0)