Skip to content

Commit 1ad18f8

Browse files
committed
Adjustments on attn_k
1 parent 8c2c03f commit 1ad18f8

File tree

1 file changed

+12
-14
lines changed

1 file changed

+12
-14
lines changed

src/llama.cpp

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15531,7 +15531,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1553115531
new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_XXS;
1553215532
}
1553315533
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
15534-
new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_Q4_K : GGML_TYPE_IQ3_XXS;
15534+
new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_XXS;
1553515535
}
1553615536
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
1553715537
new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_Q4_K : GGML_TYPE_IQ3_S;
@@ -15573,23 +15573,21 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1557315573
}
1557415574
else new_type = GGML_TYPE_Q8_0;
1557515575
}
15576-
else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) &&
15577-
(qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
15578-
new_type = GGML_TYPE_IQ1_M;
15579-
}
15580-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
15581-
new_type = GGML_TYPE_IQ2_XXS;
15576+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
15577+
if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ2_XS;
15578+
else new_type = GGML_TYPE_IQ2_XXS;
1558215579
}
1558315580
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS) {
15584-
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ2_XS;
15585-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL) new_type = GGML_TYPE_IQ2_XXS;
15581+
if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ2_S;
15582+
else if (qs.model.hparams.n_gqa() >= 2) new_type = GGML_TYPE_IQ2_XS;
15583+
else new_type = GGML_TYPE_IQ2_XXS;
1558615584
}
15587-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
15588-
new_type = GGML_TYPE_IQ2_S;
15585+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
15586+
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ2_S;
1558915587
}
15590-
else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) &&
15591-
(qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
15592-
new_type = GGML_TYPE_IQ3_XXS;
15588+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
15589+
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ3_XXS;
15590+
else new_type = GGML_TYPE_IQ2_S;
1559315591
}
1559415592
else if ((ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) &&
1559515593
(qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {

0 commit comments

Comments
 (0)