@@ -15531,7 +15531,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
15531
15531
new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_XXS;
15532
15532
}
15533
15533
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
15534
- new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_Q4_K : GGML_TYPE_IQ3_XXS;
15534
+ new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_XXS;
15535
15535
}
15536
15536
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
15537
15537
new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_Q4_K : GGML_TYPE_IQ3_S;
@@ -15573,23 +15573,21 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
15573
15573
}
15574
15574
else new_type = GGML_TYPE_Q8_0;
15575
15575
}
15576
- else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) &&
15577
- (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
15578
- new_type = GGML_TYPE_IQ1_M;
15579
- }
15580
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
15581
- new_type = GGML_TYPE_IQ2_XXS;
15576
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
15577
+ if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ2_XS;
15578
+ else new_type = GGML_TYPE_IQ2_XXS;
15582
15579
}
15583
15580
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS) {
15584
- if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ2_XS;
15585
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL) new_type = GGML_TYPE_IQ2_XXS;
15581
+ if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ2_S;
15582
+ else if (qs.model.hparams.n_gqa() >= 2) new_type = GGML_TYPE_IQ2_XS;
15583
+ else new_type = GGML_TYPE_IQ2_XXS;
15586
15584
}
15587
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ) {
15588
- new_type = GGML_TYPE_IQ2_S;
15585
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
15586
+ if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ2_S;
15589
15587
}
15590
- else if (( ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) &&
15591
- (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
15592
- new_type = GGML_TYPE_IQ3_XXS ;
15588
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
15589
+ if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ3_XXS;
15590
+ else new_type = GGML_TYPE_IQ2_S ;
15593
15591
}
15594
15592
else if ((ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) &&
15595
15593
(qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
0 commit comments