@@ -17601,23 +17601,22 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
17601
17601
else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K;
17602
17602
else new_type = GGML_TYPE_IQ4_XS;
17603
17603
}
17604
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
17605
- ftype == LLAMA_FTYPE_MOSTLY_IQ2_S) {
17604
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
17606
17605
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
17607
17606
else if (qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ4_XS;
17608
17607
else new_type = GGML_TYPE_Q4_K;
17609
17608
}
17610
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL ) {
17609
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M ) {
17611
17610
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
17612
- else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q4_K;
17611
+ else if (qs.model.hparams.n_vocab >= 127999 && qs.model.hparams.n_gqa() < 12 ) new_type = GGML_TYPE_Q4_K;
17613
17612
else new_type = GGML_TYPE_Q5_K;
17614
17613
}
17615
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
17616
- if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
17614
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
17615
+ if (qs.model.hparams.n_expert >= 4 && qs.model.hparams.n_gqa() >= 12 ) new_type = GGML_TYPE_Q6_K;
17617
17616
else new_type = GGML_TYPE_Q5_K;
17618
17617
}
17619
17618
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_ML) {
17620
- if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
17619
+ if (qs.model.hparams.n_expert >= 4 && qs.model.hparams.n_gqa() >= 12 ) new_type = GGML_TYPE_Q6_K;
17621
17620
else if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_Q5_K;
17622
17621
else new_type = GGML_TYPE_Q6_K;
17623
17622
}
0 commit comments