Skip to content

Commit b302561

Browse files
committed
IQ3_UXL for test
1 parent 8c6e408 commit b302561

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

src/llama.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18744,7 +18744,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1874418744
}
1874518745
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
1874618746
if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2))
18747-
new_type = difquant_seven_eights_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_S;
18747+
new_type = difquant_first_last_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1874818748
else new_type = GGML_TYPE_Q3_K;
1874918749
}
1875018750
// else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_MR) {
@@ -18933,7 +18933,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1893318933
}
1893418934
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
1893518935
if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2))
18936-
new_type = (difquant_seven_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
18936+
new_type = (difquant_seven_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ4_XS;
1893718937
else new_type = GGML_TYPE_IQ4_XS;
1893818938
}
1893918939
else if (i_layer < n_layer/8 && (ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) && !qs.has_imatrix) {
@@ -19133,11 +19133,11 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1913319133
// new_type = GGML_TYPE_Q3_K;
1913419134
// else new_type = difquant_seven_eights_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1913519135
// }
19136-
// else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
19137-
// if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2))
19138-
// new_type = difquant_seven_eights_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
19139-
// else new_type = GGML_TYPE_Q3_K;
19140-
// }
19136+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
19137+
if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2))
19138+
new_type = difquant_seven_eights_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ4_XS;
19139+
else new_type = GGML_TYPE_Q3_K;
19140+
}
1914119141
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_MR) {
1914219142
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) {
1914319143
new_type = qs.i_attention_wq < qs.n_attention_wq/8 ? GGML_TYPE_Q5_K :
@@ -19496,7 +19496,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1949619496
}
1949719497
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
1949819498
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
19499-
new_type = (difquant_seven_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
19499+
new_type = (difquant_first_last_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1950019500
else new_type = (difquant_five_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1950119501
}
1950219502
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_MR) {
@@ -19640,7 +19640,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1964019640
}
1964119641
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
1964219642
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
19643-
new_type = (difquant_seven_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
19643+
new_type = (difquant_seven_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ4_XS;
1964419644
else new_type = (difquant_five_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1964519645
}
1964619646
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_MR) {

0 commit comments

Comments
 (0)