Skip to content

Commit 380b53d

Browse files
committed
Fix IQ4_XSR
1 parent 6081085 commit 380b53d

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

src/llama.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16567,7 +16567,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1656716567
new_type = qs.i_attention_wv < qs.n_attention_wv/8 ? GGML_TYPE_Q6_K :
1656816568
difquant_fl_more_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q6_K : GGML_TYPE_Q5_K;
1656916569
}
16570-
else difquant_three_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q6_K : GGML_TYPE_Q5_K;
16570+
else new_type = difquant_three_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q6_K : GGML_TYPE_Q5_K;
1657116571
}
1657216572
++qs.i_attention_wv;
1657316573
} else if (name.find("attn_k.weight") != std::string::npos) {
@@ -16710,7 +16710,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1671016710
new_type = qs.i_attention_wq < qs.n_attention_wq/8 ? GGML_TYPE_IQ4_XS :
1671116711
difquant_first_last_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1671216712
}
16713-
else difquant_fl_more_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
16713+
else new_type = difquant_fl_more_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1671416714
}
1671516715
++qs.i_attention_wq;
1671616716
} else if (name.find("attn_output.weight") != std::string::npos) {
@@ -16737,7 +16737,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1673716737
new_type = qs.i_attention_wq < qs.n_attention_wq/8 ? GGML_TYPE_Q5_K :
1673816738
difquant_first_last_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
1673916739
}
16740-
else difquant_three_eights_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
16740+
else new_type = difquant_three_eights_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
1674116741
}
1674216742
else if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) {
1674316743
if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M)
@@ -16787,7 +16787,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1678716787
new_type = qs.i_attention_wv < qs.n_attention_wv/8 ? GGML_TYPE_Q5_K :
1678816788
difquant_fl_more_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
1678916789
}
16790-
else difquant_three_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
16790+
else new_type = difquant_three_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
1679116791
}
1679216792
++qs.i_attention_wv;
1679316793
} else if (name.find("ffn_gate") != std::string::npos) {
@@ -16983,7 +16983,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1698316983
new_type = (difquant_first_last_tensors(i_layer, n_layer)) ? GGML_TYPE_Q5_K :
1698416984
difquant_fl_more_tensors(i_layer, n_layer) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
1698516985
}
16986-
else difquant_three_eights_tensors(i_layer, n_layer) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
16986+
else new_type = difquant_three_eights_tensors(i_layer, n_layer) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
1698716987
}
1698816988
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S) {
1698916989
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)

0 commit comments

Comments
 (0)