@@ -16275,8 +16275,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16275
16275
if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q4_K;
16276
16276
}
16277
16277
++qs.i_attention_wo;
16278
- }
16279
- else if (name.find("attn_qkv.weight") != std::string::npos) {
16278
+ } else if (name.find("attn_qkv.weight") != std::string::npos) {
16280
16279
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
16281
16280
new_type = GGML_TYPE_Q3_K;
16282
16281
}
@@ -16300,8 +16299,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16300
16299
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) new_type = GGML_TYPE_Q4_K;
16301
16300
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_XSR || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL) new_type = GGML_TYPE_Q5_K;
16302
16301
++qs.i_attention_wv;
16303
- }
16304
- else if (name.find("ffn_gate") != std::string::npos) {
16302
+ } else if (name.find("ffn_gate") != std::string::npos) {
16305
16303
auto info = layer_info(qs.i_ffn_gate, qs.n_ffn_gate, name.c_str());
16306
16304
int i_layer = info.first, n_layer = info.second;
16307
16305
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L && (difquant_half_tensors(i_layer, n_layer))) new_type = GGML_TYPE_Q3_K;
@@ -16319,8 +16317,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16319
16317
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL && (difquant_half_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ4_XS;
16320
16318
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL && (difquant_six_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ4_XS;
16321
16319
++qs.i_ffn_gate;
16322
- }
16323
- else if (name.find("ffn_up") != std::string::npos) {
16320
+ } else if (name.find("ffn_up") != std::string::npos) {
16324
16321
auto info = layer_info(qs.i_ffn_up, qs.n_ffn_up, name.c_str());
16325
16322
int i_layer = info.first, n_layer = info.second;
16326
16323
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L && (difquant_half_tensors(i_layer, n_layer))) new_type = GGML_TYPE_Q3_K;
0 commit comments