remove MoE expert_weights_scale from llama.cpp

Bodhi Hu · Bodhi Hu · commit d2871f8b4552 · 2025-02-19T19:58:50.000-05:00
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -412,7 +412,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
     ml.get_key(LLM_KV_BLOCK_COUNT,       hparams.n_layer);
     ml.get_key(LLM_KV_EXPERT_COUNT,      hparams.n_expert,      false);
     ml.get_key(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used, false);
-    ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale, false);
 
     if (arch == LLM_ARCH_WAVTOKENIZER_DEC) {
         ml.get_key(LLM_KV_FEATURES_LENGTH, hparams.n_embd_features);
@@ -3702,10 +3701,6 @@ void llama_model::print_info() const {
         LLAMA_LOG_INFO("%s: f_attention_scale = %f\n", __func__, hparams.f_attention_scale);
     }
 
-    if (arch == LLM_ARCH_LLAMA) {
-        LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n", __func__, hparams.expert_weights_scale);
-    }
-
     vocab.print_info();
 }
 
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -1596,7 +1596,7 @@ struct llm_build_context {
                         nullptr,
                         n_expert, n_expert_used,
                         LLM_FFN_SILU, true,
-                        hparams.expert_weights_scale > 0, hparams.expert_weights_scale,
+                        false, 0.0,
                         LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX,
                         cb, il);
                 cb(cur, "ffn_moe_out", il);