Skip to content

Commit eb190c6

Browse files
committed
Address PR feedback
1 parent fe3b812 commit eb190c6

File tree

2 files changed

+10
-8
lines changed

2 files changed

+10
-8
lines changed

convert_hf_to_gguf.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8837,7 +8837,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
88378837

88388838

88398839
@ModelBase.register("Lfm2MoeForCausalLM")
8840-
class LFM2MOEModel(TextModel):
8840+
class LFM2MoeModel(TextModel):
88418841
model_arch = gguf.MODEL_ARCH.LFM2MOE
88428842

88438843
def set_gguf_parameters(self):
@@ -8865,18 +8865,20 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
88658865
if 'conv.conv' in name:
88668866
data_torch = data_torch.squeeze(1)
88678867

8868+
if name.endswith(".expert_bias"):
8869+
name = name.replace(".expert_bias", ".expert_bias.bias")
8870+
88688871
# merge expert weights
88698872
if 'experts' in name:
88708873
n_experts = self.hparams["num_experts"]
88718874
assert bid is not None
88728875

8873-
if bid not in self._experts_cache:
8874-
self._experts_cache[bid] = {}
8875-
self._experts_cache[bid][name] = data_torch
8876+
expert_cache = self._experts_cache.setdefault(bid, {})
8877+
expert_cache[name] = data_torch
88768878
expert_weights = ["w1", "w2", "w3"]
88778879

88788880
# not enough expert weights to merge
8879-
if len(self._experts_cache[bid]) < n_experts * len(expert_weights):
8881+
if len(expert_cache) < n_experts * len(expert_weights):
88808882
return []
88818883

88828884
tensors: list[tuple[str, Tensor]] = []
@@ -8885,8 +8887,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
88858887

88868888
for xid in range(n_experts):
88878889
ename = f"model.layers.{bid}.feed_forward.experts.{xid}.{w_name}.weight"
8888-
datas.append(self._experts_cache[bid][ename])
8889-
del self._experts_cache[bid][ename]
8890+
datas.append(expert_cache[ename])
8891+
del expert_cache[ename]
88908892

88918893
data_torch = torch.stack(datas, dim=0)
88928894
merged_name = f"layers.{bid}.feed_forward.experts.{w_name}.weight"

src/llama-model.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5853,7 +5853,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
58535853
layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), {n_embd, hparams.n_ff_exp, n_expert}, 0);
58545854
layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), {hparams.n_ff_exp, n_embd, n_expert}, 0);
58555855
layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), {n_embd, hparams.n_ff_exp, n_expert}, 0);
5856-
layer.ffn_exp_probs_b = create_tensor(tn(LLM_TENSOR_FFN_EXP_PROBS_B, i), {n_expert}, 0);
5856+
layer.ffn_exp_probs_b = create_tensor(tn(LLM_TENSOR_FFN_EXP_PROBS_B, "bias", i), {n_expert}, 0);
58575857
} else { // dense
58585858
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0);
58595859
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0);

0 commit comments

Comments
 (0)