@@ -8837,7 +8837,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
88378837
88388838
88398839@ModelBase .register ("Lfm2MoeForCausalLM" )
8840- class LFM2MOEModel (TextModel ):
8840+ class LFM2MoeModel (TextModel ):
88418841 model_arch = gguf .MODEL_ARCH .LFM2MOE
88428842
88438843 def set_gguf_parameters (self ):
@@ -8865,18 +8865,20 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
88658865 if 'conv.conv' in name :
88668866 data_torch = data_torch .squeeze (1 )
88678867
8868+ if name .endswith (".expert_bias" ):
8869+ name = name .replace (".expert_bias" , ".expert_bias.bias" )
8870+
88688871 # merge expert weights
88698872 if 'experts' in name :
88708873 n_experts = self .hparams ["num_experts" ]
88718874 assert bid is not None
88728875
8873- if bid not in self ._experts_cache :
8874- self ._experts_cache [bid ] = {}
8875- self ._experts_cache [bid ][name ] = data_torch
8876+ expert_cache = self ._experts_cache .setdefault (bid , {})
8877+ expert_cache [name ] = data_torch
88768878 expert_weights = ["w1" , "w2" , "w3" ]
88778879
88788880 # not enough expert weights to merge
8879- if len (self . _experts_cache [ bid ] ) < n_experts * len (expert_weights ):
8881+ if len (expert_cache ) < n_experts * len (expert_weights ):
88808882 return []
88818883
88828884 tensors : list [tuple [str , Tensor ]] = []
@@ -8885,8 +8887,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
88858887
88868888 for xid in range (n_experts ):
88878889 ename = f"model.layers.{ bid } .feed_forward.experts.{ xid } .{ w_name } .weight"
8888- datas .append (self . _experts_cache [ bid ] [ename ])
8889- del self . _experts_cache [ bid ] [ename ]
8890+ datas .append (expert_cache [ename ])
8891+ del expert_cache [ename ]
88908892
88918893 data_torch = torch .stack (datas , dim = 0 )
88928894 merged_name = f"layers.{ bid } .feed_forward.experts.{ w_name } .weight"
0 commit comments