Skip to content

Commit dac2ad7

Browse files
Fix Qwen3OmniMoE weight init (#42531)
* module.router -> module.gate * i am not smart today
1 parent 063b431 commit dac2ad7

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

src/transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def _init_weights(self, module):
8484
if isinstance(module, Qwen3OmniMoeThinkerTextSparseMoeBlock):
8585
init.normal_(module.experts.gate_up_proj, mean=0.0, std=std)
8686
init.normal_(module.experts.down_proj, mean=0.0, std=std)
87-
init.normal_(module.router.weight, mean=0.0, std=std)
87+
init.normal_(module.gate.weight, mean=0.0, std=std)
8888

8989

9090
def _get_feat_extract_output_lengths(input_lengths):

src/transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,7 @@ def _init_weights(self, module):
899899
if isinstance(module, Qwen3OmniMoeThinkerTextSparseMoeBlock):
900900
init.normal_(module.experts.gate_up_proj, mean=0.0, std=std)
901901
init.normal_(module.experts.down_proj, mean=0.0, std=std)
902-
init.normal_(module.router.weight, mean=0.0, std=std)
902+
init.normal_(module.gate.weight, mean=0.0, std=std)
903903

904904

905905
class Qwen3OmniMoePreTrainedModelForConditionalGeneration(Qwen2_5OmniPreTrainedModelForConditionalGeneration):

0 commit comments

Comments
 (0)