Skip to content

Commit 65cb8fa

Browse files
authored
[Qwen3VL] fix: hidden_states in place modification error (#41535)
``` File "transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py", line 941, in forward hidden_states = self._deepstack_process( ^^^^^^^^^^^^^^^^^^^^^^^^ File "transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py", line 960, in _deepstack_process hidden_states[visual_pos_masks, :] = local_this ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^ RuntimeError: Output 0 of SliceBackward0 is a view and is being modified inplace. This view was created inside a custom Function (or because an input was returned as-is) and the autograd logic to handle view+inplace would override the custom backward associated with the custom Function, leading to incorrect gradients. This behavior is forbidden. You can fix this by cloning the output of the custom Function. ``` Signed-off-by: Hollow Man <[email protected]>
1 parent 3927ffe commit 65cb8fa

File tree

4 files changed

+10
-5
lines changed

4 files changed

+10
-5
lines changed

src/transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1694,7 +1694,8 @@ def _deepstack_process(self, hidden_states, visual_pos_masks, visual_embeds):
16941694
visual_pos_masks = visual_pos_masks[..., 0]
16951695
visual_pos_masks = visual_pos_masks.to(hidden_states.device)
16961696
visual_embeds = visual_embeds.to(hidden_states.device, hidden_states.dtype)
1697-
local_this = hidden_states[visual_pos_masks, :].clone() + visual_embeds
1697+
hidden_states = hidden_states.clone()
1698+
local_this = hidden_states[visual_pos_masks, :] + visual_embeds
16981699
hidden_states[visual_pos_masks, :] = local_this
16991700
return hidden_states
17001701

@@ -2888,7 +2889,8 @@ def _deepstack_process(
28882889
):
28892890
visual_pos_masks = visual_pos_masks.to(hidden_states.device)
28902891
visual_embeds = visual_embeds.to(hidden_states.device, hidden_states.dtype)
2891-
local_this = hidden_states[visual_pos_masks, :].clone() + visual_embeds
2892+
hidden_states = hidden_states.clone()
2893+
local_this = hidden_states[visual_pos_masks, :] + visual_embeds
28922894
hidden_states[visual_pos_masks, :] = local_this
28932895
return hidden_states
28942896

src/transformers/models/qwen3_vl/modeling_qwen3_vl.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -876,7 +876,8 @@ def _deepstack_process(
876876
):
877877
visual_pos_masks = visual_pos_masks.to(hidden_states.device)
878878
visual_embeds = visual_embeds.to(hidden_states.device, hidden_states.dtype)
879-
local_this = hidden_states[visual_pos_masks, :].clone() + visual_embeds
879+
hidden_states = hidden_states.clone()
880+
local_this = hidden_states[visual_pos_masks, :] + visual_embeds
880881
hidden_states[visual_pos_masks, :] = local_this
881882
return hidden_states
882883

src/transformers/models/qwen3_vl/modular_qwen3_vl.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,8 @@ def _deepstack_process(
746746
):
747747
visual_pos_masks = visual_pos_masks.to(hidden_states.device)
748748
visual_embeds = visual_embeds.to(hidden_states.device, hidden_states.dtype)
749-
local_this = hidden_states[visual_pos_masks, :].clone() + visual_embeds
749+
hidden_states = hidden_states.clone()
750+
local_this = hidden_states[visual_pos_masks, :] + visual_embeds
750751
hidden_states[visual_pos_masks, :] = local_this
751752
return hidden_states
752753

src/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,8 @@ def _deepstack_process(
957957
):
958958
visual_pos_masks = visual_pos_masks.to(hidden_states.device)
959959
visual_embeds = visual_embeds.to(hidden_states.device, hidden_states.dtype)
960-
local_this = hidden_states[visual_pos_masks, :].clone() + visual_embeds
960+
hidden_states = hidden_states.clone()
961+
local_this = hidden_states[visual_pos_masks, :] + visual_embeds
961962
hidden_states[visual_pos_masks, :] = local_this
962963
return hidden_states
963964

0 commit comments

Comments
 (0)