@@ -400,9 +400,9 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r
400400; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
401401; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
402402; GFX12-NEXT: s_wait_kmcnt 0x0
403- ; GFX12-NEXT: v_add_co_u32 v2, s1, v0, s6
403+ ; GFX12-NEXT: v_add_co_u32 v2, s1, s6, v0
404404; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
405- ; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7 , s1
405+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0 , s1
406406; GFX12-NEXT: v_add_co_u32 v0, s1, s4, v0
407407; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
408408; GFX12-NEXT: s_wait_alu 0xf1ff
@@ -438,9 +438,9 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r
438438; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
439439; GFX12-SPREFETCH-NEXT: v_lshlrev_b32_e32 v0, 4, v0
440440; GFX12-SPREFETCH-NEXT: s_wait_kmcnt 0x0
441- ; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, v0, s6
441+ ; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, s6, v0
442442; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
443- ; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7 , s1
443+ ; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0 , s1
444444; GFX12-SPREFETCH-NEXT: v_add_co_u32 v0, s1, s4, v0
445445; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
446446; GFX12-SPREFETCH-NEXT: s_wait_alu 0xf1ff
@@ -531,9 +531,9 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d,
531531; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
532532; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
533533; GFX12-NEXT: s_wait_kmcnt 0x0
534- ; GFX12-NEXT: v_add_co_u32 v2, s1, v0, s6
534+ ; GFX12-NEXT: v_add_co_u32 v2, s1, s6, v0
535535; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
536- ; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7 , s1
536+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0 , s1
537537; GFX12-NEXT: v_add_co_u32 v0, s1, s4, v0
538538; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
539539; GFX12-NEXT: s_wait_alu 0xf1ff
@@ -569,9 +569,9 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d,
569569; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
570570; GFX12-SPREFETCH-NEXT: v_lshlrev_b32_e32 v0, 4, v0
571571; GFX12-SPREFETCH-NEXT: s_wait_kmcnt 0x0
572- ; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, v0, s6
572+ ; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, s6, v0
573573; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
574- ; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7 , s1
574+ ; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0 , s1
575575; GFX12-SPREFETCH-NEXT: v_add_co_u32 v0, s1, s4, v0
576576; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
577577; GFX12-SPREFETCH-NEXT: s_wait_alu 0xf1ff
0 commit comments