@@ -6398,8 +6398,10 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no
6398
6398
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
6399
6399
; GFX8-NEXT: s_waitcnt vmcnt(0)
6400
6400
; GFX8-NEXT: v_mov_b32_e32 v5, v0
6401
- ; GFX8-NEXT: v_add_f16_sdwa v0, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
6401
+ ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v5
6402
+ ; GFX8-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
6402
6403
; GFX8-NEXT: v_add_f16_e32 v1, v5, v2
6404
+ ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
6403
6405
; GFX8-NEXT: v_or_b32_e32 v4, v1, v0
6404
6406
; GFX8-NEXT: v_mov_b32_e32 v0, v4
6405
6407
; GFX8-NEXT: v_mov_b32_e32 v1, v5
@@ -6625,8 +6627,10 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fin
6625
6627
; GFX8-NEXT: .LBB20_1: ; %atomicrmw.start
6626
6628
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
6627
6629
; GFX8-NEXT: s_waitcnt vmcnt(0)
6628
- ; GFX8-NEXT: v_add_f16_sdwa v1, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
6630
+ ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v2
6631
+ ; GFX8-NEXT: v_add_f16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
6629
6632
; GFX8-NEXT: v_add_f16_e32 v4, v2, v0
6633
+ ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6630
6634
; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
6631
6635
; GFX8-NEXT: v_mov_b32_e32 v5, v2
6632
6636
; GFX8-NEXT: v_mov_b32_e32 v4, v1
@@ -7044,7 +7048,9 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall
7044
7048
; GFX8-NEXT: ; =>This Loop Header: Depth=1
7045
7049
; GFX8-NEXT: ; Child Loop BB21_4 Depth 2
7046
7050
; GFX8-NEXT: s_waitcnt vmcnt(0)
7047
- ; GFX8-NEXT: v_add_f16_sdwa v4, v8, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
7051
+ ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v8
7052
+ ; GFX8-NEXT: v_add_f16_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
7053
+ ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4
7048
7054
; GFX8-NEXT: v_add_f16_e32 v6, v8, v5
7049
7055
; GFX8-NEXT: v_or_b32_e32 v7, v6, v4
7050
7056
; GFX8-NEXT: v_mov_b32_e32 v6, v7
@@ -7390,8 +7396,10 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset(ptr addrsp
7390
7396
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
7391
7397
; GFX8-NEXT: s_waitcnt vmcnt(0)
7392
7398
; GFX8-NEXT: v_mov_b32_e32 v5, v0
7393
- ; GFX8-NEXT: v_add_f16_sdwa v0, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
7399
+ ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v5
7400
+ ; GFX8-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
7394
7401
; GFX8-NEXT: v_add_f16_e32 v1, v5, v2
7402
+ ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
7395
7403
; GFX8-NEXT: v_or_b32_e32 v4, v1, v0
7396
7404
; GFX8-NEXT: v_mov_b32_e32 v0, v4
7397
7405
; GFX8-NEXT: v_mov_b32_e32 v1, v5
@@ -7650,8 +7658,10 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset(ptr addrspace(
7650
7658
; GFX8-NEXT: .LBB23_1: ; %atomicrmw.start
7651
7659
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
7652
7660
; GFX8-NEXT: s_waitcnt vmcnt(0)
7653
- ; GFX8-NEXT: v_add_f16_sdwa v1, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
7661
+ ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v2
7662
+ ; GFX8-NEXT: v_add_f16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
7654
7663
; GFX8-NEXT: v_add_f16_e32 v4, v2, v0
7664
+ ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
7655
7665
; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
7656
7666
; GFX8-NEXT: v_mov_b32_e32 v5, v2
7657
7667
; GFX8-NEXT: v_mov_b32_e32 v4, v1
@@ -7915,8 +7925,10 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no
7915
7925
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
7916
7926
; GFX8-NEXT: s_waitcnt vmcnt(0)
7917
7927
; GFX8-NEXT: v_mov_b32_e32 v5, v0
7918
- ; GFX8-NEXT: v_add_f16_sdwa v0, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
7928
+ ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v5
7929
+ ; GFX8-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
7919
7930
; GFX8-NEXT: v_add_f16_e32 v1, v5, v2
7931
+ ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
7920
7932
; GFX8-NEXT: v_or_b32_e32 v4, v1, v0
7921
7933
; GFX8-NEXT: v_mov_b32_e32 v0, v4
7922
7934
; GFX8-NEXT: v_mov_b32_e32 v1, v5
@@ -8175,8 +8187,10 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_rem
8175
8187
; GFX8-NEXT: .LBB25_1: ; %atomicrmw.start
8176
8188
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
8177
8189
; GFX8-NEXT: s_waitcnt vmcnt(0)
8178
- ; GFX8-NEXT: v_add_f16_sdwa v1, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
8190
+ ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v2
8191
+ ; GFX8-NEXT: v_add_f16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
8179
8192
; GFX8-NEXT: v_add_f16_e32 v4, v2, v0
8193
+ ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
8180
8194
; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
8181
8195
; GFX8-NEXT: v_mov_b32_e32 v5, v2
8182
8196
; GFX8-NEXT: v_mov_b32_e32 v4, v1
0 commit comments