Skip to content

Commit d213b3c

Browse files
committed
AMDGPU: Add baseline tests for flat-may-alias private atomic expansions
1 parent 2c97d43 commit d213b3c

File tree

4 files changed

+17681
-0
lines changed

4 files changed

+17681
-0
lines changed

llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,29 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %d
7777
ret void
7878
}
7979

80+
define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw_noprivate(ptr %ptr, double %data) #0 {
81+
; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw_noprivate
82+
; GFX90A_GFX940: bb.0 (%ir-block.0):
83+
; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
84+
; GFX90A_GFX940-NEXT: {{ $}}
85+
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
86+
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
87+
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
88+
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
89+
; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
90+
; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
91+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
92+
; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
93+
; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
94+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
95+
; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
96+
; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
97+
; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
98+
; GFX90A_GFX940-NEXT: S_ENDPGM 0
99+
%ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
100+
ret void
101+
}
102+
80103
define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) #0 {
81104
; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw
82105
; GFX90A_GFX940: bb.0 (%ir-block.0):
@@ -104,8 +127,36 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da
104127
ret double %ret
105128
}
106129

130+
define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw__noprivate(ptr %ptr, double %data) #0 {
131+
; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw__noprivate
132+
; GFX90A_GFX940: bb.0 (%ir-block.0):
133+
; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
134+
; GFX90A_GFX940-NEXT: {{ $}}
135+
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
136+
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
137+
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
138+
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
139+
; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
140+
; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
141+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
142+
; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
143+
; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
144+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
145+
; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
146+
; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
147+
; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
148+
; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
149+
; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
150+
; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]]
151+
; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]]
152+
; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
153+
%ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
154+
ret double %ret
155+
}
156+
107157
declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr, double)
108158

109159
attributes #0 = { nounwind }
110160

111161
!0 = !{}
162+
!1 = !{i32 5, i32 6}

0 commit comments

Comments
 (0)