@@ -77,6 +77,29 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %d
77
77
ret void
78
78
}
79
79
80
+ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw_noprivate (ptr %ptr , double %data ) #0 {
81
+ ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw_noprivate
82
+ ; GFX90A_GFX940: bb.0 (%ir-block.0):
83
+ ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
84
+ ; GFX90A_GFX940-NEXT: {{ $}}
85
+ ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
86
+ ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
87
+ ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
88
+ ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
89
+ ; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
90
+ ; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
91
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
92
+ ; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
93
+ ; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
94
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
95
+ ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
96
+ ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
97
+ ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
98
+ ; GFX90A_GFX940-NEXT: S_ENDPGM 0
99
+ %ret = atomicrmw fadd ptr %ptr , double %data syncscope("wavefront" ) monotonic , !noalias.addrspace !1 , !amdgpu.no.fine.grained.memory !0
100
+ ret void
101
+ }
102
+
80
103
define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw (ptr %ptr , double %data ) #0 {
81
104
; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw
82
105
; GFX90A_GFX940: bb.0 (%ir-block.0):
@@ -104,8 +127,36 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da
104
127
ret double %ret
105
128
}
106
129
130
+ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw__noprivate (ptr %ptr , double %data ) #0 {
131
+ ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw__noprivate
132
+ ; GFX90A_GFX940: bb.0 (%ir-block.0):
133
+ ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
134
+ ; GFX90A_GFX940-NEXT: {{ $}}
135
+ ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
136
+ ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
137
+ ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
138
+ ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
139
+ ; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
140
+ ; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
141
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
142
+ ; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
143
+ ; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
144
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
145
+ ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
146
+ ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
147
+ ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
148
+ ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
149
+ ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
150
+ ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]]
151
+ ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]]
152
+ ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
153
+ %ret = atomicrmw fadd ptr %ptr , double %data syncscope("wavefront" ) monotonic , !noalias.addrspace !1 , !amdgpu.no.fine.grained.memory !0
154
+ ret double %ret
155
+ }
156
+
107
157
declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64 (ptr , double )
108
158
109
159
attributes #0 = { nounwind }
110
160
111
161
!0 = !{}
162
+ !1 = !{i32 5 , i32 6 }
0 commit comments