|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
2 |
| -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX90A_GFX940 %s |
3 |
| -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX90A_GFX940 %s |
| 2 | +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX90A %s |
| 3 | +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX940 %s |
4 | 4 | ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX11 %s
|
5 | 5 | ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX11 %s
|
6 | 6 |
|
@@ -149,6 +149,190 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %p
|
149 | 149 | }
|
150 | 150 |
|
151 | 151 | define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) #0 {
|
| 152 | + ; GFX90A-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw |
| 153 | + ; GFX90A: bb.1 (%ir-block.0): |
| 154 | + ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) |
| 155 | + ; GFX90A-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 |
| 156 | + ; GFX90A-NEXT: {{ $}} |
| 157 | + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 |
| 158 | + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 |
| 159 | + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 |
| 160 | + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| 161 | + ; GFX90A-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF |
| 162 | + ; GFX90A-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE |
| 163 | + ; GFX90A-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec |
| 164 | + ; GFX90A-NEXT: S_BRANCH %bb.2 |
| 165 | + ; GFX90A-NEXT: {{ $}} |
| 166 | + ; GFX90A-NEXT: bb.2 (%ir-block.5): |
| 167 | + ; GFX90A-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) |
| 168 | + ; GFX90A-NEXT: {{ $}} |
| 169 | + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec |
| 170 | + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0 |
| 171 | + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0 |
| 172 | + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1 |
| 173 | + ; GFX90A-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 |
| 174 | + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 |
| 175 | + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 |
| 176 | + ; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 |
| 177 | + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] |
| 178 | + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] |
| 179 | + ; GFX90A-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY8]], [[COPY9]], implicit $exec |
| 180 | + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] |
| 181 | + ; GFX90A-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY10]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec |
| 182 | + ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 |
| 183 | + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] |
| 184 | + ; GFX90A-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY11]], implicit-def dead $scc, implicit $exec |
| 185 | + ; GFX90A-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 |
| 186 | + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 187 | + ; GFX90A-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec |
| 188 | + ; GFX90A-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec |
| 189 | + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 190 | + ; GFX90A-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec |
| 191 | + ; GFX90A-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec |
| 192 | + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 193 | + ; GFX90A-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec |
| 194 | + ; GFX90A-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec |
| 195 | + ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 196 | + ; GFX90A-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec |
| 197 | + ; GFX90A-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec |
| 198 | + ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 199 | + ; GFX90A-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec |
| 200 | + ; GFX90A-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec |
| 201 | + ; GFX90A-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 202 | + ; GFX90A-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec |
| 203 | + ; GFX90A-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec |
| 204 | + ; GFX90A-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 205 | + ; GFX90A-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY18]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec |
| 206 | + ; GFX90A-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 63 |
| 207 | + ; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_4]] |
| 208 | + ; GFX90A-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] |
| 209 | + ; GFX90A-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY19]], implicit $exec |
| 210 | + ; GFX90A-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] |
| 211 | + ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY20]], implicit $exec |
| 212 | + ; GFX90A-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec |
| 213 | + ; GFX90A-NEXT: S_BRANCH %bb.3 |
| 214 | + ; GFX90A-NEXT: {{ $}} |
| 215 | + ; GFX90A-NEXT: bb.3 (%ir-block.36): |
| 216 | + ; GFX90A-NEXT: successors: %bb.5(0x80000000) |
| 217 | + ; GFX90A-NEXT: {{ $}} |
| 218 | + ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| 219 | + ; GFX90A-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) |
| 220 | + ; GFX90A-NEXT: S_BRANCH %bb.5 |
| 221 | + ; GFX90A-NEXT: {{ $}} |
| 222 | + ; GFX90A-NEXT: bb.4.Flow: |
| 223 | + ; GFX90A-NEXT: successors: %bb.6(0x80000000) |
| 224 | + ; GFX90A-NEXT: {{ $}} |
| 225 | + ; GFX90A-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %43, %bb.5, [[DEF]], %bb.1 |
| 226 | + ; GFX90A-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec |
| 227 | + ; GFX90A-NEXT: S_BRANCH %bb.6 |
| 228 | + ; GFX90A-NEXT: {{ $}} |
| 229 | + ; GFX90A-NEXT: bb.5 (%ir-block.39): |
| 230 | + ; GFX90A-NEXT: successors: %bb.4(0x80000000) |
| 231 | + ; GFX90A-NEXT: {{ $}} |
| 232 | + ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2 |
| 233 | + ; GFX90A-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec |
| 234 | + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec |
| 235 | + ; GFX90A-NEXT: [[STRICT_WWM1:%[0-9]+]]:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec |
| 236 | + ; GFX90A-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] |
| 237 | + ; GFX90A-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY21]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec |
| 238 | + ; GFX90A-NEXT: S_BRANCH %bb.4 |
| 239 | + ; GFX90A-NEXT: {{ $}} |
| 240 | + ; GFX90A-NEXT: bb.6 (%ir-block.46): |
| 241 | + ; GFX90A-NEXT: $vgpr0 = COPY [[PHI]] |
| 242 | + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 |
| 243 | + ; |
| 244 | + ; GFX940-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw |
| 245 | + ; GFX940: bb.1 (%ir-block.0): |
| 246 | + ; GFX940-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) |
| 247 | + ; GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 |
| 248 | + ; GFX940-NEXT: {{ $}} |
| 249 | + ; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 |
| 250 | + ; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 |
| 251 | + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 |
| 252 | + ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| 253 | + ; GFX940-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF |
| 254 | + ; GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE |
| 255 | + ; GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec |
| 256 | + ; GFX940-NEXT: S_BRANCH %bb.2 |
| 257 | + ; GFX940-NEXT: {{ $}} |
| 258 | + ; GFX940-NEXT: bb.2 (%ir-block.5): |
| 259 | + ; GFX940-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) |
| 260 | + ; GFX940-NEXT: {{ $}} |
| 261 | + ; GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec |
| 262 | + ; GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0 |
| 263 | + ; GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0 |
| 264 | + ; GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1 |
| 265 | + ; GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 |
| 266 | + ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 |
| 267 | + ; GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 |
| 268 | + ; GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 |
| 269 | + ; GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] |
| 270 | + ; GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] |
| 271 | + ; GFX940-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY8]], [[COPY9]], implicit $exec |
| 272 | + ; GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] |
| 273 | + ; GFX940-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY10]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec |
| 274 | + ; GFX940-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 |
| 275 | + ; GFX940-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] |
| 276 | + ; GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY11]], implicit-def dead $scc, implicit $exec |
| 277 | + ; GFX940-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 |
| 278 | + ; GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 279 | + ; GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec |
| 280 | + ; GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec |
| 281 | + ; GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 282 | + ; GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec |
| 283 | + ; GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec |
| 284 | + ; GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 285 | + ; GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec |
| 286 | + ; GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec |
| 287 | + ; GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 288 | + ; GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec |
| 289 | + ; GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec |
| 290 | + ; GFX940-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 291 | + ; GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec |
| 292 | + ; GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec |
| 293 | + ; GFX940-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 294 | + ; GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec |
| 295 | + ; GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec |
| 296 | + ; GFX940-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] |
| 297 | + ; GFX940-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY18]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec |
| 298 | + ; GFX940-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 63 |
| 299 | + ; GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_4]] |
| 300 | + ; GFX940-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] |
| 301 | + ; GFX940-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY19]], implicit $exec |
| 302 | + ; GFX940-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] |
| 303 | + ; GFX940-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY20]], implicit $exec |
| 304 | + ; GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec |
| 305 | + ; GFX940-NEXT: S_BRANCH %bb.3 |
| 306 | + ; GFX940-NEXT: {{ $}} |
| 307 | + ; GFX940-NEXT: bb.3 (%ir-block.36): |
| 308 | + ; GFX940-NEXT: successors: %bb.5(0x80000000) |
| 309 | + ; GFX940-NEXT: {{ $}} |
| 310 | + ; GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| 311 | + ; GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) |
| 312 | + ; GFX940-NEXT: S_BRANCH %bb.5 |
| 313 | + ; GFX940-NEXT: {{ $}} |
| 314 | + ; GFX940-NEXT: bb.4.Flow: |
| 315 | + ; GFX940-NEXT: successors: %bb.6(0x80000000) |
| 316 | + ; GFX940-NEXT: {{ $}} |
| 317 | + ; GFX940-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %42, %bb.5, [[DEF]], %bb.1 |
| 318 | + ; GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec |
| 319 | + ; GFX940-NEXT: S_BRANCH %bb.6 |
| 320 | + ; GFX940-NEXT: {{ $}} |
| 321 | + ; GFX940-NEXT: bb.5 (%ir-block.39): |
| 322 | + ; GFX940-NEXT: successors: %bb.4(0x80000000) |
| 323 | + ; GFX940-NEXT: {{ $}} |
| 324 | + ; GFX940-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2 |
| 325 | + ; GFX940-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec |
| 326 | + ; GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec |
| 327 | + ; GFX940-NEXT: [[STRICT_WWM1:%[0-9]+]]:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec |
| 328 | + ; GFX940-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] |
| 329 | + ; GFX940-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY21]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec |
| 330 | + ; GFX940-NEXT: S_BRANCH %bb.4 |
| 331 | + ; GFX940-NEXT: {{ $}} |
| 332 | + ; GFX940-NEXT: bb.6 (%ir-block.46): |
| 333 | + ; GFX940-NEXT: $vgpr0 = COPY [[PHI]] |
| 334 | + ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 |
| 335 | + ; |
152 | 336 | ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw
|
153 | 337 | ; GFX11: bb.1 (%ir-block.0):
|
154 | 338 | ; GFX11-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
|
|
0 commit comments