Skip to content

Commit dd5af89

Browse files
authored
[AMDGPU] Mark S_NOP as having side effects (#65745)
This prevents S_NOP from being rescheduled past other (side-effecting) instructions, which is useful because it is generally used to introduce a short delay or to avoid hazards. Currently this only affects MIR tests because the compiler itself only inserts nops in PostRAHazardRecognizer which runs after all scheduling.
1 parent e067b0e commit dd5af89

7 files changed

+167
-94
lines changed

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1161,7 +1161,9 @@ multiclass SOPP_With_Relaxation <string opName, dag ins,
11611161
def _pad_s_nop : SOPP_Pseudo <opName # "_pad_s_nop", ins, asmOps, pattern, " ", opName>;
11621162
}
11631163

1164-
def S_NOP : SOPP_Pseudo<"s_nop" , (ins i16imm:$simm16), "$simm16">;
1164+
def S_NOP : SOPP_Pseudo<"s_nop" , (ins i16imm:$simm16), "$simm16"> {
1165+
let hasSideEffects = 1;
1166+
}
11651167

11661168
let isTerminator = 1 in {
11671169
def S_ENDPGM : SOPP_Pseudo<"s_endpgm", (ins Endpgm:$simm16), "$simm16", [], ""> {

llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@
7272
; GFX908-NEXT: ; implicit-def: $vgpr59
7373
; GFX908-NEXT: ; implicit-def: $vgpr60
7474
; GFX908-NEXT: ; implicit-def: $vgpr61
75-
; GFX908-NEXT: s_nop 0
76-
; GFX908-NEXT: s_nop 0
75+
; GFX908-NEXT: s_nop 1
7776
; GFX908-NEXT: v_accvgpr_write_b32 a64, v63
7877
; GFX908-NEXT: v_accvgpr_read_b32 v63, a97
7978
; GFX908-NEXT: s_nop 1
@@ -214,6 +213,7 @@
214213
; GFX908-NEXT: v_accvgpr_read_b32 v63, a111 ; Reload Reuse
215214
; GFX908-NEXT: s_nop 1
216215
; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
216+
; GFX908-NEXT: s_nop 0
217217
;
218218
; GFX90A-LABEL: test_spill:
219219
; GFX90A: ; %bb.0:

llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,9 +343,41 @@ body: |
343343
; GCN: bb.0:
344344
; GCN-NEXT: successors: %bb.1(0x80000000)
345345
; GCN-NEXT: {{ $}}
346+
; GCN-NEXT: S_NOP 0
347+
; GCN-NEXT: S_NOP 0
348+
; GCN-NEXT: S_NOP 0
349+
; GCN-NEXT: S_NOP 0
350+
; GCN-NEXT: S_NOP 0
351+
; GCN-NEXT: S_NOP 0
352+
; GCN-NEXT: S_NOP 0
353+
; GCN-NEXT: S_NOP 0
354+
; GCN-NEXT: S_NOP 0
355+
; GCN-NEXT: S_NOP 0
356+
; GCN-NEXT: S_NOP 0
357+
; GCN-NEXT: S_NOP 0
358+
; GCN-NEXT: S_NOP 0
359+
; GCN-NEXT: S_NOP 0
360+
; GCN-NEXT: S_NOP 0
361+
; GCN-NEXT: S_NOP 0
362+
; GCN-NEXT: S_NOP 0
363+
; GCN-NEXT: S_NOP 0
364+
; GCN-NEXT: S_NOP 0
365+
; GCN-NEXT: S_NOP 0
366+
; GCN-NEXT: S_NOP 0
367+
; GCN-NEXT: S_NOP 0
368+
; GCN-NEXT: S_NOP 0
369+
; GCN-NEXT: S_NOP 0
370+
; GCN-NEXT: S_NOP 0
371+
; GCN-NEXT: S_NOP 0
372+
; GCN-NEXT: S_NOP 0
373+
; GCN-NEXT: S_NOP 0
374+
; GCN-NEXT: S_NOP 0
375+
; GCN-NEXT: S_NOP 0
346376
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
347377
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
348378
; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
379+
; GCN-NEXT: S_NOP 0
380+
; GCN-NEXT: S_NOP 0
349381
; GCN-NEXT: {{ $}}
350382
; GCN-NEXT: bb.1:
351383
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
@@ -406,9 +438,12 @@ body: |
406438
; GCN: bb.0:
407439
; GCN-NEXT: successors: %bb.1(0x80000000)
408440
; GCN-NEXT: {{ $}}
441+
; GCN-NEXT: S_NOP 0, implicit-def $vcc
409442
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
410443
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
411444
; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
445+
; GCN-NEXT: S_NOP 0
446+
; GCN-NEXT: S_NOP 0
412447
; GCN-NEXT: {{ $}}
413448
; GCN-NEXT: bb.1:
414449
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
@@ -516,6 +551,34 @@ body: |
516551
; GCN-LABEL: name: vcc_liveness_dbg_value_search_after
517552
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
518553
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
554+
; GCN-NEXT: S_NOP 0
555+
; GCN-NEXT: S_NOP 0
556+
; GCN-NEXT: S_NOP 0
557+
; GCN-NEXT: S_NOP 0
558+
; GCN-NEXT: S_NOP 0
559+
; GCN-NEXT: S_NOP 0
560+
; GCN-NEXT: S_NOP 0
561+
; GCN-NEXT: S_NOP 0
562+
; GCN-NEXT: S_NOP 0
563+
; GCN-NEXT: S_NOP 0
564+
; GCN-NEXT: S_NOP 0
565+
; GCN-NEXT: S_NOP 0
566+
; GCN-NEXT: S_NOP 0
567+
; GCN-NEXT: S_NOP 0
568+
; GCN-NEXT: S_NOP 0
569+
; GCN-NEXT: S_NOP 0
570+
; GCN-NEXT: S_NOP 0
571+
; GCN-NEXT: S_NOP 0
572+
; GCN-NEXT: S_NOP 0
573+
; GCN-NEXT: S_NOP 0
574+
; GCN-NEXT: S_NOP 0
575+
; GCN-NEXT: S_NOP 0
576+
; GCN-NEXT: S_NOP 0
577+
; GCN-NEXT: S_NOP 0
578+
; GCN-NEXT: S_NOP 0
579+
; GCN-NEXT: S_NOP 0
580+
; GCN-NEXT: S_NOP 0
581+
; GCN-NEXT: S_NOP 0
519582
; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
520583
; GCN-NEXT: DBG_VALUE $noreg, 0
521584
; GCN-NEXT: DBG_VALUE $noreg, 0

llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ body: |
3131
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
3232
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
3333
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
34-
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
3534
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
36-
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
37-
; CHECK-NEXT: S_NOP 0
3835
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
36+
; CHECK-NEXT: S_NOP 0
37+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
38+
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
3939
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
4040
; CHECK-NEXT: S_ENDPGM 0
4141
%0:sreg_64 = IMPLICIT_DEF
@@ -127,8 +127,8 @@ body: |
127127
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
128128
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
129129
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
130-
; CHECK-NEXT: S_NOP 0
131130
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
131+
; CHECK-NEXT: S_NOP 0
132132
; CHECK-NEXT: SCHED_BARRIER 2
133133
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
134134
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
@@ -211,13 +211,13 @@ body: |
211211
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
212212
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
213213
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
214-
; CHECK-NEXT: S_NOP 0
215214
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
216215
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
217216
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec
218217
; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
219218
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_1:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
220219
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
220+
; CHECK-NEXT: S_NOP 0
221221
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
222222
; CHECK-NEXT: SCHED_BARRIER 8
223223
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -259,11 +259,11 @@ body: |
259259
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
260260
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
261261
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
262-
; CHECK-NEXT: S_NOP 0
263262
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
263+
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
264+
; CHECK-NEXT: S_NOP 0
264265
; CHECK-NEXT: SCHED_BARRIER 16
265266
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
266-
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
267267
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
268268
; CHECK-NEXT: S_ENDPGM 0
269269
%0:sreg_64 = IMPLICIT_DEF
@@ -292,8 +292,8 @@ body: |
292292
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
293293
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
294294
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
295-
; CHECK-NEXT: S_NOP 0
296295
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
296+
; CHECK-NEXT: S_NOP 0
297297
; CHECK-NEXT: SCHED_BARRIER 32
298298
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
299299
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
@@ -322,12 +322,12 @@ body: |
322322
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
323323
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
324324
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
325-
; CHECK-NEXT: S_NOP 0
326325
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
326+
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
327+
; CHECK-NEXT: S_NOP 0
327328
; CHECK-NEXT: SCHED_BARRIER 64
328329
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
329330
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
330-
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
331331
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
332332
; CHECK-NEXT: S_ENDPGM 0
333333
%0:sreg_64 = IMPLICIT_DEF
@@ -353,13 +353,13 @@ body: |
353353
; CHECK-LABEL: name: sched_barrier_mask_128
354354
; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
355355
; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
356+
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec
356357
; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
358+
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
357359
; CHECK-NEXT: S_NOP 0
358-
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec
359360
; CHECK-NEXT: SCHED_BARRIER 128
360361
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec
361-
; CHECK-NEXT: dead %0:sreg_64 = IMPLICIT_DEF
362-
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
362+
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
363363
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_1]], [[V_MUL_LO_U32_e64_]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
364364
; CHECK-NEXT: S_ENDPGM 0
365365
%0:sreg_64 = IMPLICIT_DEF
@@ -385,13 +385,13 @@ body: |
385385
; CHECK-LABEL: name: sched_barrier_mask_256
386386
; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
387387
; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
388-
; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
389388
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec
390-
; CHECK-NEXT: S_NOP 0
389+
; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
391390
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
391+
; CHECK-NEXT: S_NOP 0
392392
; CHECK-NEXT: SCHED_BARRIER 256
393393
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec
394-
; CHECK-NEXT: dead %0:sreg_64 = IMPLICIT_DEF
394+
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
395395
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_1]], [[V_MUL_LO_U32_e64_]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
396396
; CHECK-NEXT: S_ENDPGM 0
397397
%0:sreg_64 = IMPLICIT_DEF
@@ -417,13 +417,13 @@ body: |
417417
; CHECK-LABEL: name: sched_barrier_mask_512
418418
; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
419419
; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
420-
; CHECK-NEXT: S_NOP 0
421420
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec
421+
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
422+
; CHECK-NEXT: S_NOP 0
422423
; CHECK-NEXT: SCHED_BARRIER 512
423424
; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
424425
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec
425-
; CHECK-NEXT: dead %0:sreg_64 = IMPLICIT_DEF
426-
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
426+
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
427427
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_1]], [[V_MUL_LO_U32_e64_]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
428428
; CHECK-NEXT: S_ENDPGM 0
429429
%0:sreg_64 = IMPLICIT_DEF
@@ -463,11 +463,11 @@ body: |
463463
; CHECK-NEXT: SCHED_BARRIER 12
464464
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
465465
; CHECK-NEXT: SCHED_BARRIER 8
466+
; CHECK-NEXT: S_NOP 0
466467
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
467468
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_3:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
468469
; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
469470
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec
470-
; CHECK-NEXT: S_NOP 0
471471
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
472472
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
473473
%0:sreg_64 = IMPLICIT_DEF

0 commit comments

Comments
 (0)