Skip to content

Commit 8b706da

Browse files
committed
[DAGCombiner] Freeze maybe poison operands when folding select to logic
Just like for regular IR we need to treat SELECT as conditionally blocking poison. So (unless the condition itself is poison) the result is only poison if the selected true/false value is poison. Thus, when doing DAG combines that turn SELECT into arithmetic/logical operations (e.g. AND/OR) we need to make sure that the new operations aren't more poisonous. One way to do that is to use FREEZE to make sure the operands aren't posion. This patch aims at fixing the kind of miscompiles reported in llvm#84653 and llvm#85190 Solution is to make sure that we insert FREEZE, if needed to make the fold sound, when using the foldBoolSelectToLogic and foldVSelectToSignBitSplatMask DAG combines. This may result in some (hopefully minor) regressions since we lack some ways to fold away the freeze (or due to isGuaranteedNotToBePoison being too pessimistic). Focus in this patch is to just avoid miscompiles, but I think some of the regressions can be avoided by general improvements regarding poison/freeze handling in SelectionDAG.
1 parent 39f1b2d commit 8b706da

19 files changed

+499
-511
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11349,28 +11349,28 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
1134911349
if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
1135011350
return SDValue();
1135111351

11352-
// select Cond, Cond, F --> or Cond, F
11353-
// select Cond, 1, F --> or Cond, F
11352+
// select Cond, Cond, F --> or Cond, freeze(F)
11353+
// select Cond, 1, F --> or Cond, freeze(F)
1135411354
if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11355-
return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11355+
return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, DAG.getFreeze(F));
1135611356

1135711357
// select Cond, T, Cond --> and Cond, T
1135811358
// select Cond, T, 0 --> and Cond, T
1135911359
if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11360-
return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11360+
return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, DAG.getFreeze(T));
1136111361

1136211362
// select Cond, T, 1 --> or (not Cond), T
1136311363
if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
1136411364
SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
1136511365
DAG.getAllOnesConstant(SDLoc(N), VT));
11366-
return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11366+
return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, DAG.getFreeze(T));
1136711367
}
1136811368

1136911369
// select Cond, 0, F --> and (not Cond), F
1137011370
if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
1137111371
SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
1137211372
DAG.getAllOnesConstant(SDLoc(N), VT));
11373-
return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11373+
return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, DAG.getFreeze(F));
1137411374
}
1137511375

1137611376
return SDValue();
@@ -11404,15 +11404,15 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
1140411404
SDLoc DL(N);
1140511405
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
1140611406
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11407-
return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11407+
return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
1140811408
}
1140911409

1141011410
// (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
1141111411
if (isAllOnesOrAllOnesSplat(N1)) {
1141211412
SDLoc DL(N);
1141311413
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
1141411414
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11415-
return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11415+
return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
1141611416
}
1141711417

1141811418
// If we have to invert the sign bit mask, only do that transform if the
@@ -11424,7 +11424,7 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
1142411424
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
1142511425
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
1142611426
SDValue Not = DAG.getNOT(DL, Sra, VT);
11427-
return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11427+
return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
1142811428
}
1142911429

1143011430
// TODO: There's another pattern in this family, but it may require

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
236236
; CHECK-NEXT: mov z7.d, z0.d
237237
; CHECK-NEXT: add x9, x9, x11
238238
; CHECK-NEXT: add x8, x8, x12
239-
; CHECK-NEXT: cmpne p2.d, p1/z, z2.d, #0
239+
; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
240+
; CHECK-NEXT: and p2.b, p1/z, p1.b, p2.b
240241
; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
241242
; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
242243
; CHECK-NEXT: ld1d { z2.d }, p1/z, [x13, #1, mul vl]

llvm/test/CodeGen/AArch64/fast-isel-select.ll

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
2-
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
3-
; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
1+
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,SISEL
2+
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,FISEL
3+
; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GISEL
44

55
; First test the different supported value types for select.
66
define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
@@ -295,22 +295,28 @@ define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {
295295
; Test peephole optimizations for select.
296296
define zeroext i1 @select_opt1(i1 zeroext %c, i1 zeroext %a) {
297297
; CHECK-LABEL: select_opt1
298-
; CHECK: orr {{w[0-9]+}}, w0, w1
298+
; SISEL: orr [[REG:w[0-9]+]], w0, w1
299+
; SISEL: and w0, [[REG]], #0x1
300+
; FISEL: orr {{w[0-9]+}}, w0, w1
299301
%1 = select i1 %c, i1 true, i1 %a
300302
ret i1 %1
301303
}
302304

303305
define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
304306
; CHECK-LABEL: select_opt2
305-
; CHECK: eor [[REG:w[0-9]+]], w0, #0x1
306-
; CHECK: orr {{w[0-9]+}}, [[REG]], w1
307+
; SISEL: orn [[REG:w[0-9]+]], w1, w0
308+
; SISEL: and w0, [[REG]], #0x1
309+
; FISEL: eor [[REG:w[0-9]+]], w0, #0x1
310+
; FISEL: orr {{w[0-9]+}}, [[REG]], w1
307311
%1 = select i1 %c, i1 %a, i1 true
308312
ret i1 %1
309313
}
310314

311315
define zeroext i1 @select_opt3(i1 zeroext %c, i1 zeroext %a) {
312316
; CHECK-LABEL: select_opt3
313-
; CHECK: bic {{w[0-9]+}}, w1, w0
317+
; SISEL: eor [[REG:w[0-9]+]], w0, #0x1
318+
; SISEL: and w0, [[REG]], w1
319+
; FISEL: bic {{w[0-9]+}}, w1, w0
314320
%1 = select i1 %c, i1 false, i1 %a
315321
ret i1 %1
316322
}

llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,9 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
213213
define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
214214
; CHECK-LABEL: ctz_and_nxv16i1:
215215
; CHECK: // %bb.0:
216-
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
217216
; CHECK-NEXT: ptrue p1.b
217+
; CHECK-NEXT: cmpne p2.b, p1/z, z0.b, z1.b
218+
; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
218219
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
219220
; CHECK-NEXT: cntp x0, p0, p0.b
220221
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0

llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
2424
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s
2525
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s
2626
; CHECK-NEXT: add z0.d, z2.d, z1.d
27-
; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b
27+
; CHECK-NEXT: not p2.b, p0/z, p2.b
28+
; CHECK-NEXT: and p2.b, p1/z, p1.b, p2.b
2829
; CHECK-NEXT: mov z0.d, p2/m, z2.d
2930
; CHECK-NEXT: sel z0.d, p1, z0.d, z2.d
3031
; CHECK-NEXT: uaddv d0, p0, z0.d

llvm/test/CodeGen/AMDGPU/div_i128.ll

Lines changed: 24 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -466,28 +466,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
466466
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
467467
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
468468
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
469+
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
470+
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
471+
; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
469472
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
470473
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
471474
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
472475
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
473-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
474-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
475-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
476-
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
477-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
478-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
479-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
480-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
476+
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
481477
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
482-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
483-
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
484-
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
478+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
479+
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
485480
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
486-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
487-
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
481+
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
488482
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
489-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
490-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
483+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
491484
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
492485
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
493486
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
@@ -498,7 +491,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
498491
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
499492
; GFX9-O0-NEXT: s_mov_b32 s14, s13
500493
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
501-
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
502494
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
503495
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
504496
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -1032,10 +1024,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10321024
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
10331025
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
10341026
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1035-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1036-
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1037-
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1038-
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1027+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1028+
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1029+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1030+
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
10391031
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
10401032
; GFX9-O0-NEXT: s_mov_b32 s5, s6
10411033
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
@@ -2737,28 +2729,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
27372729
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
27382730
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
27392731
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
2732+
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2733+
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2734+
; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
27402735
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
27412736
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
27422737
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
27432738
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2744-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
2745-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2746-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2747-
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2748-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2749-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
2750-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2751-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2739+
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
27522740
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2753-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2754-
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2755-
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
2741+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2742+
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
27562743
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2757-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2758-
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
2744+
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
27592745
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
2760-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2761-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
2746+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
27622747
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
27632748
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
27642749
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
@@ -2769,7 +2754,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
27692754
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
27702755
; GFX9-O0-NEXT: s_mov_b32 s14, s13
27712756
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
2772-
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
27732757
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
27742758
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
27752759
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -3303,10 +3287,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
33033287
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
33043288
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
33053289
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3306-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3307-
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3308-
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3309-
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3290+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3291+
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3292+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3293+
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
33103294
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
33113295
; GFX9-O0-NEXT: s_mov_b32 s5, s6
33123296
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)

0 commit comments

Comments
 (0)