Skip to content

Commit 0523eff

Browse files
bjopeyuxuanchen1997
authored andcommitted
[DAGCombiner] Freeze maybe poison operands when folding select to logic (#84924)
Just like for regular IR we need to treat SELECT as conditionally blocking poison in SelectionDAG. So (unless the condition itself is poison) the result is only poison if the selected true/false value is poison. Thus, when doing DAG combines that turn SELECT into arithmetic/logical operations (e.g. AND/OR) we need to make sure that the new operations aren't more poisonous. One way to do that is to use FREEZE to make sure the operands aren't posion. This patch aims at fixing the kind of miscompiles reported in #84653 and #85190 Solution is to make sure that we insert FREEZE, if needed to make the fold sound, when using the foldBoolSelectToLogic and foldVSelectToSignBitSplatMask DAG combines.
1 parent 0906b7f commit 0523eff

19 files changed

+700
-674
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11501,28 +11501,28 @@ static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
1150111501
if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
1150211502
return SDValue();
1150311503

11504-
// select Cond, Cond, F --> or Cond, F
11505-
// select Cond, 1, F --> or Cond, F
11504+
// select Cond, Cond, F --> or Cond, freeze(F)
11505+
// select Cond, 1, F --> or Cond, freeze(F)
1150611506
if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11507-
return matcher.getNode(ISD::OR, DL, VT, Cond, F);
11507+
return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
1150811508

11509-
// select Cond, T, Cond --> and Cond, T
11510-
// select Cond, T, 0 --> and Cond, T
11509+
// select Cond, T, Cond --> and Cond, freeze(T)
11510+
// select Cond, T, 0 --> and Cond, freeze(T)
1151111511
if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11512-
return matcher.getNode(ISD::AND, DL, VT, Cond, T);
11512+
return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
1151311513

11514-
// select Cond, T, 1 --> or (not Cond), T
11514+
// select Cond, T, 1 --> or (not Cond), freeze(T)
1151511515
if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
1151611516
SDValue NotCond =
1151711517
matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11518-
return matcher.getNode(ISD::OR, DL, VT, NotCond, T);
11518+
return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
1151911519
}
1152011520

11521-
// select Cond, 0, F --> and (not Cond), F
11521+
// select Cond, 0, F --> and (not Cond), freeze(F)
1152211522
if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
1152311523
SDValue NotCond =
1152411524
matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11525-
return matcher.getNode(ISD::AND, DL, VT, NotCond, F);
11525+
return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
1152611526
}
1152711527

1152811528
return SDValue();
@@ -11550,37 +11550,37 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
1155011550
else
1155111551
return SDValue();
1155211552

11553-
// (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
11553+
// (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
1155411554
if (isNullOrNullSplat(N2)) {
1155511555
SDLoc DL(N);
1155611556
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
1155711557
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11558-
return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11558+
return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
1155911559
}
1156011560

11561-
// (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
11561+
// (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
1156211562
if (isAllOnesOrAllOnesSplat(N1)) {
1156311563
SDLoc DL(N);
1156411564
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
1156511565
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11566-
return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11566+
return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
1156711567
}
1156811568

1156911569
// If we have to invert the sign bit mask, only do that transform if the
1157011570
// target has a bitwise 'and not' instruction (the invert is free).
11571-
// (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
11571+
// (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
1157211572
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1157311573
if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
1157411574
SDLoc DL(N);
1157511575
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
1157611576
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
1157711577
SDValue Not = DAG.getNOT(DL, Sra, VT);
11578-
return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11578+
return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
1157911579
}
1158011580

1158111581
// TODO: There's another pattern in this family, but it may require
1158211582
// implementing hasOrNot() to check for profitability:
11583-
// (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
11583+
// (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
1158411584

1158511585
return SDValue();
1158611586
}

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
229229
; CHECK-NEXT: mov z6.d, z1.d
230230
; CHECK-NEXT: mov z7.d, z0.d
231231
; CHECK-NEXT: add x2, x2, x11
232-
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
232+
; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
233+
; CHECK-NEXT: and p1.b, p1/z, p1.b, p2.b
233234
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
234235
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
235236
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]

llvm/test/CodeGen/AArch64/fast-isel-select.ll

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -500,31 +500,20 @@ define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {
500500

501501
; Test peephole optimizations for select.
502502
define zeroext i1 @select_opt1(i1 zeroext %c, i1 zeroext %a) {
503-
; CHECK-SDAGISEL-LABEL: select_opt1:
504-
; CHECK-SDAGISEL: ; %bb.0:
505-
; CHECK-SDAGISEL-NEXT: orr w0, w0, w1
506-
; CHECK-SDAGISEL-NEXT: ret
507-
;
508-
; CHECK-FASTISEL-LABEL: select_opt1:
509-
; CHECK-FASTISEL: ; %bb.0:
510-
; CHECK-FASTISEL-NEXT: orr w8, w0, w1
511-
; CHECK-FASTISEL-NEXT: and w0, w8, #0x1
512-
; CHECK-FASTISEL-NEXT: ret
513-
;
514-
; CHECK-GISEL-LABEL: select_opt1:
515-
; CHECK-GISEL: ; %bb.0:
516-
; CHECK-GISEL-NEXT: orr w8, w0, w1
517-
; CHECK-GISEL-NEXT: and w0, w8, #0x1
518-
; CHECK-GISEL-NEXT: ret
503+
; CHECK-LABEL: select_opt1:
504+
; CHECK: ; %bb.0:
505+
; CHECK-NEXT: orr w8, w0, w1
506+
; CHECK-NEXT: and w0, w8, #0x1
507+
; CHECK-NEXT: ret
519508
%1 = select i1 %c, i1 true, i1 %a
520509
ret i1 %1
521510
}
522511

523512
define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
524513
; CHECK-SDAGISEL-LABEL: select_opt2:
525514
; CHECK-SDAGISEL: ; %bb.0:
526-
; CHECK-SDAGISEL-NEXT: eor w8, w0, #0x1
527-
; CHECK-SDAGISEL-NEXT: orr w0, w8, w1
515+
; CHECK-SDAGISEL-NEXT: orn w8, w1, w0
516+
; CHECK-SDAGISEL-NEXT: and w0, w8, #0x1
528517
; CHECK-SDAGISEL-NEXT: ret
529518
;
530519
; CHECK-FASTISEL-LABEL: select_opt2:
@@ -547,7 +536,8 @@ define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
547536
define zeroext i1 @select_opt3(i1 zeroext %c, i1 zeroext %a) {
548537
; CHECK-SDAGISEL-LABEL: select_opt3:
549538
; CHECK-SDAGISEL: ; %bb.0:
550-
; CHECK-SDAGISEL-NEXT: bic w0, w1, w0
539+
; CHECK-SDAGISEL-NEXT: eor w8, w0, #0x1
540+
; CHECK-SDAGISEL-NEXT: and w0, w8, w1
551541
; CHECK-SDAGISEL-NEXT: ret
552542
;
553543
; CHECK-FASTISEL-LABEL: select_opt3:

llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,9 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
319319
define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
320320
; CHECK-LABEL: ctz_and_nxv16i1:
321321
; CHECK: // %bb.0:
322-
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
323322
; CHECK-NEXT: ptrue p1.b
323+
; CHECK-NEXT: cmpne p2.b, p1/z, z0.b, z1.b
324+
; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
324325
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
325326
; CHECK-NEXT: cntp x0, p0, p0.b
326327
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0

llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
2424
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s
2525
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s
2626
; CHECK-NEXT: add z0.d, z2.d, z1.d
27-
; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b
27+
; CHECK-NEXT: not p2.b, p0/z, p2.b
28+
; CHECK-NEXT: and p2.b, p1/z, p1.b, p2.b
2829
; CHECK-NEXT: mov z0.d, p2/m, z2.d
2930
; CHECK-NEXT: sel z0.d, p1, z0.d, z2.d
3031
; CHECK-NEXT: uaddv d0, p0, z0.d

llvm/test/CodeGen/AMDGPU/div_i128.ll

Lines changed: 24 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -482,28 +482,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
482482
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
483483
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
484484
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
485+
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
486+
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
487+
; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
485488
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
486489
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
487490
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
488491
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
489-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
490-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
491-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
492-
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
493-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
494-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
495-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
496-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
492+
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
497493
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
498-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
499-
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
500-
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
494+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
495+
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
501496
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
502-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
503-
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
497+
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
504498
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
505-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
506-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
499+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
507500
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
508501
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
509502
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
@@ -514,7 +507,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
514507
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
515508
; GFX9-O0-NEXT: s_mov_b32 s14, s13
516509
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
517-
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
518510
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
519511
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
520512
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -1048,10 +1040,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10481040
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
10491041
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
10501042
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1051-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1052-
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1053-
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1054-
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1043+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1044+
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1045+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1046+
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
10551047
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
10561048
; GFX9-O0-NEXT: s_mov_b32 s5, s6
10571049
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
@@ -2695,28 +2687,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26952687
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26962688
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
26972689
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
2690+
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2691+
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2692+
; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
26982693
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26992694
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
27002695
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
27012696
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2702-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
2703-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2704-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2705-
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2706-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2707-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
2708-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2709-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2697+
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
27102698
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2711-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2712-
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2713-
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
2699+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2700+
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
27142701
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2715-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2716-
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
2702+
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
27172703
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
2718-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2719-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
2704+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
27202705
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
27212706
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
27222707
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
@@ -2727,7 +2712,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
27272712
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
27282713
; GFX9-O0-NEXT: s_mov_b32 s14, s13
27292714
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
2730-
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
27312715
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
27322716
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
27332717
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -3261,10 +3245,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
32613245
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
32623246
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
32633247
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3264-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3265-
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3266-
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3267-
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3248+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3249+
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3250+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3251+
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
32683252
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
32693253
; GFX9-O0-NEXT: s_mov_b32 s5, s6
32703254
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)

0 commit comments

Comments
 (0)