Skip to content

Commit 16bd10a

Browse files
committed
Revert "[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison (#84921)" and more...
This reverts: b3c55b7 - "[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison (#84921)" (because it updates a test case that I don't know how to resolve the conflict for) 8e2f649 - "[DAGCombiner] Do not always fold FREEZE over BUILD_VECTOR (#85932)" 73472c5 - "[SelectionDAG] Treat CopyFromReg as freezing the value (#85932)" Due to a test suite failure on AArch64 when compiling for SVE. https://lab.llvm.org/buildbot/#/builders/197/builds/13955 clang: ../llvm/llvm/include/llvm/CodeGen/ValueTypes.h:307: MVT llvm::EVT::getSimpleVT() const: Assertion `isSimple() && "Expected a SimpleValueType!"' failed.
1 parent f029da5 commit 16bd10a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+2298
-2204
lines changed

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,6 @@ enum NodeType {
205205
/// CopyFromReg - This node indicates that the input value is a virtual or
206206
/// physical register that is defined outside of the scope of this
207207
/// SelectionDAG. The register is available from the RegisterSDNode object.
208-
/// Note that CopyFromReg is considered as also freezing the value.
209208
CopyFromReg,
210209

211210
/// UNDEF - An undefined node.

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15459,12 +15459,6 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1545915459
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
1546015460
return N0;
1546115461

15462-
// We currently avoid folding freeze over SRA/SRL, due to the problems seen
15463-
// with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
15464-
// example https://reviews.llvm.org/D136529#4120959.
15465-
if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
15466-
return SDValue();
15467-
1546815462
// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
1546915463
// Try to push freeze through instructions that propagate but don't produce
1547015464
// poison as far as possible. If an operand of freeze follows three
@@ -15481,26 +15475,6 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1548115475
N0.getOpcode() == ISD::BUILD_PAIR ||
1548215476
N0.getOpcode() == ISD::CONCAT_VECTORS;
1548315477

15484-
// Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
15485-
// ones" or "constant" into something that depends on FrozenUndef. We can
15486-
// instead pick undef values to keep those properties, while at the same time
15487-
// folding away the freeze.
15488-
// If we implement a more general solution for folding away freeze(undef) in
15489-
// the future, then this special handling can be removed.
15490-
if (N0.getOpcode() == ISD::BUILD_VECTOR) {
15491-
SDLoc DL(N0);
15492-
MVT VT = N0.getSimpleValueType();
15493-
if (llvm::ISD::isBuildVectorAllOnes(N0.getNode()))
15494-
return DAG.getAllOnesConstant(DL, VT);
15495-
if (llvm::ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
15496-
SmallVector<SDValue, 8> NewVecC;
15497-
for (const SDValue &Op : N0->op_values())
15498-
NewVecC.push_back(
15499-
Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
15500-
return DAG.getBuildVector(VT, DL, NewVecC);
15501-
}
15502-
}
15503-
1550415478
SmallSetVector<SDValue, 8> MaybePoisonOperands;
1550515479
for (SDValue Op : N0->ops()) {
1550615480
if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5063,7 +5063,6 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
50635063
case ISD::VALUETYPE:
50645064
case ISD::FrameIndex:
50655065
case ISD::TargetFrameIndex:
5066-
case ISD::CopyFromReg:
50675066
return true;
50685067

50695068
case ISD::UNDEF:
@@ -5137,16 +5136,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51375136
case ISD::FREEZE:
51385137
case ISD::CONCAT_VECTORS:
51395138
case ISD::INSERT_SUBVECTOR:
5140-
case ISD::SADDSAT:
5141-
case ISD::UADDSAT:
5142-
case ISD::SSUBSAT:
5143-
case ISD::USUBSAT:
5144-
case ISD::MULHU:
5145-
case ISD::MULHS:
5146-
case ISD::SMIN:
5147-
case ISD::SMAX:
5148-
case ISD::UMIN:
5149-
case ISD::UMAX:
51505139
case ISD::AND:
51515140
case ISD::XOR:
51525141
case ISD::ROTL:
@@ -5167,7 +5156,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51675156
case ISD::BUILD_PAIR:
51685157
return false;
51695158

5170-
case ISD::SELECT_CC:
51715159
case ISD::SETCC: {
51725160
// Integer setcc cannot create undef or poison.
51735161
if (Op.getOperand(0).getValueType().isInteger())
@@ -5177,8 +5165,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51775165
// based on options and flags. The options and flags also cause special
51785166
// nonan condition codes to be used. Those condition codes may be preserved
51795167
// even if the nonan flag is dropped somewhere.
5180-
unsigned CCOp = Opcode == ISD::SETCC ? 2 : 4;
5181-
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(CCOp))->get();
5168+
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(2))->get();
51825169
if (((unsigned)CCCode & 0x10U))
51835170
return true;
51845171

@@ -5195,8 +5182,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51955182
return false;
51965183

51975184
case ISD::SHL:
5198-
case ISD::SRL:
5199-
case ISD::SRA:
52005185
// If the max shift amount isn't in range, then the shift can create poison.
52015186
return !getValidMaximumShiftAmountConstant(Op, DemandedElts);
52025187

llvm/test/CodeGen/AArch64/combine-mul.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) {
4444
define i64 @combine_mul_self_demandedbits(i64 %x) {
4545
; CHECK-LABEL: combine_mul_self_demandedbits:
4646
; CHECK: // %bb.0:
47-
; CHECK-NEXT: mul x0, x0, x0
47+
; CHECK-NEXT: mul x8, x0, x0
48+
; CHECK-NEXT: and x0, x8, #0xfffffffffffffffd
4849
; CHECK-NEXT: ret
4950
%1 = mul i64 %x, %x
5051
%2 = and i64 %1, -3
@@ -76,7 +77,7 @@ define i8 @one_demanded_bit(i8 %x) {
7677
define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) {
7778
; CHECK-LABEL: one_demanded_bit_splat:
7879
; CHECK: // %bb.0:
79-
; CHECK-NEXT: mov w8, #32 // =0x20
80+
; CHECK-NEXT: mov w8, #32
8081
; CHECK-NEXT: shl v0.2d, v0.2d, #5
8182
; CHECK-NEXT: dup v1.2d, x8
8283
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@@ -130,7 +131,7 @@ define i32 @squared_demanded_2_low_bits(i32 %x) {
130131
define <2 x i64> @squared_demanded_2_low_bits_splat(<2 x i64> %x) {
131132
; CHECK-LABEL: squared_demanded_2_low_bits_splat:
132133
; CHECK: // %bb.0:
133-
; CHECK-NEXT: mov x8, #-2 // =0xfffffffffffffffe
134+
; CHECK-NEXT: mov x8, #-2
134135
; CHECK-NEXT: dup v1.2d, x8
135136
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
136137
; CHECK-NEXT: ret

llvm/test/CodeGen/AMDGPU/div_i128.ll

Lines changed: 43 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -282,21 +282,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
282282
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
283283
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v1
284284
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15
285-
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16
285+
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v16
286286
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v13
287-
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v14
287+
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v14
288288
; GFX9-O0-NEXT: v_sub_co_u32_e32 v9, vcc, v9, v4
289-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
290-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
291289
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
290+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
291+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
292292
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
293293
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
294-
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
295-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
294+
; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
295+
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5
296296
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
297297
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
298-
; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
299-
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v1
298+
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
299+
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v1
300300
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
301301
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
302302
; GFX9-O0-NEXT: v_xor_b32_e64 v1, v5, v1
@@ -312,21 +312,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
312312
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
313313
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1
314314
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
315-
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec
316-
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11
315+
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8
316+
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v11
317317
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12
318318
; GFX9-O0-NEXT: v_sub_co_u32_e32 v1, vcc, v1, v3
319-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v8, v5, vcc
320-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v7, v3, vcc
321-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v2, v5, vcc
319+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
320+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v8, v3, vcc
321+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc
322322
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
323323
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
324-
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
325-
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
324+
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
325+
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2
326326
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
327327
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
328-
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
329-
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
328+
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
329+
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v7
330330
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, v6
331331
; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v4
332332
; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
@@ -339,26 +339,18 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
339339
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
340340
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
341341
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
342-
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
343-
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
344-
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
342+
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
345343
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
346-
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
347-
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
348-
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
349-
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
344+
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
345+
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
350346
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
351-
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
352-
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
353-
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
354-
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
347+
; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
348+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
355349
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
356-
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
357-
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
358-
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
359-
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
350+
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
351+
; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
360352
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
361-
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
353+
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
362354
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12
363355
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2
364356
; GFX9-O0-NEXT: v_or_b32_e64 v3, v8, v7
@@ -411,8 +403,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
411403
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
412404
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
413405
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
414-
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7]
415-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
406+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[6:7]
416407
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[12:13]
417408
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
418409
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
@@ -448,8 +439,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
448439
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
449440
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
450441
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
451-
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
452-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
442+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[6:7]
453443
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9]
454444
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
455445
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
@@ -700,10 +690,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
700690
; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
701691
; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
702692
; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
703-
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
704-
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
705-
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
706-
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
693+
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
694+
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
695+
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
696+
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
707697
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
708698
; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
709699
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
@@ -913,14 +903,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
913903
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
914904
; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
915905
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
916-
; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
917-
; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
918-
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
919-
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
920-
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
921-
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
922-
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
923-
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
906+
; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
907+
; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
908+
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
909+
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
910+
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
911+
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
912+
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
913+
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
924914
; GFX9-O0-NEXT: s_waitcnt vmcnt(9)
925915
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
926916
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
@@ -1038,10 +1028,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10381028
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
10391029
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
10401030
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
1041-
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1042-
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1043-
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1044-
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1031+
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1032+
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1033+
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1034+
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
10451035
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
10461036
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
10471037
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload

0 commit comments

Comments
 (0)