Skip to content

Commit 55c6bda

Browse files
committed
Revert "Revert "[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison (#84921)" and more..."
This reverts commit 16bd10a. Re-applies: b3c55b7 - "[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison (#84921)" 8e2f649 - "[DAGCombiner] Do not always fold FREEZE over BUILD_VECTOR (#85932)" 73472c5 - "[SelectionDAG] Treat CopyFromReg as freezing the value (#85932)" with a fix in DAGCombiner::visitFREEZE.
1 parent 95395ee commit 55c6bda

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+2224
-2298
lines changed

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ enum NodeType {
205205
/// CopyFromReg - This node indicates that the input value is a virtual or
206206
/// physical register that is defined outside of the scope of this
207207
/// SelectionDAG. The register is available from the RegisterSDNode object.
208+
/// Note that CopyFromReg is considered as also freezing the value.
208209
CopyFromReg,
209210

210211
/// UNDEF - An undefined node.

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15459,6 +15459,12 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1545915459
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
1546015460
return N0;
1546115461

15462+
// We currently avoid folding freeze over SRA/SRL, due to the problems seen
15463+
// with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
15464+
// example https://reviews.llvm.org/D136529#4120959.
15465+
if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
15466+
return SDValue();
15467+
1546215468
// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
1546315469
// Try to push freeze through instructions that propagate but don't produce
1546415470
// poison as far as possible. If an operand of freeze follows three
@@ -15475,6 +15481,26 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1547515481
N0.getOpcode() == ISD::BUILD_PAIR ||
1547615482
N0.getOpcode() == ISD::CONCAT_VECTORS;
1547715483

15484+
// Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
15485+
// ones" or "constant" into something that depends on FrozenUndef. We can
15486+
// instead pick undef values to keep those properties, while at the same time
15487+
// folding away the freeze.
15488+
// If we implement a more general solution for folding away freeze(undef) in
15489+
// the future, then this special handling can be removed.
15490+
if (N0.getOpcode() == ISD::BUILD_VECTOR) {
15491+
SDLoc DL(N0);
15492+
EVT VT = N0.getValueType();
15493+
if (llvm::ISD::isBuildVectorAllOnes(N0.getNode()))
15494+
return DAG.getAllOnesConstant(DL, VT);
15495+
if (llvm::ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
15496+
SmallVector<SDValue, 8> NewVecC;
15497+
for (const SDValue &Op : N0->op_values())
15498+
NewVecC.push_back(
15499+
Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
15500+
return DAG.getBuildVector(VT, DL, NewVecC);
15501+
}
15502+
}
15503+
1547815504
SmallSetVector<SDValue, 8> MaybePoisonOperands;
1547915505
for (SDValue Op : N0->ops()) {
1548015506
if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5063,6 +5063,7 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
50635063
case ISD::VALUETYPE:
50645064
case ISD::FrameIndex:
50655065
case ISD::TargetFrameIndex:
5066+
case ISD::CopyFromReg:
50665067
return true;
50675068

50685069
case ISD::UNDEF:
@@ -5136,6 +5137,16 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51365137
case ISD::FREEZE:
51375138
case ISD::CONCAT_VECTORS:
51385139
case ISD::INSERT_SUBVECTOR:
5140+
case ISD::SADDSAT:
5141+
case ISD::UADDSAT:
5142+
case ISD::SSUBSAT:
5143+
case ISD::USUBSAT:
5144+
case ISD::MULHU:
5145+
case ISD::MULHS:
5146+
case ISD::SMIN:
5147+
case ISD::SMAX:
5148+
case ISD::UMIN:
5149+
case ISD::UMAX:
51395150
case ISD::AND:
51405151
case ISD::XOR:
51415152
case ISD::ROTL:
@@ -5156,6 +5167,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51565167
case ISD::BUILD_PAIR:
51575168
return false;
51585169

5170+
case ISD::SELECT_CC:
51595171
case ISD::SETCC: {
51605172
// Integer setcc cannot create undef or poison.
51615173
if (Op.getOperand(0).getValueType().isInteger())
@@ -5165,7 +5177,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51655177
// based on options and flags. The options and flags also cause special
51665178
// nonan condition codes to be used. Those condition codes may be preserved
51675179
// even if the nonan flag is dropped somewhere.
5168-
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(2))->get();
5180+
unsigned CCOp = Opcode == ISD::SETCC ? 2 : 4;
5181+
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(CCOp))->get();
51695182
if (((unsigned)CCCode & 0x10U))
51705183
return true;
51715184

@@ -5182,6 +5195,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51825195
return false;
51835196

51845197
case ISD::SHL:
5198+
case ISD::SRL:
5199+
case ISD::SRA:
51855200
// If the max shift amount isn't in range, then the shift can create poison.
51865201
return !getValidMaximumShiftAmountConstant(Op, DemandedElts);
51875202

llvm/test/CodeGen/AArch64/combine-mul.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@ define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) {
4444
define i64 @combine_mul_self_demandedbits(i64 %x) {
4545
; CHECK-LABEL: combine_mul_self_demandedbits:
4646
; CHECK: // %bb.0:
47-
; CHECK-NEXT: mul x8, x0, x0
48-
; CHECK-NEXT: and x0, x8, #0xfffffffffffffffd
47+
; CHECK-NEXT: mul x0, x0, x0
4948
; CHECK-NEXT: ret
5049
%1 = mul i64 %x, %x
5150
%2 = and i64 %1, -3
@@ -77,7 +76,7 @@ define i8 @one_demanded_bit(i8 %x) {
7776
define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) {
7877
; CHECK-LABEL: one_demanded_bit_splat:
7978
; CHECK: // %bb.0:
80-
; CHECK-NEXT: mov w8, #32
79+
; CHECK-NEXT: mov w8, #32 // =0x20
8180
; CHECK-NEXT: shl v0.2d, v0.2d, #5
8281
; CHECK-NEXT: dup v1.2d, x8
8382
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@@ -131,7 +130,7 @@ define i32 @squared_demanded_2_low_bits(i32 %x) {
131130
define <2 x i64> @squared_demanded_2_low_bits_splat(<2 x i64> %x) {
132131
; CHECK-LABEL: squared_demanded_2_low_bits_splat:
133132
; CHECK: // %bb.0:
134-
; CHECK-NEXT: mov x8, #-2
133+
; CHECK-NEXT: mov x8, #-2 // =0xfffffffffffffffe
135134
; CHECK-NEXT: dup v1.2d, x8
136135
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
137136
; CHECK-NEXT: ret

llvm/test/CodeGen/AMDGPU/div_i128.ll

Lines changed: 53 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -282,21 +282,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
282282
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
283283
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v1
284284
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15
285-
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v16
285+
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16
286286
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v13
287-
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v14
287+
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v14
288288
; GFX9-O0-NEXT: v_sub_co_u32_e32 v9, vcc, v9, v4
289-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
290-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
291289
; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
290+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
291+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
292292
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
293293
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
294-
; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
295-
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5
294+
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
295+
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
296296
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
297297
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
298-
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
299-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v1
298+
; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
299+
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v1
300300
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
301301
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
302302
; GFX9-O0-NEXT: v_xor_b32_e64 v1, v5, v1
@@ -312,21 +312,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
312312
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
313313
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1
314314
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
315-
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8
316-
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v11
315+
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec
316+
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11
317317
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12
318318
; GFX9-O0-NEXT: v_sub_co_u32_e32 v1, vcc, v1, v3
319-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
320-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v8, v3, vcc
321-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc
319+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v8, v5, vcc
320+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v7, v3, vcc
321+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v2, v5, vcc
322322
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
323323
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
324-
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
325-
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2
324+
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
325+
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
326326
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
327327
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
328-
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
329-
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v7
328+
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
329+
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
330330
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, v6
331331
; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v4
332332
; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
@@ -339,18 +339,26 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
339339
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
340340
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
341341
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
342-
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
342+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
343+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
344+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
343345
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
344-
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
345-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
346+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
347+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
348+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
349+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
346350
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
347-
; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
348-
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
351+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
352+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
353+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
354+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
349355
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
350-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
351-
; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
356+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
357+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
358+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
359+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
352360
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
353-
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
361+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
354362
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12
355363
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2
356364
; GFX9-O0-NEXT: v_or_b32_e64 v3, v8, v7
@@ -403,7 +411,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
403411
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
404412
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
405413
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
406-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[6:7]
414+
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7]
415+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
407416
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[12:13]
408417
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
409418
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
@@ -439,7 +448,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
439448
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
440449
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
441450
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
442-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[6:7]
451+
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
452+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
443453
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9]
444454
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
445455
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
@@ -690,10 +700,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
690700
; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
691701
; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
692702
; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
693-
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
694-
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
695-
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
696-
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
703+
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
704+
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
705+
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
706+
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
697707
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
698708
; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
699709
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
@@ -903,14 +913,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
903913
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
904914
; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
905915
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
906-
; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
907-
; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
908-
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
909-
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
910-
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
911-
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
912-
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
913-
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
916+
; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
917+
; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
918+
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
919+
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
920+
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
921+
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
922+
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
923+
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
914924
; GFX9-O0-NEXT: s_waitcnt vmcnt(9)
915925
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
916926
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
@@ -1028,10 +1038,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10281038
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
10291039
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
10301040
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
1031-
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1032-
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1033-
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1034-
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1041+
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1042+
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1043+
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1044+
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
10351045
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
10361046
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
10371047
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload

0 commit comments

Comments
 (0)