Skip to content

Commit 984e005

Browse files
committed
[VPlan] Introduce ExitPhi VPInstruction, use to create phi for FOR.
This patch introduces a new ExitPhi VPInstruction which creates a phi in a leaf block of a VPlan. The first use is to create the phi node for fixed-order recurrence resume values in the scalar preheader. The VPInstruction takes 2 operands: 1) the incoming value from the middle-block and a default value to be used for all other incoming blocks. In follow-up changes, it will also be used to create phis for reduction and induction resume values.
1 parent 7d466d7 commit 984e005

27 files changed

+343
-272
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 46 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -607,10 +607,6 @@ class InnerLoopVectorizer {
607607
BasicBlock *MiddleBlock, BasicBlock *VectorHeader,
608608
VPlan &Plan, VPTransformState &State);
609609

610-
/// Create the phi node for the resume value of first order recurrences in the
611-
/// scalar preheader and update the users in the scalar loop.
612-
void fixFixedOrderRecurrence(VPLiveOut *LO, VPTransformState &State);
613-
614610
/// Iteratively sink the scalarized operands of a predicated instruction into
615611
/// the block that was created for it.
616612
void sinkScalarOperands(Instruction *PredInst);
@@ -3327,8 +3323,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
33273323
for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) {
33283324
if (!Legal->isFixedOrderRecurrence(LO->getPhi()))
33293325
continue;
3330-
fixFixedOrderRecurrence(LO, State);
3331-
Plan.removeLiveOut(LO->getPhi());
33323326
}
33333327

33343328
// Forget the original basic block.
@@ -3410,35 +3404,6 @@ static void reorderIncomingBlocks(SmallVectorImpl<BasicBlock *> &Blocks,
34103404
std::swap(Blocks[0], Blocks[1]);
34113405
}
34123406

3413-
void InnerLoopVectorizer::fixFixedOrderRecurrence(VPLiveOut *LO,
3414-
VPTransformState &State) {
3415-
// Extract the last vector element in the middle block. This will be the
3416-
// initial value for the recurrence when jumping to the scalar loop.
3417-
VPValue *VPExtract = LO->getOperand(0);
3418-
using namespace llvm::VPlanPatternMatch;
3419-
assert(match(VPExtract, m_VPInstruction<VPInstruction::ExtractFromEnd>(
3420-
m_VPValue(), m_VPValue())) &&
3421-
"FOR LiveOut expects to use an extract from end.");
3422-
Value *ResumeScalarFOR = State.get(VPExtract, UF - 1, true);
3423-
3424-
// Fix the initial value of the original recurrence in the scalar loop.
3425-
PHINode *ScalarHeaderPhi = LO->getPhi();
3426-
auto *InitScalarFOR =
3427-
ScalarHeaderPhi->getIncomingValueForBlock(LoopScalarPreHeader);
3428-
Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin());
3429-
auto *ScalarPreheaderPhi =
3430-
Builder.CreatePHI(ScalarHeaderPhi->getType(), 2, "scalar.recur.init");
3431-
SmallVector<BasicBlock *> Blocks(predecessors(LoopScalarPreHeader));
3432-
reorderIncomingBlocks(Blocks, LoopMiddleBlock);
3433-
for (auto *BB : Blocks) {
3434-
auto *Incoming = BB == LoopMiddleBlock ? ResumeScalarFOR : InitScalarFOR;
3435-
ScalarPreheaderPhi->addIncoming(Incoming, BB);
3436-
}
3437-
ScalarHeaderPhi->setIncomingValueForBlock(LoopScalarPreHeader,
3438-
ScalarPreheaderPhi);
3439-
ScalarHeaderPhi->setName("scalar.recur");
3440-
}
3441-
34423407
void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
34433408
// The basic block and loop containing the predicated instruction.
34443409
auto *PredBB = PredInst->getParent();
@@ -8485,7 +8450,9 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop,
84858450
Value *IncomingValue =
84868451
ExitPhi.getIncomingValueForBlock(ExitingBB);
84878452
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue, Plan);
8488-
Plan.addLiveOut(&ExitPhi, V);
8453+
Plan.addLiveOut(
8454+
&ExitPhi, V,
8455+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()));
84898456
}
84908457
}
84918458

@@ -8665,6 +8632,49 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
86658632
"VPBasicBlock");
86668633
RecipeBuilder.fixHeaderPhis();
86678634

8635+
auto *MiddleVPBB =
8636+
cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getSingleSuccessor());
8637+
8638+
VPBasicBlock *ScalarPH = nullptr;
8639+
for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) {
8640+
auto *VPIRBB = dyn_cast<VPIRBasicBlock>(Succ);
8641+
if (VPIRBB && VPIRBB->getIRBasicBlock() == OrigLoop->getHeader()) {
8642+
ScalarPH = VPIRBB;
8643+
break;
8644+
}
8645+
}
8646+
8647+
if (ScalarPH) {
8648+
for (auto &H : HeaderVPBB->phis()) {
8649+
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&H);
8650+
if (!FOR)
8651+
continue;
8652+
VPBuilder B(ScalarPH);
8653+
VPBuilder MiddleBuilder;
8654+
// Set insert point so new recipes are inserted before terminator and
8655+
// condition, if there is either the former or both.
8656+
if (MiddleVPBB->getNumSuccessors() != 2)
8657+
MiddleBuilder.setInsertPoint(MiddleVPBB);
8658+
else if (isa<VPInstruction>(MiddleVPBB->getTerminator()->getOperand(0)))
8659+
MiddleBuilder.setInsertPoint(
8660+
&*std::prev(MiddleVPBB->getTerminator()->getIterator()));
8661+
else
8662+
MiddleBuilder.setInsertPoint(MiddleVPBB->getTerminator());
8663+
8664+
// Extract the resume value and create a new VPLiveOut for it.
8665+
auto *Resume = MiddleBuilder.createNaryOp(
8666+
VPInstruction::ExtractFromEnd,
8667+
{FOR->getBackedgeValue(),
8668+
Plan->getOrAddLiveIn(
8669+
ConstantInt::get(Plan->getCanonicalIV()->getScalarType(), 1))},
8670+
{}, "vector.recur.extract");
8671+
auto *R =
8672+
B.createNaryOp(VPInstruction::ExitPhi, {Resume, FOR->getStartValue()},
8673+
{}, "scalar.recur.init");
8674+
Plan->addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), R, ScalarPH);
8675+
}
8676+
}
8677+
86688678
// ---------------------------------------------------------------------------
86698679
// Transform initial VPlan: Apply previously taken decisions, in order, to
86708680
// bring the VPlan to its final state.

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -446,9 +446,6 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
446446
assert(getHierarchicalSuccessors().empty() &&
447447
"VPIRBasicBlock cannot have successors at the moment");
448448

449-
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
450-
executeRecipes(State, getIRBasicBlock());
451-
452449
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
453450
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
454451
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
@@ -467,6 +464,9 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
467464
TermBr->setSuccessor(idx, IRBB);
468465
State->CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, IRBB}});
469466
}
467+
468+
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
469+
executeRecipes(State, getIRBasicBlock());
470470
}
471471

472472
void VPBasicBlock::execute(VPTransformState *State) {
@@ -1087,9 +1087,9 @@ LLVM_DUMP_METHOD
10871087
void VPlan::dump() const { print(dbgs()); }
10881088
#endif
10891089

1090-
void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
1090+
void VPlan::addLiveOut(PHINode *PN, VPValue *V, VPBasicBlock *Pred) {
10911091
assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1092-
LiveOuts.insert({PN, new VPLiveOut(PN, V)});
1092+
LiveOuts.insert({PN, new VPLiveOut(PN, V, Pred)});
10931093
}
10941094

10951095
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
@@ -1158,9 +1158,18 @@ VPlan *VPlan::duplicate() {
11581158
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
11591159
remapOperands(Entry, NewEntry, Old2NewVPValues);
11601160

1161+
DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
1162+
VPBlockBase *OldMiddle = getVectorLoopRegion()->getSingleSuccessor();
1163+
VPBlockBase *NewMiddle = NewPlan->getVectorLoopRegion()->getSingleSuccessor();
1164+
Old2NewVPBlocks[OldMiddle] = NewMiddle;
1165+
for (const auto &[Old, New] :
1166+
zip(OldMiddle->getSuccessors(), NewMiddle->getSuccessors()))
1167+
Old2NewVPBlocks[Old] = New;
1168+
11611169
// Clone live-outs.
11621170
for (const auto &[_, LO] : LiveOuts)
1163-
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1171+
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)],
1172+
cast<VPBasicBlock>(Old2NewVPBlocks[LO->getPred()]));
11641173

11651174
// Initialize remaining fields of cloned VPlan.
11661175
NewPlan->VFs = VFs;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -683,9 +683,13 @@ class VPBlockBase {
683683
class VPLiveOut : public VPUser {
684684
PHINode *Phi;
685685

686+
/// Predecessor in VPlan of this live-out. Used to as block to set the
687+
/// incoming value for.
688+
VPBasicBlock *Pred;
689+
686690
public:
687-
VPLiveOut(PHINode *Phi, VPValue *Op)
688-
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
691+
VPLiveOut(PHINode *Phi, VPValue *Op, VPBasicBlock *Pred)
692+
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi), Pred(Pred) {}
689693

690694
static inline bool classof(const VPUser *U) {
691695
return U->getVPUserID() == VPUser::VPUserID::LiveOut;
@@ -707,6 +711,9 @@ class VPLiveOut : public VPUser {
707711

708712
PHINode *getPhi() const { return Phi; }
709713

714+
/// Returns to incoming block for which to set the value.
715+
VPBasicBlock *getPred() const { return Pred; }
716+
710717
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
711718
/// Print the VPLiveOut to \p O.
712719
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
@@ -1188,6 +1195,10 @@ class VPInstruction : public VPRecipeWithIRFlags {
11881195
SLPStore,
11891196
ActiveLaneMask,
11901197
ExplicitVectorLength,
1198+
/// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1199+
/// The first operand is the incoming value from the predecessor in VPlan,
1200+
/// the second operand is the incoming value for all other predecessors.
1201+
ExitPhi,
11911202
CalculateTripCountMinusVF,
11921203
// Increment the canonical IV separately for each unrolled part.
11931204
CanonicalIVIncrementForPart,
@@ -3324,7 +3335,7 @@ class VPlan {
33243335
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
33253336
}
33263337

3327-
void addLiveOut(PHINode *PN, VPValue *V);
3338+
void addLiveOut(PHINode *PN, VPValue *V, VPBasicBlock *Pred);
33283339

33293340
void removeLiveOut(PHINode *PN) {
33303341
delete LiveOuts[PN];

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,11 +193,12 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
193193
VPValue *ExitValue = getOperand(0);
194194
if (vputils::isUniformAfterVectorization(ExitValue))
195195
Lane = VPLane::getFirstLane();
196-
VPBasicBlock *MiddleVPBB =
197-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
198-
BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
199-
Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
200-
MiddleBB);
196+
BasicBlock *PredBB = State.CFG.VPBB2IRBB[Pred];
197+
Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane));
198+
if (Phi->getBasicBlockIndex(PredBB) != -1)
199+
Phi->setIncomingValueForBlock(PredBB, V);
200+
else
201+
Phi->addIncoming(V, PredBB);
201202
}
202203

203204
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -303,6 +304,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
303304
case VPInstruction::CanonicalIVIncrementForPart:
304305
case VPInstruction::PtrAdd:
305306
case VPInstruction::ExplicitVectorLength:
307+
case VPInstruction::ExitPhi:
306308
return true;
307309
default:
308310
return false;
@@ -319,6 +321,14 @@ Value *VPInstruction::generatePerLane(VPTransformState &State,
319321
State.get(getOperand(1), Lane), Name);
320322
}
321323

324+
static void reorderIncomingBlocks(SmallVectorImpl<BasicBlock *> &Blocks,
325+
BasicBlock *LoopMiddleBlock) {
326+
if (Blocks.front() == LoopMiddleBlock)
327+
std::swap(Blocks.front(), Blocks.back());
328+
if (Blocks.size() == 3)
329+
std::swap(Blocks[0], Blocks[1]);
330+
}
331+
322332
Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
323333
IRBuilderBase &Builder = State.Builder;
324334

@@ -595,13 +605,36 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
595605
Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
596606
return Builder.CreatePtrAdd(Ptr, Addend, Name);
597607
}
608+
case VPInstruction::ExitPhi: {
609+
if (Part != 0)
610+
return State.get(this, 0, /*IsScalar*/ true);
611+
Value *IncomingFromVPlanPred =
612+
State.get(getOperand(0), Part, /* IsScalar */ true);
613+
Value *IncomingForOtherPredecessors =
614+
State.get(getOperand(1), Part, /* IsScalar */ true);
615+
auto *NewPhi =
616+
Builder.CreatePHI(IncomingForOtherPredecessors->getType(), 2, Name);
617+
SmallVector<BasicBlock *> Blocks(predecessors(Builder.GetInsertBlock()));
618+
BasicBlock *VPlanPred =
619+
State.CFG
620+
.VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
621+
reorderIncomingBlocks(Blocks, VPlanPred);
622+
for (auto *BB : Blocks) {
623+
auto *Incoming = BB == VPlanPred ? IncomingFromVPlanPred
624+
: IncomingForOtherPredecessors;
625+
NewPhi->addIncoming(Incoming, BB);
626+
}
627+
return NewPhi;
628+
}
629+
598630
default:
599631
llvm_unreachable("Unsupported opcode for instruction");
600632
}
601633
}
602634

603635
bool VPInstruction::isVectorToScalar() const {
604636
return getOpcode() == VPInstruction::ExtractFromEnd ||
637+
getOpcode() == VPInstruction::ExitPhi ||
605638
getOpcode() == VPInstruction::ComputeReductionResult;
606639
}
607640

@@ -731,6 +764,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
731764
case VPInstruction::ActiveLaneMask:
732765
O << "active lane mask";
733766
break;
767+
case VPInstruction::ExitPhi:
768+
O << "exit-phi";
769+
break;
734770
case VPInstruction::ExplicitVectorLength:
735771
O << "EXPLICIT-VECTOR-LENGTH";
736772
break;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -939,14 +939,6 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
939939
{}, "vector.recur.extract.for.phi"));
940940
RecurSplice->replaceUsesWithIf(
941941
Penultimate, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });
942-
943-
// Extract the resume value and create a new VPLiveOut for it.
944-
auto *Resume = MiddleBuilder.createNaryOp(
945-
VPInstruction::ExtractFromEnd,
946-
{FOR->getBackedgeValue(),
947-
Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 1))},
948-
{}, "vector.recur.extract");
949-
Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), Resume);
950942
}
951943
return true;
952944
}

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) {
7373
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP24]], i32 3
7474
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
7575
; CHECK: scalar.ph:
76-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
77-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
76+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
77+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
7878
; CHECK-NEXT: br label [[LOOP:%.*]]
7979
; CHECK: loop:
8080
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
5151
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
5252
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
5353
; CHECK: scalar.ph:
54-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5554
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ]
55+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5656
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
5757
; CHECK: for.cond.cleanup.loopexit:
5858
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
@@ -160,10 +160,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
160160
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
161161
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
162162
; CHECK: scalar.ph:
163-
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ]
164-
; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i8 [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ]
165-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
166163
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ]
164+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
165+
; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i8 [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ]
166+
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ]
167167
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
168168
; CHECK: for.cond.cleanup.loopexit:
169169
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,13 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
124124
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
125125
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
126126
; CHECK: middle.block:
127-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
128127
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
128+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
129129
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
130130
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
131131
; CHECK: scalar.ph:
132-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
133132
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
133+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
134134
; CHECK-NEXT: br label [[LOOP:%.*]]
135135
; CHECK: loop:
136136
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -187,13 +187,13 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) {
187187
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
188188
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
189189
; CHECK: middle.block:
190-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
191190
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
191+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
192192
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
193193
; CHECK: scalar.ph:
194-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
195194
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
196195
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
196+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
197197
; CHECK-NEXT: br label [[LOOP:%.*]]
198198
; CHECK: loop:
199199
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

0 commit comments

Comments
 (0)