Skip to content

Commit 1ea4a7c

Browse files
committed
[VPlan] Introduce ExitPhi VPInstruction, use to create phi for FOR.
This patch introduces a new ExitPhi VPInstruction which creates a phi in a leaf block of a VPlan. The first use is to create the phi node for fixed-order recurrence resume values in the scalar preheader. The VPInstruction takes 2 operands: 1) the incoming value from the middle-block and a default value to be used for all other incoming blocks. In follow-up changes, it will also be used to create phis for reduction and induction resume values.
1 parent f92bfca commit 1ea4a7c

28 files changed

+326
-258
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 46 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -607,10 +607,6 @@ class InnerLoopVectorizer {
607607
BasicBlock *MiddleBlock, BasicBlock *VectorHeader,
608608
VPlan &Plan, VPTransformState &State);
609609

610-
/// Create the phi node for the resume value of first order recurrences in the
611-
/// scalar preheader and update the users in the scalar loop.
612-
void fixFixedOrderRecurrence(VPLiveOut *LO, VPTransformState &State);
613-
614610
/// Iteratively sink the scalarized operands of a predicated instruction into
615611
/// the block that was created for it.
616612
void sinkScalarOperands(Instruction *PredInst);
@@ -3315,8 +3311,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
33153311
for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) {
33163312
if (!Legal->isFixedOrderRecurrence(LO->getPhi()))
33173313
continue;
3318-
fixFixedOrderRecurrence(LO, State);
3319-
Plan.removeLiveOut(LO->getPhi());
33203314
}
33213315

33223316
// Forget the original basic block.
@@ -3386,33 +3380,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
33863380
VF.getKnownMinValue() * UF);
33873381
}
33883382

3389-
void InnerLoopVectorizer::fixFixedOrderRecurrence(VPLiveOut *LO,
3390-
VPTransformState &State) {
3391-
// Extract the last vector element in the middle block. This will be the
3392-
// initial value for the recurrence when jumping to the scalar loop.
3393-
VPValue *VPExtract = LO->getOperand(0);
3394-
using namespace llvm::VPlanPatternMatch;
3395-
assert(match(VPExtract, m_VPInstruction<VPInstruction::ExtractFromEnd>(
3396-
m_VPValue(), m_VPValue())) &&
3397-
"FOR LiveOut expects to use an extract from end.");
3398-
Value *ResumeScalarFOR = State.get(VPExtract, UF - 1, true);
3399-
3400-
// Fix the initial value of the original recurrence in the scalar loop.
3401-
PHINode *ScalarHeaderPhi = LO->getPhi();
3402-
auto *InitScalarFOR =
3403-
ScalarHeaderPhi->getIncomingValueForBlock(LoopScalarPreHeader);
3404-
Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin());
3405-
auto *ScalarPreheaderPhi =
3406-
Builder.CreatePHI(ScalarHeaderPhi->getType(), 2, "scalar.recur.init");
3407-
for (auto *BB : predecessors(LoopScalarPreHeader)) {
3408-
auto *Incoming = BB == LoopMiddleBlock ? ResumeScalarFOR : InitScalarFOR;
3409-
ScalarPreheaderPhi->addIncoming(Incoming, BB);
3410-
}
3411-
ScalarHeaderPhi->setIncomingValueForBlock(LoopScalarPreHeader,
3412-
ScalarPreheaderPhi);
3413-
ScalarHeaderPhi->setName("scalar.recur");
3414-
}
3415-
34163383
void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
34173384
// The basic block and loop containing the predicated instruction.
34183385
auto *PredBB = PredInst->getParent();
@@ -8463,7 +8430,9 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop,
84638430
Value *IncomingValue =
84648431
ExitPhi.getIncomingValueForBlock(ExitingBB);
84658432
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue, Plan);
8466-
Plan.addLiveOut(&ExitPhi, V);
8433+
Plan.addLiveOut(
8434+
&ExitPhi, V,
8435+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()));
84678436
}
84688437
}
84698438

@@ -8635,6 +8604,49 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
86358604
"VPBasicBlock");
86368605
RecipeBuilder.fixHeaderPhis();
86378606

8607+
auto *MiddleVPBB =
8608+
cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getSingleSuccessor());
8609+
8610+
VPBasicBlock *ScalarPH = nullptr;
8611+
for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) {
8612+
auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
8613+
if (VPBB && !isa<VPIRBasicBlock>(VPBB)) {
8614+
ScalarPH = VPBB;
8615+
break;
8616+
}
8617+
}
8618+
8619+
if (ScalarPH) {
8620+
for (auto &H : HeaderVPBB->phis()) {
8621+
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&H);
8622+
if (!FOR)
8623+
continue;
8624+
VPBuilder B(ScalarPH);
8625+
VPBuilder MiddleBuilder;
8626+
// Set insert point so new recipes are inserted before terminator and
8627+
// condition, if there is either the former or both.
8628+
if (MiddleVPBB->getNumSuccessors() != 2)
8629+
MiddleBuilder.setInsertPoint(MiddleVPBB);
8630+
else if (isa<VPInstruction>(MiddleVPBB->getTerminator()->getOperand(0)))
8631+
MiddleBuilder.setInsertPoint(
8632+
&*std::prev(MiddleVPBB->getTerminator()->getIterator()));
8633+
else
8634+
MiddleBuilder.setInsertPoint(MiddleVPBB->getTerminator());
8635+
8636+
// Extract the resume value and create a new VPLiveOut for it.
8637+
auto *Resume = MiddleBuilder.createNaryOp(
8638+
VPInstruction::ExtractFromEnd,
8639+
{FOR->getBackedgeValue(),
8640+
Plan->getOrAddLiveIn(
8641+
ConstantInt::get(Plan->getCanonicalIV()->getScalarType(), 1))},
8642+
{}, "vector.recur.extract");
8643+
auto *R =
8644+
B.createNaryOp(VPInstruction::ExitPhi, {Resume, FOR->getStartValue()},
8645+
{}, "scalar.recur.init");
8646+
Plan->addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), R, ScalarPH);
8647+
}
8648+
}
8649+
86388650
// ---------------------------------------------------------------------------
86398651
// Transform initial VPlan: Apply previously taken decisions, in order, to
86408652
// bring the VPlan to its final state.

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,11 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
909909
/// VPBB, if any, are rewired to the new VPIRBasicBlock.
910910
static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
911911
VPIRBasicBlock *IRMiddleVPBB = new VPIRBasicBlock(IRBB);
912+
for (auto &[_, LO] : VPBB->getPlan()->getLiveOuts()) {
913+
if (LO->getPred() == VPBB)
914+
LO->setPred(IRMiddleVPBB);
915+
}
916+
912917
for (auto &R : make_early_inc_range(*VPBB))
913918
R.moveBefore(*IRMiddleVPBB, IRMiddleVPBB->end());
914919
VPBlockBase *PredVPBB = VPBB->getSinglePredecessor();
@@ -1124,9 +1129,9 @@ LLVM_DUMP_METHOD
11241129
void VPlan::dump() const { print(dbgs()); }
11251130
#endif
11261131

1127-
void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
1132+
void VPlan::addLiveOut(PHINode *PN, VPValue *V, VPBasicBlock *Pred) {
11281133
assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1129-
LiveOuts.insert({PN, new VPLiveOut(PN, V)});
1134+
LiveOuts.insert({PN, new VPLiveOut(PN, V, Pred)});
11301135
}
11311136

11321137
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
@@ -1195,9 +1200,18 @@ VPlan *VPlan::duplicate() {
11951200
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
11961201
remapOperands(Entry, NewEntry, Old2NewVPValues);
11971202

1203+
DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
1204+
VPBlockBase *OldMiddle = getVectorLoopRegion()->getSingleSuccessor();
1205+
VPBlockBase *NewMiddle = NewPlan->getVectorLoopRegion()->getSingleSuccessor();
1206+
Old2NewVPBlocks[OldMiddle] = NewMiddle;
1207+
for (const auto &[Old, New] :
1208+
zip(OldMiddle->getSuccessors(), NewMiddle->getSuccessors()))
1209+
Old2NewVPBlocks[Old] = New;
1210+
11981211
// Clone live-outs.
11991212
for (const auto &[_, LO] : LiveOuts)
1200-
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1213+
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)],
1214+
cast<VPBasicBlock>(Old2NewVPBlocks[LO->getPred()]));
12011215

12021216
// Initialize remaining fields of cloned VPlan.
12031217
NewPlan->VFs = VFs;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -686,9 +686,13 @@ class VPBlockBase {
686686
class VPLiveOut : public VPUser {
687687
PHINode *Phi;
688688

689+
/// Predecessor in VPlan of this live-out. Used to as block to set the
690+
/// incoming value for.
691+
VPBasicBlock *Pred;
692+
689693
public:
690-
VPLiveOut(PHINode *Phi, VPValue *Op)
691-
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
694+
VPLiveOut(PHINode *Phi, VPValue *Op, VPBasicBlock *Pred)
695+
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi), Pred(Pred) {}
692696

693697
static inline bool classof(const VPUser *U) {
694698
return U->getVPUserID() == VPUser::VPUserID::LiveOut;
@@ -710,6 +714,11 @@ class VPLiveOut : public VPUser {
710714

711715
PHINode *getPhi() const { return Phi; }
712716

717+
/// Returns to incoming block for which to set the value.
718+
VPBasicBlock *getPred() const { return Pred; }
719+
720+
void setPred(VPBasicBlock *Pred) { this->Pred = Pred; }
721+
713722
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
714723
/// Print the VPLiveOut to \p O.
715724
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
@@ -1191,6 +1200,10 @@ class VPInstruction : public VPRecipeWithIRFlags {
11911200
SLPStore,
11921201
ActiveLaneMask,
11931202
ExplicitVectorLength,
1203+
/// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1204+
/// The first operand is the incoming value from the predecessor in VPlan,
1205+
/// the second operand is the incoming value for all other predecessors.
1206+
ExitPhi,
11941207
CalculateTripCountMinusVF,
11951208
// Increment the canonical IV separately for each unrolled part.
11961209
CanonicalIVIncrementForPart,
@@ -3333,7 +3346,7 @@ class VPlan {
33333346
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
33343347
}
33353348

3336-
void addLiveOut(PHINode *PN, VPValue *V);
3349+
void addLiveOut(PHINode *PN, VPValue *V, VPBasicBlock *Pred);
33373350

33383351
void removeLiveOut(PHINode *PN) {
33393352
delete LiveOuts[PN];

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,11 +193,12 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
193193
VPValue *ExitValue = getOperand(0);
194194
if (vputils::isUniformAfterVectorization(ExitValue))
195195
Lane = VPLane::getFirstLane();
196-
VPBasicBlock *MiddleVPBB =
197-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
198-
BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
199-
Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
200-
MiddleBB);
196+
BasicBlock *PredBB = State.CFG.VPBB2IRBB[Pred];
197+
Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane));
198+
if (Phi->getBasicBlockIndex(PredBB) != -1)
199+
Phi->setIncomingValueForBlock(PredBB, V);
200+
else
201+
Phi->addIncoming(V, PredBB);
201202
}
202203

203204
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -303,6 +304,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
303304
case VPInstruction::CanonicalIVIncrementForPart:
304305
case VPInstruction::PtrAdd:
305306
case VPInstruction::ExplicitVectorLength:
307+
case VPInstruction::ExitPhi:
306308
return true;
307309
default:
308310
return false;
@@ -593,13 +595,31 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
593595
Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
594596
return Builder.CreatePtrAdd(Ptr, Addend, Name);
595597
}
598+
case VPInstruction::ExitPhi: {
599+
if (Part != 0)
600+
return State.get(this, 0, /*IsScalar*/ true);
601+
Value *IncomingFromVPlanPred =
602+
State.get(getOperand(0), Part, /* IsScalar */ true);
603+
Value *IncomingFromOtherPred =
604+
State.get(getOperand(1), Part, /* IsScalar */ true);
605+
auto *NewPhi = Builder.CreatePHI(IncomingFromOtherPred->getType(), 2, Name);
606+
BasicBlock *VPlanPred =
607+
State.CFG
608+
.VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
609+
NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
610+
for (auto *BB : predecessors(Builder.GetInsertBlock()))
611+
NewPhi->addIncoming(IncomingFromOtherPred, BB);
612+
return NewPhi;
613+
}
614+
596615
default:
597616
llvm_unreachable("Unsupported opcode for instruction");
598617
}
599618
}
600619

601620
bool VPInstruction::isVectorToScalar() const {
602621
return getOpcode() == VPInstruction::ExtractFromEnd ||
622+
getOpcode() == VPInstruction::ExitPhi ||
603623
getOpcode() == VPInstruction::ComputeReductionResult;
604624
}
605625

@@ -729,6 +749,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
729749
case VPInstruction::ActiveLaneMask:
730750
O << "active lane mask";
731751
break;
752+
case VPInstruction::ExitPhi:
753+
O << "exit-phi";
754+
break;
732755
case VPInstruction::ExplicitVectorLength:
733756
O << "EXPLICIT-VECTOR-LENGTH";
734757
break;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -945,14 +945,6 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
945945
{}, "vector.recur.extract.for.phi"));
946946
RecurSplice->replaceUsesWithIf(
947947
Penultimate, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });
948-
949-
// Extract the resume value and create a new VPLiveOut for it.
950-
auto *Resume = MiddleBuilder.createNaryOp(
951-
VPInstruction::ExtractFromEnd,
952-
{FOR->getBackedgeValue(),
953-
Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 1))},
954-
{}, "vector.recur.extract");
955-
Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), Resume);
956948
}
957949
return true;
958950
}

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) {
7373
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP24]], i32 3
7474
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
7575
; CHECK: scalar.ph:
76-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
77-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
76+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
77+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
7878
; CHECK-NEXT: br label [[LOOP:%.*]]
7979
; CHECK: loop:
8080
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
5151
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
5252
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
5353
; CHECK: scalar.ph:
54-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ]
5554
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ]
55+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ]
5656
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
5757
; CHECK: for.cond.cleanup.loopexit:
5858
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
@@ -160,10 +160,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
160160
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
161161
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
162162
; CHECK: scalar.ph:
163-
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ]
164-
; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ]
165-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ]
166163
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ]
164+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ]
165+
; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ]
166+
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ]
167167
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
168168
; CHECK: for.cond.cleanup.loopexit:
169169
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,13 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
124124
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
125125
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
126126
; CHECK: middle.block:
127-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
128127
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
128+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
129129
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
130130
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
131131
; CHECK: scalar.ph:
132-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
133-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
132+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ]
133+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
134134
; CHECK-NEXT: br label [[LOOP:%.*]]
135135
; CHECK: loop:
136136
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -187,13 +187,13 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) {
187187
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
188188
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
189189
; CHECK: middle.block:
190-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
191190
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
191+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
192192
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
193193
; CHECK: scalar.ph:
194-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
195-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
194+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
196195
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
196+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
197197
; CHECK-NEXT: br label [[LOOP:%.*]]
198198
; CHECK: loop:
199199
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -789,8 +789,8 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur
789789
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
790790
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
791791
; CHECK: scalar.ph:
792-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
793792
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
793+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
794794
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
795795
; CHECK: for.cond.cleanup.loopexit:
796796
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
@@ -868,13 +868,13 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture
868868
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
869869
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
870870
; CHECK: middle.block:
871-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14
872871
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15
872+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14
873873
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
874874
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
875875
; CHECK: scalar.ph:
876-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
877-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
876+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
877+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
878878
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
879879
; CHECK: for.cond.cleanup:
880880
; CHECK-NEXT: [[A_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR:%.*]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]

0 commit comments

Comments
 (0)