Skip to content

Commit 8c3560a

Browse files
committed
[VPlan] Update induction resume values in VPlan.
Updated ILV.crateInductionResumeValues to directly update the VPIRInstructiosn wrapping the original phis with the created resume values. This is the first step towards modeling them completely in VPlan. Subsequent patches will move creation of the resume values completely into VPlan. Builds on top of llvm#109975, which is included in this PR.
1 parent 08b76d3 commit 8c3560a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+886
-852
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 93 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -467,11 +467,12 @@ class InnerLoopVectorizer {
467467
ElementCount MinProfitableTripCount,
468468
unsigned UnrollFactor, LoopVectorizationLegality *LVL,
469469
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
470-
ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks)
470+
ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks,
471+
VPlan &Plan)
471472
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
472473
AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
473474
Builder(PSE.getSE()->getContext()), Legal(LVL), Cost(CM), BFI(BFI),
474-
PSI(PSI), RTChecks(RTChecks) {
475+
PSI(PSI), RTChecks(RTChecks), Plan(Plan) {
475476
// Query this against the original loop and save it here because the profile
476477
// of the original loop header may change as the transformation happens.
477478
OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize(
@@ -522,7 +523,7 @@ class InnerLoopVectorizer {
522523
/// and the resume values can come from an additional bypass block, the \p
523524
/// AdditionalBypass pair provides information about the bypass block and the
524525
/// end value on the edge from bypass to this loop.
525-
PHINode *createInductionResumeValue(
526+
void createInductionResumeValue(
526527
PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
527528
ArrayRef<BasicBlock *> BypassBlocks,
528529
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
@@ -535,6 +536,11 @@ class InnerLoopVectorizer {
535536
/// count of the original loop for both main loop and epilogue vectorization.
536537
void setTripCount(Value *TC) { TripCount = TC; }
537538

539+
std::pair<BasicBlock *, Value *>
540+
getInductionBypassValue(PHINode *OrigPhi) const {
541+
return InductionBypassValues.find(OrigPhi)->second;
542+
}
543+
538544
protected:
539545
friend class LoopVectorizationPlanner;
540546

@@ -677,6 +683,11 @@ class InnerLoopVectorizer {
677683
/// Structure to hold information about generated runtime checks, responsible
678684
/// for cleaning the checks, if vectorization turns out unprofitable.
679685
GeneratedRTChecks &RTChecks;
686+
687+
/// Mapping of induction phis to their bypass values and bypass blocks.
688+
DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
689+
690+
VPlan &Plan;
680691
};
681692

682693
/// Encapsulate information regarding vectorization of a loop and its epilogue.
@@ -718,10 +729,10 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
718729
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
719730
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
720731
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
721-
GeneratedRTChecks &Checks)
732+
GeneratedRTChecks &Checks, VPlan &Plan)
722733
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
723734
EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF, LVL,
724-
CM, BFI, PSI, Checks),
735+
CM, BFI, PSI, Checks, Plan),
725736
EPI(EPI) {}
726737

727738
// Override this function to handle the more complex control flow around the
@@ -758,9 +769,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
758769
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
759770
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
760771
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
761-
GeneratedRTChecks &Check)
772+
GeneratedRTChecks &Check, VPlan &Plan)
762773
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
763-
EPI, LVL, CM, BFI, PSI, Check) {}
774+
EPI, LVL, CM, BFI, PSI, Check, Plan) {}
764775
/// Implements the interface for creating a vectorized skeleton using the
765776
/// *main loop* strategy (ie the first pass of vplan execution).
766777
std::pair<BasicBlock *, Value *>
@@ -787,9 +798,9 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
787798
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
788799
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
789800
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
790-
GeneratedRTChecks &Checks)
801+
GeneratedRTChecks &Checks, VPlan &Plan)
791802
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
792-
EPI, LVL, CM, BFI, PSI, Checks) {
803+
EPI, LVL, CM, BFI, PSI, Checks, Plan) {
793804
TripCount = EPI.TripCount;
794805
}
795806
/// Implements the interface for creating a vectorized skeleton using the
@@ -2546,7 +2557,18 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25462557
nullptr, Twine(Prefix) + "scalar.ph");
25472558
}
25482559

2549-
PHINode *InnerLoopVectorizer::createInductionResumeValue(
2560+
static void addOperandToPhiInVPIRBasicBlock(VPIRBasicBlock *VPBB, PHINode *P,
2561+
VPValue *Op) {
2562+
for (VPRecipeBase &R : *VPBB) {
2563+
auto *IRI = cast<VPIRInstruction>(&R);
2564+
if (&IRI->getInstruction() == P) {
2565+
IRI->addOperand(Op);
2566+
break;
2567+
}
2568+
}
2569+
}
2570+
2571+
void InnerLoopVectorizer::createInductionResumeValue(
25502572
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
25512573
ArrayRef<BasicBlock *> BypassBlocks,
25522574
std::pair<BasicBlock *, Value *> AdditionalBypass) {
@@ -2581,27 +2603,28 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
25812603
}
25822604
}
25832605

2584-
// Create phi nodes to merge from the backedge-taken check block.
2585-
PHINode *BCResumeVal =
2586-
PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
2587-
LoopScalarPreHeader->getFirstNonPHIIt());
2588-
// Copy original phi DL over to the new one.
2589-
BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
2606+
VPBasicBlock *MiddleVPBB =
2607+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
25902608

2591-
// The new PHI merges the original incoming value, in case of a bypass,
2592-
// or the value at the end of the vectorized loop.
2593-
BCResumeVal->addIncoming(EndValue, LoopMiddleBlock);
2609+
VPBasicBlock *ScalarPHVPBB = nullptr;
2610+
if (MiddleVPBB->getNumSuccessors() == 2) {
2611+
// Order is strict: first is the exit block, second is the scalar preheader.
2612+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
2613+
} else {
2614+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
2615+
}
25942616

2595-
// Fix the scalar body counter (PHI node).
2596-
// The old induction's phi node in the scalar body needs the truncated
2597-
// value.
2598-
for (BasicBlock *BB : BypassBlocks)
2599-
BCResumeVal->addIncoming(II.getStartValue(), BB);
2617+
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
2618+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
2619+
VPInstruction::ResumePhi,
2620+
{Plan.getOrAddLiveIn(EndValue), Plan.getOrAddLiveIn(II.getStartValue())},
2621+
OrigPhi->getDebugLoc(), "bc.resume.val");
26002622

2601-
if (AdditionalBypass.first)
2602-
BCResumeVal->setIncomingValueForBlock(AdditionalBypass.first,
2603-
EndValueFromAdditionalBypass);
2604-
return BCResumeVal;
2623+
auto *ScalarLoopHeader =
2624+
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
2625+
addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
2626+
InductionBypassValues[OrigPhi] = {AdditionalBypass.first,
2627+
EndValueFromAdditionalBypass};
26052628
}
26062629

26072630
/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2634,10 +2657,8 @@ void InnerLoopVectorizer::createInductionResumeValues(
26342657
for (const auto &InductionEntry : Legal->getInductionVars()) {
26352658
PHINode *OrigPhi = InductionEntry.first;
26362659
const InductionDescriptor &II = InductionEntry.second;
2637-
PHINode *BCResumeVal = createInductionResumeValue(
2638-
OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), LoopBypassBlocks,
2639-
AdditionalBypass);
2640-
OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal);
2660+
createInductionResumeValue(OrigPhi, II, getExpandedStep(II, ExpandedSCEVs),
2661+
LoopBypassBlocks, AdditionalBypass);
26412662
}
26422663
}
26432664

@@ -7738,6 +7759,25 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
77387759
// the second pass for the scalar loop. The induction resume values for the
77397760
// inductions in the epilogue loop are created before executing the plan for
77407761
// the epilogue loop.
7762+
for (VPRecipeBase &R :
7763+
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
7764+
// Create induction resume values for both widened pointer and
7765+
// integer/fp inductions and update the start value of the induction
7766+
// recipes to use the resume value.
7767+
PHINode *IndPhi = nullptr;
7768+
const InductionDescriptor *ID;
7769+
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
7770+
IndPhi = cast<PHINode>(Ind->getUnderlyingValue());
7771+
ID = &Ind->getInductionDescriptor();
7772+
} else if (auto *WidenInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
7773+
IndPhi = WidenInd->getPHINode();
7774+
ID = &WidenInd->getInductionDescriptor();
7775+
} else
7776+
continue;
7777+
7778+
createInductionResumeValue(IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
7779+
LoopBypassBlocks);
7780+
}
77417781

77427782
return {LoopVectorPreHeader, nullptr};
77437783
}
@@ -8911,14 +8951,9 @@ static void addLiveOutsForFirstOrderRecurrences(
89118951
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
89128952
"scalar.recur.init");
89138953
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
8914-
for (VPRecipeBase &R :
8915-
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
8916-
auto *IRI = cast<VPIRInstruction>(&R);
8917-
if (&IRI->getInstruction() == FORPhi) {
8918-
IRI->addOperand(ResumePhiRecipe);
8919-
break;
8920-
}
8921-
}
8954+
addOperandToPhiInVPIRBasicBlock(
8955+
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor()), FORPhi,
8956+
ResumePhiRecipe);
89228957

89238958
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
89248959
// Extract the penultimate value of the recurrence and use it as operand for
@@ -9645,7 +9680,7 @@ static bool processLoopInVPlanNativePath(
96459680
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(),
96469681
AddBranchWeights);
96479682
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
9648-
VF.Width, 1, LVL, &CM, BFI, PSI, Checks);
9683+
VF.Width, 1, LVL, &CM, BFI, PSI, Checks, BestPlan);
96499684
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
96509685
<< L->getHeader()->getParent()->getName() << "\"\n");
96519686
LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false);
@@ -10133,11 +10168,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1013310168
assert(IC > 1 && "interleave count should not be 1 or 0");
1013410169
// If we decided that it is not legal to vectorize the loop, then
1013510170
// interleave it.
10171+
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
1013610172
InnerLoopVectorizer Unroller(
1013710173
L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1),
10138-
ElementCount::getFixed(1), IC, &LVL, &CM, BFI, PSI, Checks);
10174+
ElementCount::getFixed(1), IC, &LVL, &CM, BFI, PSI, Checks, BestPlan);
1013910175

10140-
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
1014110176
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
1014210177

1014310178
ORE->emit([&]() {
@@ -10159,10 +10194,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1015910194
// to be vectorized by executing the plan (potentially with a different
1016010195
// factor) again shortly afterwards.
1016110196
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
10197+
std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
1016210198
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
10163-
EPI, &LVL, &CM, BFI, PSI, Checks);
10199+
EPI, &LVL, &CM, BFI, PSI, Checks,
10200+
*BestMainPlan);
1016410201

10165-
std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
1016610202
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
1016710203
*BestMainPlan, MainILV, DT, true);
1016810204
++LoopsVectorized;
@@ -10171,11 +10207,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1017110207
// edges from the first pass.
1017210208
EPI.MainLoopVF = EPI.EpilogueVF;
1017310209
EPI.MainLoopUF = EPI.EpilogueUF;
10210+
VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
1017410211
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
1017510212
ORE, EPI, &LVL, &CM, BFI, PSI,
10176-
Checks);
10213+
Checks, BestEpiPlan);
1017710214

10178-
VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
1017910215
VPRegionBlock *VectorLoop = BestEpiPlan.getVectorLoopRegion();
1018010216
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
1018110217
Header->setName("vec.epilog.vector.body");
@@ -10224,23 +10260,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1022410260
RdxDesc.getRecurrenceStartValue());
1022510261
}
1022610262
} else {
10227-
// Create induction resume values for both widened pointer and
10228-
// integer/fp inductions and update the start value of the induction
10229-
// recipes to use the resume value.
10263+
// Retrive the induction resume values for wide inductions from
10264+
// their original phi nodes in the scalar loop
1023010265
PHINode *IndPhi = nullptr;
10231-
const InductionDescriptor *ID;
1023210266
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
1023310267
IndPhi = cast<PHINode>(Ind->getUnderlyingValue());
10234-
ID = &Ind->getInductionDescriptor();
1023510268
} else {
1023610269
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
1023710270
IndPhi = WidenInd->getPHINode();
10238-
ID = &WidenInd->getInductionDescriptor();
1023910271
}
10240-
10241-
ResumeV = MainILV.createInductionResumeValue(
10242-
IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
10243-
{EPI.MainLoopIterationCountCheck});
10272+
ResumeV = IndPhi->getIncomingValueForBlock(L->getLoopPreheader());
1024410273
}
1024510274
assert(ResumeV && "Must have a resume value");
1024610275
VPValue *StartVal = BestEpiPlan.getOrAddLiveIn(ResumeV);
@@ -10252,13 +10281,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1025210281
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
1025310282
DT, true, &ExpandedSCEVs);
1025410283
++LoopsEpilogueVectorized;
10284+
BasicBlock *PH = L->getLoopPreheader();
1025510285

10286+
for (const auto &[IVPhi, _] : LVL.getInductionVars()) {
10287+
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
10288+
const auto &[BB, V] = EpilogILV.getInductionBypassValue(IVPhi);
10289+
Inc->setIncomingValueForBlock(BB, V);
10290+
}
1025610291
if (!MainILV.areSafetyChecksAdded())
1025710292
DisableRuntimeUnroll = true;
1025810293
} else {
1025910294
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
1026010295
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
10261-
PSI, Checks);
10296+
PSI, Checks, BestPlan);
1026210297
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
1026310298
++LoopsVectorized;
1026410299

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
630630
State.CFG
631631
.VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
632632
NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
633-
for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
633+
for (auto *OtherPred :
634+
reverse(to_vector(predecessors(Builder.GetInsertBlock())))) {
634635
assert(OtherPred != VPlanPred &&
635636
"VPlan predecessors should not be connected yet");
636637
NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);

llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
3535
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[WIDE_LOAD1]], i32 1
3636
; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
3737
; CHECK: [[SCALAR_PH]]:
38-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
3938
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
39+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
4040
; CHECK-NEXT: br label %[[LOOP:.*]]
4141
; CHECK: [[LOOP]]:
4242
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,10 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
205205
; CHECK: vector.ph:
206206
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
207207
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
208+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]]
208209
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0
209210
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
210211
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
211-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]]
212212
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
213213
; CHECK: vector.body:
214214
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -433,7 +433,7 @@ define void @test_widen_extended_induction(ptr %dst) {
433433
; CHECK: vec.epilog.middle.block:
434434
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
435435
; CHECK: vec.epilog.scalar.ph:
436-
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
436+
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
437437
; CHECK-NEXT: br label [[LOOP:%.*]]
438438
; CHECK: loop:
439439
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) {
7373
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP24]], i32 3
7474
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
7575
; CHECK: scalar.ph:
76-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
77-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
76+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
77+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
7878
; CHECK-NEXT: br label [[LOOP:%.*]]
7979
; CHECK: loop:
8080
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

0 commit comments

Comments
 (0)