Skip to content

Commit aac0b49

Browse files
committed
[VPlan] Update induction resume values in VPlan.
Updated ILV.crateInductionResumeValues to directly update the VPIRInstructiosn wrapping the original phis with the created resume values. This is the first step towards modeling them completely in VPlan. Subsequent patches will move creation of the resume values completely into VPlan. Builds on top of #109975, which is included in this PR.
1 parent be27cb6 commit aac0b49

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+886
-852
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 93 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -467,11 +467,12 @@ class InnerLoopVectorizer {
467467
ElementCount MinProfitableTripCount,
468468
unsigned UnrollFactor, LoopVectorizationLegality *LVL,
469469
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
470-
ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks)
470+
ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks,
471+
VPlan &Plan)
471472
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
472473
AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
473474
Builder(PSE.getSE()->getContext()), Legal(LVL), Cost(CM), BFI(BFI),
474-
PSI(PSI), RTChecks(RTChecks) {
475+
PSI(PSI), RTChecks(RTChecks), Plan(Plan) {
475476
// Query this against the original loop and save it here because the profile
476477
// of the original loop header may change as the transformation happens.
477478
OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize(
@@ -522,7 +523,7 @@ class InnerLoopVectorizer {
522523
/// and the resume values can come from an additional bypass block, the \p
523524
/// AdditionalBypass pair provides information about the bypass block and the
524525
/// end value on the edge from bypass to this loop.
525-
PHINode *createInductionResumeValue(
526+
void createInductionResumeValue(
526527
PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
527528
ArrayRef<BasicBlock *> BypassBlocks,
528529
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
@@ -535,6 +536,11 @@ class InnerLoopVectorizer {
535536
/// count of the original loop for both main loop and epilogue vectorization.
536537
void setTripCount(Value *TC) { TripCount = TC; }
537538

539+
std::pair<BasicBlock *, Value *>
540+
getInductionBypassValue(PHINode *OrigPhi) const {
541+
return InductionBypassValues.find(OrigPhi)->second;
542+
}
543+
538544
protected:
539545
friend class LoopVectorizationPlanner;
540546

@@ -680,6 +686,11 @@ class InnerLoopVectorizer {
680686
/// Structure to hold information about generated runtime checks, responsible
681687
/// for cleaning the checks, if vectorization turns out unprofitable.
682688
GeneratedRTChecks &RTChecks;
689+
690+
/// Mapping of induction phis to their bypass values and bypass blocks.
691+
DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
692+
693+
VPlan &Plan;
683694
};
684695

685696
/// Encapsulate information regarding vectorization of a loop and its epilogue.
@@ -721,10 +732,10 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
721732
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
722733
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
723734
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
724-
GeneratedRTChecks &Checks)
735+
GeneratedRTChecks &Checks, VPlan &Plan)
725736
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
726737
EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF, LVL,
727-
CM, BFI, PSI, Checks),
738+
CM, BFI, PSI, Checks, Plan),
728739
EPI(EPI) {}
729740

730741
// Override this function to handle the more complex control flow around the
@@ -761,9 +772,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
761772
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
762773
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
763774
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
764-
GeneratedRTChecks &Check)
775+
GeneratedRTChecks &Check, VPlan &Plan)
765776
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
766-
EPI, LVL, CM, BFI, PSI, Check) {}
777+
EPI, LVL, CM, BFI, PSI, Check, Plan) {}
767778
/// Implements the interface for creating a vectorized skeleton using the
768779
/// *main loop* strategy (ie the first pass of vplan execution).
769780
std::pair<BasicBlock *, Value *>
@@ -790,9 +801,9 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
790801
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
791802
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
792803
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
793-
GeneratedRTChecks &Checks)
804+
GeneratedRTChecks &Checks, VPlan &Plan)
794805
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
795-
EPI, LVL, CM, BFI, PSI, Checks) {
806+
EPI, LVL, CM, BFI, PSI, Checks, Plan) {
796807
TripCount = EPI.TripCount;
797808
}
798809
/// Implements the interface for creating a vectorized skeleton using the
@@ -2555,7 +2566,18 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25552566
nullptr, Twine(Prefix) + "scalar.ph");
25562567
}
25572568

2558-
PHINode *InnerLoopVectorizer::createInductionResumeValue(
2569+
static void addOperandToPhiInVPIRBasicBlock(VPIRBasicBlock *VPBB, PHINode *P,
2570+
VPValue *Op) {
2571+
for (VPRecipeBase &R : *VPBB) {
2572+
auto *IRI = cast<VPIRInstruction>(&R);
2573+
if (&IRI->getInstruction() == P) {
2574+
IRI->addOperand(Op);
2575+
break;
2576+
}
2577+
}
2578+
}
2579+
2580+
void InnerLoopVectorizer::createInductionResumeValue(
25592581
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
25602582
ArrayRef<BasicBlock *> BypassBlocks,
25612583
std::pair<BasicBlock *, Value *> AdditionalBypass) {
@@ -2590,27 +2612,28 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
25902612
}
25912613
}
25922614

2593-
// Create phi nodes to merge from the backedge-taken check block.
2594-
PHINode *BCResumeVal =
2595-
PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
2596-
LoopScalarPreHeader->getFirstNonPHIIt());
2597-
// Copy original phi DL over to the new one.
2598-
BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
2615+
VPBasicBlock *MiddleVPBB =
2616+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
25992617

2600-
// The new PHI merges the original incoming value, in case of a bypass,
2601-
// or the value at the end of the vectorized loop.
2602-
BCResumeVal->addIncoming(EndValue, LoopMiddleBlock);
2618+
VPBasicBlock *ScalarPHVPBB = nullptr;
2619+
if (MiddleVPBB->getNumSuccessors() == 2) {
2620+
// Order is strict: first is the exit block, second is the scalar preheader.
2621+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
2622+
} else {
2623+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
2624+
}
26032625

2604-
// Fix the scalar body counter (PHI node).
2605-
// The old induction's phi node in the scalar body needs the truncated
2606-
// value.
2607-
for (BasicBlock *BB : BypassBlocks)
2608-
BCResumeVal->addIncoming(II.getStartValue(), BB);
2626+
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
2627+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
2628+
VPInstruction::ResumePhi,
2629+
{Plan.getOrAddLiveIn(EndValue), Plan.getOrAddLiveIn(II.getStartValue())},
2630+
OrigPhi->getDebugLoc(), "bc.resume.val");
26092631

2610-
if (AdditionalBypass.first)
2611-
BCResumeVal->setIncomingValueForBlock(AdditionalBypass.first,
2612-
EndValueFromAdditionalBypass);
2613-
return BCResumeVal;
2632+
auto *ScalarLoopHeader =
2633+
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
2634+
addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
2635+
InductionBypassValues[OrigPhi] = {AdditionalBypass.first,
2636+
EndValueFromAdditionalBypass};
26142637
}
26152638

26162639
/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2643,10 +2666,8 @@ void InnerLoopVectorizer::createInductionResumeValues(
26432666
for (const auto &InductionEntry : Legal->getInductionVars()) {
26442667
PHINode *OrigPhi = InductionEntry.first;
26452668
const InductionDescriptor &II = InductionEntry.second;
2646-
PHINode *BCResumeVal = createInductionResumeValue(
2647-
OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), LoopBypassBlocks,
2648-
AdditionalBypass);
2649-
OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal);
2669+
createInductionResumeValue(OrigPhi, II, getExpandedStep(II, ExpandedSCEVs),
2670+
LoopBypassBlocks, AdditionalBypass);
26502671
}
26512672
}
26522673

@@ -7678,6 +7699,25 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
76787699
// the second pass for the scalar loop. The induction resume values for the
76797700
// inductions in the epilogue loop are created before executing the plan for
76807701
// the epilogue loop.
7702+
for (VPRecipeBase &R :
7703+
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
7704+
// Create induction resume values for both widened pointer and
7705+
// integer/fp inductions and update the start value of the induction
7706+
// recipes to use the resume value.
7707+
PHINode *IndPhi = nullptr;
7708+
const InductionDescriptor *ID;
7709+
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
7710+
IndPhi = cast<PHINode>(Ind->getUnderlyingValue());
7711+
ID = &Ind->getInductionDescriptor();
7712+
} else if (auto *WidenInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
7713+
IndPhi = WidenInd->getPHINode();
7714+
ID = &WidenInd->getInductionDescriptor();
7715+
} else
7716+
continue;
7717+
7718+
createInductionResumeValue(IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
7719+
LoopBypassBlocks);
7720+
}
76817721

76827722
return {LoopVectorPreHeader, nullptr};
76837723
}
@@ -8848,14 +8888,9 @@ static void addLiveOutsForFirstOrderRecurrences(
88488888
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
88498889
"scalar.recur.init");
88508890
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
8851-
for (VPRecipeBase &R :
8852-
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
8853-
auto *IRI = cast<VPIRInstruction>(&R);
8854-
if (&IRI->getInstruction() == FORPhi) {
8855-
IRI->addOperand(ResumePhiRecipe);
8856-
break;
8857-
}
8858-
}
8891+
addOperandToPhiInVPIRBasicBlock(
8892+
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor()), FORPhi,
8893+
ResumePhiRecipe);
88598894

88608895
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
88618896
// Extract the penultimate value of the recurrence and use it as operand for
@@ -9582,7 +9617,7 @@ static bool processLoopInVPlanNativePath(
95829617
GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
95839618
F->getDataLayout(), AddBranchWeights);
95849619
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
9585-
VF.Width, 1, LVL, &CM, BFI, PSI, Checks);
9620+
VF.Width, 1, LVL, &CM, BFI, PSI, Checks, BestPlan);
95869621
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
95879622
<< L->getHeader()->getParent()->getName() << "\"\n");
95889623
LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false);
@@ -10070,11 +10105,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1007010105
assert(IC > 1 && "interleave count should not be 1 or 0");
1007110106
// If we decided that it is not legal to vectorize the loop, then
1007210107
// interleave it.
10108+
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
1007310109
InnerLoopVectorizer Unroller(
1007410110
L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1),
10075-
ElementCount::getFixed(1), IC, &LVL, &CM, BFI, PSI, Checks);
10111+
ElementCount::getFixed(1), IC, &LVL, &CM, BFI, PSI, Checks, BestPlan);
1007610112

10077-
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
1007810113
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
1007910114

1008010115
ORE->emit([&]() {
@@ -10096,10 +10131,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1009610131
// to be vectorized by executing the plan (potentially with a different
1009710132
// factor) again shortly afterwards.
1009810133
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
10134+
std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
1009910135
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
10100-
EPI, &LVL, &CM, BFI, PSI, Checks);
10136+
EPI, &LVL, &CM, BFI, PSI, Checks,
10137+
*BestMainPlan);
1010110138

10102-
std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
1010310139
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
1010410140
*BestMainPlan, MainILV, DT, true);
1010510141
++LoopsVectorized;
@@ -10108,11 +10144,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1010810144
// edges from the first pass.
1010910145
EPI.MainLoopVF = EPI.EpilogueVF;
1011010146
EPI.MainLoopUF = EPI.EpilogueUF;
10147+
VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
1011110148
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
1011210149
ORE, EPI, &LVL, &CM, BFI, PSI,
10113-
Checks);
10150+
Checks, BestEpiPlan);
1011410151

10115-
VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
1011610152
VPRegionBlock *VectorLoop = BestEpiPlan.getVectorLoopRegion();
1011710153
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
1011810154
Header->setName("vec.epilog.vector.body");
@@ -10161,23 +10197,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1016110197
RdxDesc.getRecurrenceStartValue());
1016210198
}
1016310199
} else {
10164-
// Create induction resume values for both widened pointer and
10165-
// integer/fp inductions and update the start value of the induction
10166-
// recipes to use the resume value.
10200+
// Retrive the induction resume values for wide inductions from
10201+
// their original phi nodes in the scalar loop
1016710202
PHINode *IndPhi = nullptr;
10168-
const InductionDescriptor *ID;
1016910203
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
1017010204
IndPhi = cast<PHINode>(Ind->getUnderlyingValue());
10171-
ID = &Ind->getInductionDescriptor();
1017210205
} else {
1017310206
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
1017410207
IndPhi = WidenInd->getPHINode();
10175-
ID = &WidenInd->getInductionDescriptor();
1017610208
}
10177-
10178-
ResumeV = MainILV.createInductionResumeValue(
10179-
IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
10180-
{EPI.MainLoopIterationCountCheck});
10209+
ResumeV = IndPhi->getIncomingValueForBlock(L->getLoopPreheader());
1018110210
}
1018210211
assert(ResumeV && "Must have a resume value");
1018310212
VPValue *StartVal = BestEpiPlan.getOrAddLiveIn(ResumeV);
@@ -10189,13 +10218,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1018910218
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
1019010219
DT, true, &ExpandedSCEVs);
1019110220
++LoopsEpilogueVectorized;
10221+
BasicBlock *PH = L->getLoopPreheader();
1019210222

10223+
for (const auto &[IVPhi, _] : LVL.getInductionVars()) {
10224+
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
10225+
const auto &[BB, V] = EpilogILV.getInductionBypassValue(IVPhi);
10226+
Inc->setIncomingValueForBlock(BB, V);
10227+
}
1019310228
if (!MainILV.areSafetyChecksAdded())
1019410229
DisableRuntimeUnroll = true;
1019510230
} else {
1019610231
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
1019710232
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
10198-
PSI, Checks);
10233+
PSI, Checks, BestPlan);
1019910234
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
1020010235
++LoopsVectorized;
1020110236

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
625625
State.CFG
626626
.VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
627627
NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
628-
for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
628+
for (auto *OtherPred :
629+
reverse(to_vector(predecessors(Builder.GetInsertBlock())))) {
629630
assert(OtherPred != VPlanPred &&
630631
"VPlan predecessors should not be connected yet");
631632
NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);

llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
3535
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[WIDE_LOAD1]], i32 1
3636
; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
3737
; CHECK: [[SCALAR_PH]]:
38-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
3938
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
39+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
4040
; CHECK-NEXT: br label %[[LOOP:.*]]
4141
; CHECK: [[LOOP]]:
4242
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,10 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
205205
; CHECK: vector.ph:
206206
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
207207
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
208+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]]
208209
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0
209210
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
210211
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
211-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]]
212212
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
213213
; CHECK: vector.body:
214214
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -433,7 +433,7 @@ define void @test_widen_extended_induction(ptr %dst) {
433433
; CHECK: vec.epilog.middle.block:
434434
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
435435
; CHECK: vec.epilog.scalar.ph:
436-
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
436+
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
437437
; CHECK-NEXT: br label [[LOOP:%.*]]
438438
; CHECK: loop:
439439
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) {
7373
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP24]], i32 3
7474
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
7575
; CHECK: scalar.ph:
76-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
77-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
76+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
77+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
7878
; CHECK-NEXT: br label [[LOOP:%.*]]
7979
; CHECK: loop:
8080
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

0 commit comments

Comments
 (0)