Skip to content

Commit 2c87133

Browse files
committed
Reapply "[VPlan] Update final IV exit value via VPlan. (#112147)"
This reverts the revert commit 58326f1. The build failure in sanitizer stage2 builds has been fixed with 0d39fe6. Original commit message: Model updating IV users directly in VPlan, replace fixupIVUsers. Now simple extracts are created for all phis in the exit block during initial VPlan construction. A later VPlan transform (optimizeInductionExitUsers) replaces extracts of inductions with their pre-computed values if possible. This completes the transition towards modeling all live-outs directly in VPlan. There are a few follow-ups: * emit extracts initially also for resume phis, and optimize them tougher with IV exit users * support for VPlans with multiple exits in optimizeInductionExitUsers. Depends on #110004, #109975 and #112145.
1 parent 8d90473 commit 2c87133

10 files changed

+288
-216
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 21 additions & 193 deletions
Original file line numberDiff line numberDiff line change
@@ -543,11 +543,6 @@ class InnerLoopVectorizer {
543543
protected:
544544
friend class LoopVectorizationPlanner;
545545

546-
/// Set up the values of the IVs correctly when exiting the vector loop.
547-
virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
548-
Value *VectorTripCount, BasicBlock *MiddleBlock,
549-
VPTransformState &State);
550-
551546
/// Iteratively sink the scalarized operands of a predicated instruction into
552547
/// the block that was created for it.
553548
void sinkScalarOperands(Instruction *PredInst);
@@ -785,10 +780,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
785780
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
786781
void printDebugTracesAtStart() override;
787782
void printDebugTracesAtEnd() override;
788-
789-
void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
790-
Value *VectorTripCount, BasicBlock *MiddleBlock,
791-
VPTransformState &State) override {};
792783
};
793784

794785
// A specialized derived class of inner loop vectorizer that performs
@@ -2782,97 +2773,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
27822773
return LoopVectorPreHeader;
27832774
}
27842775

2785-
// Fix up external users of the induction variable. At this point, we are
2786-
// in LCSSA form, with all external PHIs that use the IV having one input value,
2787-
// coming from the remainder loop. We need those PHIs to also have a correct
2788-
// value for the IV when arriving directly from the middle block.
2789-
void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2790-
const InductionDescriptor &II,
2791-
Value *VectorTripCount,
2792-
BasicBlock *MiddleBlock,
2793-
VPTransformState &State) {
2794-
// There are two kinds of external IV usages - those that use the value
2795-
// computed in the last iteration (the PHI) and those that use the penultimate
2796-
// value (the value that feeds into the phi from the loop latch).
2797-
// We allow both, but they, obviously, have different values.
2798-
2799-
DenseMap<Value *, Value *> MissingVals;
2800-
2801-
Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
2802-
OrigLoop->getLoopPreheader()))
2803-
->getIncomingValueForBlock(MiddleBlock);
2804-
2805-
// An external user of the last iteration's value should see the value that
2806-
// the remainder loop uses to initialize its own IV.
2807-
Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
2808-
for (User *U : PostInc->users()) {
2809-
Instruction *UI = cast<Instruction>(U);
2810-
if (!OrigLoop->contains(UI)) {
2811-
assert(isa<PHINode>(UI) && "Expected LCSSA form");
2812-
MissingVals[UI] = EndValue;
2813-
}
2814-
}
2815-
2816-
// An external user of the penultimate value need to see EndValue - Step.
2817-
// The simplest way to get this is to recompute it from the constituent SCEVs,
2818-
// that is Start + (Step * (CRD - 1)).
2819-
for (User *U : OrigPhi->users()) {
2820-
auto *UI = cast<Instruction>(U);
2821-
if (!OrigLoop->contains(UI)) {
2822-
assert(isa<PHINode>(UI) && "Expected LCSSA form");
2823-
IRBuilder<> B(MiddleBlock->getTerminator());
2824-
2825-
// Fast-math-flags propagate from the original induction instruction.
2826-
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2827-
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2828-
2829-
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
2830-
assert(StepVPV && "step must have been expanded during VPlan execution");
2831-
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
2832-
: State.get(StepVPV, VPLane(0));
2833-
Value *Escape = nullptr;
2834-
if (EndValue->getType()->isIntegerTy())
2835-
Escape = B.CreateSub(EndValue, Step);
2836-
else if (EndValue->getType()->isPointerTy())
2837-
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
2838-
else {
2839-
assert(EndValue->getType()->isFloatingPointTy() &&
2840-
"Unexpected induction type");
2841-
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
2842-
Instruction::FAdd
2843-
? Instruction::FSub
2844-
: Instruction::FAdd,
2845-
EndValue, Step);
2846-
}
2847-
Escape->setName("ind.escape");
2848-
MissingVals[UI] = Escape;
2849-
}
2850-
}
2851-
2852-
assert((MissingVals.empty() ||
2853-
all_of(MissingVals,
2854-
[MiddleBlock, this](const std::pair<Value *, Value *> &P) {
2855-
return all_of(
2856-
predecessors(cast<Instruction>(P.first)->getParent()),
2857-
[MiddleBlock, this](BasicBlock *Pred) {
2858-
return Pred == MiddleBlock ||
2859-
Pred == OrigLoop->getLoopLatch();
2860-
});
2861-
})) &&
2862-
"Expected escaping values from latch/middle.block only");
2863-
2864-
for (auto &I : MissingVals) {
2865-
PHINode *PHI = cast<PHINode>(I.first);
2866-
// One corner case we have to handle is two IVs "chasing" each-other,
2867-
// that is %IV2 = phi [...], [ %IV1, %latch ]
2868-
// In this case, if IV1 has an external use, we need to avoid adding both
2869-
// "last value of IV1" and "penultimate value of IV2". So, verify that we
2870-
// don't already have an incoming value for the middle block.
2871-
if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
2872-
PHI->addIncoming(I.second, MiddleBlock);
2873-
}
2874-
}
2875-
28762776
namespace {
28772777

28782778
struct CSEDenseMapInfo {
@@ -2999,24 +2899,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29992899
for (PHINode &PN : Exit->phis())
30002900
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
30012901

3002-
if (Cost->requiresScalarEpilogue(VF.isVector())) {
3003-
// No edge from the middle block to the unique exit block has been inserted
3004-
// and there is nothing to fix from vector loop; phis should have incoming
3005-
// from scalar loop only.
3006-
} else {
3007-
// TODO: Check in VPlan to see if IV users need fixing instead of checking
3008-
// the cost model.
3009-
3010-
// If we inserted an edge from the middle block to the unique exit block,
3011-
// update uses outside the loop (phis) to account for the newly inserted
3012-
// edge.
3013-
3014-
// Fix-up external users of the induction variables.
3015-
for (const auto &Entry : Legal->getInductionVars())
3016-
fixupIVUsers(Entry.first, Entry.second,
3017-
getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
3018-
}
3019-
30202902
// Don't apply optimizations below when no vector region remains, as they all
30212903
// require a vector loop at the moment.
30222904
if (!State.Plan->getVectorLoopRegion())
@@ -9049,11 +8931,9 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
90498931
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
90508932
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
90518933
/// the end value of the induction.
9052-
static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
9053-
VPBuilder &VectorPHBuilder,
9054-
VPBuilder &ScalarPHBuilder,
9055-
VPTypeAnalysis &TypeInfo,
9056-
VPValue *VectorTC) {
8934+
static VPInstruction *addResumePhiRecipeForInduction(
8935+
VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8936+
VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) {
90578937
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
90588938
// Truncated wide inductions resume from the last lane of their vector value
90598939
// in the last vector iteration which is handled elsewhere.
@@ -9087,8 +8967,10 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
90878967

90888968
/// Create resume phis in the scalar preheader for first-order recurrences,
90898969
/// reductions and inductions, and update the VPIRInstructions wrapping the
9090-
/// original phis in the scalar header.
9091-
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8970+
/// original phis in the scalar header. End values for inductions are added to
8971+
/// \p IVEndValues.
8972+
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
8973+
DenseMap<VPValue *, VPValue *> &IVEndValues) {
90928974
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
90938975
auto *ScalarPH = Plan.getScalarPreheader();
90948976
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -9105,11 +8987,16 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
91058987
if (!ScalarPhiI)
91068988
break;
91078989

8990+
// TODO: Extract final value from induction recipe initially, optimize to
8991+
// pre-computed end value together in optimizeInductionExitUsers.
91088992
auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder.getRecipe(ScalarPhiI));
91098993
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
9110-
if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
8994+
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
91118995
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
91128996
&Plan.getVectorTripCount())) {
8997+
assert(ResumePhi->getOpcode() == VPInstruction::ResumePhi &&
8998+
"Expected a ResumePhi");
8999+
IVEndValues[WideIVR] = ResumePhi->getOperand(0);
91139000
ScalarPhiIRI->addOperand(ResumePhi);
91149001
continue;
91159002
}
@@ -9140,65 +9027,6 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
91409027
}
91419028
}
91429029

9143-
/// Return true if \p VPV is an optimizable IV or IV use. That is, if \p VPV is
9144-
/// either an untruncated wide induction, or if it increments a wide induction
9145-
/// by its step.
9146-
static bool isOptimizableIVOrUse(VPValue *VPV) {
9147-
VPRecipeBase *Def = VPV->getDefiningRecipe();
9148-
if (!Def)
9149-
return false;
9150-
auto *WideIV = dyn_cast<VPWidenInductionRecipe>(Def);
9151-
if (WideIV) {
9152-
// VPV itself is a wide induction, separately compute the end value for exit
9153-
// users if it is not a truncated IV.
9154-
return isa<VPWidenPointerInductionRecipe>(WideIV) ||
9155-
!cast<VPWidenIntOrFpInductionRecipe>(WideIV)->getTruncInst();
9156-
}
9157-
9158-
// Check if VPV is an optimizable induction increment.
9159-
if (Def->getNumOperands() != 2)
9160-
return false;
9161-
WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(0));
9162-
if (!WideIV)
9163-
WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(1));
9164-
if (!WideIV)
9165-
return false;
9166-
9167-
using namespace VPlanPatternMatch;
9168-
auto &ID = WideIV->getInductionDescriptor();
9169-
9170-
// Check if VPV increments the induction by the induction step.
9171-
VPValue *IVStep = WideIV->getStepValue();
9172-
switch (ID.getInductionOpcode()) {
9173-
case Instruction::Add:
9174-
return match(VPV, m_c_Binary<Instruction::Add>(m_Specific(WideIV),
9175-
m_Specific(IVStep)));
9176-
case Instruction::FAdd:
9177-
return match(VPV, m_c_Binary<Instruction::FAdd>(m_Specific(WideIV),
9178-
m_Specific(IVStep)));
9179-
case Instruction::FSub:
9180-
return match(VPV, m_Binary<Instruction::FSub>(m_Specific(WideIV),
9181-
m_Specific(IVStep)));
9182-
case Instruction::Sub: {
9183-
// IVStep will be the negated step of the subtraction. Check if Step == -1 *
9184-
// IVStep.
9185-
VPValue *Step;
9186-
if (!match(VPV, m_Binary<Instruction::Sub>(m_VPValue(), m_VPValue(Step))) ||
9187-
!Step->isLiveIn() || !IVStep->isLiveIn())
9188-
return false;
9189-
auto *StepCI = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
9190-
auto *IVStepCI = dyn_cast<ConstantInt>(IVStep->getLiveInIRValue());
9191-
return StepCI && IVStepCI &&
9192-
StepCI->getValue() == (-1 * IVStepCI->getValue());
9193-
}
9194-
default:
9195-
return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
9196-
match(VPV, m_GetElementPtr(m_Specific(WideIV),
9197-
m_Specific(WideIV->getStepValue())));
9198-
}
9199-
llvm_unreachable("should have been covered by switch above");
9200-
}
9201-
92029030
// Collect VPIRInstructions for phis in the exit blocks that are modeled
92039031
// in VPlan and add the exiting VPValue as operand. Some exiting values are not
92049032
// modeled explicitly yet and won't be included. Those are un-truncated
@@ -9228,12 +9056,6 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
92289056
}
92299057
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
92309058
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
9231-
// Exit values for inductions are computed and updated outside of VPlan
9232-
// and independent of induction recipes.
9233-
// TODO: Compute induction exit values in VPlan.
9234-
if (isOptimizableIVOrUse(V) &&
9235-
ExitVPBB->getSinglePredecessor() == MiddleVPBB)
9236-
continue;
92379059
ExitUsersToFix.insert(ExitIRI);
92389060
ExitIRI->addOperand(V);
92399061
}
@@ -9253,6 +9075,7 @@ addUsersInExitBlocks(VPlan &Plan,
92539075

92549076
auto *MiddleVPBB = Plan.getMiddleBlock();
92559077
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
9078+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
92569079

92579080
// Introduce extract for exiting values and update the VPIRInstructions
92589081
// modeling the corresponding LCSSA phis.
@@ -9574,7 +9397,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95749397
VPlanTransforms::handleUncountableEarlyExit(
95759398
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
95769399
}
9577-
addScalarResumePhis(RecipeBuilder, *Plan);
9400+
DenseMap<VPValue *, VPValue *> IVEndValues;
9401+
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
95789402
SetVector<VPIRInstruction *> ExitUsersToFix =
95799403
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
95809404
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
@@ -9657,6 +9481,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
96579481
VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow,
96589482
WithoutRuntimeCheck);
96599483
}
9484+
VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues);
96609485

96619486
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
96629487
return Plan;
@@ -9708,7 +9533,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
97089533
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
97099534
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
97109535
}
9711-
addScalarResumePhis(RecipeBuilder, *Plan);
9536+
DenseMap<VPValue *, VPValue *> IVEndValues;
9537+
// TODO: IVEndValues are not used yet in the native path, to optimize exit
9538+
// values.
9539+
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
97129540

97139541
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
97149542
return Plan;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1422,6 +1422,12 @@ class VPIRInstruction : public VPRecipeBase {
14221422
"Op must be an operand of the recipe");
14231423
return true;
14241424
}
1425+
1426+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1427+
assert(is_contained(operands(), Op) &&
1428+
"Op must be an operand of the recipe");
1429+
return true;
1430+
}
14251431
};
14261432

14271433
/// VPWidenRecipe is a recipe for producing a widened instruction using the

0 commit comments

Comments
 (0)