@@ -543,11 +543,6 @@ class InnerLoopVectorizer {
543
543
protected:
544
544
friend class LoopVectorizationPlanner;
545
545
546
- /// Set up the values of the IVs correctly when exiting the vector loop.
547
- virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
548
- Value *VectorTripCount, BasicBlock *MiddleBlock,
549
- VPTransformState &State);
550
-
551
546
/// Iteratively sink the scalarized operands of a predicated instruction into
552
547
/// the block that was created for it.
553
548
void sinkScalarOperands(Instruction *PredInst);
@@ -785,10 +780,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
785
780
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
786
781
void printDebugTracesAtStart() override;
787
782
void printDebugTracesAtEnd() override;
788
-
789
- void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
790
- Value *VectorTripCount, BasicBlock *MiddleBlock,
791
- VPTransformState &State) override {};
792
783
};
793
784
794
785
// A specialized derived class of inner loop vectorizer that performs
@@ -2775,97 +2766,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
2775
2766
return LoopVectorPreHeader;
2776
2767
}
2777
2768
2778
- // Fix up external users of the induction variable. At this point, we are
2779
- // in LCSSA form, with all external PHIs that use the IV having one input value,
2780
- // coming from the remainder loop. We need those PHIs to also have a correct
2781
- // value for the IV when arriving directly from the middle block.
2782
- void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2783
- const InductionDescriptor &II,
2784
- Value *VectorTripCount,
2785
- BasicBlock *MiddleBlock,
2786
- VPTransformState &State) {
2787
- // There are two kinds of external IV usages - those that use the value
2788
- // computed in the last iteration (the PHI) and those that use the penultimate
2789
- // value (the value that feeds into the phi from the loop latch).
2790
- // We allow both, but they, obviously, have different values.
2791
-
2792
- DenseMap<Value *, Value *> MissingVals;
2793
-
2794
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
2795
- OrigLoop->getLoopPreheader()))
2796
- ->getIncomingValueForBlock(MiddleBlock);
2797
-
2798
- // An external user of the last iteration's value should see the value that
2799
- // the remainder loop uses to initialize its own IV.
2800
- Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
2801
- for (User *U : PostInc->users()) {
2802
- Instruction *UI = cast<Instruction>(U);
2803
- if (!OrigLoop->contains(UI)) {
2804
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
2805
- MissingVals[UI] = EndValue;
2806
- }
2807
- }
2808
-
2809
- // An external user of the penultimate value need to see EndValue - Step.
2810
- // The simplest way to get this is to recompute it from the constituent SCEVs,
2811
- // that is Start + (Step * (CRD - 1)).
2812
- for (User *U : OrigPhi->users()) {
2813
- auto *UI = cast<Instruction>(U);
2814
- if (!OrigLoop->contains(UI)) {
2815
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
2816
- IRBuilder<> B(MiddleBlock->getTerminator());
2817
-
2818
- // Fast-math-flags propagate from the original induction instruction.
2819
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2820
- B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2821
-
2822
- VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
2823
- assert(StepVPV && "step must have been expanded during VPlan execution");
2824
- Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
2825
- : State.get(StepVPV, VPLane(0));
2826
- Value *Escape = nullptr;
2827
- if (EndValue->getType()->isIntegerTy())
2828
- Escape = B.CreateSub(EndValue, Step);
2829
- else if (EndValue->getType()->isPointerTy())
2830
- Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
2831
- else {
2832
- assert(EndValue->getType()->isFloatingPointTy() &&
2833
- "Unexpected induction type");
2834
- Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
2835
- Instruction::FAdd
2836
- ? Instruction::FSub
2837
- : Instruction::FAdd,
2838
- EndValue, Step);
2839
- }
2840
- Escape->setName("ind.escape");
2841
- MissingVals[UI] = Escape;
2842
- }
2843
- }
2844
-
2845
- assert((MissingVals.empty() ||
2846
- all_of(MissingVals,
2847
- [MiddleBlock, this](const std::pair<Value *, Value *> &P) {
2848
- return all_of(
2849
- predecessors(cast<Instruction>(P.first)->getParent()),
2850
- [MiddleBlock, this](BasicBlock *Pred) {
2851
- return Pred == MiddleBlock ||
2852
- Pred == OrigLoop->getLoopLatch();
2853
- });
2854
- })) &&
2855
- "Expected escaping values from latch/middle.block only");
2856
-
2857
- for (auto &I : MissingVals) {
2858
- PHINode *PHI = cast<PHINode>(I.first);
2859
- // One corner case we have to handle is two IVs "chasing" each-other,
2860
- // that is %IV2 = phi [...], [ %IV1, %latch ]
2861
- // In this case, if IV1 has an external use, we need to avoid adding both
2862
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2863
- // don't already have an incoming value for the middle block.
2864
- if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
2865
- PHI->addIncoming(I.second, MiddleBlock);
2866
- }
2867
- }
2868
-
2869
2769
namespace {
2870
2770
2871
2771
struct CSEDenseMapInfo {
@@ -2994,24 +2894,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
2994
2894
for (PHINode &PN : Exit->phis())
2995
2895
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
2996
2896
2997
- if (Cost->requiresScalarEpilogue(VF.isVector())) {
2998
- // No edge from the middle block to the unique exit block has been inserted
2999
- // and there is nothing to fix from vector loop; phis should have incoming
3000
- // from scalar loop only.
3001
- } else {
3002
- // TODO: Check in VPlan to see if IV users need fixing instead of checking
3003
- // the cost model.
3004
-
3005
- // If we inserted an edge from the middle block to the unique exit block,
3006
- // update uses outside the loop (phis) to account for the newly inserted
3007
- // edge.
3008
-
3009
- // Fix-up external users of the induction variables.
3010
- for (const auto &Entry : Legal->getInductionVars())
3011
- fixupIVUsers(Entry.first, Entry.second,
3012
- getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
3013
- }
3014
-
3015
2897
for (Instruction *PI : PredicatedInstructions)
3016
2898
sinkScalarOperands(&*PI);
3017
2899
@@ -8866,11 +8748,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8866
8748
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
8867
8749
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
8868
8750
/// the end value of the induction.
8869
- static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
8870
- VPBuilder &VectorPHBuilder,
8871
- VPBuilder &ScalarPHBuilder,
8872
- VPTypeAnalysis &TypeInfo,
8873
- VPValue *VectorTC) {
8751
+ static VPValue *addResumePhiRecipeForInduction(
8752
+ VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8753
+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
8754
+ DenseMap<VPValue *, VPValue *> &EndValues) {
8874
8755
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
8875
8756
// Truncated wide inductions resume from the last lane of their vector value
8876
8757
// in the last vector iteration which is handled elsewhere.
@@ -8895,6 +8776,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
8895
8776
ScalarTypeOfWideIV);
8896
8777
}
8897
8778
8779
+ EndValues[WideIV] = EndValue;
8898
8780
auto *ResumePhiRecipe =
8899
8781
ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
8900
8782
WideIV->getDebugLoc(), "bc.resume.val");
@@ -8904,7 +8786,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
8904
8786
/// Create resume phis in the scalar preheader for first-order recurrences,
8905
8787
/// reductions and inductions, and update the VPIRInstructions wrapping the
8906
8788
/// original phis in the scalar header.
8907
- static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8789
+ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
8790
+ Loop *OrigLoop,
8791
+ DenseMap<VPValue *, VPValue *> &EndValues) {
8908
8792
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
8909
8793
auto *ScalarPH = Plan.getScalarPreheader();
8910
8794
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8924,7 +8808,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8924
8808
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
8925
8809
if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
8926
8810
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8927
- &Plan.getVectorTripCount())) {
8811
+ &Plan.getVectorTripCount(), EndValues )) {
8928
8812
ScalarPhiIRI->addOperand(ResumePhi);
8929
8813
continue;
8930
8814
}
@@ -9009,9 +8893,9 @@ static bool isIVUse(VPValue *Incoming) {
9009
8893
// modeled explicitly yet and won't be included. Those are un-truncated
9010
8894
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
9011
8895
// increments.
9012
- static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
9013
- Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan
9014
- ) {
8896
+ static SetVector<VPIRInstruction *>
8897
+ collectUsersInExitBlocks( Loop *OrigLoop, VPRecipeBuilder &Builder,
8898
+ VPlan &Plan ) {
9015
8899
auto *MiddleVPBB = Plan.getMiddleBlock();
9016
8900
SetVector<VPIRInstruction *> ExitUsersToFix;
9017
8901
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -9033,11 +8917,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
9033
8917
}
9034
8918
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
9035
8919
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
9036
- // Exit values for inductions are computed and updated outside of VPlan
9037
- // and independent of induction recipes.
9038
- // TODO: Compute induction exit values in VPlan.
9039
- if (isIVUse(V) && ExitVPBB->getSinglePredecessor() == MiddleVPBB)
9040
- continue;
9041
8920
ExitUsersToFix.insert(ExitIRI);
9042
8921
ExitIRI->addOperand(V);
9043
8922
}
@@ -9046,17 +8925,86 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
9046
8925
return ExitUsersToFix;
9047
8926
}
9048
8927
8928
+ /// If \p Incoming is a user of a non-truncated induction, create recipes to
8929
+ /// compute the final value and update the user \p ExitIRI.
8930
+ static bool addInductionEndValue(
8931
+ VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming,
8932
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
8933
+ DenseMap<VPValue *, VPValue *> &EndValues, VPTypeAnalysis &TypeInfo) {
8934
+ if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
8935
+ !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) ||
8936
+ isa<VPWidenPointerInductionRecipe>(Incoming) ||
8937
+ (isa<Instruction>(Incoming->getUnderlyingValue()) &&
8938
+ any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(),
8939
+ [&Inductions](User *U) {
8940
+ auto *P = dyn_cast<PHINode>(U);
8941
+ return P && Inductions.contains(P);
8942
+ }))) {
8943
+ VPValue *IV;
8944
+ if (auto *WideIV =
8945
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()))
8946
+ IV = WideIV;
8947
+ else if (auto *WideIV =
8948
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()
8949
+ ->getOperand(0)
8950
+ ->getDefiningRecipe()))
8951
+ IV = WideIV;
8952
+ else
8953
+ IV = Incoming->getDefiningRecipe()->getOperand(1);
8954
+ // Skip phi nodes already updated. This can be the case if 2 induction
8955
+ // phis chase each other.
8956
+ VPValue *EndValue = EndValues[IV];
8957
+ if (any_of(cast<VPRecipeBase>(Incoming->getDefiningRecipe())->operands(),
8958
+ IsaPred<VPWidenIntOrFpInductionRecipe,
8959
+ VPWidenPointerInductionRecipe>)) {
8960
+ ExitIRI->setOperand(0, EndValue);
8961
+ return true;
8962
+ }
8963
+
8964
+ VPBuilder B(Plan.getMiddleBlock()->getTerminator());
8965
+ VPValue *Escape = nullptr;
8966
+ auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe());
8967
+ VPValue *Step = WideIV->getStepValue();
8968
+ Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
8969
+ if (ScalarTy->isIntegerTy())
8970
+ Escape =
8971
+ B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
8972
+ else if (ScalarTy->isPointerTy())
8973
+ Escape = B.createPtrAdd(
8974
+ EndValue,
8975
+ B.createNaryOp(Instruction::Sub,
8976
+ {Plan.getOrAddLiveIn(ConstantInt::get(
8977
+ Step->getLiveInIRValue()->getType(), 0)),
8978
+ Step}),
8979
+ {}, "ind.escape");
8980
+ else if (ScalarTy->isFloatingPointTy()) {
8981
+ const auto &ID = WideIV->getInductionDescriptor();
8982
+ Escape = B.createNaryOp(
8983
+ ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
8984
+ ? Instruction::FSub
8985
+ : Instruction::FAdd,
8986
+ {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
8987
+ } else {
8988
+ llvm_unreachable("all possible induction types must be handled");
8989
+ }
8990
+ ExitIRI->setOperand(0, Escape);
8991
+ return true;
8992
+ }
8993
+ return false;
8994
+ }
9049
8995
// Add exit values to \p Plan. Extracts are added for each entry in \p
9050
8996
// ExitUsersToFix if needed and their operands are updated. Returns true if all
9051
8997
// exit users can be handled, otherwise return false.
9052
- static bool
9053
- addUsersInExitBlocks(VPlan &Plan,
9054
- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8998
+ static bool addUsersInExitBlocks(
8999
+ VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
9000
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
9001
+ DenseMap<VPValue *, VPValue *> &EndValues) {
9055
9002
if (ExitUsersToFix.empty())
9056
9003
return true;
9057
9004
9058
9005
auto *MiddleVPBB = Plan.getMiddleBlock();
9059
9006
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
9007
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
9060
9008
9061
9009
// Introduce extract for exiting values and update the VPIRInstructions
9062
9010
// modeling the corresponding LCSSA phis.
@@ -9072,11 +9020,16 @@ addUsersInExitBlocks(VPlan &Plan,
9072
9020
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
9073
9021
return false;
9074
9022
9023
+ VPValue *Incoming = ExitIRI->getOperand(0);
9024
+ if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues,
9025
+ TypeInfo))
9026
+ continue;
9027
+
9075
9028
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
9076
9029
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
9077
9030
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
9078
9031
IntegerType::get(Ctx, 32), 1))});
9079
- ExitIRI->setOperand(Idx , Ext);
9032
+ ExitIRI->setOperand(0 , Ext);
9080
9033
}
9081
9034
}
9082
9035
return true;
@@ -9371,11 +9324,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9371
9324
VPlanTransforms::handleUncountableEarlyExit(
9372
9325
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
9373
9326
}
9374
- addScalarResumePhis(RecipeBuilder, *Plan);
9375
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
9376
- OrigLoop, RecipeBuilder, *Plan);
9327
+ DenseMap<VPValue *, VPValue *> EndValues;
9328
+ addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
9329
+ SetVector<VPIRInstruction *> ExitUsersToFix =
9330
+ collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
9377
9331
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9378
- if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
9332
+ if (!addUsersInExitBlocks(*Plan, ExitUsersToFix,
9333
+ EndValues)) {
9379
9334
reportVectorizationFailure(
9380
9335
"Some exit values in loop with uncountable exit not supported yet",
9381
9336
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
@@ -9502,7 +9457,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9502
9457
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
9503
9458
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
9504
9459
}
9505
- addScalarResumePhis(RecipeBuilder, *Plan);
9460
+ DenseMap<VPValue *, VPValue *> EndValues;
9461
+ addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
9506
9462
9507
9463
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
9508
9464
return Plan;
0 commit comments