@@ -542,11 +542,6 @@ class InnerLoopVectorizer {
542
542
protected:
543
543
friend class LoopVectorizationPlanner;
544
544
545
- /// Set up the values of the IVs correctly when exiting the vector loop.
546
- virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
547
- Value *VectorTripCount, BasicBlock *MiddleBlock,
548
- VPTransformState &State);
549
-
550
545
/// Iteratively sink the scalarized operands of a predicated instruction into
551
546
/// the block that was created for it.
552
547
void sinkScalarOperands(Instruction *PredInst);
@@ -775,10 +770,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
775
770
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
776
771
void printDebugTracesAtStart() override;
777
772
void printDebugTracesAtEnd() override;
778
-
779
- void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
780
- Value *VectorTripCount, BasicBlock *MiddleBlock,
781
- VPTransformState &State) override {};
782
773
};
783
774
784
775
// A specialized derived class of inner loop vectorizer that performs
@@ -2751,97 +2742,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
2751
2742
return LoopVectorPreHeader;
2752
2743
}
2753
2744
2754
- // Fix up external users of the induction variable. At this point, we are
2755
- // in LCSSA form, with all external PHIs that use the IV having one input value,
2756
- // coming from the remainder loop. We need those PHIs to also have a correct
2757
- // value for the IV when arriving directly from the middle block.
2758
- void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2759
- const InductionDescriptor &II,
2760
- Value *VectorTripCount,
2761
- BasicBlock *MiddleBlock,
2762
- VPTransformState &State) {
2763
- // There are two kinds of external IV usages - those that use the value
2764
- // computed in the last iteration (the PHI) and those that use the penultimate
2765
- // value (the value that feeds into the phi from the loop latch).
2766
- // We allow both, but they, obviously, have different values.
2767
-
2768
- DenseMap<Value *, Value *> MissingVals;
2769
-
2770
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
2771
- OrigLoop->getLoopPreheader()))
2772
- ->getIncomingValueForBlock(MiddleBlock);
2773
-
2774
- // An external user of the last iteration's value should see the value that
2775
- // the remainder loop uses to initialize its own IV.
2776
- Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
2777
- for (User *U : PostInc->users()) {
2778
- Instruction *UI = cast<Instruction>(U);
2779
- if (!OrigLoop->contains(UI)) {
2780
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
2781
- MissingVals[UI] = EndValue;
2782
- }
2783
- }
2784
-
2785
- // An external user of the penultimate value need to see EndValue - Step.
2786
- // The simplest way to get this is to recompute it from the constituent SCEVs,
2787
- // that is Start + (Step * (CRD - 1)).
2788
- for (User *U : OrigPhi->users()) {
2789
- auto *UI = cast<Instruction>(U);
2790
- if (!OrigLoop->contains(UI)) {
2791
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
2792
- IRBuilder<> B(MiddleBlock->getTerminator());
2793
-
2794
- // Fast-math-flags propagate from the original induction instruction.
2795
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2796
- B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2797
-
2798
- VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
2799
- assert(StepVPV && "step must have been expanded during VPlan execution");
2800
- Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
2801
- : State.get(StepVPV, VPLane(0));
2802
- Value *Escape = nullptr;
2803
- if (EndValue->getType()->isIntegerTy())
2804
- Escape = B.CreateSub(EndValue, Step);
2805
- else if (EndValue->getType()->isPointerTy())
2806
- Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
2807
- else {
2808
- assert(EndValue->getType()->isFloatingPointTy() &&
2809
- "Unexpected induction type");
2810
- Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
2811
- Instruction::FAdd
2812
- ? Instruction::FSub
2813
- : Instruction::FAdd,
2814
- EndValue, Step);
2815
- }
2816
- Escape->setName("ind.escape");
2817
- MissingVals[UI] = Escape;
2818
- }
2819
- }
2820
-
2821
- assert((MissingVals.empty() ||
2822
- all_of(MissingVals,
2823
- [MiddleBlock, this](const std::pair<Value *, Value *> &P) {
2824
- return all_of(
2825
- predecessors(cast<Instruction>(P.first)->getParent()),
2826
- [MiddleBlock, this](BasicBlock *Pred) {
2827
- return Pred == MiddleBlock ||
2828
- Pred == OrigLoop->getLoopLatch();
2829
- });
2830
- })) &&
2831
- "Expected escaping values from latch/middle.block only");
2832
-
2833
- for (auto &I : MissingVals) {
2834
- PHINode *PHI = cast<PHINode>(I.first);
2835
- // One corner case we have to handle is two IVs "chasing" each-other,
2836
- // that is %IV2 = phi [...], [ %IV1, %latch ]
2837
- // In this case, if IV1 has an external use, we need to avoid adding both
2838
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2839
- // don't already have an incoming value for the middle block.
2840
- if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
2841
- PHI->addIncoming(I.second, MiddleBlock);
2842
- }
2843
- }
2844
-
2845
2745
namespace {
2846
2746
2847
2747
struct CSEDenseMapInfo {
@@ -2986,24 +2886,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
2986
2886
for (PHINode &PN : Exit->phis())
2987
2887
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
2988
2888
2989
- if (Cost->requiresScalarEpilogue(VF.isVector())) {
2990
- // No edge from the middle block to the unique exit block has been inserted
2991
- // and there is nothing to fix from vector loop; phis should have incoming
2992
- // from scalar loop only.
2993
- } else {
2994
- // TODO: Check in VPlan to see if IV users need fixing instead of checking
2995
- // the cost model.
2996
-
2997
- // If we inserted an edge from the middle block to the unique exit block,
2998
- // update uses outside the loop (phis) to account for the newly inserted
2999
- // edge.
3000
-
3001
- // Fix-up external users of the induction variables.
3002
- for (const auto &Entry : Legal->getInductionVars())
3003
- fixupIVUsers(Entry.first, Entry.second,
3004
- getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
3005
- }
3006
-
3007
2889
for (Instruction *PI : PredicatedInstructions)
3008
2890
sinkScalarOperands(&*PI);
3009
2891
@@ -8857,11 +8739,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8857
8739
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
8858
8740
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
8859
8741
/// the end value of the induction.
8860
- static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
8861
- VPBuilder &VectorPHBuilder,
8862
- VPBuilder &ScalarPHBuilder,
8863
- VPTypeAnalysis &TypeInfo,
8864
- VPValue *VectorTC) {
8742
+ static VPValue *addResumePhiRecipeForInduction(
8743
+ VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8744
+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
8745
+ DenseMap<VPValue *, VPValue *> &EndValues) {
8865
8746
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
8866
8747
// Truncated wide inductions resume from the last lane of their vector value
8867
8748
// in the last vector iteration which is handled elsewhere.
@@ -8886,6 +8767,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
8886
8767
ScalarTypeOfWideIV);
8887
8768
}
8888
8769
8770
+ EndValues[WideIV] = EndValue;
8889
8771
auto *ResumePhiRecipe =
8890
8772
ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
8891
8773
WideIV->getDebugLoc(), "bc.resume.val");
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
8895
8777
/// Create resume phis in the scalar preheader for first-order recurrences,
8896
8778
/// reductions and inductions, and update the VPIRInstructions wrapping the
8897
8779
/// original phis in the scalar header.
8898
- static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8780
+ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
8781
+ Loop *OrigLoop,
8782
+ DenseMap<VPValue *, VPValue *> &EndValues) {
8899
8783
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
8900
8784
auto *ScalarPH = Plan.getScalarPreheader();
8901
8785
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8915,7 +8799,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8915
8799
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
8916
8800
if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
8917
8801
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8918
- &Plan.getVectorTripCount())) {
8802
+ &Plan.getVectorTripCount(), EndValues )) {
8919
8803
ScalarPhiIRI->addOperand(ResumePhi);
8920
8804
continue;
8921
8805
}
@@ -8949,9 +8833,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8949
8833
// modeled explicitly yet and won't be included. Those are un-truncated
8950
8834
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
8951
8835
// increments.
8952
- static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8953
- Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan ,
8954
- const MapVector<PHINode *, InductionDescriptor> &Inductions ) {
8836
+ static SetVector<VPIRInstruction *>
8837
+ collectUsersInExitBlocks( Loop *OrigLoop, VPRecipeBuilder &Builder,
8838
+ VPlan &Plan ) {
8955
8839
auto *MiddleVPBB = Plan.getMiddleBlock();
8956
8840
SetVector<VPIRInstruction *> ExitUsersToFix;
8957
8841
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -8976,18 +8860,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8976
8860
// Exit values for inductions are computed and updated outside of VPlan
8977
8861
// and independent of induction recipes.
8978
8862
// TODO: Compute induction exit values in VPlan.
8979
- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8980
- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
8981
- isa<VPWidenPointerInductionRecipe>(V) ||
8982
- (isa<Instruction>(IncomingValue) &&
8983
- OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
8984
- any_of(IncomingValue->users(), [&Inductions](User *U) {
8985
- auto *P = dyn_cast<PHINode>(U);
8986
- return P && Inductions.contains(P);
8987
- }))) {
8988
- if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
8989
- continue;
8990
- }
8991
8863
ExitUsersToFix.insert(ExitIRI);
8992
8864
ExitIRI->addOperand(V);
8993
8865
}
@@ -8996,17 +8868,86 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8996
8868
return ExitUsersToFix;
8997
8869
}
8998
8870
8871
+ /// If \p Incoming is a user of a non-truncated induction, create recipes to
8872
+ /// compute the final value and update the user \p ExitIRI.
8873
+ static bool addInductionEndValue(
8874
+ VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming,
8875
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
8876
+ DenseMap<VPValue *, VPValue *> &EndValues, VPTypeAnalysis &TypeInfo) {
8877
+ if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
8878
+ !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) ||
8879
+ isa<VPWidenPointerInductionRecipe>(Incoming) ||
8880
+ (isa<Instruction>(Incoming->getUnderlyingValue()) &&
8881
+ any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(),
8882
+ [&Inductions](User *U) {
8883
+ auto *P = dyn_cast<PHINode>(U);
8884
+ return P && Inductions.contains(P);
8885
+ }))) {
8886
+ VPValue *IV;
8887
+ if (auto *WideIV =
8888
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()))
8889
+ IV = WideIV;
8890
+ else if (auto *WideIV =
8891
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()
8892
+ ->getOperand(0)
8893
+ ->getDefiningRecipe()))
8894
+ IV = WideIV;
8895
+ else
8896
+ IV = Incoming->getDefiningRecipe()->getOperand(1);
8897
+ // Skip phi nodes already updated. This can be the case if 2 induction
8898
+ // phis chase each other.
8899
+ VPValue *EndValue = EndValues[IV];
8900
+ if (any_of(cast<VPRecipeBase>(Incoming->getDefiningRecipe())->operands(),
8901
+ IsaPred<VPWidenIntOrFpInductionRecipe,
8902
+ VPWidenPointerInductionRecipe>)) {
8903
+ ExitIRI->setOperand(0, EndValue);
8904
+ return true;
8905
+ }
8906
+
8907
+ VPBuilder B(Plan.getMiddleBlock()->getTerminator());
8908
+ VPValue *Escape = nullptr;
8909
+ auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe());
8910
+ VPValue *Step = WideIV->getStepValue();
8911
+ Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
8912
+ if (ScalarTy->isIntegerTy())
8913
+ Escape =
8914
+ B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
8915
+ else if (ScalarTy->isPointerTy())
8916
+ Escape = B.createPtrAdd(
8917
+ EndValue,
8918
+ B.createNaryOp(Instruction::Sub,
8919
+ {Plan.getOrAddLiveIn(ConstantInt::get(
8920
+ Step->getLiveInIRValue()->getType(), 0)),
8921
+ Step}),
8922
+ {}, "ind.escape");
8923
+ else if (ScalarTy->isFloatingPointTy()) {
8924
+ const auto &ID = WideIV->getInductionDescriptor();
8925
+ Escape = B.createNaryOp(
8926
+ ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
8927
+ ? Instruction::FSub
8928
+ : Instruction::FAdd,
8929
+ {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
8930
+ } else {
8931
+ llvm_unreachable("all possible induction types must be handled");
8932
+ }
8933
+ ExitIRI->setOperand(0, Escape);
8934
+ return true;
8935
+ }
8936
+ return false;
8937
+ }
8999
8938
// Add exit values to \p Plan. Extracts are added for each entry in \p
9000
8939
// ExitUsersToFix if needed and their operands are updated. Returns true if all
9001
8940
// exit users can be handled, otherwise return false.
9002
- static bool
9003
- addUsersInExitBlocks(VPlan &Plan,
9004
- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8941
+ static bool addUsersInExitBlocks(
8942
+ VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
8943
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
8944
+ DenseMap<VPValue *, VPValue *> &EndValues) {
9005
8945
if (ExitUsersToFix.empty())
9006
8946
return true;
9007
8947
9008
8948
auto *MiddleVPBB = Plan.getMiddleBlock();
9009
8949
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8950
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
9010
8951
9011
8952
// Introduce extract for exiting values and update the VPIRInstructions
9012
8953
// modeling the corresponding LCSSA phis.
@@ -9022,11 +8963,16 @@ addUsersInExitBlocks(VPlan &Plan,
9022
8963
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
9023
8964
return false;
9024
8965
8966
+ VPValue *Incoming = ExitIRI->getOperand(0);
8967
+ if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues,
8968
+ TypeInfo))
8969
+ continue;
8970
+
9025
8971
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
9026
8972
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
9027
8973
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
9028
8974
IntegerType::get(Ctx, 32), 1))});
9029
- ExitIRI->setOperand(Idx , Ext);
8975
+ ExitIRI->setOperand(0 , Ext);
9030
8976
}
9031
8977
}
9032
8978
return true;
@@ -9307,11 +9253,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9307
9253
VPlanTransforms::handleUncountableEarlyExit(
9308
9254
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
9309
9255
}
9310
- addScalarResumePhis(RecipeBuilder, *Plan);
9311
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
9312
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9256
+ DenseMap<VPValue *, VPValue *> EndValues;
9257
+ addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
9258
+ SetVector<VPIRInstruction *> ExitUsersToFix =
9259
+ collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
9313
9260
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9314
- if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
9261
+ if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, Legal->getInductionVars(),
9262
+ EndValues)) {
9315
9263
reportVectorizationFailure(
9316
9264
"Some exit values in loop with uncountable exit not supported yet",
9317
9265
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
@@ -9438,7 +9386,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9438
9386
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
9439
9387
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
9440
9388
}
9441
- addScalarResumePhis(RecipeBuilder, *Plan);
9389
+ DenseMap<VPValue *, VPValue *> EndValues;
9390
+ addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
9442
9391
9443
9392
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
9444
9393
return Plan;
0 commit comments