@@ -552,11 +552,6 @@ class InnerLoopVectorizer {
552
552
protected:
553
553
friend class LoopVectorizationPlanner;
554
554
555
- /// Set up the values of the IVs correctly when exiting the vector loop.
556
- virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
557
- Value *VectorTripCount, BasicBlock *MiddleBlock,
558
- VPTransformState &State);
559
-
560
555
/// Iteratively sink the scalarized operands of a predicated instruction into
561
556
/// the block that was created for it.
562
557
void sinkScalarOperands(Instruction *PredInst);
@@ -785,10 +780,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
785
780
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
786
781
void printDebugTracesAtStart() override;
787
782
void printDebugTracesAtEnd() override;
788
-
789
- void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
790
- Value *VectorTripCount, BasicBlock *MiddleBlock,
791
- VPTransformState &State) override {};
792
783
};
793
784
794
785
// A specialized derived class of inner loop vectorizer that performs
@@ -2768,88 +2759,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
2768
2759
return LoopVectorPreHeader;
2769
2760
}
2770
2761
2771
- // Fix up external users of the induction variable. At this point, we are
2772
- // in LCSSA form, with all external PHIs that use the IV having one input value,
2773
- // coming from the remainder loop. We need those PHIs to also have a correct
2774
- // value for the IV when arriving directly from the middle block.
2775
- void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2776
- const InductionDescriptor &II,
2777
- Value *VectorTripCount,
2778
- BasicBlock *MiddleBlock,
2779
- VPTransformState &State) {
2780
- // There are two kinds of external IV usages - those that use the value
2781
- // computed in the last iteration (the PHI) and those that use the penultimate
2782
- // value (the value that feeds into the phi from the loop latch).
2783
- // We allow both, but they, obviously, have different values.
2784
-
2785
- DenseMap<Value *, Value *> MissingVals;
2786
-
2787
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
2788
- OrigLoop->getLoopPreheader()))
2789
- ->getIncomingValueForBlock(MiddleBlock);
2790
-
2791
- // An external user of the last iteration's value should see the value that
2792
- // the remainder loop uses to initialize its own IV.
2793
- Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
2794
- for (User *U : PostInc->users()) {
2795
- Instruction *UI = cast<Instruction>(U);
2796
- if (!OrigLoop->contains(UI)) {
2797
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
2798
- MissingVals[UI] = EndValue;
2799
- }
2800
- }
2801
-
2802
- // An external user of the penultimate value need to see EndValue - Step.
2803
- // The simplest way to get this is to recompute it from the constituent SCEVs,
2804
- // that is Start + (Step * (CRD - 1)).
2805
- for (User *U : OrigPhi->users()) {
2806
- auto *UI = cast<Instruction>(U);
2807
- if (!OrigLoop->contains(UI)) {
2808
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
2809
- IRBuilder<> B(MiddleBlock->getTerminator());
2810
-
2811
- // Fast-math-flags propagate from the original induction instruction.
2812
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2813
- B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2814
-
2815
- VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
2816
- assert(StepVPV && "step must have been expanded during VPlan execution");
2817
- Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
2818
- : State.get(StepVPV, VPLane(0));
2819
- Value *Escape = nullptr;
2820
- if (EndValue->getType()->isIntegerTy())
2821
- Escape = B.CreateSub(EndValue, Step);
2822
- else if (EndValue->getType()->isPointerTy())
2823
- Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
2824
- else {
2825
- assert(EndValue->getType()->isFloatingPointTy() &&
2826
- "Unexpected induction type");
2827
- Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
2828
- Instruction::FAdd
2829
- ? Instruction::FSub
2830
- : Instruction::FAdd,
2831
- EndValue, Step);
2832
- }
2833
- Escape->setName("ind.escape");
2834
- MissingVals[UI] = Escape;
2835
- }
2836
- }
2837
-
2838
- assert((MissingVals.empty() || OrigLoop->getUniqueExitBlock()) &&
2839
- "Expected a single exit block for escaping values");
2840
-
2841
- for (auto &I : MissingVals) {
2842
- PHINode *PHI = cast<PHINode>(I.first);
2843
- // One corner case we have to handle is two IVs "chasing" each-other,
2844
- // that is %IV2 = phi [...], [ %IV1, %latch ]
2845
- // In this case, if IV1 has an external use, we need to avoid adding both
2846
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2847
- // don't already have an incoming value for the middle block.
2848
- if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
2849
- PHI->addIncoming(I.second, MiddleBlock);
2850
- }
2851
- }
2852
-
2853
2762
namespace {
2854
2763
2855
2764
struct CSEDenseMapInfo {
@@ -2978,24 +2887,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
2978
2887
for (PHINode &PN : Exit->phis())
2979
2888
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
2980
2889
2981
- if (Cost->requiresScalarEpilogue(VF.isVector())) {
2982
- // No edge from the middle block to the unique exit block has been inserted
2983
- // and there is nothing to fix from vector loop; phis should have incoming
2984
- // from scalar loop only.
2985
- } else {
2986
- // TODO: Check in VPlan to see if IV users need fixing instead of checking
2987
- // the cost model.
2988
-
2989
- // If we inserted an edge from the middle block to the unique exit block,
2990
- // update uses outside the loop (phis) to account for the newly inserted
2991
- // edge.
2992
-
2993
- // Fix-up external users of the induction variables.
2994
- for (const auto &Entry : Legal->getInductionVars())
2995
- fixupIVUsers(Entry.first, Entry.second,
2996
- getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
2997
- }
2998
-
2999
2890
for (Instruction *PI : PredicatedInstructions)
3000
2891
sinkScalarOperands(&*PI);
3001
2892
@@ -8839,11 +8730,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8839
8730
/// Create a ResumePhi for \p PhiR, if it is wide induction recipe. If the
8840
8731
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
8841
8732
/// the end value of the induction.
8842
- static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
8843
- VPBuilder &VectorPHBuilder,
8844
- VPBuilder &ScalarPHBuilder,
8845
- VPTypeAnalysis &TypeInfo,
8846
- VPValue *VectorTC) {
8733
+ static VPValue *addResumeValuesForInduction(
8734
+ VPHeaderPHIRecipe *PhiR, VPBuilder &VectorPHBuilder,
8735
+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
8736
+ Loop *OrigLoop, DenseMap<VPValue *, VPValue *> &EndValues) {
8847
8737
auto *WideIV = dyn_cast<VPWidenInductionRecipe>(PhiR);
8848
8738
if (!WideIV)
8849
8739
return nullptr;
@@ -8875,6 +8765,7 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
8875
8765
ScalarTy);
8876
8766
}
8877
8767
8768
+ EndValues[PhiR] = EndValue;
8878
8769
auto *ResumePhiRecipe =
8879
8770
ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
8880
8771
WideIV->getDebugLoc(), "bc.resume.val");
@@ -8886,7 +8777,8 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
8886
8777
/// original phis in the scalar header.
8887
8778
static void addScalarResumePhis(
8888
8779
VPlan &Plan,
8889
- function_ref<VPHeaderPHIRecipe *(PHINode *)> GetHeaderPhiRecipe) {
8780
+ function_ref<VPHeaderPHIRecipe *(PHINode *)> GetHeaderPhiRecipe,
8781
+ Loop *OrigLoop, DenseMap<VPValue *, VPValue *> &EndValues) {
8890
8782
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
8891
8783
auto *ScalarPH = Plan.getScalarPreheader();
8892
8784
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8905,7 +8797,7 @@ static void addScalarResumePhis(
8905
8797
8906
8798
if (VPValue *ResumePhi = addResumeValuesForInduction(
8907
8799
VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8908
- &Plan.getVectorTripCount())) {
8800
+ &Plan.getVectorTripCount(), OrigLoop, EndValues )) {
8909
8801
ScalarPhiIRI->addOperand(ResumePhi);
8910
8802
continue;
8911
8803
}
@@ -8937,9 +8829,9 @@ static void addScalarResumePhis(
8937
8829
// modeled explicitly yet and won't be included. Those are un-truncated
8938
8830
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
8939
8831
// increments.
8940
- static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8941
- Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan ,
8942
- const MapVector<PHINode *, InductionDescriptor> &Inductions ) {
8832
+ static SetVector<VPIRInstruction *>
8833
+ collectUsersInExitBlocks( Loop *OrigLoop, VPRecipeBuilder &Builder,
8834
+ VPlan &Plan ) {
8943
8835
auto *MiddleVPBB = Plan.getMiddleBlock();
8944
8836
SetVector<VPIRInstruction *> ExitUsersToFix;
8945
8837
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -8964,18 +8856,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8964
8856
// Exit values for inductions are computed and updated outside of VPlan
8965
8857
// and independent of induction recipes.
8966
8858
// TODO: Compute induction exit values in VPlan.
8967
- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8968
- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
8969
- isa<VPWidenPointerInductionRecipe>(V) ||
8970
- (isa<Instruction>(IncomingValue) &&
8971
- OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
8972
- any_of(IncomingValue->users(), [&Inductions](User *U) {
8973
- auto *P = dyn_cast<PHINode>(U);
8974
- return P && Inductions.contains(P);
8975
- }))) {
8976
- if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
8977
- continue;
8978
- }
8979
8859
ExitUsersToFix.insert(ExitIRI);
8980
8860
ExitIRI->addOperand(V);
8981
8861
}
@@ -8987,14 +8867,16 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8987
8867
// Add exit values to \p Plan. Extracts are added for each entry in \p
8988
8868
// ExitUsersToFix if needed and their operands are updated. Returns true if all
8989
8869
// exit users can be handled, otherwise return false.
8990
- static bool
8991
- addUsersInExitBlocks(VPlan &Plan,
8992
- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8870
+ static bool addUsersInExitBlocks(
8871
+ VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
8872
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
8873
+ DenseMap<VPValue *, VPValue *> &EndValues) {
8993
8874
if (ExitUsersToFix.empty())
8994
8875
return true;
8995
8876
8996
8877
auto *MiddleVPBB = Plan.getMiddleBlock();
8997
8878
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8879
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
8998
8880
8999
8881
// Introduce extract for exiting values and update the VPIRInstructions
9000
8882
// modeling the corresponding LCSSA phis.
@@ -9010,6 +8892,69 @@ addUsersInExitBlocks(VPlan &Plan,
9010
8892
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
9011
8893
return false;
9012
8894
8895
+ VPValue *Incoming = ExitIRI->getOperand(0);
8896
+ if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
8897
+ !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) ||
8898
+ isa<VPWidenPointerInductionRecipe>(Incoming) ||
8899
+ (isa<Instruction>(Incoming->getUnderlyingValue()) &&
8900
+ // OrigLoop->contains(cast<Instruction>(Incoming->getUnderlyingValue()))
8901
+ // &&
8902
+ any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(),
8903
+ [&Inductions](User *U) {
8904
+ auto *P = dyn_cast<PHINode>(U);
8905
+ return P && Inductions.contains(P);
8906
+ }))) {
8907
+ VPValue *IV;
8908
+ if (auto *WideIV =
8909
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()))
8910
+ IV = WideIV;
8911
+ else if (auto *WideIV = dyn_cast<VPWidenInductionRecipe>(
8912
+ Incoming->getDefiningRecipe()
8913
+ ->getOperand(0)
8914
+ ->getDefiningRecipe()))
8915
+ IV = WideIV;
8916
+ else
8917
+ IV = Incoming->getDefiningRecipe()->getOperand(1);
8918
+ // Skip phi nodes already updated. This can be the case if 2 induction
8919
+ // phis chase each other.
8920
+ VPValue *EndValue = EndValues[IV];
8921
+ if (any_of(cast<VPRecipeBase>(Incoming->getDefiningRecipe())->operands(),
8922
+ IsaPred<VPWidenIntOrFpInductionRecipe,
8923
+ VPWidenPointerInductionRecipe>)) {
8924
+ ExitIRI->setOperand(0, EndValue);
8925
+ continue;
8926
+ }
8927
+
8928
+ VPBuilder B(Plan.getMiddleBlock()->getTerminator());
8929
+ VPValue *Escape = nullptr;
8930
+ auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe());
8931
+ VPValue *Step = WideIV->getStepValue();
8932
+ Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
8933
+ if (ScalarTy->isIntegerTy())
8934
+ Escape = B.createNaryOp(Instruction::Sub, {EndValue, Step}, {},
8935
+ "ind.escape");
8936
+ else if (ScalarTy->isPointerTy())
8937
+ Escape = B.createPtrAdd(
8938
+ EndValue,
8939
+ B.createNaryOp(Instruction::Sub,
8940
+ {Plan.getOrAddLiveIn(ConstantInt::get(
8941
+ Step->getLiveInIRValue()->getType(), 0)),
8942
+ Step}),
8943
+ {}, "ind.escape");
8944
+ else if (ScalarTy->isFloatingPointTy()) {
8945
+ const auto &ID = WideIV->getInductionDescriptor();
8946
+ Escape = B.createNaryOp(
8947
+ ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
8948
+ ? Instruction::FSub
8949
+ : Instruction::FAdd,
8950
+ {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
8951
+ } else {
8952
+ llvm_unreachable("all possible induction types must be handled");
8953
+ }
8954
+ ExitIRI->setOperand(0, Escape);
8955
+ continue;
8956
+ }
8957
+
9013
8958
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
9014
8959
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
9015
8960
{V, Plan.getOrAddLiveIn(ConstantInt::get(
@@ -9294,13 +9239,18 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9294
9239
VPlanTransforms::handleUncountableEarlyExit(
9295
9240
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
9296
9241
}
9297
- addScalarResumePhis(*Plan, [&RecipeBuilder](PHINode *P) {
9298
- return cast<VPHeaderPHIRecipe>(RecipeBuilder.getRecipe(P));
9299
- });
9300
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
9301
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9242
+ DenseMap<VPValue *, VPValue *> EndValues;
9243
+ addScalarResumePhis(
9244
+ *Plan,
9245
+ [&RecipeBuilder](PHINode *P) {
9246
+ return cast<VPHeaderPHIRecipe>(RecipeBuilder.getRecipe(P));
9247
+ },
9248
+ OrigLoop, EndValues);
9249
+ SetVector<VPIRInstruction *> ExitUsersToFix =
9250
+ collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
9302
9251
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9303
- if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
9252
+ if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, Legal->getInductionVars(),
9253
+ EndValues)) {
9304
9254
reportVectorizationFailure(
9305
9255
"Some exit values in loop with uncountable exit not supported yet",
9306
9256
"Some exit values in loop with uncountable exit not supported yet",
@@ -9419,6 +9369,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9419
9369
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
9420
9370
DebugLoc());
9421
9371
9372
+ DenseMap<VPValue *, VPValue *> EndValues;
9422
9373
addScalarResumePhis(
9423
9374
*Plan,
9424
9375
[&Plan](PHINode *P) {
@@ -9428,9 +9379,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9428
9379
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
9429
9380
return HeaderR->getUnderlyingValue() == P ? HeaderR : nullptr;
9430
9381
});
9431
- }
9432
-
9433
- );
9382
+ },
9383
+ OrigLoop, EndValues);
9434
9384
9435
9385
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
9436
9386
return Plan;
0 commit comments