@@ -779,10 +779,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
779
779
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
780
780
void printDebugTracesAtStart() override;
781
781
void printDebugTracesAtEnd() override;
782
-
783
- void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
784
- Value *VectorTripCount, BasicBlock *MiddleBlock,
785
- VPlan &Plan, VPTransformState &State) override {};
786
782
};
787
783
788
784
// A specialized derived class of inner loop vectorizer that performs
@@ -2697,87 +2693,6 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
2697
2693
return {LoopVectorPreHeader, nullptr};
2698
2694
}
2699
2695
2700
- // Fix up external users of the induction variable. At this point, we are
2701
- // in LCSSA form, with all external PHIs that use the IV having one input value,
2702
- // coming from the remainder loop. We need those PHIs to also have a correct
2703
- // value for the IV when arriving directly from the middle block.
2704
- void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2705
- const InductionDescriptor &II,
2706
- Value *VectorTripCount,
2707
- BasicBlock *MiddleBlock, VPlan &Plan,
2708
- VPTransformState &State) {
2709
- // There are two kinds of external IV usages - those that use the value
2710
- // computed in the last iteration (the PHI) and those that use the penultimate
2711
- // value (the value that feeds into the phi from the loop latch).
2712
- // We allow both, but they, obviously, have different values.
2713
-
2714
- assert(OrigLoop->getUniqueExitBlock() && "Expected a single exit block");
2715
-
2716
- DenseMap<Value *, Value *> MissingVals;
2717
-
2718
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
2719
- OrigLoop->getLoopPreheader()))
2720
- ->getIncomingValueForBlock(MiddleBlock);
2721
-
2722
- // An external user of the last iteration's value should see the value that
2723
- // the remainder loop uses to initialize its own IV.
2724
- Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
2725
- for (User *U : PostInc->users()) {
2726
- Instruction *UI = cast<Instruction>(U);
2727
- if (!OrigLoop->contains(UI)) {
2728
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
2729
- MissingVals[UI] = EndValue;
2730
- }
2731
- }
2732
-
2733
- // An external user of the penultimate value need to see EndValue - Step.
2734
- // The simplest way to get this is to recompute it from the constituent SCEVs,
2735
- // that is Start + (Step * (CRD - 1)).
2736
- for (User *U : OrigPhi->users()) {
2737
- auto *UI = cast<Instruction>(U);
2738
- if (!OrigLoop->contains(UI)) {
2739
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
2740
- IRBuilder<> B(MiddleBlock->getTerminator());
2741
-
2742
- // Fast-math-flags propagate from the original induction instruction.
2743
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2744
- B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2745
-
2746
- VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
2747
- assert(StepVPV && "step must have been expanded during VPlan execution");
2748
- Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
2749
- : State.get(StepVPV, VPLane(0));
2750
- Value *Escape = nullptr;
2751
- if (EndValue->getType()->isIntegerTy())
2752
- Escape = B.CreateSub(EndValue, Step);
2753
- else if (EndValue->getType()->isPointerTy())
2754
- Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
2755
- else if (EndValue->getType()->isFloatingPointTy()) {
2756
- Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
2757
- Instruction::FAdd
2758
- ? Instruction::FSub
2759
- : Instruction::FAdd,
2760
- EndValue, Step);
2761
- } else {
2762
- llvm_unreachable("all possible induction types must be handled");
2763
- }
2764
- Escape->setName("ind.escape");
2765
- MissingVals[UI] = Escape;
2766
- }
2767
- }
2768
-
2769
- for (auto &I : MissingVals) {
2770
- PHINode *PHI = cast<PHINode>(I.first);
2771
- // One corner case we have to handle is two IVs "chasing" each-other,
2772
- // that is %IV2 = phi [...], [ %IV1, %latch ]
2773
- // In this case, if IV1 has an external use, we need to avoid adding both
2774
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2775
- // don't already have an incoming value for the middle block.
2776
- if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
2777
- PHI->addIncoming(I.second, MiddleBlock);
2778
- }
2779
- }
2780
-
2781
2696
namespace {
2782
2697
2783
2698
struct CSEDenseMapInfo {
@@ -2907,25 +2822,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
2907
2822
for (PHINode &PN : Exit->phis())
2908
2823
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
2909
2824
2910
- if (Cost->requiresScalarEpilogue(VF.isVector())) {
2911
- // No edge from the middle block to the unique exit block has been inserted
2912
- // and there is nothing to fix from vector loop; phis should have incoming
2913
- // from scalar loop only.
2914
- } else {
2915
- // TODO: Check VPLiveOuts to see if IV users need fixing instead of checking
2916
- // the cost model.
2917
-
2918
- // If we inserted an edge from the middle block to the unique exit block,
2919
- // update uses outside the loop (phis) to account for the newly inserted
2920
- // edge.
2921
-
2922
- // Fix-up external users of the induction variables.
2923
- for (const auto &Entry : Legal->getInductionVars())
2924
- fixupIVUsers(Entry.first, Entry.second,
2925
- getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, Plan,
2926
- State);
2927
- }
2928
-
2929
2825
for (Instruction *PI : PredicatedInstructions)
2930
2826
sinkScalarOperands(&*PI);
2931
2827
@@ -8821,7 +8717,7 @@ addUsersInExitBlock(VPlan &Plan,
8821
8717
}
8822
8718
}
8823
8719
8824
- static void addResumeValuesForInductions(VPlan &Plan) {
8720
+ static void addResumeValuesForInductions(VPlan &Plan, Loop *OrigLoop ) {
8825
8721
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
8826
8722
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
8827
8723
@@ -8870,9 +8766,11 @@ static void addResumeValuesForInductions(VPlan &Plan) {
8870
8766
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
8871
8767
8872
8768
VPBasicBlock *ScalarPHVPBB = nullptr;
8769
+ VPBasicBlock *ExitVPBB = nullptr;
8873
8770
if (MiddleVPBB->getNumSuccessors() == 2) {
8874
8771
// Order is strict: first is the exit block, second is the scalar
8875
8772
// preheader.
8773
+ ExitVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[0]);
8876
8774
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
8877
8775
} else {
8878
8776
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
@@ -8886,6 +8784,53 @@ static void addResumeValuesForInductions(VPlan &Plan) {
8886
8784
auto *ScalarLoopHeader =
8887
8785
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
8888
8786
addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8787
+
8788
+ if (ExitVPBB) {
8789
+
8790
+ Value *PostInc =
8791
+ OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
8792
+ for (auto &R : *ExitVPBB) {
8793
+ auto *VPIRInst = cast<VPIRInstruction>(&R);
8794
+ auto *IRI = &VPIRInst->getInstruction();
8795
+ if (!isa<PHINode>(IRI))
8796
+ break;
8797
+ // Skip phi nodes already updated. This can be the case if 2 induction
8798
+ // phis chase each other.
8799
+ if (VPIRInst->getNumOperands() == 1)
8800
+ continue;
8801
+ if (any_of(IRI->operands(),
8802
+ [PostInc](Value *Op) { return Op == PostInc; })) {
8803
+ VPIRInst->addOperand(EndValue);
8804
+ continue;
8805
+ }
8806
+
8807
+ if (any_of(IRI->operands(),
8808
+ [OrigPhi](Value *Op) { return Op == OrigPhi; })) {
8809
+ VPBuilder B(MiddleVPBB->getTerminator());
8810
+ VPValue *Escape = nullptr;
8811
+ if (ScalarTy->isIntegerTy())
8812
+ Escape = B.createNaryOp(Instruction::Sub, {EndValue, Step});
8813
+ else if (ScalarTy->isPointerTy())
8814
+ Escape = B.createPtrAdd(
8815
+ EndValue,
8816
+ B.createNaryOp(
8817
+ Instruction::Xor,
8818
+ {Step, Plan.getOrAddLiveIn(ConstantInt::get(
8819
+ Step->getLiveInIRValue()->getType(), -1))}));
8820
+ else if (ScalarTy->isFloatingPointTy()) {
8821
+ Escape = B.createNaryOp(
8822
+ ID->getInductionBinOp()->getOpcode() == Instruction::FAdd
8823
+ ? Instruction::FSub
8824
+ : Instruction::FAdd,
8825
+ {EndValue, Step},
8826
+ {ID->getInductionBinOp()->getFastMathFlags()});
8827
+ } else {
8828
+ llvm_unreachable("all possible induction types must be handled");
8829
+ }
8830
+ VPIRInst->addOperand(Escape);
8831
+ }
8832
+ }
8833
+ }
8889
8834
}
8890
8835
}
8891
8836
@@ -9199,7 +9144,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9199
9144
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9200
9145
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9201
9146
addUsersInExitBlock(*Plan, ExitUsersToFix);
9202
- addResumeValuesForInductions(*Plan);
9147
+ addResumeValuesForInductions(*Plan, OrigLoop );
9203
9148
9204
9149
// ---------------------------------------------------------------------------
9205
9150
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9305,7 +9250,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9305
9250
bool HasNUW = true;
9306
9251
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
9307
9252
DebugLoc());
9308
- addResumeValuesForInductions(*Plan);
9253
+ addResumeValuesForInductions(*Plan, OrigLoop );
9309
9254
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
9310
9255
return Plan;
9311
9256
}
0 commit comments