@@ -2775,6 +2775,23 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
2775
2775
return LoopVectorPreHeader;
2776
2776
}
2777
2777
2778
+ static bool isValueIncomingFromBlock(BasicBlock *ExitingBB, Value *V,
2779
+ Instruction *UI) {
2780
+ PHINode *PHI = dyn_cast<PHINode>(UI);
2781
+ assert(PHI && "Expected LCSSA form");
2782
+
2783
+ // If this loop has an uncountable early exit then there could be
2784
+ // different users of OrigPhi with either:
2785
+ // 1. Multiple users, because each exiting block (countable or
2786
+ // uncountable) jumps to the same exit block, or ..
2787
+ // 2. A single user with an incoming value from a countable or
2788
+ // uncountable exiting block.
2789
+ // In both cases there is no guarantee this came from a countable exiting
2790
+ // block, i.e. the latch.
2791
+ int Index = PHI->getBasicBlockIndex(ExitingBB);
2792
+ return Index != -1 && PHI->getIncomingValue(Index) == V;
2793
+ }
2794
+
2778
2795
// Fix up external users of the induction variable. At this point, we are
2779
2796
// in LCSSA form, with all external PHIs that use the IV having one input value,
2780
2797
// coming from the remainder loop. We need those PHIs to also have a correct
@@ -2797,12 +2814,13 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2797
2814
2798
2815
// An external user of the last iteration's value should see the value that
2799
2816
// the remainder loop uses to initialize its own IV.
2800
- Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
2817
+ BasicBlock *OrigLoopLatch = OrigLoop->getLoopLatch();
2818
+ Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoopLatch);
2801
2819
for (User *U : PostInc->users()) {
2802
2820
Instruction *UI = cast<Instruction>(U);
2803
2821
if (!OrigLoop->contains(UI)) {
2804
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
2805
- MissingVals[UI ] = EndValue;
2822
+ if (isValueIncomingFromBlock(OrigLoopLatch, PostInc, UI))
2823
+ MissingVals[cast<PHINode>(UI) ] = EndValue;
2806
2824
}
2807
2825
}
2808
2826
@@ -2812,7 +2830,8 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2812
2830
for (User *U : OrigPhi->users()) {
2813
2831
auto *UI = cast<Instruction>(U);
2814
2832
if (!OrigLoop->contains(UI)) {
2815
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
2833
+ if (!isValueIncomingFromBlock(OrigLoopLatch, OrigPhi, UI))
2834
+ continue;
2816
2835
IRBuilder<> B(MiddleBlock->getTerminator());
2817
2836
2818
2837
// Fast-math-flags propagate from the original induction instruction.
@@ -2842,18 +2861,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2842
2861
}
2843
2862
}
2844
2863
2845
- assert((MissingVals.empty() ||
2846
- all_of(MissingVals,
2847
- [MiddleBlock, this](const std::pair<Value *, Value *> &P) {
2848
- return all_of(
2849
- predecessors(cast<Instruction>(P.first)->getParent()),
2850
- [MiddleBlock, this](BasicBlock *Pred) {
2851
- return Pred == MiddleBlock ||
2852
- Pred == OrigLoop->getLoopLatch();
2853
- });
2854
- })) &&
2855
- "Expected escaping values from latch/middle.block only");
2856
-
2857
2864
for (auto &I : MissingVals) {
2858
2865
PHINode *PHI = cast<PHINode>(I.first);
2859
2866
// One corner case we have to handle is two IVs "chasing" each-other,
@@ -7774,6 +7781,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7774
7781
State.LVer->prepareNoAliasMetadata();
7775
7782
}
7776
7783
7784
+ // Set the uncountable early exit block in the VPTransformState.
7785
+ State.CFG.UncountableEarlyExitBB = ILV.Legal->getUncountableEarlyExitBlock();
7786
+
7777
7787
ILV.printDebugTracesAtStart();
7778
7788
7779
7789
//===------------------------------------------------===//
@@ -8958,6 +8968,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8958
8968
// start value provides the value if the loop is bypassed.
8959
8969
bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
8960
8970
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
8971
+ assert(!Plan.getEarlyExit() &&
8972
+ "Cannot handle reductions or first-order recurrences with "
8973
+ "uncountable early exits");
8961
8974
if (IsFOR)
8962
8975
ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
8963
8976
VPInstruction::ExtractFromEnd, {ResumeFromVectorLoop, OneVPV}, {},
@@ -9075,14 +9088,20 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
9075
9088
// Add exit values to \p Plan. Extracts are added for each entry in \p
9076
9089
// ExitUsersToFix if needed and their operands are updated. Returns true if all
9077
9090
// exit users can be handled, otherwise return false.
9078
- static bool
9091
+ static void
9079
9092
addUsersInExitBlocks(VPlan &Plan,
9080
9093
const SetVector<VPIRInstruction *> &ExitUsersToFix) {
9081
9094
if (ExitUsersToFix.empty())
9082
- return true ;
9095
+ return;
9083
9096
9084
9097
auto *MiddleVPBB = Plan.getMiddleBlock();
9085
- VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
9098
+ VPBuilder MiddleB(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
9099
+ VPBuilder EarlyExitB;
9100
+ VPBasicBlock *VectorEarlyExitVPBB = Plan.getEarlyExit();
9101
+ VPValue *EarlyExitMask = nullptr;
9102
+ if (VectorEarlyExitVPBB)
9103
+ EarlyExitB.setInsertPoint(VectorEarlyExitVPBB,
9104
+ VectorEarlyExitVPBB->getFirstNonPhi());
9086
9105
9087
9106
// Introduce extract for exiting values and update the VPIRInstructions
9088
9107
// modeling the corresponding LCSSA phis.
@@ -9093,19 +9112,38 @@ addUsersInExitBlocks(VPlan &Plan,
9093
9112
if (Op->isLiveIn())
9094
9113
continue;
9095
9114
9096
- // Currently only live-ins can be used by exit values from blocks not
9097
- // exiting via the vector latch through to the middle block.
9098
- if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
9099
- return false;
9100
-
9101
9115
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
9102
- VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
9103
- {Op, Plan.getOrAddLiveIn(ConstantInt::get(
9104
- IntegerType::get(Ctx, 32), 1))});
9116
+ VPValue *Ext;
9117
+ VPBasicBlock *PredVPBB =
9118
+ cast<VPBasicBlock>(ExitIRI->getParent()->getPredecessors()[Idx]);
9119
+ if (PredVPBB != MiddleVPBB) {
9120
+ assert(ExitIRI->getParent()->getNumPredecessors() <= 2);
9121
+
9122
+ // Cache the early exit mask
9123
+ if (!EarlyExitMask) {
9124
+ VPBasicBlock *MiddleSplitVPBB =
9125
+ cast<VPBasicBlock>(VectorEarlyExitVPBB->getSinglePredecessor());
9126
+ VPInstruction *PredTerm =
9127
+ cast<VPInstruction>(MiddleSplitVPBB->getTerminator());
9128
+ assert(PredTerm->getOpcode() == VPInstruction::BranchOnCond &&
9129
+ "Unexpected middle split block terminator");
9130
+ VPInstruction *ScalarCond =
9131
+ cast<VPInstruction>(PredTerm->getOperand(0));
9132
+ assert(
9133
+ ScalarCond->getOpcode() == VPInstruction::AnyOf &&
9134
+ "Unexpected condition for middle split block terminator branch");
9135
+ EarlyExitMask = ScalarCond->getOperand(0);
9136
+ }
9137
+ Ext = EarlyExitB.createNaryOp(VPInstruction::ExtractFirstActive,
9138
+ {Op, EarlyExitMask});
9139
+ } else {
9140
+ Ext = MiddleB.createNaryOp(VPInstruction::ExtractFromEnd,
9141
+ {Op, Plan.getOrAddLiveIn(ConstantInt::get(
9142
+ IntegerType::get(Ctx, 32), 1))});
9143
+ }
9105
9144
ExitIRI->setOperand(Idx, Ext);
9106
9145
}
9107
9146
}
9108
- return true;
9109
9147
}
9110
9148
9111
9149
/// Handle users in the exit block for first order reductions in the original
@@ -9401,12 +9439,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9401
9439
SetVector<VPIRInstruction *> ExitUsersToFix =
9402
9440
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
9403
9441
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9404
- if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
9405
- reportVectorizationFailure(
9406
- "Some exit values in loop with uncountable exit not supported yet",
9407
- "UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
9408
- return nullptr;
9409
- }
9442
+ addUsersInExitBlocks(*Plan, ExitUsersToFix);
9410
9443
9411
9444
// ---------------------------------------------------------------------------
9412
9445
// Transform initial VPlan: Apply previously taken decisions, in order, to
0 commit comments