Skip to content

Commit 844aa2a

Browse files
committed
[VPlan] Update final exit value via VPlan.
Model updating IV users directly in VPlan, replace fixupIVUsers. Depends on #110004, #109975 and #112145.
1 parent 2764023 commit 844aa2a

File tree

5 files changed

+118
-155
lines changed

5 files changed

+118
-155
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 98 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -543,11 +543,6 @@ class InnerLoopVectorizer {
543543
protected:
544544
friend class LoopVectorizationPlanner;
545545

546-
/// Set up the values of the IVs correctly when exiting the vector loop.
547-
virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
548-
Value *VectorTripCount, BasicBlock *MiddleBlock,
549-
VPTransformState &State);
550-
551546
/// Iteratively sink the scalarized operands of a predicated instruction into
552547
/// the block that was created for it.
553548
void sinkScalarOperands(Instruction *PredInst);
@@ -785,10 +780,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
785780
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
786781
void printDebugTracesAtStart() override;
787782
void printDebugTracesAtEnd() override;
788-
789-
void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
790-
Value *VectorTripCount, BasicBlock *MiddleBlock,
791-
VPTransformState &State) override {};
792783
};
793784

794785
// A specialized derived class of inner loop vectorizer that performs
@@ -2775,97 +2766,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
27752766
return LoopVectorPreHeader;
27762767
}
27772768

2778-
// Fix up external users of the induction variable. At this point, we are
2779-
// in LCSSA form, with all external PHIs that use the IV having one input value,
2780-
// coming from the remainder loop. We need those PHIs to also have a correct
2781-
// value for the IV when arriving directly from the middle block.
2782-
void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2783-
const InductionDescriptor &II,
2784-
Value *VectorTripCount,
2785-
BasicBlock *MiddleBlock,
2786-
VPTransformState &State) {
2787-
// There are two kinds of external IV usages - those that use the value
2788-
// computed in the last iteration (the PHI) and those that use the penultimate
2789-
// value (the value that feeds into the phi from the loop latch).
2790-
// We allow both, but they, obviously, have different values.
2791-
2792-
DenseMap<Value *, Value *> MissingVals;
2793-
2794-
Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
2795-
OrigLoop->getLoopPreheader()))
2796-
->getIncomingValueForBlock(MiddleBlock);
2797-
2798-
// An external user of the last iteration's value should see the value that
2799-
// the remainder loop uses to initialize its own IV.
2800-
Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
2801-
for (User *U : PostInc->users()) {
2802-
Instruction *UI = cast<Instruction>(U);
2803-
if (!OrigLoop->contains(UI)) {
2804-
assert(isa<PHINode>(UI) && "Expected LCSSA form");
2805-
MissingVals[UI] = EndValue;
2806-
}
2807-
}
2808-
2809-
// An external user of the penultimate value need to see EndValue - Step.
2810-
// The simplest way to get this is to recompute it from the constituent SCEVs,
2811-
// that is Start + (Step * (CRD - 1)).
2812-
for (User *U : OrigPhi->users()) {
2813-
auto *UI = cast<Instruction>(U);
2814-
if (!OrigLoop->contains(UI)) {
2815-
assert(isa<PHINode>(UI) && "Expected LCSSA form");
2816-
IRBuilder<> B(MiddleBlock->getTerminator());
2817-
2818-
// Fast-math-flags propagate from the original induction instruction.
2819-
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2820-
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2821-
2822-
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
2823-
assert(StepVPV && "step must have been expanded during VPlan execution");
2824-
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
2825-
: State.get(StepVPV, VPLane(0));
2826-
Value *Escape = nullptr;
2827-
if (EndValue->getType()->isIntegerTy())
2828-
Escape = B.CreateSub(EndValue, Step);
2829-
else if (EndValue->getType()->isPointerTy())
2830-
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
2831-
else {
2832-
assert(EndValue->getType()->isFloatingPointTy() &&
2833-
"Unexpected induction type");
2834-
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
2835-
Instruction::FAdd
2836-
? Instruction::FSub
2837-
: Instruction::FAdd,
2838-
EndValue, Step);
2839-
}
2840-
Escape->setName("ind.escape");
2841-
MissingVals[UI] = Escape;
2842-
}
2843-
}
2844-
2845-
assert((MissingVals.empty() ||
2846-
all_of(MissingVals,
2847-
[MiddleBlock, this](const std::pair<Value *, Value *> &P) {
2848-
return all_of(
2849-
predecessors(cast<Instruction>(P.first)->getParent()),
2850-
[MiddleBlock, this](BasicBlock *Pred) {
2851-
return Pred == MiddleBlock ||
2852-
Pred == OrigLoop->getLoopLatch();
2853-
});
2854-
})) &&
2855-
"Expected escaping values from latch/middle.block only");
2856-
2857-
for (auto &I : MissingVals) {
2858-
PHINode *PHI = cast<PHINode>(I.first);
2859-
// One corner case we have to handle is two IVs "chasing" each-other,
2860-
// that is %IV2 = phi [...], [ %IV1, %latch ]
2861-
// In this case, if IV1 has an external use, we need to avoid adding both
2862-
// "last value of IV1" and "penultimate value of IV2". So, verify that we
2863-
// don't already have an incoming value for the middle block.
2864-
if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
2865-
PHI->addIncoming(I.second, MiddleBlock);
2866-
}
2867-
}
2868-
28692769
namespace {
28702770

28712771
struct CSEDenseMapInfo {
@@ -2994,24 +2894,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29942894
for (PHINode &PN : Exit->phis())
29952895
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
29962896

2997-
if (Cost->requiresScalarEpilogue(VF.isVector())) {
2998-
// No edge from the middle block to the unique exit block has been inserted
2999-
// and there is nothing to fix from vector loop; phis should have incoming
3000-
// from scalar loop only.
3001-
} else {
3002-
// TODO: Check in VPlan to see if IV users need fixing instead of checking
3003-
// the cost model.
3004-
3005-
// If we inserted an edge from the middle block to the unique exit block,
3006-
// update uses outside the loop (phis) to account for the newly inserted
3007-
// edge.
3008-
3009-
// Fix-up external users of the induction variables.
3010-
for (const auto &Entry : Legal->getInductionVars())
3011-
fixupIVUsers(Entry.first, Entry.second,
3012-
getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
3013-
}
3014-
30152897
for (Instruction *PI : PredicatedInstructions)
30162898
sinkScalarOperands(&*PI);
30172899

@@ -8866,11 +8748,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
88668748
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
88678749
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
88688750
/// the end value of the induction.
8869-
static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
8870-
VPBuilder &VectorPHBuilder,
8871-
VPBuilder &ScalarPHBuilder,
8872-
VPTypeAnalysis &TypeInfo,
8873-
VPValue *VectorTC) {
8751+
static VPValue *addResumePhiRecipeForInduction(
8752+
VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8753+
VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
8754+
DenseMap<VPValue *, VPValue *> &EndValues) {
88748755
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
88758756
// Truncated wide inductions resume from the last lane of their vector value
88768757
// in the last vector iteration which is handled elsewhere.
@@ -8895,6 +8776,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
88958776
ScalarTypeOfWideIV);
88968777
}
88978778

8779+
EndValues[WideIV] = EndValue;
88988780
auto *ResumePhiRecipe =
88998781
ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
89008782
WideIV->getDebugLoc(), "bc.resume.val");
@@ -8904,7 +8786,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
89048786
/// Create resume phis in the scalar preheader for first-order recurrences,
89058787
/// reductions and inductions, and update the VPIRInstructions wrapping the
89068788
/// original phis in the scalar header.
8907-
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8789+
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
8790+
Loop *OrigLoop,
8791+
DenseMap<VPValue *, VPValue *> &EndValues) {
89088792
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
89098793
auto *ScalarPH = Plan.getScalarPreheader();
89108794
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8924,7 +8808,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
89248808
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
89258809
if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
89268810
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8927-
&Plan.getVectorTripCount())) {
8811+
&Plan.getVectorTripCount(), EndValues)) {
89288812
ScalarPhiIRI->addOperand(ResumePhi);
89298813
continue;
89308814
}
@@ -9009,9 +8893,9 @@ static bool isIVUse(VPValue *Incoming) {
90098893
// modeled explicitly yet and won't be included. Those are un-truncated
90108894
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
90118895
// increments.
9012-
static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
9013-
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan
9014-
) {
8896+
static SetVector<VPIRInstruction *>
8897+
collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
8898+
VPlan &Plan) {
90158899
auto *MiddleVPBB = Plan.getMiddleBlock();
90168900
SetVector<VPIRInstruction *> ExitUsersToFix;
90178901
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -9033,11 +8917,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
90338917
}
90348918
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
90358919
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
9036-
// Exit values for inductions are computed and updated outside of VPlan
9037-
// and independent of induction recipes.
9038-
// TODO: Compute induction exit values in VPlan.
9039-
if (isIVUse(V) && ExitVPBB->getSinglePredecessor() == MiddleVPBB)
9040-
continue;
90418920
ExitUsersToFix.insert(ExitIRI);
90428921
ExitIRI->addOperand(V);
90438922
}
@@ -9046,17 +8925,86 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
90468925
return ExitUsersToFix;
90478926
}
90488927

8928+
/// If \p Incoming is a user of a non-truncated induction, create recipes to
8929+
/// compute the final value and update the user \p ExitIRI.
8930+
static bool addInductionEndValue(
8931+
VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming,
8932+
const MapVector<PHINode *, InductionDescriptor> &Inductions,
8933+
DenseMap<VPValue *, VPValue *> &EndValues, VPTypeAnalysis &TypeInfo) {
8934+
if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
8935+
!cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) ||
8936+
isa<VPWidenPointerInductionRecipe>(Incoming) ||
8937+
(isa<Instruction>(Incoming->getUnderlyingValue()) &&
8938+
any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(),
8939+
[&Inductions](User *U) {
8940+
auto *P = dyn_cast<PHINode>(U);
8941+
return P && Inductions.contains(P);
8942+
}))) {
8943+
VPValue *IV;
8944+
if (auto *WideIV =
8945+
dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()))
8946+
IV = WideIV;
8947+
else if (auto *WideIV =
8948+
dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()
8949+
->getOperand(0)
8950+
->getDefiningRecipe()))
8951+
IV = WideIV;
8952+
else
8953+
IV = Incoming->getDefiningRecipe()->getOperand(1);
8954+
// Skip phi nodes already updated. This can be the case if 2 induction
8955+
// phis chase each other.
8956+
VPValue *EndValue = EndValues[IV];
8957+
if (any_of(cast<VPRecipeBase>(Incoming->getDefiningRecipe())->operands(),
8958+
IsaPred<VPWidenIntOrFpInductionRecipe,
8959+
VPWidenPointerInductionRecipe>)) {
8960+
ExitIRI->setOperand(0, EndValue);
8961+
return true;
8962+
}
8963+
8964+
VPBuilder B(Plan.getMiddleBlock()->getTerminator());
8965+
VPValue *Escape = nullptr;
8966+
auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe());
8967+
VPValue *Step = WideIV->getStepValue();
8968+
Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
8969+
if (ScalarTy->isIntegerTy())
8970+
Escape =
8971+
B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
8972+
else if (ScalarTy->isPointerTy())
8973+
Escape = B.createPtrAdd(
8974+
EndValue,
8975+
B.createNaryOp(Instruction::Sub,
8976+
{Plan.getOrAddLiveIn(ConstantInt::get(
8977+
Step->getLiveInIRValue()->getType(), 0)),
8978+
Step}),
8979+
{}, "ind.escape");
8980+
else if (ScalarTy->isFloatingPointTy()) {
8981+
const auto &ID = WideIV->getInductionDescriptor();
8982+
Escape = B.createNaryOp(
8983+
ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
8984+
? Instruction::FSub
8985+
: Instruction::FAdd,
8986+
{EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
8987+
} else {
8988+
llvm_unreachable("all possible induction types must be handled");
8989+
}
8990+
ExitIRI->setOperand(0, Escape);
8991+
return true;
8992+
}
8993+
return false;
8994+
}
90498995
// Add exit values to \p Plan. Extracts are added for each entry in \p
90508996
// ExitUsersToFix if needed and their operands are updated. Returns true if all
90518997
// exit users can be handled, otherwise return false.
9052-
static bool
9053-
addUsersInExitBlocks(VPlan &Plan,
9054-
const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8998+
static bool addUsersInExitBlocks(
8999+
VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
9000+
const MapVector<PHINode *, InductionDescriptor> &Inductions,
9001+
DenseMap<VPValue *, VPValue *> &EndValues) {
90559002
if (ExitUsersToFix.empty())
90569003
return true;
90579004

90589005
auto *MiddleVPBB = Plan.getMiddleBlock();
90599006
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
9007+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
90609008

90619009
// Introduce extract for exiting values and update the VPIRInstructions
90629010
// modeling the corresponding LCSSA phis.
@@ -9072,11 +9020,16 @@ addUsersInExitBlocks(VPlan &Plan,
90729020
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
90739021
return false;
90749022

9023+
VPValue *Incoming = ExitIRI->getOperand(0);
9024+
if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues,
9025+
TypeInfo))
9026+
continue;
9027+
90759028
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
90769029
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
90779030
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
90789031
IntegerType::get(Ctx, 32), 1))});
9079-
ExitIRI->setOperand(Idx, Ext);
9032+
ExitIRI->setOperand(0, Ext);
90809033
}
90819034
}
90829035
return true;
@@ -9371,11 +9324,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93719324
VPlanTransforms::handleUncountableEarlyExit(
93729325
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
93739326
}
9374-
addScalarResumePhis(RecipeBuilder, *Plan);
9375-
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
9376-
OrigLoop, RecipeBuilder, *Plan);
9327+
DenseMap<VPValue *, VPValue *> EndValues;
9328+
addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
9329+
SetVector<VPIRInstruction *> ExitUsersToFix =
9330+
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
93779331
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9378-
if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
9332+
if (!addUsersInExitBlocks(*Plan, ExitUsersToFix,
9333+
EndValues)) {
93799334
reportVectorizationFailure(
93809335
"Some exit values in loop with uncountable exit not supported yet",
93819336
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
@@ -9502,7 +9457,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
95029457
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
95039458
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
95049459
}
9505-
addScalarResumePhis(RecipeBuilder, *Plan);
9460+
DenseMap<VPValue *, VPValue *> EndValues;
9461+
addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
95069462

95079463
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
95089464
return Plan;

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -311,16 +311,20 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
311311
LastLane = 0;
312312
}
313313

314-
auto *LastInst = cast<Instruction>(get(Def, LastLane));
315-
// Set the insert point after the last scalarized instruction or after the
316-
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
317-
// will directly follow the scalar definitions.
314+
auto *LastDef = get(Def, LastLane);
318315
auto OldIP = Builder.saveIP();
319-
auto NewIP =
320-
isa<PHINode>(LastInst)
321-
? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
322-
: std::next(BasicBlock::iterator(LastInst));
323-
Builder.SetInsertPoint(&*NewIP);
316+
if (auto *LastInst = dyn_cast<Instruction>(LastDef)) {
317+
// TODO: Remove once VPDerivedIVReicpe can be simplified, which requires
318+
// vector trip count being modeled in VPlan.
319+
// Set the insert point after the last scalarized instruction or after the
320+
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
321+
// will directly follow the scalar definitions.
322+
auto NewIP =
323+
isa<PHINode>(LastInst)
324+
? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
325+
: std::next(BasicBlock::iterator(LastInst));
326+
Builder.SetInsertPoint(&*NewIP);
327+
}
324328

325329
// However, if we are vectorizing, we need to construct the vector values.
326330
// If the value is known to be uniform after vectorization, we can just
@@ -335,7 +339,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
335339
} else {
336340
// Initialize packing with insertelements to start from undef.
337341
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
338-
Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
342+
Value *Undef = PoisonValue::get(VectorType::get(LastDef->getType(), VF));
339343
set(Def, Undef);
340344
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
341345
packScalarIntoVectorValue(Def, Lane);

0 commit comments

Comments
 (0)