Skip to content

Commit 9b6fa7c

Browse files
committed
[VPlan] Update final exit value via VPlan.
Model updating IV users directly in VPlan, replace fixupIVUsers. Depends on #110004, #109975 and #112145.
1 parent 7f3428d commit 9b6fa7c

File tree

5 files changed

+118
-162
lines changed

5 files changed

+118
-162
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 98 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -542,11 +542,6 @@ class InnerLoopVectorizer {
542542
protected:
543543
friend class LoopVectorizationPlanner;
544544

545-
/// Set up the values of the IVs correctly when exiting the vector loop.
546-
virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
547-
Value *VectorTripCount, BasicBlock *MiddleBlock,
548-
VPTransformState &State);
549-
550545
/// Iteratively sink the scalarized operands of a predicated instruction into
551546
/// the block that was created for it.
552547
void sinkScalarOperands(Instruction *PredInst);
@@ -775,10 +770,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
775770
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
776771
void printDebugTracesAtStart() override;
777772
void printDebugTracesAtEnd() override;
778-
779-
void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
780-
Value *VectorTripCount, BasicBlock *MiddleBlock,
781-
VPTransformState &State) override {};
782773
};
783774

784775
// A specialized derived class of inner loop vectorizer that performs
@@ -2751,97 +2742,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
27512742
return LoopVectorPreHeader;
27522743
}
27532744

2754-
// Fix up external users of the induction variable. At this point, we are
2755-
// in LCSSA form, with all external PHIs that use the IV having one input value,
2756-
// coming from the remainder loop. We need those PHIs to also have a correct
2757-
// value for the IV when arriving directly from the middle block.
2758-
void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2759-
const InductionDescriptor &II,
2760-
Value *VectorTripCount,
2761-
BasicBlock *MiddleBlock,
2762-
VPTransformState &State) {
2763-
// There are two kinds of external IV usages - those that use the value
2764-
// computed in the last iteration (the PHI) and those that use the penultimate
2765-
// value (the value that feeds into the phi from the loop latch).
2766-
// We allow both, but they, obviously, have different values.
2767-
2768-
DenseMap<Value *, Value *> MissingVals;
2769-
2770-
Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
2771-
OrigLoop->getLoopPreheader()))
2772-
->getIncomingValueForBlock(MiddleBlock);
2773-
2774-
// An external user of the last iteration's value should see the value that
2775-
// the remainder loop uses to initialize its own IV.
2776-
Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
2777-
for (User *U : PostInc->users()) {
2778-
Instruction *UI = cast<Instruction>(U);
2779-
if (!OrigLoop->contains(UI)) {
2780-
assert(isa<PHINode>(UI) && "Expected LCSSA form");
2781-
MissingVals[UI] = EndValue;
2782-
}
2783-
}
2784-
2785-
// An external user of the penultimate value need to see EndValue - Step.
2786-
// The simplest way to get this is to recompute it from the constituent SCEVs,
2787-
// that is Start + (Step * (CRD - 1)).
2788-
for (User *U : OrigPhi->users()) {
2789-
auto *UI = cast<Instruction>(U);
2790-
if (!OrigLoop->contains(UI)) {
2791-
assert(isa<PHINode>(UI) && "Expected LCSSA form");
2792-
IRBuilder<> B(MiddleBlock->getTerminator());
2793-
2794-
// Fast-math-flags propagate from the original induction instruction.
2795-
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2796-
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2797-
2798-
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
2799-
assert(StepVPV && "step must have been expanded during VPlan execution");
2800-
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
2801-
: State.get(StepVPV, VPLane(0));
2802-
Value *Escape = nullptr;
2803-
if (EndValue->getType()->isIntegerTy())
2804-
Escape = B.CreateSub(EndValue, Step);
2805-
else if (EndValue->getType()->isPointerTy())
2806-
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
2807-
else {
2808-
assert(EndValue->getType()->isFloatingPointTy() &&
2809-
"Unexpected induction type");
2810-
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
2811-
Instruction::FAdd
2812-
? Instruction::FSub
2813-
: Instruction::FAdd,
2814-
EndValue, Step);
2815-
}
2816-
Escape->setName("ind.escape");
2817-
MissingVals[UI] = Escape;
2818-
}
2819-
}
2820-
2821-
assert((MissingVals.empty() ||
2822-
all_of(MissingVals,
2823-
[MiddleBlock, this](const std::pair<Value *, Value *> &P) {
2824-
return all_of(
2825-
predecessors(cast<Instruction>(P.first)->getParent()),
2826-
[MiddleBlock, this](BasicBlock *Pred) {
2827-
return Pred == MiddleBlock ||
2828-
Pred == OrigLoop->getLoopLatch();
2829-
});
2830-
})) &&
2831-
"Expected escaping values from latch/middle.block only");
2832-
2833-
for (auto &I : MissingVals) {
2834-
PHINode *PHI = cast<PHINode>(I.first);
2835-
// One corner case we have to handle is two IVs "chasing" each-other,
2836-
// that is %IV2 = phi [...], [ %IV1, %latch ]
2837-
// In this case, if IV1 has an external use, we need to avoid adding both
2838-
// "last value of IV1" and "penultimate value of IV2". So, verify that we
2839-
// don't already have an incoming value for the middle block.
2840-
if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
2841-
PHI->addIncoming(I.second, MiddleBlock);
2842-
}
2843-
}
2844-
28452745
namespace {
28462746

28472747
struct CSEDenseMapInfo {
@@ -2986,24 +2886,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29862886
for (PHINode &PN : Exit->phis())
29872887
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
29882888

2989-
if (Cost->requiresScalarEpilogue(VF.isVector())) {
2990-
// No edge from the middle block to the unique exit block has been inserted
2991-
// and there is nothing to fix from vector loop; phis should have incoming
2992-
// from scalar loop only.
2993-
} else {
2994-
// TODO: Check in VPlan to see if IV users need fixing instead of checking
2995-
// the cost model.
2996-
2997-
// If we inserted an edge from the middle block to the unique exit block,
2998-
// update uses outside the loop (phis) to account for the newly inserted
2999-
// edge.
3000-
3001-
// Fix-up external users of the induction variables.
3002-
for (const auto &Entry : Legal->getInductionVars())
3003-
fixupIVUsers(Entry.first, Entry.second,
3004-
getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
3005-
}
3006-
30072889
for (Instruction *PI : PredicatedInstructions)
30082890
sinkScalarOperands(&*PI);
30092891

@@ -8857,11 +8739,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
88578739
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
88588740
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
88598741
/// the end value of the induction.
8860-
static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
8861-
VPBuilder &VectorPHBuilder,
8862-
VPBuilder &ScalarPHBuilder,
8863-
VPTypeAnalysis &TypeInfo,
8864-
VPValue *VectorTC) {
8742+
static VPValue *addResumePhiRecipeForInduction(
8743+
VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8744+
VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
8745+
DenseMap<VPValue *, VPValue *> &EndValues) {
88658746
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
88668747
// Truncated wide inductions resume from the last lane of their vector value
88678748
// in the last vector iteration which is handled elsewhere.
@@ -8886,6 +8767,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
88868767
ScalarTypeOfWideIV);
88878768
}
88888769

8770+
EndValues[WideIV] = EndValue;
88898771
auto *ResumePhiRecipe =
88908772
ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
88918773
WideIV->getDebugLoc(), "bc.resume.val");
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
88958777
/// Create resume phis in the scalar preheader for first-order recurrences,
88968778
/// reductions and inductions, and update the VPIRInstructions wrapping the
88978779
/// original phis in the scalar header.
8898-
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8780+
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
8781+
Loop *OrigLoop,
8782+
DenseMap<VPValue *, VPValue *> &EndValues) {
88998783
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
89008784
auto *ScalarPH = Plan.getScalarPreheader();
89018785
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8915,7 +8799,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
89158799
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
89168800
if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
89178801
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8918-
&Plan.getVectorTripCount())) {
8802+
&Plan.getVectorTripCount(), EndValues)) {
89198803
ScalarPhiIRI->addOperand(ResumePhi);
89208804
continue;
89218805
}
@@ -8949,9 +8833,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
89498833
// modeled explicitly yet and won't be included. Those are un-truncated
89508834
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
89518835
// increments.
8952-
static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8953-
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8954-
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8836+
static SetVector<VPIRInstruction *>
8837+
collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
8838+
VPlan &Plan) {
89558839
auto *MiddleVPBB = Plan.getMiddleBlock();
89568840
SetVector<VPIRInstruction *> ExitUsersToFix;
89578841
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -8976,18 +8860,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
89768860
// Exit values for inductions are computed and updated outside of VPlan
89778861
// and independent of induction recipes.
89788862
// TODO: Compute induction exit values in VPlan.
8979-
if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8980-
!cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
8981-
isa<VPWidenPointerInductionRecipe>(V) ||
8982-
(isa<Instruction>(IncomingValue) &&
8983-
OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
8984-
any_of(IncomingValue->users(), [&Inductions](User *U) {
8985-
auto *P = dyn_cast<PHINode>(U);
8986-
return P && Inductions.contains(P);
8987-
}))) {
8988-
if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
8989-
continue;
8990-
}
89918863
ExitUsersToFix.insert(ExitIRI);
89928864
ExitIRI->addOperand(V);
89938865
}
@@ -8996,17 +8868,86 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
89968868
return ExitUsersToFix;
89978869
}
89988870

8871+
/// If \p Incoming is a user of a non-truncated induction, create recipes to
8872+
/// compute the final value and update the user \p ExitIRI.
8873+
static bool addInductionEndValue(
8874+
VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming,
8875+
const MapVector<PHINode *, InductionDescriptor> &Inductions,
8876+
DenseMap<VPValue *, VPValue *> &EndValues, VPTypeAnalysis &TypeInfo) {
8877+
if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
8878+
!cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) ||
8879+
isa<VPWidenPointerInductionRecipe>(Incoming) ||
8880+
(isa<Instruction>(Incoming->getUnderlyingValue()) &&
8881+
any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(),
8882+
[&Inductions](User *U) {
8883+
auto *P = dyn_cast<PHINode>(U);
8884+
return P && Inductions.contains(P);
8885+
}))) {
8886+
VPValue *IV;
8887+
if (auto *WideIV =
8888+
dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()))
8889+
IV = WideIV;
8890+
else if (auto *WideIV =
8891+
dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()
8892+
->getOperand(0)
8893+
->getDefiningRecipe()))
8894+
IV = WideIV;
8895+
else
8896+
IV = Incoming->getDefiningRecipe()->getOperand(1);
8897+
// Skip phi nodes already updated. This can be the case if 2 induction
8898+
// phis chase each other.
8899+
VPValue *EndValue = EndValues[IV];
8900+
if (any_of(cast<VPRecipeBase>(Incoming->getDefiningRecipe())->operands(),
8901+
IsaPred<VPWidenIntOrFpInductionRecipe,
8902+
VPWidenPointerInductionRecipe>)) {
8903+
ExitIRI->setOperand(0, EndValue);
8904+
return true;
8905+
}
8906+
8907+
VPBuilder B(Plan.getMiddleBlock()->getTerminator());
8908+
VPValue *Escape = nullptr;
8909+
auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe());
8910+
VPValue *Step = WideIV->getStepValue();
8911+
Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
8912+
if (ScalarTy->isIntegerTy())
8913+
Escape =
8914+
B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
8915+
else if (ScalarTy->isPointerTy())
8916+
Escape = B.createPtrAdd(
8917+
EndValue,
8918+
B.createNaryOp(Instruction::Sub,
8919+
{Plan.getOrAddLiveIn(ConstantInt::get(
8920+
Step->getLiveInIRValue()->getType(), 0)),
8921+
Step}),
8922+
{}, "ind.escape");
8923+
else if (ScalarTy->isFloatingPointTy()) {
8924+
const auto &ID = WideIV->getInductionDescriptor();
8925+
Escape = B.createNaryOp(
8926+
ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
8927+
? Instruction::FSub
8928+
: Instruction::FAdd,
8929+
{EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
8930+
} else {
8931+
llvm_unreachable("all possible induction types must be handled");
8932+
}
8933+
ExitIRI->setOperand(0, Escape);
8934+
return true;
8935+
}
8936+
return false;
8937+
}
89998938
// Add exit values to \p Plan. Extracts are added for each entry in \p
90008939
// ExitUsersToFix if needed and their operands are updated. Returns true if all
90018940
// exit users can be handled, otherwise return false.
9002-
static bool
9003-
addUsersInExitBlocks(VPlan &Plan,
9004-
const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8941+
static bool addUsersInExitBlocks(
8942+
VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
8943+
const MapVector<PHINode *, InductionDescriptor> &Inductions,
8944+
DenseMap<VPValue *, VPValue *> &EndValues) {
90058945
if (ExitUsersToFix.empty())
90068946
return true;
90078947

90088948
auto *MiddleVPBB = Plan.getMiddleBlock();
90098949
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8950+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
90108951

90118952
// Introduce extract for exiting values and update the VPIRInstructions
90128953
// modeling the corresponding LCSSA phis.
@@ -9022,11 +8963,16 @@ addUsersInExitBlocks(VPlan &Plan,
90228963
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
90238964
return false;
90248965

8966+
VPValue *Incoming = ExitIRI->getOperand(0);
8967+
if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues,
8968+
TypeInfo))
8969+
continue;
8970+
90258971
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
90268972
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
90278973
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
90288974
IntegerType::get(Ctx, 32), 1))});
9029-
ExitIRI->setOperand(Idx, Ext);
8975+
ExitIRI->setOperand(0, Ext);
90308976
}
90318977
}
90328978
return true;
@@ -9307,11 +9253,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93079253
VPlanTransforms::handleUncountableEarlyExit(
93089254
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
93099255
}
9310-
addScalarResumePhis(RecipeBuilder, *Plan);
9311-
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
9312-
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9256+
DenseMap<VPValue *, VPValue *> EndValues;
9257+
addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
9258+
SetVector<VPIRInstruction *> ExitUsersToFix =
9259+
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
93139260
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9314-
if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
9261+
if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, Legal->getInductionVars(),
9262+
EndValues)) {
93159263
reportVectorizationFailure(
93169264
"Some exit values in loop with uncountable exit not supported yet",
93179265
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
@@ -9438,7 +9386,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
94389386
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
94399387
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
94409388
}
9441-
addScalarResumePhis(RecipeBuilder, *Plan);
9389+
DenseMap<VPValue *, VPValue *> EndValues;
9390+
addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
94429391

94439392
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
94449393
return Plan;

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -315,16 +315,20 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
315315
LastLane = 0;
316316
}
317317

318-
auto *LastInst = cast<Instruction>(get(Def, LastLane));
319-
// Set the insert point after the last scalarized instruction or after the
320-
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
321-
// will directly follow the scalar definitions.
318+
auto *LastDef = get(Def, LastLane);
322319
auto OldIP = Builder.saveIP();
323-
auto NewIP =
324-
isa<PHINode>(LastInst)
325-
? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
326-
: std::next(BasicBlock::iterator(LastInst));
327-
Builder.SetInsertPoint(&*NewIP);
320+
if (auto *LastInst = dyn_cast<Instruction>(LastDef)) {
321+
// TODO: Remove once VPDerivedIVReicpe can be simplified, which requires
322+
// vector trip count being modeled in VPlan.
323+
// Set the insert point after the last scalarized instruction or after the
324+
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
325+
// will directly follow the scalar definitions.
326+
auto NewIP =
327+
isa<PHINode>(LastInst)
328+
? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
329+
: std::next(BasicBlock::iterator(LastInst));
330+
Builder.SetInsertPoint(&*NewIP);
331+
}
328332

329333
// However, if we are vectorizing, we need to construct the vector values.
330334
// If the value is known to be uniform after vectorization, we can just
@@ -339,7 +343,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
339343
} else {
340344
// Initialize packing with insertelements to start from undef.
341345
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
342-
Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
346+
Value *Undef = PoisonValue::get(VectorType::get(LastDef->getType(), VF));
343347
set(Def, Undef);
344348
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
345349
packScalarIntoVectorValue(Def, Lane);

0 commit comments

Comments
 (0)