Skip to content

Commit 8ecee6b

Browse files
committed
[VPlan] Update final exit value via VPlan.
Model updating IV users directly in VPlan, replace fixupIVUsers. Depends on #110004, #109975 and #112145.
1 parent d74aca2 commit 8ecee6b

File tree

4 files changed

+113
-154
lines changed

4 files changed

+113
-154
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 93 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -552,11 +552,6 @@ class InnerLoopVectorizer {
552552
protected:
553553
friend class LoopVectorizationPlanner;
554554

555-
/// Set up the values of the IVs correctly when exiting the vector loop.
556-
virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
557-
Value *VectorTripCount, BasicBlock *MiddleBlock,
558-
VPTransformState &State);
559-
560555
/// Iteratively sink the scalarized operands of a predicated instruction into
561556
/// the block that was created for it.
562557
void sinkScalarOperands(Instruction *PredInst);
@@ -785,10 +780,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
785780
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
786781
void printDebugTracesAtStart() override;
787782
void printDebugTracesAtEnd() override;
788-
789-
void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
790-
Value *VectorTripCount, BasicBlock *MiddleBlock,
791-
VPTransformState &State) override {};
792783
};
793784

794785
// A specialized derived class of inner loop vectorizer that performs
@@ -2768,88 +2759,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
27682759
return LoopVectorPreHeader;
27692760
}
27702761

2771-
// Fix up external users of the induction variable. At this point, we are
2772-
// in LCSSA form, with all external PHIs that use the IV having one input value,
2773-
// coming from the remainder loop. We need those PHIs to also have a correct
2774-
// value for the IV when arriving directly from the middle block.
2775-
void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2776-
const InductionDescriptor &II,
2777-
Value *VectorTripCount,
2778-
BasicBlock *MiddleBlock,
2779-
VPTransformState &State) {
2780-
// There are two kinds of external IV usages - those that use the value
2781-
// computed in the last iteration (the PHI) and those that use the penultimate
2782-
// value (the value that feeds into the phi from the loop latch).
2783-
// We allow both, but they, obviously, have different values.
2784-
2785-
DenseMap<Value *, Value *> MissingVals;
2786-
2787-
Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
2788-
OrigLoop->getLoopPreheader()))
2789-
->getIncomingValueForBlock(MiddleBlock);
2790-
2791-
// An external user of the last iteration's value should see the value that
2792-
// the remainder loop uses to initialize its own IV.
2793-
Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
2794-
for (User *U : PostInc->users()) {
2795-
Instruction *UI = cast<Instruction>(U);
2796-
if (!OrigLoop->contains(UI)) {
2797-
assert(isa<PHINode>(UI) && "Expected LCSSA form");
2798-
MissingVals[UI] = EndValue;
2799-
}
2800-
}
2801-
2802-
// An external user of the penultimate value need to see EndValue - Step.
2803-
// The simplest way to get this is to recompute it from the constituent SCEVs,
2804-
// that is Start + (Step * (CRD - 1)).
2805-
for (User *U : OrigPhi->users()) {
2806-
auto *UI = cast<Instruction>(U);
2807-
if (!OrigLoop->contains(UI)) {
2808-
assert(isa<PHINode>(UI) && "Expected LCSSA form");
2809-
IRBuilder<> B(MiddleBlock->getTerminator());
2810-
2811-
// Fast-math-flags propagate from the original induction instruction.
2812-
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2813-
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2814-
2815-
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
2816-
assert(StepVPV && "step must have been expanded during VPlan execution");
2817-
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
2818-
: State.get(StepVPV, VPLane(0));
2819-
Value *Escape = nullptr;
2820-
if (EndValue->getType()->isIntegerTy())
2821-
Escape = B.CreateSub(EndValue, Step);
2822-
else if (EndValue->getType()->isPointerTy())
2823-
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
2824-
else {
2825-
assert(EndValue->getType()->isFloatingPointTy() &&
2826-
"Unexpected induction type");
2827-
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
2828-
Instruction::FAdd
2829-
? Instruction::FSub
2830-
: Instruction::FAdd,
2831-
EndValue, Step);
2832-
}
2833-
Escape->setName("ind.escape");
2834-
MissingVals[UI] = Escape;
2835-
}
2836-
}
2837-
2838-
assert((MissingVals.empty() || OrigLoop->getUniqueExitBlock()) &&
2839-
"Expected a single exit block for escaping values");
2840-
2841-
for (auto &I : MissingVals) {
2842-
PHINode *PHI = cast<PHINode>(I.first);
2843-
// One corner case we have to handle is two IVs "chasing" each-other,
2844-
// that is %IV2 = phi [...], [ %IV1, %latch ]
2845-
// In this case, if IV1 has an external use, we need to avoid adding both
2846-
// "last value of IV1" and "penultimate value of IV2". So, verify that we
2847-
// don't already have an incoming value for the middle block.
2848-
if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
2849-
PHI->addIncoming(I.second, MiddleBlock);
2850-
}
2851-
}
2852-
28532762
namespace {
28542763

28552764
struct CSEDenseMapInfo {
@@ -2978,24 +2887,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29782887
for (PHINode &PN : Exit->phis())
29792888
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
29802889

2981-
if (Cost->requiresScalarEpilogue(VF.isVector())) {
2982-
// No edge from the middle block to the unique exit block has been inserted
2983-
// and there is nothing to fix from vector loop; phis should have incoming
2984-
// from scalar loop only.
2985-
} else {
2986-
// TODO: Check in VPlan to see if IV users need fixing instead of checking
2987-
// the cost model.
2988-
2989-
// If we inserted an edge from the middle block to the unique exit block,
2990-
// update uses outside the loop (phis) to account for the newly inserted
2991-
// edge.
2992-
2993-
// Fix-up external users of the induction variables.
2994-
for (const auto &Entry : Legal->getInductionVars())
2995-
fixupIVUsers(Entry.first, Entry.second,
2996-
getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
2997-
}
2998-
29992890
for (Instruction *PI : PredicatedInstructions)
30002891
sinkScalarOperands(&*PI);
30012892

@@ -8839,11 +8730,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
88398730
/// Create a ResumePhi for \p PhiR, if it is wide induction recipe. If the
88408731
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
88418732
/// the end value of the induction.
8842-
static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
8843-
VPBuilder &VectorPHBuilder,
8844-
VPBuilder &ScalarPHBuilder,
8845-
VPTypeAnalysis &TypeInfo,
8846-
VPValue *VectorTC) {
8733+
static VPValue *addResumeValuesForInduction(
8734+
VPHeaderPHIRecipe *PhiR, VPBuilder &VectorPHBuilder,
8735+
VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
8736+
Loop *OrigLoop, DenseMap<VPValue *, VPValue *> &EndValues) {
88478737
auto *WideIV = dyn_cast<VPWidenInductionRecipe>(PhiR);
88488738
if (!WideIV)
88498739
return nullptr;
@@ -8875,6 +8765,7 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
88758765
ScalarTy);
88768766
}
88778767

8768+
EndValues[PhiR] = EndValue;
88788769
auto *ResumePhiRecipe =
88798770
ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
88808771
WideIV->getDebugLoc(), "bc.resume.val");
@@ -8886,7 +8777,8 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
88868777
/// original phis in the scalar header.
88878778
static void addScalarResumePhis(
88888779
VPlan &Plan,
8889-
function_ref<VPHeaderPHIRecipe *(PHINode *)> GetHeaderPhiRecipe) {
8780+
function_ref<VPHeaderPHIRecipe *(PHINode *)> GetHeaderPhiRecipe,
8781+
Loop *OrigLoop, DenseMap<VPValue *, VPValue *> &EndValues) {
88908782
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
88918783
auto *ScalarPH = Plan.getScalarPreheader();
88928784
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8905,7 +8797,7 @@ static void addScalarResumePhis(
89058797

89068798
if (VPValue *ResumePhi = addResumeValuesForInduction(
89078799
VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8908-
&Plan.getVectorTripCount())) {
8800+
&Plan.getVectorTripCount(), OrigLoop, EndValues)) {
89098801
ScalarPhiIRI->addOperand(ResumePhi);
89108802
continue;
89118803
}
@@ -8937,9 +8829,9 @@ static void addScalarResumePhis(
89378829
// modeled explicitly yet and won't be included. Those are un-truncated
89388830
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
89398831
// increments.
8940-
static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8941-
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8942-
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8832+
static SetVector<VPIRInstruction *>
8833+
collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
8834+
VPlan &Plan) {
89438835
auto *MiddleVPBB = Plan.getMiddleBlock();
89448836
SetVector<VPIRInstruction *> ExitUsersToFix;
89458837
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -8964,18 +8856,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
89648856
// Exit values for inductions are computed and updated outside of VPlan
89658857
// and independent of induction recipes.
89668858
// TODO: Compute induction exit values in VPlan.
8967-
if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8968-
!cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
8969-
isa<VPWidenPointerInductionRecipe>(V) ||
8970-
(isa<Instruction>(IncomingValue) &&
8971-
OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
8972-
any_of(IncomingValue->users(), [&Inductions](User *U) {
8973-
auto *P = dyn_cast<PHINode>(U);
8974-
return P && Inductions.contains(P);
8975-
}))) {
8976-
if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
8977-
continue;
8978-
}
89798859
ExitUsersToFix.insert(ExitIRI);
89808860
ExitIRI->addOperand(V);
89818861
}
@@ -8987,14 +8867,16 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
89878867
// Add exit values to \p Plan. Extracts are added for each entry in \p
89888868
// ExitUsersToFix if needed and their operands are updated. Returns true if all
89898869
// exit users can be handled, otherwise return false.
8990-
static bool
8991-
addUsersInExitBlocks(VPlan &Plan,
8992-
const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8870+
static bool addUsersInExitBlocks(
8871+
VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
8872+
const MapVector<PHINode *, InductionDescriptor> &Inductions,
8873+
DenseMap<VPValue *, VPValue *> &EndValues) {
89938874
if (ExitUsersToFix.empty())
89948875
return true;
89958876

89968877
auto *MiddleVPBB = Plan.getMiddleBlock();
89978878
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8879+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
89988880

89998881
// Introduce extract for exiting values and update the VPIRInstructions
90008882
// modeling the corresponding LCSSA phis.
@@ -9010,6 +8892,69 @@ addUsersInExitBlocks(VPlan &Plan,
90108892
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
90118893
return false;
90128894

8895+
VPValue *Incoming = ExitIRI->getOperand(0);
8896+
if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
8897+
!cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) ||
8898+
isa<VPWidenPointerInductionRecipe>(Incoming) ||
8899+
(isa<Instruction>(Incoming->getUnderlyingValue()) &&
8900+
// OrigLoop->contains(cast<Instruction>(Incoming->getUnderlyingValue()))
8901+
// &&
8902+
any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(),
8903+
[&Inductions](User *U) {
8904+
auto *P = dyn_cast<PHINode>(U);
8905+
return P && Inductions.contains(P);
8906+
}))) {
8907+
VPValue *IV;
8908+
if (auto *WideIV =
8909+
dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()))
8910+
IV = WideIV;
8911+
else if (auto *WideIV = dyn_cast<VPWidenInductionRecipe>(
8912+
Incoming->getDefiningRecipe()
8913+
->getOperand(0)
8914+
->getDefiningRecipe()))
8915+
IV = WideIV;
8916+
else
8917+
IV = Incoming->getDefiningRecipe()->getOperand(1);
8918+
// Skip phi nodes already updated. This can be the case if 2 induction
8919+
// phis chase each other.
8920+
VPValue *EndValue = EndValues[IV];
8921+
if (any_of(cast<VPRecipeBase>(Incoming->getDefiningRecipe())->operands(),
8922+
IsaPred<VPWidenIntOrFpInductionRecipe,
8923+
VPWidenPointerInductionRecipe>)) {
8924+
ExitIRI->setOperand(0, EndValue);
8925+
continue;
8926+
}
8927+
8928+
VPBuilder B(Plan.getMiddleBlock()->getTerminator());
8929+
VPValue *Escape = nullptr;
8930+
auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe());
8931+
VPValue *Step = WideIV->getStepValue();
8932+
Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
8933+
if (ScalarTy->isIntegerTy())
8934+
Escape = B.createNaryOp(Instruction::Sub, {EndValue, Step}, {},
8935+
"ind.escape");
8936+
else if (ScalarTy->isPointerTy())
8937+
Escape = B.createPtrAdd(
8938+
EndValue,
8939+
B.createNaryOp(Instruction::Sub,
8940+
{Plan.getOrAddLiveIn(ConstantInt::get(
8941+
Step->getLiveInIRValue()->getType(), 0)),
8942+
Step}),
8943+
{}, "ind.escape");
8944+
else if (ScalarTy->isFloatingPointTy()) {
8945+
const auto &ID = WideIV->getInductionDescriptor();
8946+
Escape = B.createNaryOp(
8947+
ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
8948+
? Instruction::FSub
8949+
: Instruction::FAdd,
8950+
{EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
8951+
} else {
8952+
llvm_unreachable("all possible induction types must be handled");
8953+
}
8954+
ExitIRI->setOperand(0, Escape);
8955+
continue;
8956+
}
8957+
90138958
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
90148959
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
90158960
{V, Plan.getOrAddLiveIn(ConstantInt::get(
@@ -9294,13 +9239,18 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
92949239
VPlanTransforms::handleUncountableEarlyExit(
92959240
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
92969241
}
9297-
addScalarResumePhis(*Plan, [&RecipeBuilder](PHINode *P) {
9298-
return cast<VPHeaderPHIRecipe>(RecipeBuilder.getRecipe(P));
9299-
});
9300-
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
9301-
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9242+
DenseMap<VPValue *, VPValue *> EndValues;
9243+
addScalarResumePhis(
9244+
*Plan,
9245+
[&RecipeBuilder](PHINode *P) {
9246+
return cast<VPHeaderPHIRecipe>(RecipeBuilder.getRecipe(P));
9247+
},
9248+
OrigLoop, EndValues);
9249+
SetVector<VPIRInstruction *> ExitUsersToFix =
9250+
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
93029251
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9303-
if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
9252+
if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, Legal->getInductionVars(),
9253+
EndValues)) {
93049254
reportVectorizationFailure(
93059255
"Some exit values in loop with uncountable exit not supported yet",
93069256
"Some exit values in loop with uncountable exit not supported yet",
@@ -9419,6 +9369,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
94199369
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
94209370
DebugLoc());
94219371

9372+
DenseMap<VPValue *, VPValue *> EndValues;
94229373
addScalarResumePhis(
94239374
*Plan,
94249375
[&Plan](PHINode *P) {
@@ -9428,9 +9379,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
94289379
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
94299380
return HeaderR->getUnderlyingValue() == P ? HeaderR : nullptr;
94309381
});
9431-
}
9432-
9433-
);
9382+
},
9383+
OrigLoop, EndValues);
94349384

94359385
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
94369386
return Plan;

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -315,16 +315,20 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
315315
LastLane = 0;
316316
}
317317

318-
auto *LastInst = cast<Instruction>(get(Def, LastLane));
319-
// Set the insert point after the last scalarized instruction or after the
320-
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
321-
// will directly follow the scalar definitions.
318+
auto *LastDef = get(Def, LastLane);
322319
auto OldIP = Builder.saveIP();
323-
auto NewIP =
324-
isa<PHINode>(LastInst)
325-
? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
326-
: std::next(BasicBlock::iterator(LastInst));
327-
Builder.SetInsertPoint(&*NewIP);
320+
if (auto *LastInst = dyn_cast<Instruction>(LastDef)) {
321+
// TODO: Remove once VPDerivedIVReicpe can be simplified, which requires
322+
// vector trip count being modeled in VPlan.
323+
// Set the insert point after the last scalarized instruction or after the
324+
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
325+
// will directly follow the scalar definitions.
326+
auto NewIP =
327+
isa<PHINode>(LastInst)
328+
? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
329+
: std::next(BasicBlock::iterator(LastInst));
330+
Builder.SetInsertPoint(&*NewIP);
331+
}
328332

329333
// However, if we are vectorizing, we need to construct the vector values.
330334
// If the value is known to be uniform after vectorization, we can just
@@ -339,7 +343,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
339343
} else {
340344
// Initialize packing with insertelements to start from undef.
341345
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
342-
Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
346+
Value *Undef = PoisonValue::get(VectorType::get(LastDef->getType(), VF));
343347
set(Def, Undef);
344348
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
345349
packScalarIntoVectorValue(Def, Lane);

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,6 +1442,11 @@ class VPIRInstruction : public VPRecipeBase {
14421442
"Op must be an operand of the recipe");
14431443
return true;
14441444
}
1445+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1446+
assert(is_contained(operands(), Op) &&
1447+
"Op must be an operand of the recipe");
1448+
return true;
1449+
}
14451450
};
14461451

14471452
/// VPWidenRecipe is a recipe for producing a widened instruction using the

0 commit comments

Comments
 (0)