Skip to content

Commit f1f3c34

Browse files
committed
Revert "Recommit "[VPlan] First step towards VPlan cost modeling. (#92555)""
This reverts commit 242cc20 and eea150c, as it is causing a build bot failure and there have been a number of crashes reported at #92555
1 parent 39048b6 commit f1f3c34

File tree

10 files changed

+28
-498
lines changed

10 files changed

+28
-498
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -344,16 +344,6 @@ class LoopVectorizationPlanner {
344344
/// A builder used to construct the current plan.
345345
VPBuilder Builder;
346346

347-
/// Computes the cost of \p Plan for vectorization factor \p VF.
348-
///
349-
/// The current implementation requires access to the
350-
/// LoopVectorizationLegality to handle inductions and reductions, which is
351-
/// why it is kept separate from the VPlan-only cost infrastructure.
352-
///
353-
/// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
354-
/// been retired.
355-
InstructionCost cost(VPlan &Plan, ElementCount VF) const;
356-
357347
public:
358348
LoopVectorizationPlanner(
359349
Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
@@ -375,9 +365,6 @@ class LoopVectorizationPlanner {
375365
/// Return the best VPlan for \p VF.
376366
VPlan &getBestPlanFor(ElementCount VF) const;
377367

378-
/// Return the most profitable plan and fix its VF to the most profitable one.
379-
VPlan &getBestPlan() const;
380-
381368
/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
382369
/// according to the best selected \p VF and \p UF.
383370
///
@@ -456,9 +443,7 @@ class LoopVectorizationPlanner {
456443
ElementCount MinVF);
457444

458445
/// \return The most profitable vectorization factor and the cost of that VF.
459-
/// This method checks every VF in \p CandidateVFs. This is now only used to
460-
/// verify the decisions by the new VPlan-based cost-model and will be retired
461-
/// once the VPlan-based cost-model is stabilized.
446+
/// This method checks every VF in \p CandidateVFs.
462447
VectorizationFactor
463448
selectVectorizationFactor(const ElementCountSet &CandidateVFs);
464449

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 22 additions & 207 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ static cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor(
290290
cl::desc("A flag that overrides the target's max interleave factor for "
291291
"vectorized loops."));
292292

293-
cl::opt<unsigned> ForceTargetInstructionCost(
293+
static cl::opt<unsigned> ForceTargetInstructionCost(
294294
"force-target-instruction-cost", cl::init(0), cl::Hidden,
295295
cl::desc("A flag that overrides the target's expected cost for "
296296
"an instruction to a single constant value. Mostly "
@@ -412,6 +412,14 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
412412
return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
413413
}
414414

415+
/// A helper function that returns the reciprocal of the block probability of
416+
/// predicated blocks. If we return X, we are assuming the predicated block
417+
/// will execute once for every X iterations of the loop header.
418+
///
419+
/// TODO: We should use actual block probability here, if available. Currently,
420+
/// we always assume predicated blocks have a 50% chance of executing.
421+
static unsigned getReciprocalPredBlockProb() { return 2; }
422+
415423
/// Returns "best known" trip count for the specified loop \p L as defined by
416424
/// the following procedure:
417425
/// 1) Returns exact trip count if it is known.
@@ -1613,16 +1621,6 @@ class LoopVectorizationCostModel {
16131621
/// \p VF is the vectorization factor chosen for the original loop.
16141622
bool isEpilogueVectorizationProfitable(const ElementCount VF) const;
16151623

1616-
/// Return the cost of instructions in an inloop reduction pattern, if I is
1617-
/// part of that pattern.
1618-
std::optional<InstructionCost>
1619-
getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
1620-
TTI::TargetCostKind CostKind) const;
1621-
1622-
/// Returns the execution time cost of an instruction for a given vector
1623-
/// width. Vector width of one means scalar.
1624-
VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
1625-
16261624
private:
16271625
unsigned NumPredStores = 0;
16281626

@@ -1648,11 +1646,21 @@ class LoopVectorizationCostModel {
16481646
/// of elements.
16491647
ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
16501648

1649+
/// Returns the execution time cost of an instruction for a given vector
1650+
/// width. Vector width of one means scalar.
1651+
VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
1652+
16511653
/// The cost-computation logic from getInstructionCost which provides
16521654
/// the vector type as an output parameter.
16531655
InstructionCost getInstructionCost(Instruction *I, ElementCount VF,
16541656
Type *&VectorTy);
16551657

1658+
/// Return the cost of instructions in an inloop reduction pattern, if I is
1659+
/// part of that pattern.
1660+
std::optional<InstructionCost>
1661+
getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
1662+
TTI::TargetCostKind CostKind) const;
1663+
16561664
/// Calculate vectorization cost of memory instruction \p I.
16571665
InstructionCost getMemoryInstructionCost(Instruction *I, ElementCount VF);
16581666

@@ -7289,10 +7297,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
72897297
if (!MaxFactors.hasVector())
72907298
return VectorizationFactor::Disabled();
72917299

7292-
// Select the optimal vectorization factor according to the legacy cost-model.
7293-
// This is now only used to verify the decisions by the new VPlan-based
7294-
// cost-model and will be retired once the VPlan-based cost-model is
7295-
// stabilized.
7300+
// Select the optimal vectorization factor.
72967301
VectorizationFactor VF = selectVectorizationFactor(VFCandidates);
72977302
assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
72987303
if (!hasPlanWithVF(VF.Width)) {
@@ -7303,189 +7308,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
73037308
return VF;
73047309
}
73057310

7306-
InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
7307-
ElementCount VF) const {
7308-
return CM.getInstructionCost(UI, VF).first;
7309-
}
7310-
7311-
bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const {
7312-
return (IsVector && CM.VecValuesToIgnore.contains(UI)) ||
7313-
SkipCostComputation.contains(UI);
7314-
}
7315-
7316-
InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
7317-
ElementCount VF) const {
7318-
InstructionCost Cost = 0;
7319-
LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext();
7320-
VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM);
7321-
7322-
// Cost modeling for inductions is inaccurate in the legacy cost model
7323-
// compared to the recipes that are generated. To match here initially during
7324-
// VPlan cost model bring up directly use the induction costs from the legacy
7325-
// cost model. Note that we do this as pre-processing; the VPlan may not have
7326-
// any recipes associated with the original induction increment instruction
7327-
// and may replace truncates with VPWidenIntOrFpInductionRecipe. We precompute
7328-
// the cost of induction phis and increments (both that are represented by
7329-
// recipes and those that are not), to avoid distinguishing between them here,
7330-
// and skip all recipes that represent induction phis and increments (the
7331-
// former case) later on, if they exist, to avoid counting them twice.
7332-
// Similarly we pre-compute the cost of any optimized truncates.
7333-
// TODO: Switch to more accurate costing based on VPlan.
7334-
for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
7335-
Instruction *IVInc = cast<Instruction>(
7336-
IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
7337-
SmallVector<Instruction *> IVInsts = {IV, IVInc};
7338-
for (User *U : IV->users()) {
7339-
auto *CI = cast<Instruction>(U);
7340-
if (!CostCtx.CM.isOptimizableIVTruncate(CI, VF))
7341-
continue;
7342-
IVInsts.push_back(CI);
7343-
}
7344-
for (Instruction *IVInst : IVInsts) {
7345-
if (!CostCtx.SkipCostComputation.insert(IVInst).second)
7346-
continue;
7347-
InstructionCost InductionCost = CostCtx.getLegacyCost(IVInst, VF);
7348-
LLVM_DEBUG({
7349-
dbgs() << "Cost of " << InductionCost << " for VF " << VF
7350-
<< ": induction instruction " << *IVInst << "\n";
7351-
});
7352-
Cost += InductionCost;
7353-
}
7354-
}
7355-
7356-
/// Compute the cost of all exiting conditions of the loop using the legacy
7357-
/// cost model. This is to match the legacy behavior, which adds the cost of
7358-
/// all exit conditions. Note that this over-estimates the cost, as there will
7359-
/// be a single condition to control the vector loop.
7360-
SmallVector<BasicBlock *> Exiting;
7361-
CM.TheLoop->getExitingBlocks(Exiting);
7362-
SetVector<Instruction *> ExitInstrs;
7363-
// Collect all exit conditions.
7364-
for (BasicBlock *EB : Exiting) {
7365-
auto *Term = dyn_cast<BranchInst>(EB->getTerminator());
7366-
if (!Term)
7367-
continue;
7368-
if (auto *CondI = dyn_cast<Instruction>(Term->getOperand(0))) {
7369-
ExitInstrs.insert(CondI);
7370-
}
7371-
}
7372-
// Compute the cost of all instructions only feeding the exit conditions.
7373-
for (unsigned I = 0; I != ExitInstrs.size(); ++I) {
7374-
Instruction *CondI = ExitInstrs[I];
7375-
if (!OrigLoop->contains(CondI) ||
7376-
!CostCtx.SkipCostComputation.insert(CondI).second)
7377-
continue;
7378-
Cost += CostCtx.getLegacyCost(CondI, VF);
7379-
for (Value *Op : CondI->operands()) {
7380-
auto *OpI = dyn_cast<Instruction>(Op);
7381-
if (!OpI || any_of(OpI->users(), [&ExitInstrs](User *U) {
7382-
return !ExitInstrs.contains(cast<Instruction>(U));
7383-
}))
7384-
continue;
7385-
ExitInstrs.insert(OpI);
7386-
}
7387-
}
7388-
7389-
// The legacy cost model has special logic to compute the cost of in-loop
7390-
// reductions, which may be smaller than the sum of all instructions involved
7391-
// in the reduction. For AnyOf reductions, VPlan codegen may remove the select
7392-
// which the legacy cost model uses to assign cost. Pre-compute their costs
7393-
// for now.
7394-
// TODO: Switch to costing based on VPlan once the logic has been ported.
7395-
for (const auto &[RedPhi, RdxDesc] : Legal->getReductionVars()) {
7396-
if (!CM.isInLoopReduction(RedPhi) &&
7397-
!RecurrenceDescriptor::isAnyOfRecurrenceKind(
7398-
RdxDesc.getRecurrenceKind()))
7399-
continue;
7400-
7401-
// AnyOf reduction codegen may remove the select. To match the legacy cost
7402-
// model, pre-compute the cost for AnyOf reductions here.
7403-
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
7404-
RdxDesc.getRecurrenceKind())) {
7405-
auto *Select = cast<SelectInst>(*find_if(
7406-
RedPhi->users(), [](User *U) { return isa<SelectInst>(U); }));
7407-
assert(!CostCtx.SkipCostComputation.contains(Select) &&
7408-
"reduction op visited multiple times");
7409-
CostCtx.SkipCostComputation.insert(Select);
7410-
auto ReductionCost = CostCtx.getLegacyCost(Select, VF);
7411-
LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF
7412-
<< ":\n any-of reduction " << *Select << "\n");
7413-
Cost += ReductionCost;
7414-
continue;
7415-
}
7416-
7417-
const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop);
7418-
SetVector<Instruction *> ChainOpsAndOperands(ChainOps.begin(),
7419-
ChainOps.end());
7420-
// Also include the operands of instructions in the chain, as the cost-model
7421-
// may mark extends as free.
7422-
for (auto *ChainOp : ChainOps) {
7423-
for (Value *Op : ChainOp->operands()) {
7424-
if (auto *I = dyn_cast<Instruction>(Op))
7425-
ChainOpsAndOperands.insert(I);
7426-
}
7427-
}
7428-
7429-
// Pre-compute the cost for I, if it has a reduction pattern cost.
7430-
for (Instruction *I : ChainOpsAndOperands) {
7431-
auto ReductionCost = CM.getReductionPatternCost(
7432-
I, VF, ToVectorTy(I->getType(), VF), TTI::TCK_RecipThroughput);
7433-
if (!ReductionCost)
7434-
continue;
7435-
7436-
assert(!CostCtx.SkipCostComputation.contains(I) &&
7437-
"reduction op visited multiple times");
7438-
CostCtx.SkipCostComputation.insert(I);
7439-
LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF
7440-
<< ":\n in-loop reduction " << *I << "\n");
7441-
Cost += *ReductionCost;
7442-
}
7443-
}
7444-
7445-
// Now compute and add the VPlan-based cost.
7446-
Cost += Plan.cost(VF, CostCtx);
7447-
LLVM_DEBUG(dbgs() << "Cost for VF " << VF << ": " << Cost << "\n");
7448-
return Cost;
7449-
}
7450-
7451-
VPlan &LoopVectorizationPlanner::getBestPlan() const {
7452-
// If there is a single VPlan with a single VF, return it directly.
7453-
VPlan &FirstPlan = *VPlans[0];
7454-
if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
7455-
return FirstPlan;
7456-
7457-
VPlan *BestPlan = &FirstPlan;
7458-
ElementCount ScalarVF = ElementCount::getFixed(1);
7459-
assert(hasPlanWithVF(ScalarVF) &&
7460-
"More than a single plan/VF w/o any plan having scalar VF");
7461-
7462-
InstructionCost ScalarCost = cost(getBestPlanFor(ScalarVF), ScalarVF);
7463-
VectorizationFactor BestFactor(ScalarVF, ScalarCost, ScalarCost);
7464-
7465-
bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled;
7466-
if (ForceVectorization) {
7467-
// Ignore scalar width, because the user explicitly wants vectorization.
7468-
// Initialize cost to max so that VF = 2 is, at least, chosen during cost
7469-
// evaluation.
7470-
BestFactor.Cost = InstructionCost::getMax();
7471-
}
7472-
7473-
for (auto &P : VPlans) {
7474-
for (ElementCount VF : P->vectorFactors()) {
7475-
if (VF.isScalar())
7476-
continue;
7477-
InstructionCost Cost = cost(*P, VF);
7478-
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
7479-
if (isMoreProfitable(CurrentFactor, BestFactor)) {
7480-
BestFactor = CurrentFactor;
7481-
BestPlan = &*P;
7482-
}
7483-
}
7484-
}
7485-
BestPlan->setVF(BestFactor.Width);
7486-
return *BestPlan;
7487-
}
7488-
74897311
VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
74907312
assert(count_if(VPlans,
74917313
[VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
@@ -10344,15 +10166,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1034410166
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
1034510167
PSI, Checks);
1034610168

10347-
VPlan &BestPlan = LVP.getBestPlan();
10348-
assert(size(BestPlan.vectorFactors()) == 1 &&
10349-
"Plan should have a single VF");
10350-
ElementCount Width = *BestPlan.vectorFactors().begin();
10351-
LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width
10352-
<< "\n");
10353-
assert(VF.Width == Width &&
10354-
"VPlan cost model and legacy cost model disagreed");
10355-
LVP.executePlan(Width, IC, BestPlan, LB, DT, false);
10169+
VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
10170+
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
1035610171
++LoopsVectorized;
1035710172

1035810173
// Add metadata to disable runtime unrolling a scalar loop when there

0 commit comments

Comments
 (0)