Skip to content

Commit 6f538f6

Browse files
committed
Revert "Recommit "[VPlan] First step towards VPlan cost modeling. (#92555)""
This reverts commit 90fd99c. This reverts commit 43e6f46. Causes crashes, see comments on #92555.
1 parent eca988a commit 6f538f6

File tree

8 files changed

+27
-428
lines changed

8 files changed

+27
-428
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -344,16 +344,6 @@ class LoopVectorizationPlanner {
344344
/// A builder used to construct the current plan.
345345
VPBuilder Builder;
346346

347-
/// Computes the cost of \p Plan for vectorization factor \p VF.
348-
///
349-
/// The current implementation requires access to the
350-
/// LoopVectorizationLegality to handle inductions and reductions, which is
351-
/// why it is kept separate from the VPlan-only cost infrastructure.
352-
///
353-
/// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
354-
/// been retired.
355-
InstructionCost cost(VPlan &Plan, ElementCount VF) const;
356-
357347
public:
358348
LoopVectorizationPlanner(
359349
Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
@@ -375,9 +365,6 @@ class LoopVectorizationPlanner {
375365
/// Return the best VPlan for \p VF.
376366
VPlan &getBestPlanFor(ElementCount VF) const;
377367

378-
/// Return the most profitable plan and fix its VF to the most profitable one.
379-
VPlan &getBestPlan() const;
380-
381368
/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
382369
/// according to the best selected \p VF and \p UF.
383370
///
@@ -456,9 +443,7 @@ class LoopVectorizationPlanner {
456443
ElementCount MinVF);
457444

458445
/// \return The most profitable vectorization factor and the cost of that VF.
459-
/// This method checks every VF in \p CandidateVFs. This is now only used to
460-
/// verify the decisions by the new VPlan-based cost-model and will be retired
461-
/// once the VPlan-based cost-model is stabilized.
446+
/// This method checks every VF in \p CandidateVFs.
462447
VectorizationFactor
463448
selectVectorizationFactor(const ElementCountSet &CandidateVFs);
464449

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 22 additions & 214 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ static cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor(
290290
cl::desc("A flag that overrides the target's max interleave factor for "
291291
"vectorized loops."));
292292

293-
cl::opt<unsigned> ForceTargetInstructionCost(
293+
static cl::opt<unsigned> ForceTargetInstructionCost(
294294
"force-target-instruction-cost", cl::init(0), cl::Hidden,
295295
cl::desc("A flag that overrides the target's expected cost for "
296296
"an instruction to a single constant value. Mostly "
@@ -412,6 +412,14 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
412412
return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
413413
}
414414

415+
/// A helper function that returns the reciprocal of the block probability of
416+
/// predicated blocks. If we return X, we are assuming the predicated block
417+
/// will execute once for every X iterations of the loop header.
418+
///
419+
/// TODO: We should use actual block probability here, if available. Currently,
420+
/// we always assume predicated blocks have a 50% chance of executing.
421+
static unsigned getReciprocalPredBlockProb() { return 2; }
422+
415423
/// Returns "best known" trip count for the specified loop \p L as defined by
416424
/// the following procedure:
417425
/// 1) Returns exact trip count if it is known.
@@ -1613,16 +1621,6 @@ class LoopVectorizationCostModel {
16131621
/// \p VF is the vectorization factor chosen for the original loop.
16141622
bool isEpilogueVectorizationProfitable(const ElementCount VF) const;
16151623

1616-
/// Return the cost of instructions in an inloop reduction pattern, if I is
1617-
/// part of that pattern.
1618-
std::optional<InstructionCost>
1619-
getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
1620-
TTI::TargetCostKind CostKind) const;
1621-
1622-
/// Returns the execution time cost of an instruction for a given vector
1623-
/// width. Vector width of one means scalar.
1624-
VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
1625-
16261624
private:
16271625
unsigned NumPredStores = 0;
16281626

@@ -1648,11 +1646,21 @@ class LoopVectorizationCostModel {
16481646
/// of elements.
16491647
ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
16501648

1649+
/// Returns the execution time cost of an instruction for a given vector
1650+
/// width. Vector width of one means scalar.
1651+
VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
1652+
16511653
/// The cost-computation logic from getInstructionCost which provides
16521654
/// the vector type as an output parameter.
16531655
InstructionCost getInstructionCost(Instruction *I, ElementCount VF,
16541656
Type *&VectorTy);
16551657

1658+
/// Return the cost of instructions in an inloop reduction pattern, if I is
1659+
/// part of that pattern.
1660+
std::optional<InstructionCost>
1661+
getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
1662+
TTI::TargetCostKind CostKind) const;
1663+
16561664
/// Calculate vectorization cost of memory instruction \p I.
16571665
InstructionCost getMemoryInstructionCost(Instruction *I, ElementCount VF);
16581666

@@ -7280,10 +7288,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
72807288
if (!MaxFactors.hasVector())
72817289
return VectorizationFactor::Disabled();
72827290

7283-
// Select the optimal vectorization factor according to the legacy cost-model.
7284-
// This is now only used to verify the decisions by the new VPlan-based
7285-
// cost-model and will be retired once the VPlan-based cost-model is
7286-
// stabilized.
7291+
// Select the optimal vectorization factor.
72877292
VectorizationFactor VF = selectVectorizationFactor(VFCandidates);
72887293
assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
72897294
if (!hasPlanWithVF(VF.Width)) {
@@ -7294,196 +7299,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
72947299
return VF;
72957300
}
72967301

7297-
InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
7298-
ElementCount VF) const {
7299-
return CM.getInstructionCost(UI, VF).first;
7300-
}
7301-
7302-
bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const {
7303-
return (IsVector && CM.VecValuesToIgnore.contains(UI)) ||
7304-
SkipCostComputation.contains(UI);
7305-
}
7306-
7307-
InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
7308-
ElementCount VF) const {
7309-
InstructionCost Cost = 0;
7310-
LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext();
7311-
VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM);
7312-
7313-
// Cost modeling for inductions is inaccurate in the legacy cost model
7314-
// compared to the recipes that are generated. To match here initially during
7315-
// VPlan cost model bring up directly use the induction costs from the legacy
7316-
// cost model. Note that we do this as pre-processing; the VPlan may not have
7317-
// any recipes associated with the original induction increment instruction
7318-
// and may replace truncates with VPWidenIntOrFpInductionRecipe. We precompute
7319-
// the cost of both induction increment instructions that are represented by
7320-
// recipes and those that are not, to avoid distinguishing between them here,
7321-
// and skip all recipes that represent induction increments (the former case)
7322-
// later on, if they exist, to avoid counting them twice. Similarly we
7323-
// pre-compute the cost of any optimized truncates.
7324-
// TODO: Switch to more accurate costing based on VPlan.
7325-
for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
7326-
Instruction *IVInc = cast<Instruction>(
7327-
IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
7328-
if (CostCtx.SkipCostComputation.insert(IVInc).second) {
7329-
InstructionCost InductionCost = CostCtx.getLegacyCost(IVInc, VF);
7330-
LLVM_DEBUG({
7331-
dbgs() << "Cost of " << InductionCost << " for VF " << VF
7332-
<< ":\n induction increment " << *IVInc << "\n";
7333-
IVInc->dump();
7334-
});
7335-
Cost += InductionCost;
7336-
}
7337-
for (User *U : IV->users()) {
7338-
auto *CI = cast<Instruction>(U);
7339-
if (!CostCtx.CM.isOptimizableIVTruncate(CI, VF))
7340-
continue;
7341-
assert(!CostCtx.SkipCostComputation.contains(CI) &&
7342-
"Same cast for multiple inductions?");
7343-
CostCtx.SkipCostComputation.insert(CI);
7344-
InstructionCost CastCost = CostCtx.getLegacyCost(CI, VF);
7345-
LLVM_DEBUG({
7346-
dbgs() << "Cost of " << CastCost << " for VF " << VF
7347-
<< ":\n induction cast " << *CI << "\n";
7348-
CI->dump();
7349-
});
7350-
Cost += CastCost;
7351-
}
7352-
}
7353-
7354-
/// Compute the cost of all exiting conditions of the loop using the legacy
7355-
/// cost model. This is to match the legacy behavior, which adds the cost of
7356-
/// all exit conditions. Note that this over-estimates the cost, as there will
7357-
/// be a single condition to control the vector loop.
7358-
SmallVector<BasicBlock *> Exiting;
7359-
CM.TheLoop->getExitingBlocks(Exiting);
7360-
SetVector<Instruction *> ExitInstrs;
7361-
// Collect all exit conditions.
7362-
for (BasicBlock *EB : Exiting) {
7363-
auto *Term = dyn_cast<BranchInst>(EB->getTerminator());
7364-
if (!Term)
7365-
continue;
7366-
if (auto *CondI = dyn_cast<Instruction>(Term->getOperand(0))) {
7367-
ExitInstrs.insert(CondI);
7368-
}
7369-
}
7370-
// Compute the cost of all instructions only feeding the exit conditions.
7371-
for (unsigned I = 0; I != ExitInstrs.size(); ++I) {
7372-
Instruction *CondI = ExitInstrs[I];
7373-
if (!OrigLoop->contains(CondI) ||
7374-
!CostCtx.SkipCostComputation.insert(CondI).second)
7375-
continue;
7376-
Cost += CostCtx.getLegacyCost(CondI, VF);
7377-
for (Value *Op : CondI->operands()) {
7378-
auto *OpI = dyn_cast<Instruction>(Op);
7379-
if (!OpI || any_of(OpI->users(), [&ExitInstrs](User *U) {
7380-
return !ExitInstrs.contains(cast<Instruction>(U));
7381-
}))
7382-
continue;
7383-
ExitInstrs.insert(OpI);
7384-
}
7385-
}
7386-
7387-
// The legacy cost model has special logic to compute the cost of in-loop
7388-
// reductions, which may be smaller than the sum of all instructions involved
7389-
// in the reduction. For AnyOf reductions, VPlan codegen may remove the select
7390-
// which the legacy cost model uses to assign cost. Pre-compute their costs
7391-
// for now.
7392-
// TODO: Switch to costing based on VPlan once the logic has been ported.
7393-
for (const auto &[RedPhi, RdxDesc] : Legal->getReductionVars()) {
7394-
if (!CM.isInLoopReduction(RedPhi) &&
7395-
!RecurrenceDescriptor::isAnyOfRecurrenceKind(
7396-
RdxDesc.getRecurrenceKind()))
7397-
continue;
7398-
7399-
// AnyOf reduction codegen may remove the select. To match the legacy cost
7400-
// model, pre-compute the cost for AnyOf reductions here.
7401-
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
7402-
RdxDesc.getRecurrenceKind())) {
7403-
auto *Select = cast<SelectInst>(*find_if(
7404-
RedPhi->users(), [](User *U) { return isa<SelectInst>(U); }));
7405-
assert(!CostCtx.SkipCostComputation.contains(Select) &&
7406-
"reduction op visited multiple times");
7407-
CostCtx.SkipCostComputation.insert(Select);
7408-
auto ReductionCost = CostCtx.getLegacyCost(Select, VF);
7409-
LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF
7410-
<< ":\n any-of reduction " << *Select << "\n");
7411-
Cost += ReductionCost;
7412-
continue;
7413-
}
7414-
7415-
const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop);
7416-
SetVector<Instruction *> ChainOpsAndOperands(ChainOps.begin(),
7417-
ChainOps.end());
7418-
// Also include the operands of instructions in the chain, as the cost-model
7419-
// may mark extends as free.
7420-
for (auto *ChainOp : ChainOps) {
7421-
for (Value *Op : ChainOp->operands()) {
7422-
if (auto *I = dyn_cast<Instruction>(Op))
7423-
ChainOpsAndOperands.insert(I);
7424-
}
7425-
}
7426-
7427-
// Pre-compute the cost for I, if it has a reduction pattern cost.
7428-
for (Instruction *I : ChainOpsAndOperands) {
7429-
auto ReductionCost = CM.getReductionPatternCost(
7430-
I, VF, ToVectorTy(I->getType(), VF), TTI::TCK_RecipThroughput);
7431-
if (!ReductionCost)
7432-
continue;
7433-
7434-
assert(!CostCtx.SkipCostComputation.contains(I) &&
7435-
"reduction op visited multiple times");
7436-
CostCtx.SkipCostComputation.insert(I);
7437-
LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF
7438-
<< ":\n in-loop reduction " << *I << "\n");
7439-
Cost += *ReductionCost;
7440-
}
7441-
}
7442-
7443-
// Now compute and add the VPlan-based cost.
7444-
Cost += Plan.cost(VF, CostCtx);
7445-
LLVM_DEBUG(dbgs() << "Cost for VF " << VF << ": " << Cost << "\n");
7446-
return Cost;
7447-
}
7448-
7449-
VPlan &LoopVectorizationPlanner::getBestPlan() const {
7450-
// If there is a single VPlan with a single VF, return it directly.
7451-
VPlan &FirstPlan = *VPlans[0];
7452-
if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
7453-
return FirstPlan;
7454-
7455-
VPlan *BestPlan = &FirstPlan;
7456-
ElementCount ScalarVF = ElementCount::getFixed(1);
7457-
assert(hasPlanWithVF(ScalarVF) &&
7458-
"More than a single plan/VF w/o any plan having scalar VF");
7459-
7460-
InstructionCost ScalarCost = cost(getBestPlanFor(ScalarVF), ScalarVF);
7461-
VectorizationFactor BestFactor(ScalarVF, ScalarCost, ScalarCost);
7462-
7463-
bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled;
7464-
if (ForceVectorization) {
7465-
// Ignore scalar width, because the user explicitly wants vectorization.
7466-
// Initialize cost to max so that VF = 2 is, at least, chosen during cost
7467-
// evaluation.
7468-
BestFactor.Cost = InstructionCost::getMax();
7469-
}
7470-
7471-
for (auto &P : VPlans) {
7472-
for (ElementCount VF : P->vectorFactors()) {
7473-
if (VF.isScalar())
7474-
continue;
7475-
InstructionCost Cost = cost(*P, VF);
7476-
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
7477-
if (isMoreProfitable(CurrentFactor, BestFactor)) {
7478-
BestFactor = CurrentFactor;
7479-
BestPlan = &*P;
7480-
}
7481-
}
7482-
}
7483-
BestPlan->setVF(BestFactor.Width);
7484-
return *BestPlan;
7485-
}
7486-
74877302
VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
74887303
assert(count_if(VPlans,
74897304
[VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
@@ -10342,15 +10157,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1034210157
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
1034310158
PSI, Checks);
1034410159

10345-
VPlan &BestPlan = LVP.getBestPlan();
10346-
assert(size(BestPlan.vectorFactors()) == 1 &&
10347-
"Plan should have a single VF");
10348-
ElementCount Width = *BestPlan.vectorFactors().begin();
10349-
LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width
10350-
<< "\n");
10351-
assert(VF.Width == Width &&
10352-
"VPlan cost model and legacy cost model disagreed");
10353-
LVP.executePlan(Width, IC, BestPlan, LB, DT, false);
10160+
VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
10161+
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
1035410162
++LoopsVectorized;
1035510163

1035610164
// Add metadata to disable runtime unrolling a scalar loop when there

0 commit comments

Comments
 (0)