-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VPlan] Only use selectVectorizationFactor for cross-check (NFCI). #103033
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8608a2e
8847712
543f0d3
38d762f
7c5c96f
1199254
580ff54
0775eca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4546,6 +4546,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, | |
return false; | ||
} | ||
|
||
#ifndef NDEBUG | ||
VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { | ||
InstructionCost ExpectedCost = CM.expectedCost(ElementCount::getFixed(1)); | ||
LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n"); | ||
|
@@ -4578,7 +4579,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { | |
InstructionCost C = CM.expectedCost(VF); | ||
VectorizationFactor Candidate(VF, C, ScalarCost.ScalarCost); | ||
|
||
#ifndef NDEBUG | ||
unsigned AssumedMinimumVscale = | ||
getVScaleForTuning(OrigLoop, TTI).value_or(1); | ||
unsigned Width = | ||
|
@@ -4591,7 +4591,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { | |
LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of " | ||
<< AssumedMinimumVscale << ")"); | ||
LLVM_DEBUG(dbgs() << ".\n"); | ||
#endif | ||
|
||
if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) { | ||
LLVM_DEBUG( | ||
|
@@ -4621,6 +4620,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { | |
LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << ChosenFactor.Width << ".\n"); | ||
return ChosenFactor; | ||
} | ||
#endif | ||
|
||
bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization( | ||
ElementCount VF) const { | ||
|
@@ -6985,15 +6985,14 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { | |
return VectorizationFactor::Disabled(); | ||
} | ||
|
||
std::optional<VectorizationFactor> | ||
LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { | ||
void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { | ||
assert(OrigLoop->isInnermost() && "Inner loop expected."); | ||
CM.collectValuesToIgnore(); | ||
CM.collectElementTypesForWidening(); | ||
|
||
FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC); | ||
if (!MaxFactors) // Cases that should not to be vectorized nor interleaved. | ||
return std::nullopt; | ||
return; | ||
|
||
// Invalidate interleave groups if all blocks of loop will be predicated. | ||
if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) && | ||
|
@@ -7028,14 +7027,8 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { | |
if (CM.selectUserVectorizationFactor(UserVF)) { | ||
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); | ||
buildVPlansWithVPRecipes(UserVF, UserVF); | ||
if (!hasPlanWithVF(UserVF)) { | ||
LLVM_DEBUG(dbgs() | ||
<< "LV: No VPlan could be built for " << UserVF << ".\n"); | ||
return std::nullopt; | ||
} | ||
|
||
LLVM_DEBUG(printPlans(dbgs())); | ||
return {{UserVF, 0, 0}}; | ||
return; | ||
} else | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just noting another else-after-return, below, but perhaps there it overall looks better(?):
|
||
reportVectorizationInfo("UserVF ignored because of invalid costs.", | ||
"InvalidCost", ORE, OrigLoop); | ||
|
@@ -7066,24 +7059,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { | |
buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF); | ||
|
||
LLVM_DEBUG(printPlans(dbgs())); | ||
if (VPlans.empty()) | ||
return std::nullopt; | ||
if (all_of(VPlans, | ||
[](std::unique_ptr<VPlan> &P) { return P->hasScalarVFOnly(); })) | ||
return VectorizationFactor::Disabled(); | ||
|
||
// Select the optimal vectorization factor according to the legacy cost-model. | ||
// This is now only used to verify the decisions by the new VPlan-based | ||
// cost-model and will be retired once the VPlan-based cost-model is | ||
// stabilized. | ||
VectorizationFactor VF = selectVectorizationFactor(); | ||
assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero."); | ||
if (!hasPlanWithVF(VF.Width)) { | ||
LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width | ||
<< ".\n"); | ||
return std::nullopt; | ||
} | ||
return VF; | ||
} | ||
|
||
InstructionCost VPCostContext::getLegacyCost(Instruction *UI, | ||
|
@@ -7255,18 +7230,21 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, | |
return Cost; | ||
} | ||
|
||
ElementCount LoopVectorizationPlanner::computeBestVF() { | ||
VectorizationFactor LoopVectorizationPlanner::computeBestVF() { | ||
if (VPlans.empty()) | ||
return VectorizationFactor::Disabled(); | ||
// If there is a single VPlan with a single VF, return it directly. | ||
VPlan &FirstPlan = *VPlans[0]; | ||
if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1) | ||
return *FirstPlan.vectorFactors().begin(); | ||
return {*FirstPlan.vectorFactors().begin(), 0, 0}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Returning zeroes retains current behavior: the single VPlan case corresponds to a UserVF whose cost and scalar-cost are ignored, although the former does have valid cost (the latter is not calculated).) |
||
|
||
ElementCount ScalarVF = ElementCount::getFixed(1); | ||
assert(hasPlanWithVF(ScalarVF) && | ||
"More than a single plan/VF w/o any plan having scalar VF"); | ||
|
||
// TODO: Compute scalar cost using VPlan-based cost model. | ||
InstructionCost ScalarCost = CM.expectedCost(ScalarVF); | ||
LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ScalarCost << ".\n"); | ||
VectorizationFactor ScalarFactor(ScalarVF, ScalarCost, ScalarCost); | ||
VectorizationFactor BestFactor = ScalarFactor; | ||
|
||
|
@@ -7300,7 +7278,20 @@ ElementCount LoopVectorizationPlanner::computeBestVF() { | |
ProfitableVFs.push_back(CurrentFactor); | ||
} | ||
} | ||
return BestFactor.Width; | ||
|
||
#ifndef NDEBUG | ||
// Select the optimal vectorization factor according to the legacy cost-model. | ||
// This is now only used to verify the decisions by the new VPlan-based | ||
// cost-model and will be retired once the VPlan-based cost-model is | ||
// stabilized. | ||
VectorizationFactor LegacyVF = selectVectorizationFactor(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. selectVectorizationFactor() should also be declared and defined under #ifndef NDEBUG. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks |
||
assert(BestFactor.Width == LegacyVF.Width && | ||
" VPlan cost model and legacy cost model disagreed"); | ||
assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) && | ||
"when vectorizing, the scalar cost must be computed."); | ||
#endif | ||
|
||
return BestFactor; | ||
} | ||
|
||
static void AddRuntimeUnrollDisableMetaData(Loop *L) { | ||
|
@@ -9828,21 +9819,19 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |
ElementCount UserVF = Hints.getWidth(); | ||
unsigned UserIC = Hints.getInterleave(); | ||
|
||
// Plan how to best vectorize, return the best VF and its cost. | ||
std::optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC); | ||
// Plan how to best vectorize. | ||
LVP.plan(UserVF, UserIC); | ||
VectorizationFactor VF = LVP.computeBestVF(); | ||
unsigned IC = 1; | ||
|
||
if (ORE->allowExtraAnalysis(LV_NAME)) | ||
LVP.emitInvalidCostRemarks(ORE); | ||
|
||
VectorizationFactor VF = VectorizationFactor::Disabled(); | ||
unsigned IC = 1; | ||
|
||
bool AddBranchWeights = | ||
hasBranchWeightMD(*L->getLoopLatch()->getTerminator()); | ||
GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI, | ||
F->getDataLayout(), AddBranchWeights); | ||
if (MaybeVF) { | ||
VF = *MaybeVF; | ||
if (LVP.hasPlanWithVF(VF.Width)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better to ask instead if VF is valid, i.e., not Disabled? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At the moment, VF.Width may be 1 (disabled), but it is still considered valid, if a plan with VF = 1 exists, for interleaving only. We could update Either would probably be done independently? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, right, VectorizationFactor::Disabled (Width == 1) is distinct from "invalid" (can be Width == 0). |
||
// Select the interleave count. | ||
IC = CM.selectInterleaveCount(VF.Width, VF.Cost); | ||
|
||
|
@@ -9882,7 +9871,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |
VectorizeLoop = false; | ||
} | ||
|
||
if (!MaybeVF && UserIC > 1) { | ||
if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better to ask instead if VF is valid? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Left as is for now, see comment above. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, would be good to simplify, as a follow-up. |
||
// Tell the user interleaving was avoided up-front, despite being explicitly | ||
// requested. | ||
LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and " | ||
|
@@ -9964,11 +9953,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, | ||
&CM, BFI, PSI, Checks); | ||
|
||
ElementCount BestVF = LVP.computeBestVF(); | ||
assert(BestVF.isScalar() && | ||
"VPlan cost model and legacy cost model disagreed"); | ||
VPlan &BestPlan = LVP.getPlanFor(BestVF); | ||
LVP.executePlan(BestVF, IC, BestPlan, Unroller, DT, false); | ||
VPlan &BestPlan = LVP.getPlanFor(VF.Width); | ||
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Perhaps a cast of VectorizationFactor to ElementCount, returning its first field, could simplify using the former wherever the latter is required, although seems better to make sure the latter only is expected, by passing .Width explicitly.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Left as explicitly passing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
ORE->emit([&]() { | ||
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), | ||
|
@@ -9979,20 +9965,16 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |
} else { | ||
// If we decided that it is *legal* to vectorize the loop, then do it. | ||
|
||
ElementCount BestVF = LVP.computeBestVF(); | ||
LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << BestVF << "\n"); | ||
assert(VF.Width == BestVF && | ||
"VPlan cost model and legacy cost model disagreed"); | ||
VPlan &BestPlan = LVP.getPlanFor(BestVF); | ||
VPlan &BestPlan = LVP.getPlanFor(VF.Width); | ||
// Consider vectorizing the epilogue too if it's profitable. | ||
VectorizationFactor EpilogueVF = | ||
LVP.selectEpilogueVectorizationFactor(BestVF, IC); | ||
LVP.selectEpilogueVectorizationFactor(VF.Width, IC); | ||
if (EpilogueVF.Width.isVector()) { | ||
|
||
// The first pass vectorizes the main loop and creates a scalar epilogue | ||
// to be vectorized by executing the plan (potentially with a different | ||
// factor) again shortly afterwards. | ||
EpilogueLoopVectorizationInfo EPI(BestVF, IC, EpilogueVF.Width, 1); | ||
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1); | ||
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, | ||
EPI, &LVL, &CM, BFI, PSI, Checks); | ||
|
||
|
@@ -10087,10 +10069,10 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |
if (!MainILV.areSafetyChecksAdded()) | ||
DisableRuntimeUnroll = true; | ||
} else { | ||
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, BestVF, | ||
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, | ||
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI, | ||
PSI, Checks); | ||
LVP.executePlan(BestVF, IC, BestPlan, LB, DT, false); | ||
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); | ||
++LoopsVectorized; | ||
|
||
// Add metadata to disable runtime unrolling a scalar loop when there | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As @alexey-bataev noted,
else
should be dropped afterreturn
(can be done independently). OTOH, perhaps it's better to treat the simpler additional "report UserVF ignored" case first -if (!CM.selectUserVectorizationFactor(UserVF))
- otherwise build a VPlan for UserVF and return.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, will adjust that separately.