@@ -4546,6 +4546,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
4546
4546
return false;
4547
4547
}
4548
4548
4549
+ #ifndef NDEBUG
4549
4550
VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4550
4551
InstructionCost ExpectedCost = CM.expectedCost(ElementCount::getFixed(1));
4551
4552
LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");
@@ -4578,7 +4579,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4578
4579
InstructionCost C = CM.expectedCost(VF);
4579
4580
VectorizationFactor Candidate(VF, C, ScalarCost.ScalarCost);
4580
4581
4581
- #ifndef NDEBUG
4582
4582
unsigned AssumedMinimumVscale =
4583
4583
getVScaleForTuning(OrigLoop, TTI).value_or(1);
4584
4584
unsigned Width =
@@ -4591,7 +4591,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4591
4591
LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of "
4592
4592
<< AssumedMinimumVscale << ")");
4593
4593
LLVM_DEBUG(dbgs() << ".\n");
4594
- #endif
4595
4594
4596
4595
if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) {
4597
4596
LLVM_DEBUG(
@@ -4621,6 +4620,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4621
4620
LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << ChosenFactor.Width << ".\n");
4622
4621
return ChosenFactor;
4623
4622
}
4623
+ #endif
4624
4624
4625
4625
bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
4626
4626
ElementCount VF) const {
@@ -6985,15 +6985,14 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
6985
6985
return VectorizationFactor::Disabled();
6986
6986
}
6987
6987
6988
- std::optional<VectorizationFactor>
6989
- LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
6988
+ void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
6990
6989
assert(OrigLoop->isInnermost() && "Inner loop expected.");
6991
6990
CM.collectValuesToIgnore();
6992
6991
CM.collectElementTypesForWidening();
6993
6992
6994
6993
FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC);
6995
6994
if (!MaxFactors) // Cases that should not to be vectorized nor interleaved.
6996
- return std::nullopt ;
6995
+ return;
6997
6996
6998
6997
// Invalidate interleave groups if all blocks of loop will be predicated.
6999
6998
if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) &&
@@ -7028,14 +7027,8 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
7028
7027
if (CM.selectUserVectorizationFactor(UserVF)) {
7029
7028
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
7030
7029
buildVPlansWithVPRecipes(UserVF, UserVF);
7031
- if (!hasPlanWithVF(UserVF)) {
7032
- LLVM_DEBUG(dbgs()
7033
- << "LV: No VPlan could be built for " << UserVF << ".\n");
7034
- return std::nullopt;
7035
- }
7036
-
7037
7030
LLVM_DEBUG(printPlans(dbgs()));
7038
- return {{UserVF, 0, 0}} ;
7031
+ return;
7039
7032
} else
7040
7033
reportVectorizationInfo("UserVF ignored because of invalid costs.",
7041
7034
"InvalidCost", ORE, OrigLoop);
@@ -7066,24 +7059,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
7066
7059
buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF);
7067
7060
7068
7061
LLVM_DEBUG(printPlans(dbgs()));
7069
- if (VPlans.empty())
7070
- return std::nullopt;
7071
- if (all_of(VPlans,
7072
- [](std::unique_ptr<VPlan> &P) { return P->hasScalarVFOnly(); }))
7073
- return VectorizationFactor::Disabled();
7074
-
7075
- // Select the optimal vectorization factor according to the legacy cost-model.
7076
- // This is now only used to verify the decisions by the new VPlan-based
7077
- // cost-model and will be retired once the VPlan-based cost-model is
7078
- // stabilized.
7079
- VectorizationFactor VF = selectVectorizationFactor();
7080
- assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
7081
- if (!hasPlanWithVF(VF.Width)) {
7082
- LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width
7083
- << ".\n");
7084
- return std::nullopt;
7085
- }
7086
- return VF;
7087
7062
}
7088
7063
7089
7064
InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
@@ -7255,18 +7230,21 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
7255
7230
return Cost;
7256
7231
}
7257
7232
7258
- ElementCount LoopVectorizationPlanner::computeBestVF() {
7233
+ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
7234
+ if (VPlans.empty())
7235
+ return VectorizationFactor::Disabled();
7259
7236
// If there is a single VPlan with a single VF, return it directly.
7260
7237
VPlan &FirstPlan = *VPlans[0];
7261
7238
if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
7262
- return *FirstPlan.vectorFactors().begin();
7239
+ return { *FirstPlan.vectorFactors().begin(), 0, 0} ;
7263
7240
7264
7241
ElementCount ScalarVF = ElementCount::getFixed(1);
7265
7242
assert(hasPlanWithVF(ScalarVF) &&
7266
7243
"More than a single plan/VF w/o any plan having scalar VF");
7267
7244
7268
7245
// TODO: Compute scalar cost using VPlan-based cost model.
7269
7246
InstructionCost ScalarCost = CM.expectedCost(ScalarVF);
7247
+ LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ScalarCost << ".\n");
7270
7248
VectorizationFactor ScalarFactor(ScalarVF, ScalarCost, ScalarCost);
7271
7249
VectorizationFactor BestFactor = ScalarFactor;
7272
7250
@@ -7300,7 +7278,20 @@ ElementCount LoopVectorizationPlanner::computeBestVF() {
7300
7278
ProfitableVFs.push_back(CurrentFactor);
7301
7279
}
7302
7280
}
7303
- return BestFactor.Width;
7281
+
7282
+ #ifndef NDEBUG
7283
+ // Select the optimal vectorization factor according to the legacy cost-model.
7284
+ // This is now only used to verify the decisions by the new VPlan-based
7285
+ // cost-model and will be retired once the VPlan-based cost-model is
7286
+ // stabilized.
7287
+ VectorizationFactor LegacyVF = selectVectorizationFactor();
7288
+ assert(BestFactor.Width == LegacyVF.Width &&
7289
+ " VPlan cost model and legacy cost model disagreed");
7290
+ assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) &&
7291
+ "when vectorizing, the scalar cost must be computed.");
7292
+ #endif
7293
+
7294
+ return BestFactor;
7304
7295
}
7305
7296
7306
7297
static void AddRuntimeUnrollDisableMetaData(Loop *L) {
@@ -9971,21 +9962,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
9971
9962
ElementCount UserVF = Hints.getWidth();
9972
9963
unsigned UserIC = Hints.getInterleave();
9973
9964
9974
- // Plan how to best vectorize, return the best VF and its cost.
9975
- std::optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
9965
+ // Plan how to best vectorize.
9966
+ LVP.plan(UserVF, UserIC);
9967
+ VectorizationFactor VF = LVP.computeBestVF();
9968
+ unsigned IC = 1;
9976
9969
9977
9970
if (ORE->allowExtraAnalysis(LV_NAME))
9978
9971
LVP.emitInvalidCostRemarks(ORE);
9979
9972
9980
- VectorizationFactor VF = VectorizationFactor::Disabled();
9981
- unsigned IC = 1;
9982
-
9983
9973
bool AddBranchWeights =
9984
9974
hasBranchWeightMD(*L->getLoopLatch()->getTerminator());
9985
9975
GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
9986
9976
F->getDataLayout(), AddBranchWeights);
9987
- if (MaybeVF) {
9988
- VF = *MaybeVF;
9977
+ if (LVP.hasPlanWithVF(VF.Width)) {
9989
9978
// Select the interleave count.
9990
9979
IC = CM.selectInterleaveCount(VF.Width, VF.Cost);
9991
9980
@@ -10025,7 +10014,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10025
10014
VectorizeLoop = false;
10026
10015
}
10027
10016
10028
- if (!MaybeVF && UserIC > 1) {
10017
+ if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
10029
10018
// Tell the user interleaving was avoided up-front, despite being explicitly
10030
10019
// requested.
10031
10020
LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
@@ -10107,11 +10096,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10107
10096
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
10108
10097
&CM, BFI, PSI, Checks);
10109
10098
10110
- ElementCount BestVF = LVP.computeBestVF();
10111
- assert(BestVF.isScalar() &&
10112
- "VPlan cost model and legacy cost model disagreed");
10113
- VPlan &BestPlan = LVP.getPlanFor(BestVF);
10114
- LVP.executePlan(BestVF, IC, BestPlan, Unroller, DT, false);
10099
+ VPlan &BestPlan = LVP.getPlanFor(VF.Width);
10100
+ LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
10115
10101
10116
10102
ORE->emit([&]() {
10117
10103
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -10122,20 +10108,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10122
10108
} else {
10123
10109
// If we decided that it is *legal* to vectorize the loop, then do it.
10124
10110
10125
- ElementCount BestVF = LVP.computeBestVF();
10126
- LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << BestVF << "\n");
10127
- assert(VF.Width == BestVF &&
10128
- "VPlan cost model and legacy cost model disagreed");
10129
- VPlan &BestPlan = LVP.getPlanFor(BestVF);
10111
+ VPlan &BestPlan = LVP.getPlanFor(VF.Width);
10130
10112
// Consider vectorizing the epilogue too if it's profitable.
10131
10113
VectorizationFactor EpilogueVF =
10132
- LVP.selectEpilogueVectorizationFactor(BestVF , IC);
10114
+ LVP.selectEpilogueVectorizationFactor(VF.Width , IC);
10133
10115
if (EpilogueVF.Width.isVector()) {
10134
10116
10135
10117
// The first pass vectorizes the main loop and creates a scalar epilogue
10136
10118
// to be vectorized by executing the plan (potentially with a different
10137
10119
// factor) again shortly afterwards.
10138
- EpilogueLoopVectorizationInfo EPI(BestVF , IC, EpilogueVF.Width, 1);
10120
+ EpilogueLoopVectorizationInfo EPI(VF.Width , IC, EpilogueVF.Width, 1);
10139
10121
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
10140
10122
EPI, &LVL, &CM, BFI, PSI, Checks);
10141
10123
@@ -10230,10 +10212,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10230
10212
if (!MainILV.areSafetyChecksAdded())
10231
10213
DisableRuntimeUnroll = true;
10232
10214
} else {
10233
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, BestVF ,
10215
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width ,
10234
10216
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
10235
10217
PSI, Checks);
10236
- LVP.executePlan(BestVF , IC, BestPlan, LB, DT, false);
10218
+ LVP.executePlan(VF.Width , IC, BestPlan, LB, DT, false);
10237
10219
++LoopsVectorized;
10238
10220
10239
10221
// Add metadata to disable runtime unrolling a scalar loop when there
0 commit comments