Skip to content

Commit 03f2026

Browse files
fhahncjdb
authored andcommitted
[VPlan] Only use selectVectorizationFactor for cross-check (NFCI). (llvm#103033)
Use getBestVF to select VF up-front and only use selectVectorizationFactor to get the VF legacy VF to check the vectorization decision matches the VPlan-based cost model. PR: llvm#103033
1 parent 50384bf commit 03f2026

File tree

6 files changed

+50
-63
lines changed

6 files changed

+50
-63
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -354,9 +354,10 @@ class LoopVectorizationPlanner {
354354
: OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
355355
IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
356356

357-
/// Plan how to best vectorize, return the best VF and its cost, or
358-
/// std::nullopt if vectorization and interleaving should be avoided up front.
359-
std::optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
357+
/// Build VPlans for the specified \p UserVF and \p UserIC if they are
358+
/// non-zero or all applicable candidate VFs otherwise. If vectorization and
359+
/// interleaving should be avoided up-front, no plans are generated.
360+
void plan(ElementCount UserVF, unsigned UserIC);
360361

361362
/// Use the VPlan-native path to plan how to best vectorize, return the best
362363
/// VF and its cost.
@@ -368,7 +369,7 @@ class LoopVectorizationPlanner {
368369

369370
/// Compute and return the most profitable vectorization factor. Also collect
370371
/// all profitable VFs in ProfitableVFs.
371-
ElementCount computeBestVF();
372+
VectorizationFactor computeBestVF();
372373

373374
/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
374375
/// according to the best selected \p VF and \p UF.
@@ -450,12 +451,14 @@ class LoopVectorizationPlanner {
450451
VPRecipeBuilder &RecipeBuilder,
451452
ElementCount MinVF);
452453

454+
#ifndef NDEBUG
453455
/// \return The most profitable vectorization factor for the available VPlans
454456
/// and the cost of that VF.
455457
/// This is now only used to verify the decisions by the new VPlan-based
456458
/// cost-model and will be retired once the VPlan-based cost-model is
457459
/// stabilized.
458460
VectorizationFactor selectVectorizationFactor();
461+
#endif
459462

460463
/// Returns true if the per-lane cost of VectorizationFactor A is lower than
461464
/// that of B.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 37 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -4546,6 +4546,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
45464546
return false;
45474547
}
45484548

4549+
#ifndef NDEBUG
45494550
VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
45504551
InstructionCost ExpectedCost = CM.expectedCost(ElementCount::getFixed(1));
45514552
LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");
@@ -4578,7 +4579,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
45784579
InstructionCost C = CM.expectedCost(VF);
45794580
VectorizationFactor Candidate(VF, C, ScalarCost.ScalarCost);
45804581

4581-
#ifndef NDEBUG
45824582
unsigned AssumedMinimumVscale =
45834583
getVScaleForTuning(OrigLoop, TTI).value_or(1);
45844584
unsigned Width =
@@ -4591,7 +4591,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
45914591
LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of "
45924592
<< AssumedMinimumVscale << ")");
45934593
LLVM_DEBUG(dbgs() << ".\n");
4594-
#endif
45954594

45964595
if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) {
45974596
LLVM_DEBUG(
@@ -4621,6 +4620,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
46214620
LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << ChosenFactor.Width << ".\n");
46224621
return ChosenFactor;
46234622
}
4623+
#endif
46244624

46254625
bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
46264626
ElementCount VF) const {
@@ -6985,15 +6985,14 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
69856985
return VectorizationFactor::Disabled();
69866986
}
69876987

6988-
std::optional<VectorizationFactor>
6989-
LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
6988+
void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
69906989
assert(OrigLoop->isInnermost() && "Inner loop expected.");
69916990
CM.collectValuesToIgnore();
69926991
CM.collectElementTypesForWidening();
69936992

69946993
FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC);
69956994
if (!MaxFactors) // Cases that should not to be vectorized nor interleaved.
6996-
return std::nullopt;
6995+
return;
69976996

69986997
// Invalidate interleave groups if all blocks of loop will be predicated.
69996998
if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) &&
@@ -7028,14 +7027,8 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
70287027
if (CM.selectUserVectorizationFactor(UserVF)) {
70297028
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
70307029
buildVPlansWithVPRecipes(UserVF, UserVF);
7031-
if (!hasPlanWithVF(UserVF)) {
7032-
LLVM_DEBUG(dbgs()
7033-
<< "LV: No VPlan could be built for " << UserVF << ".\n");
7034-
return std::nullopt;
7035-
}
7036-
70377030
LLVM_DEBUG(printPlans(dbgs()));
7038-
return {{UserVF, 0, 0}};
7031+
return;
70397032
} else
70407033
reportVectorizationInfo("UserVF ignored because of invalid costs.",
70417034
"InvalidCost", ORE, OrigLoop);
@@ -7066,24 +7059,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
70667059
buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF);
70677060

70687061
LLVM_DEBUG(printPlans(dbgs()));
7069-
if (VPlans.empty())
7070-
return std::nullopt;
7071-
if (all_of(VPlans,
7072-
[](std::unique_ptr<VPlan> &P) { return P->hasScalarVFOnly(); }))
7073-
return VectorizationFactor::Disabled();
7074-
7075-
// Select the optimal vectorization factor according to the legacy cost-model.
7076-
// This is now only used to verify the decisions by the new VPlan-based
7077-
// cost-model and will be retired once the VPlan-based cost-model is
7078-
// stabilized.
7079-
VectorizationFactor VF = selectVectorizationFactor();
7080-
assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
7081-
if (!hasPlanWithVF(VF.Width)) {
7082-
LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width
7083-
<< ".\n");
7084-
return std::nullopt;
7085-
}
7086-
return VF;
70877062
}
70887063

70897064
InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
@@ -7255,18 +7230,21 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
72557230
return Cost;
72567231
}
72577232

7258-
ElementCount LoopVectorizationPlanner::computeBestVF() {
7233+
VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
7234+
if (VPlans.empty())
7235+
return VectorizationFactor::Disabled();
72597236
// If there is a single VPlan with a single VF, return it directly.
72607237
VPlan &FirstPlan = *VPlans[0];
72617238
if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
7262-
return *FirstPlan.vectorFactors().begin();
7239+
return {*FirstPlan.vectorFactors().begin(), 0, 0};
72637240

72647241
ElementCount ScalarVF = ElementCount::getFixed(1);
72657242
assert(hasPlanWithVF(ScalarVF) &&
72667243
"More than a single plan/VF w/o any plan having scalar VF");
72677244

72687245
// TODO: Compute scalar cost using VPlan-based cost model.
72697246
InstructionCost ScalarCost = CM.expectedCost(ScalarVF);
7247+
LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ScalarCost << ".\n");
72707248
VectorizationFactor ScalarFactor(ScalarVF, ScalarCost, ScalarCost);
72717249
VectorizationFactor BestFactor = ScalarFactor;
72727250

@@ -7300,7 +7278,20 @@ ElementCount LoopVectorizationPlanner::computeBestVF() {
73007278
ProfitableVFs.push_back(CurrentFactor);
73017279
}
73027280
}
7303-
return BestFactor.Width;
7281+
7282+
#ifndef NDEBUG
7283+
// Select the optimal vectorization factor according to the legacy cost-model.
7284+
// This is now only used to verify the decisions by the new VPlan-based
7285+
// cost-model and will be retired once the VPlan-based cost-model is
7286+
// stabilized.
7287+
VectorizationFactor LegacyVF = selectVectorizationFactor();
7288+
assert(BestFactor.Width == LegacyVF.Width &&
7289+
" VPlan cost model and legacy cost model disagreed");
7290+
assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) &&
7291+
"when vectorizing, the scalar cost must be computed.");
7292+
#endif
7293+
7294+
return BestFactor;
73047295
}
73057296

73067297
static void AddRuntimeUnrollDisableMetaData(Loop *L) {
@@ -9971,21 +9962,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
99719962
ElementCount UserVF = Hints.getWidth();
99729963
unsigned UserIC = Hints.getInterleave();
99739964

9974-
// Plan how to best vectorize, return the best VF and its cost.
9975-
std::optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
9965+
// Plan how to best vectorize.
9966+
LVP.plan(UserVF, UserIC);
9967+
VectorizationFactor VF = LVP.computeBestVF();
9968+
unsigned IC = 1;
99769969

99779970
if (ORE->allowExtraAnalysis(LV_NAME))
99789971
LVP.emitInvalidCostRemarks(ORE);
99799972

9980-
VectorizationFactor VF = VectorizationFactor::Disabled();
9981-
unsigned IC = 1;
9982-
99839973
bool AddBranchWeights =
99849974
hasBranchWeightMD(*L->getLoopLatch()->getTerminator());
99859975
GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
99869976
F->getDataLayout(), AddBranchWeights);
9987-
if (MaybeVF) {
9988-
VF = *MaybeVF;
9977+
if (LVP.hasPlanWithVF(VF.Width)) {
99899978
// Select the interleave count.
99909979
IC = CM.selectInterleaveCount(VF.Width, VF.Cost);
99919980

@@ -10025,7 +10014,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1002510014
VectorizeLoop = false;
1002610015
}
1002710016

10028-
if (!MaybeVF && UserIC > 1) {
10017+
if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
1002910018
// Tell the user interleaving was avoided up-front, despite being explicitly
1003010019
// requested.
1003110020
LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
@@ -10107,11 +10096,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1010710096
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
1010810097
&CM, BFI, PSI, Checks);
1010910098

10110-
ElementCount BestVF = LVP.computeBestVF();
10111-
assert(BestVF.isScalar() &&
10112-
"VPlan cost model and legacy cost model disagreed");
10113-
VPlan &BestPlan = LVP.getPlanFor(BestVF);
10114-
LVP.executePlan(BestVF, IC, BestPlan, Unroller, DT, false);
10099+
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
10100+
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
1011510101

1011610102
ORE->emit([&]() {
1011710103
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -10122,20 +10108,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1012210108
} else {
1012310109
// If we decided that it is *legal* to vectorize the loop, then do it.
1012410110

10125-
ElementCount BestVF = LVP.computeBestVF();
10126-
LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << BestVF << "\n");
10127-
assert(VF.Width == BestVF &&
10128-
"VPlan cost model and legacy cost model disagreed");
10129-
VPlan &BestPlan = LVP.getPlanFor(BestVF);
10111+
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
1013010112
// Consider vectorizing the epilogue too if it's profitable.
1013110113
VectorizationFactor EpilogueVF =
10132-
LVP.selectEpilogueVectorizationFactor(BestVF, IC);
10114+
LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
1013310115
if (EpilogueVF.Width.isVector()) {
1013410116

1013510117
// The first pass vectorizes the main loop and creates a scalar epilogue
1013610118
// to be vectorized by executing the plan (potentially with a different
1013710119
// factor) again shortly afterwards.
10138-
EpilogueLoopVectorizationInfo EPI(BestVF, IC, EpilogueVF.Width, 1);
10120+
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
1013910121
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
1014010122
EPI, &LVL, &CM, BFI, PSI, Checks);
1014110123

@@ -10230,10 +10212,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1023010212
if (!MainILV.areSafetyChecksAdded())
1023110213
DisableRuntimeUnroll = true;
1023210214
} else {
10233-
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, BestVF,
10215+
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
1023410216
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
1023510217
PSI, Checks);
10236-
LVP.executePlan(BestVF, IC, BestPlan, LB, DT, false);
10218+
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
1023710219
++LoopsVectorized;
1023810220

1023910221
// Add metadata to disable runtime unrolling a scalar loop when there

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1695,6 +1695,10 @@ VPlan &LoopVectorizationPlanner::getPlanFor(ElementCount VF) const {
16951695

16961696
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
16971697
void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
1698+
if (VPlans.empty()) {
1699+
O << "LV: No VPlans built.\n";
1700+
return;
1701+
}
16981702
for (const auto &Plan : VPlans)
16991703
if (PrintVPlansInDotFormat)
17001704
Plan->printDOT(O);

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
133133
; CHECK-NEXT: LV: Not Interleaving.
134134
; CHECK-NEXT: LV: Interleaving is not beneficial.
135135
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
136-
; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
137136
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
138137
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
139138
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
@@ -336,7 +335,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
336335
; CHECK-NEXT: LV: Not Interleaving.
337336
; CHECK-NEXT: LV: Interleaving is not beneficial.
338337
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
339-
; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
340338
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
341339
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
342340
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {

llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ exit:
154154
; FOR (for.y) should be moved which is not currently supported.
155155
define i32 @test_chained_first_order_recurrences_4(ptr %base) {
156156
; CHECK-LABEL: 'test_chained_first_order_recurrences_4'
157-
; CHECK: No VPlan could be built for
157+
; CHECK: No VPlans built.
158158

159159
entry:
160160
br label %loop

llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ target triple = "x86_64-unknown-linux-gnu"
1111
define void @test() {
1212
; CHECK-LABEL: 'test'
1313
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
14+
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
1415
; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
1516
; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
1617
; CHECK: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
1718
; CHECK: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
1819
; CHECK: LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
1920
; CHECK: LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4
20-
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
2121
;
2222
entry:
2323
br label %for.body

0 commit comments

Comments
 (0)