@@ -2663,6 +2663,33 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
2663
2663
return I->second;
2664
2664
}
2665
2665
2666
+ /// Knowing that loop \p L would be fully unrolled after vectorisation, add
2667
+ /// instructions that will get simplified and thus should not have any cost to
2668
+ /// \p InstsToIgnore
2669
+ static void AddFullyUnrolledInstructionsToIgnore(
2670
+ Loop *L, const LoopVectorizationLegality::InductionList &IL,
2671
+ SmallPtrSetImpl<Instruction *> &InstsToIgnore) {
2672
+ auto *Cmp = L->getLatchCmpInst();
2673
+ if (!Cmp)
2674
+ return;
2675
+ InstsToIgnore.insert(Cmp);
2676
+ for (const auto &[IV, IndDesc] : IL) {
2677
+ // Get next iteration value of the induction variable
2678
+ Instruction *IVInst =
2679
+ cast<Instruction>(IV->getIncomingValueForBlock(L->getLoopLatch()));
2680
+ bool IsSimplifiedAway = true;
2681
+ // Check that this value used only to exit the loop
2682
+ for (auto *UIV : IVInst->users()) {
2683
+ if (UIV != IV && UIV != Cmp) {
2684
+ IsSimplifiedAway = false;
2685
+ break;
2686
+ }
2687
+ }
2688
+ if (IsSimplifiedAway)
2689
+ InstsToIgnore.insert(IVInst);
2690
+ }
2691
+ }
2692
+
2666
2693
void InnerLoopVectorizer::createInductionResumeValues(
2667
2694
const SCEV2ValueTy &ExpandedSCEVs,
2668
2695
std::pair<BasicBlock *, Value *> AdditionalBypass) {
@@ -5545,19 +5572,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
5545
5572
InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
5546
5573
InstructionCost Cost;
5547
5574
5548
- // If with the given VF loop gets fully unrolled, ignore the costs of
5549
- // comparison and induction instructions, as they'll get simplified away
5550
- SmallPtrSet<const Value *, 16 > ValuesToIgnoreForVF;
5575
+ // If with the given fixed width VF loop gets fully unrolled, ignore the costs
5576
+ // of comparison and induction instructions, as they'll get simplified away
5577
+ SmallPtrSet<Instruction *, 2 > ValuesToIgnoreForVF;
5551
5578
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
5552
- auto *Cmp = TheLoop->getLatchCmpInst();
5553
- if (Cmp && TC == VF.getKnownMinValue()) {
5554
- ValuesToIgnoreForVF.insert(Cmp);
5555
- for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
5556
- Instruction *IVInc = cast<Instruction>(
5557
- IV->getIncomingValueForBlock(TheLoop->getLoopLatch()));
5558
- ValuesToIgnoreForVF.insert(IVInc);
5559
- }
5560
- }
5579
+ if (VF.isFixed() && TC == VF.getFixedValue())
5580
+ AddFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
5581
+ ValuesToIgnoreForVF);
5561
5582
5562
5583
// For each block.
5563
5584
for (BasicBlock *BB : TheLoop->blocks()) {
@@ -7241,16 +7262,10 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
7241
7262
7242
7263
// If with the given VF loop gets fully unrolled, ignore the costs of
7243
7264
// comparison and induction instructions, as they'll get simplified away
7244
- auto TC = CM.PSE.getSE()->getSmallConstantTripCount(OrigLoop);
7245
- auto *Cmp = OrigLoop->getLatchCmpInst();
7246
- if (Cmp && TC == VF.getKnownMinValue()) {
7247
- CostCtx.SkipCostComputation.insert(Cmp);
7248
- for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
7249
- Instruction *IVInc = cast<Instruction>(
7250
- IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
7251
- CostCtx.SkipCostComputation.insert(IVInc);
7252
- }
7253
- }
7265
+ auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
7266
+ if (VF.isFixed() && TC == VF.getFixedValue())
7267
+ AddFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
7268
+ CostCtx.SkipCostComputation);
7254
7269
7255
7270
for (Instruction *IVInst : IVInsts) {
7256
7271
if (CostCtx.skipCostComputation(IVInst, VF.isVector()))
0 commit comments