@@ -7659,14 +7659,17 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
7659
7659
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
7660
7660
RdxDesc.getRecurrenceKind())) {
7661
7661
using namespace llvm::PatternMatch;
7662
- Value *Cmp, *OrigResumeV;
7662
+ Value *Cmp, *OrigResumeV, *CmpOp ;
7663
7663
bool IsExpectedPattern =
7664
7664
match(MainResumeValue, m_Select(m_OneUse(m_Value(Cmp)),
7665
7665
m_Specific(RdxDesc.getSentinelValue()),
7666
7666
m_Value(OrigResumeV))) &&
7667
- match(Cmp,
7668
- m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV),
7669
- m_Specific(RdxDesc.getRecurrenceStartValue())));
7667
+ (match(Cmp, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV),
7668
+ m_Value(CmpOp))) &&
7669
+ (match(CmpOp,
7670
+ m_Freeze(m_Specific(RdxDesc.getRecurrenceStartValue()))) ||
7671
+ (CmpOp == RdxDesc.getRecurrenceStartValue() &&
7672
+ isGuaranteedNotToBeUndefOrPoison(CmpOp))));
7670
7673
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
7671
7674
(void)IsExpectedPattern;
7672
7675
MainResumeValue = OrigResumeV;
@@ -10374,6 +10377,36 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
10374
10377
VPlanTransforms::runPass(VPlanTransforms::removeDeadRecipes, MainPlan);
10375
10378
10376
10379
using namespace VPlanPatternMatch;
10380
+ // When vectorizing the epilogue, FindLastIV reductions can introduce multiple
10381
+ // uses of undef/poison. If the reduction start value may be undef or poison
10382
+ // it needs to be frozen and the frozen start has to be used when computing
10383
+ // the reduction result. We also need to use the frozen value in the resume
10384
+ // phi generated by the main vector loop, as this is also used to compute the
10385
+ // reduction result after the epilogue vector loop.
10386
+ auto AddFreezeForFindLastIVReductions = [](VPlan &Plan,
10387
+ bool UpdateResumePhis) {
10388
+ VPBuilder Builder(Plan.getEntry());
10389
+ for (VPRecipeBase &R : *Plan.getMiddleBlock()) {
10390
+ auto *VPI = dyn_cast<VPInstruction>(&R);
10391
+ if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindLastIVResult)
10392
+ continue;
10393
+ VPValue *OrigStart = VPI->getOperand(1);
10394
+ if (isGuaranteedNotToBeUndefOrPoison(OrigStart->getLiveInIRValue()))
10395
+ continue;
10396
+ VPInstruction *Freeze =
10397
+ Builder.createNaryOp(Instruction::Freeze, {OrigStart}, {}, "fr");
10398
+ VPI->setOperand(1, Freeze);
10399
+ if (UpdateResumePhis)
10400
+ OrigStart->replaceUsesWithIf(Freeze, [Freeze](VPUser &U, unsigned) {
10401
+ return Freeze != &U && isa<VPInstruction>(&U) &&
10402
+ cast<VPInstruction>(&U)->getOpcode() ==
10403
+ VPInstruction::ResumePhi;
10404
+ });
10405
+ }
10406
+ };
10407
+ AddFreezeForFindLastIVReductions(MainPlan, true);
10408
+ AddFreezeForFindLastIVReductions(EpiPlan, false);
10409
+
10377
10410
VPBasicBlock *MainScalarPH = MainPlan.getScalarPreheader();
10378
10411
VPValue *VectorTC = &MainPlan.getVectorTripCount();
10379
10412
// If there is a suitable resume value for the canonical induction in the
@@ -10401,24 +10434,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
10401
10434
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
10402
10435
Header->setName("vec.epilog.vector.body");
10403
10436
10404
- // Re-use the trip count and steps expanded for the main loop, as
10405
- // skeleton creation needs it as a value that dominates both the scalar
10406
- // and vector epilogue loops
10407
- // TODO: This is a workaround needed for epilogue vectorization and it
10408
- // should be removed once induction resume value creation is done
10409
- // directly in VPlan.
10410
- for (auto &R : make_early_inc_range(*Plan.getEntry())) {
10411
- auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
10412
- if (!ExpandR)
10413
- continue;
10414
- auto *ExpandedVal =
10415
- Plan.getOrAddLiveIn(ExpandedSCEVs.find(ExpandR->getSCEV())->second);
10416
- ExpandR->replaceAllUsesWith(ExpandedVal);
10417
- if (Plan.getTripCount() == ExpandR)
10418
- Plan.resetTripCount(ExpandedVal);
10419
- ExpandR->eraseFromParent();
10420
- }
10421
-
10437
+ DenseMap<Value *, Value *> ToFrozen;
10422
10438
// Ensure that the start values for all header phi recipes are updated before
10423
10439
// vectorizing the epilogue loop.
10424
10440
for (VPRecipeBase &R : Header->phis()) {
@@ -10484,6 +10500,10 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
10484
10500
ResumeV =
10485
10501
Builder.CreateICmpNE(ResumeV, RdxDesc.getRecurrenceStartValue());
10486
10502
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
10503
+ ToFrozen[RdxDesc.getRecurrenceStartValue()] =
10504
+ cast<PHINode>(ResumeV)->getIncomingValueForBlock(
10505
+ EPI.MainLoopIterationCountCheck);
10506
+
10487
10507
// VPReductionPHIRecipe for FindLastIV reductions requires an adjustment
10488
10508
// to the resume value. The resume value is adjusted to the sentinel
10489
10509
// value when the final value from the main vector loop equals the start
@@ -10492,8 +10512,8 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
10492
10512
// variable.
10493
10513
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent();
10494
10514
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt());
10495
- Value *Cmp =
10496
- Builder.CreateICmpEQ( ResumeV, RdxDesc.getRecurrenceStartValue());
10515
+ Value *Cmp = Builder.CreateICmpEQ(
10516
+ ResumeV, ToFrozen[ RdxDesc.getRecurrenceStartValue()] );
10497
10517
ResumeV =
10498
10518
Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV);
10499
10519
}
@@ -10509,6 +10529,35 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
10509
10529
VPValue *StartVal = Plan.getOrAddLiveIn(ResumeV);
10510
10530
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
10511
10531
}
10532
+
10533
+ // For some VPValues in the epilogue plan we must re-use the generated IR
10534
+ // values from the main plan. Replace them with live-in VPValues.
10535
+ // TODO: This is a workaround needed for epilogue vectorization and it
10536
+ // should be removed once induction resume value creation is done
10537
+ // directly in VPlan.
10538
+ for (auto &R : make_early_inc_range(*Plan.getEntry())) {
10539
+ // Re-use frozen values from the main plan for Freeze VPInstructions in the
10540
+ // epilogue plan. This ensures all users use the same frozen value.
10541
+ auto *VPI = dyn_cast<VPInstruction>(&R);
10542
+ if (VPI && VPI->getOpcode() == Instruction::Freeze) {
10543
+ VPI->replaceAllUsesWith(Plan.getOrAddLiveIn(
10544
+ ToFrozen.lookup(VPI->getOperand(0)->getLiveInIRValue())));
10545
+ continue;
10546
+ }
10547
+
10548
+ // Re-use the trip count and steps expanded for the main loop, as
10549
+ // skeleton creation needs it as a value that dominates both the scalar
10550
+ // and vector epilogue loops
10551
+ auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
10552
+ if (!ExpandR)
10553
+ continue;
10554
+ auto *ExpandedVal =
10555
+ Plan.getOrAddLiveIn(ExpandedSCEVs.find(ExpandR->getSCEV())->second);
10556
+ ExpandR->replaceAllUsesWith(ExpandedVal);
10557
+ if (Plan.getTripCount() == ExpandR)
10558
+ Plan.resetTripCount(ExpandedVal);
10559
+ ExpandR->eraseFromParent();
10560
+ }
10512
10561
}
10513
10562
10514
10563
// Generate bypass values from the additional bypass block. Note that when the
0 commit comments