Skip to content

Commit ce214f5

Browse files
committed
!fixup address latest comments, thanks!
1 parent 93f3304 commit ce214f5

26 files changed

+700
-487
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 53 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -519,14 +519,14 @@ class InnerLoopVectorizer {
519519
/// the induction resume value, and the value for the bypass block, if needed.
520520
/// \p Step is the SCEV-expanded induction step to use. In cases where the
521521
/// loop skeleton is more complicated (i.e., epilogue vectorization) and the
522-
/// resume values can come from an additional bypass block, the \p
523-
/// AdditionalBypass pair provides this additional bypass block along with the
524-
/// resume value coming from it.
525-
void createInductionResumeVPValue(
526-
VPIRInstruction *InductionPhiIRI, const InductionDescriptor &ID,
527-
Value *Step, ArrayRef<BasicBlock *> BypassBlocks,
528-
VPBuilder &ScalarPHBuilder,
529-
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
522+
/// resume values can come from an additional bypass block, \p
523+
/// AdditionalBypassValue provides the end value on the edge from bypass to
524+
/// this loop.
525+
void createInductionResumeVPValue(VPIRInstruction *InductionPhiIRI,
526+
const InductionDescriptor &ID, Value *Step,
527+
ArrayRef<BasicBlock *> BypassBlocks,
528+
VPBuilder &ScalarPHBuilder,
529+
Value *AdditionalBypassValue = nullptr);
530530

531531
/// Returns the original loop trip count.
532532
Value *getTripCount() const { return TripCount; }
@@ -539,12 +539,14 @@ class InnerLoopVectorizer {
539539
/// Retrieve the bypass value associated with an original induction header
540540
/// phi.
541541
Value *getInductionAdditionalBypassValue(PHINode *OrigPhi) const {
542-
return Induction2AdditionalBypass.at(OrigPhi).second;
542+
return Induction2AdditionalBypassValue.at(OrigPhi);
543543
}
544544

545545
/// Return the additional bypass block.
546-
BasicBlock *getInductionAdditionalBypassBlock() const {
547-
return Induction2AdditionalBypass.begin()->second.first;
546+
BasicBlock *getAdditionalBypassBlock() const {
547+
assert(AdditionalBypassBlock &&
548+
"Trying to access AdditionalBypassBlock but it has not been set");
549+
return AdditionalBypassBlock;
548550
}
549551

550552
protected:
@@ -584,11 +586,10 @@ class InnerLoopVectorizer {
584586
/// in the scalar epilogue, from where the vectorized loop left off.
585587
/// In cases where the loop skeleton is more complicated (eg. epilogue
586588
/// vectorization) and the resume values can come from an additional bypass
587-
/// block, the \p AdditionalBypass pair provides information about the bypass
588-
/// block and the end value on the edge from bypass to this loop.
589-
void createInductionResumeVPValues(
590-
const SCEV2ValueTy &ExpandedSCEVs,
591-
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
589+
/// block, the \p AdditionalBypassValue provides the end value on the edge
590+
/// from bypass to this loop.
591+
void createInductionResumeVPValues(const SCEV2ValueTy &ExpandedSCEVs,
592+
Value *AdditionalBypassValue = nullptr);
592593

593594
/// Allow subclasses to override and print debug traces before/after vplan
594595
/// execution, when trace information is requested.
@@ -678,11 +679,15 @@ class InnerLoopVectorizer {
678679
/// for cleaning the checks, if vectorization turns out unprofitable.
679680
GeneratedRTChecks &RTChecks;
680681

681-
/// Mapping of induction phis to their bypass values and bypass blocks. They
682+
/// The additional bypass block which conditionally skips over the epilogue
683+
/// loop after executing the main loop. Needed to resume inductions and
684+
/// reductions during epilogue vectorization.
685+
BasicBlock *AdditionalBypassBlock = nullptr;
686+
687+
/// Mapping of induction phis to their additional bypass values. They
682688
/// need to be added as operands to phi nodes in the scalar loop preheader
683689
/// after the epilogue skeleton has been created.
684-
DenseMap<PHINode *, std::pair<BasicBlock *, Value *>>
685-
Induction2AdditionalBypass;
690+
DenseMap<PHINode *, Value *> Induction2AdditionalBypassValue;
686691

687692
VPlan &Plan;
688693
};
@@ -2603,14 +2608,14 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
26032608
void InnerLoopVectorizer::createInductionResumeVPValue(
26042609
VPIRInstruction *InductionPhiRI, const InductionDescriptor &II, Value *Step,
26052610
ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
2606-
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2611+
Value *AdditionalBypassValue) {
26072612
auto *OrigPhi = cast<PHINode>(&InductionPhiRI->getInstruction());
26082613
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
26092614
assert(VectorTripCount && "Expected valid arguments");
26102615

26112616
Instruction *OldInduction = Legal->getPrimaryInduction();
26122617
Value *EndValue = nullptr;
2613-
Value *EndValueFromAdditionalBypass = AdditionalBypass.second;
2618+
Value *EndValueFromAdditionalBypass = AdditionalBypassValue;
26142619
if (OrigPhi == OldInduction) {
26152620
// We know what the end value is.
26162621
EndValue = VectorTripCount;
@@ -2626,11 +2631,11 @@ void InnerLoopVectorizer::createInductionResumeVPValue(
26262631
EndValue->setName("ind.end");
26272632

26282633
// Compute the end value for the additional bypass (if applicable).
2629-
if (AdditionalBypass.first) {
2630-
B.SetInsertPoint(AdditionalBypass.first,
2631-
AdditionalBypass.first->getFirstInsertionPt());
2634+
if (AdditionalBypassValue) {
2635+
B.SetInsertPoint(getAdditionalBypassBlock(),
2636+
getAdditionalBypassBlock()->getFirstInsertionPt());
26322637
EndValueFromAdditionalBypass =
2633-
emitTransformedIndex(B, AdditionalBypass.second, II.getStartValue(),
2638+
emitTransformedIndex(B, AdditionalBypassValue, II.getStartValue(),
26342639
Step, II.getKind(), II.getInductionBinOp());
26352640
EndValueFromAdditionalBypass->setName("ind.end");
26362641
}
@@ -2644,14 +2649,13 @@ void InnerLoopVectorizer::createInductionResumeVPValue(
26442649
"InductionPhiRI should not have any operands");
26452650
InductionPhiRI->addOperand(ResumePhiRecipe);
26462651

2647-
if (AdditionalBypass.first) {
2652+
if (AdditionalBypassValue) {
26482653
// Store the bypass value here, as it needs to be added as operand to its
26492654
// scalar preheader phi node after the epilogue skeleton has been created.
26502655
// TODO: Directly add as extra operand to the VPResumePHI recipe.
2651-
assert(!Induction2AdditionalBypass.contains(OrigPhi) &&
2656+
assert(!Induction2AdditionalBypassValue.contains(OrigPhi) &&
26522657
"entry for OrigPhi already exits");
2653-
Induction2AdditionalBypass[OrigPhi] = {AdditionalBypass.first,
2654-
EndValueFromAdditionalBypass};
2658+
Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass;
26552659
}
26562660
}
26572661

@@ -2670,19 +2674,13 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
26702674
}
26712675

26722676
void InnerLoopVectorizer::createInductionResumeVPValues(
2673-
const SCEV2ValueTy &ExpandedSCEVs,
2674-
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2675-
assert(((AdditionalBypass.first && AdditionalBypass.second) ||
2676-
(!AdditionalBypass.first && !AdditionalBypass.second)) &&
2677-
"Inconsistent information about additional bypass.");
2677+
const SCEV2ValueTy &ExpandedSCEVs, Value *AdditionalBypassValue) {
26782678
// We are going to resume the execution of the scalar loop.
26792679
// Go over all of the induction variable PHIs of the scalar loop header and
26802680
// fix their starting values, which depend on the counter of the last
2681-
// iteration of the vectorized loop. The starting values of PHI nodes depend
2682-
// on the counter of the last iteration in the vectorized loop. If we come
2683-
// from one of the LoopBypassBlocks then we need to start from the original
2684-
// start value. If we come from the AdditionalBypass then we need to start
2685-
// from its value.
2681+
// iteration of the vectorized loop. If we come from one of the
2682+
// LoopBypassBlocks then we need to start from the original start value. If we
2683+
// come from the AdditionalBypass then we need to start from its value.
26862684
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
26872685
VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin());
26882686
for (VPRecipeBase &R : *Plan.getScalarHeader()) {
@@ -2695,7 +2693,7 @@ void InnerLoopVectorizer::createInductionResumeVPValues(
26952693
const InductionDescriptor &II = Legal->getInductionVars().find(Phi)->second;
26962694
createInductionResumeVPValue(PhiR, II, getExpandedStep(II, ExpandedSCEVs),
26972695
LoopBypassBlocks, ScalarPHBuilder,
2698-
AdditionalBypass);
2696+
AdditionalBypassValue);
26992697
}
27002698
}
27012699

@@ -7744,7 +7742,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77447742
// 2.5 When vectorizing the epilogue, fix reduction and induction resume
77457743
// values from the additional bypass block.
77467744
if (VectorizingEpilogue) {
7747-
BasicBlock *BypassBlock = ILV.getInductionAdditionalBypassBlock();
7745+
BasicBlock *BypassBlock = ILV.getAdditionalBypassBlock();
77487746
for (VPRecipeBase &R : *ExitVPBB) {
77497747
fixReductionScalarResumeWhenVectorizingEpilog(
77507748
&R, State, State.CFG.VPBB2IRBB[ExitVPBB], BypassBlock);
@@ -7941,6 +7939,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79417939
nullptr, "vec.epilog.iter.check", true);
79427940
emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
79437941
VecEpilogueIterationCountCheck);
7942+
AdditionalBypassBlock = VecEpilogueIterationCountCheck;
79447943

79457944
// Adjust the control flow taking the state info from the main loop
79467945
// vectorization into account.
@@ -8017,12 +8016,13 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80178016
// preheader.
80188017
PHINode *EPResumeVal = nullptr;
80198018
Type *IdxTy = Legal->getWidestInductionType();
8019+
Value *TC = EPI.VectorTripCount;
8020+
Constant *Init = ConstantInt::get(IdxTy, 0);
8021+
80208022
for (PHINode &P : LoopVectorPreHeader->phis()) {
80218023
if (P.getType() == IdxTy &&
8022-
P.getIncomingValueForBlock(VecEpilogueIterationCountCheck) ==
8023-
EPI.VectorTripCount &&
8024-
P.getIncomingValueForBlock(EPI.MainLoopIterationCountCheck) ==
8025-
ConstantInt::get(IdxTy, 0)) {
8024+
P.getIncomingValueForBlock(VecEpilogueIterationCountCheck) == TC &&
8025+
P.getIncomingValueForBlock(EPI.MainLoopIterationCountCheck) == Init) {
80268026
EPResumeVal = &P;
80278027
EPResumeVal->setName("vec.epilog.resume.val");
80288028
break;
@@ -8031,22 +8031,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80318031
if (!EPResumeVal) {
80328032
EPResumeVal = PHINode::Create(IdxTy, 2, "vec.epilog.resume.val");
80338033
EPResumeVal->insertBefore(LoopVectorPreHeader->getFirstNonPHIIt());
8034-
EPResumeVal->addIncoming(EPI.VectorTripCount,
8035-
VecEpilogueIterationCountCheck);
8036-
EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0),
8037-
EPI.MainLoopIterationCountCheck);
8034+
EPResumeVal->addIncoming(TC, VecEpilogueIterationCountCheck);
8035+
EPResumeVal->addIncoming(Init, EPI.MainLoopIterationCountCheck);
80388036
}
80398037

80408038
// Generate induction resume values. These variables save the new starting
80418039
// indexes for the scalar loop. They are used to test if there are any tail
80428040
// iterations left once the vector loop has completed.
80438041
// Note that when the vectorized epilogue is skipped due to iteration count
80448042
// check, then the resume value for the induction variable comes from
8045-
// the trip count of the main vector loop, hence passing the AdditionalBypass
8046-
// argument.
8047-
createInductionResumeVPValues(ExpandedSCEVs,
8048-
{VecEpilogueIterationCountCheck,
8049-
EPI.VectorTripCount} /* AdditionalBypass */);
8043+
// the trip count of the main vector loop, hence passing the
8044+
// AdditionalBypassValue argument.
8045+
createInductionResumeVPValues(
8046+
ExpandedSCEVs, EPI.VectorTripCount /* AdditionalBypassValue */);
80508047

80518048
return {LoopVectorPreHeader, EPResumeVal};
80528049
}
@@ -10358,6 +10355,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1035810355
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
1035910356
IndPhi = WidenInd->getPHINode();
1036010357
}
10358+
// Hook up to the PHINode generated by a ResumePhi recipe of main
10359+
// loop VPlan, which feeds the scalar loop.
1036110360
ResumeV = IndPhi->getIncomingValueForBlock(L->getLoopPreheader());
1036210361
}
1036310362
assert(ResumeV && "Must have a resume value");

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
6464
case VPInstruction::FirstOrderRecurrenceSplice:
6565
case VPInstruction::LogicalAnd:
6666
case VPInstruction::PtrAdd:
67-
case VPInstruction::ResumePhi:
6867
return false;
6968
default:
7069
return true;

llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) {
9999
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
100100
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
101101
; CHECK: [[VEC_EPILOG_PH]]:
102-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END3]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
102+
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END3]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
103103
; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END2]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
104104
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
105105
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[START]], 4
@@ -120,12 +120,12 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) {
120120
; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[START]], [[N_VEC3]]
121121
; CHECK-NEXT: br i1 [[CMP_N11]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
122122
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
123-
; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i64 [ [[IND_END1]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
124-
; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi ptr [ [[IND_END5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], %[[ITER_CHECK]] ]
123+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END1]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
124+
; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi ptr [ [[IND_END5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], %[[ITER_CHECK]] ]
125125
; CHECK-NEXT: br label %[[LOOP:.*]]
126126
; CHECK: [[LOOP]]:
127-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL14]], %[[VEC_EPILOG_SCALAR_PH]] ]
128-
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ]
127+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ]
128+
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL7]], %[[VEC_EPILOG_SCALAR_PH]] ]
129129
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
130130
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1
131131
; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1

0 commit comments

Comments
 (0)