Skip to content

Commit df4a615

Browse files
committed
[VPlan] Convert induction increment check to be VPlan-based.
Check the VPlan directly to determine if a VPValue is an optimiziable IV or IV use instead of checking the underlying IR instructions. Split off from #112147. This refactoring enables moving IV end value creation from the legacy fixupIVUsers to a VPlan-based transform. There is one case we now won't optimize, that is IVs with subtracts and non-constant steps. But as this is a minor optimization and doesn't impact correctness, the benefits of performing the check in VPlan should outweigh the missed case.
1 parent ba93ecc commit df4a615

File tree

4 files changed

+98
-19
lines changed

4 files changed

+98
-19
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 67 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8953,14 +8953,73 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
89538953
}
89548954
}
89558955

8956+
/// Return true if \p VPV is an optimizable IV or IV use. That is, if \p VPV is
8957+
/// either an untruncated wide induction, or if it increments a wide induction
8958+
/// by its step.
8959+
static bool isOptimizableIVOrUse(VPValue *VPV) {
8960+
VPRecipeBase *Def = VPV->getDefiningRecipe();
8961+
if (!Def)
8962+
return false;
8963+
auto *WideIV = dyn_cast<VPWidenInductionRecipe>(Def);
8964+
if (WideIV) {
8965+
// VPV itself is a wide induction, separately compute the end value for exit
8966+
// users if it is not a truncated IV.
8967+
return isa<VPWidenPointerInductionRecipe>(WideIV) ||
8968+
!cast<VPWidenIntOrFpInductionRecipe>(WideIV)->getTruncInst();
8969+
}
8970+
8971+
// Check if VPV is an optimizable induction increment.
8972+
if (Def->getNumOperands() != 2)
8973+
return false;
8974+
WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(0));
8975+
if (!WideIV)
8976+
WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(1));
8977+
if (!WideIV)
8978+
return false;
8979+
8980+
using namespace VPlanPatternMatch;
8981+
auto &ID = WideIV->getInductionDescriptor();
8982+
8983+
// Check if VPV increments the induction by the induction step.
8984+
VPValue *IVStep = WideIV->getStepValue();
8985+
switch (ID.getInductionOpcode()) {
8986+
case Instruction::Add:
8987+
return match(VPV, m_c_Binary<Instruction::Add>(m_Specific(WideIV),
8988+
m_Specific(IVStep)));
8989+
case Instruction::FAdd:
8990+
return match(VPV, m_c_Binary<Instruction::FAdd>(m_Specific(WideIV),
8991+
m_Specific(IVStep)));
8992+
case Instruction::FSub:
8993+
return match(VPV, m_Binary<Instruction::FSub>(m_Specific(WideIV),
8994+
m_Specific(IVStep)));
8995+
case Instruction::Sub: {
8996+
// IVStep will be the negated step of the subtraction. Check if Step == -1 *
8997+
// IVStep.
8998+
VPValue *Step;
8999+
if (!match(VPV, m_Binary<Instruction::Sub>(m_VPValue(), m_VPValue(Step))) ||
9000+
!Step->isLiveIn() || !IVStep->isLiveIn())
9001+
return false;
9002+
auto *StepCI = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
9003+
auto *IVStepCI = dyn_cast<ConstantInt>(IVStep->getLiveInIRValue());
9004+
return StepCI && IVStepCI &&
9005+
StepCI->getValue() == (-1 * IVStepCI->getValue());
9006+
}
9007+
default:
9008+
return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
9009+
match(VPV, m_GetElementPtr(m_Specific(WideIV),
9010+
m_Specific(WideIV->getStepValue())));
9011+
}
9012+
llvm_unreachable("should have been covered by switch above");
9013+
}
9014+
89569015
// Collect VPIRInstructions for phis in the exit blocks that are modeled
89579016
// in VPlan and add the exiting VPValue as operand. Some exiting values are not
89589017
// modeled explicitly yet and won't be included. Those are un-truncated
89599018
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
89609019
// increments.
8961-
static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8962-
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8963-
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
9020+
static SetVector<VPIRInstruction *>
9021+
collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
9022+
VPlan &Plan) {
89649023
auto *MiddleVPBB = Plan.getMiddleBlock();
89659024
SetVector<VPIRInstruction *> ExitUsersToFix;
89669025
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -8985,18 +9044,9 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
89859044
// Exit values for inductions are computed and updated outside of VPlan
89869045
// and independent of induction recipes.
89879046
// TODO: Compute induction exit values in VPlan.
8988-
if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8989-
!cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
8990-
isa<VPWidenPointerInductionRecipe>(V) ||
8991-
(isa<Instruction>(IncomingValue) &&
8992-
OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
8993-
any_of(IncomingValue->users(), [&Inductions](User *U) {
8994-
auto *P = dyn_cast<PHINode>(U);
8995-
return P && Inductions.contains(P);
8996-
}))) {
8997-
if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
8998-
continue;
8999-
}
9047+
if (isOptimizableIVOrUse(V) &&
9048+
ExitVPBB->getSinglePredecessor() == MiddleVPBB)
9049+
continue;
90009050
ExitUsersToFix.insert(ExitIRI);
90019051
ExitIRI->addOperand(V);
90029052
}
@@ -9331,8 +9381,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93319381
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
93329382
}
93339383
addScalarResumePhis(RecipeBuilder, *Plan);
9334-
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
9335-
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9384+
SetVector<VPIRInstruction *> ExitUsersToFix =
9385+
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
93369386
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
93379387
if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
93389388
reportVectorizationFailure(

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2095,6 +2095,15 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
20952095
R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
20962096
}
20972097

2098+
static inline bool classof(const VPValue *V) {
2099+
auto *R = V->getDefiningRecipe();
2100+
return R && classof(R);
2101+
}
2102+
2103+
static inline bool classof(const VPHeaderPHIRecipe *R) {
2104+
return classof(static_cast<const VPRecipeBase *>(R));
2105+
}
2106+
20982107
virtual void execute(VPTransformState &State) override = 0;
20992108

21002109
/// Returns the step value of the induction.

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ struct MatchRecipeAndOpcode<Opcode, RecipeTy> {
139139
if constexpr (std::is_same<RecipeTy, VPScalarIVStepsRecipe>::value ||
140140
std::is_same<RecipeTy, VPCanonicalIVPHIRecipe>::value ||
141141
std::is_same<RecipeTy, VPWidenSelectRecipe>::value ||
142-
std::is_same<RecipeTy, VPDerivedIVRecipe>::value)
142+
std::is_same<RecipeTy, VPDerivedIVRecipe>::value ||
143+
std::is_same<RecipeTy, VPWidenGEPRecipe>::value)
143144
return DefR;
144145
else
145146
return DefR && DefR->getOpcode() == Opcode;
@@ -309,6 +310,12 @@ m_Binary(const Op0_t &Op0, const Op1_t &Op1) {
309310
return AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative>(Op0, Op1);
310311
}
311312

313+
template <unsigned Opcode, typename Op0_t, typename Op1_t>
314+
inline AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, true>
315+
m_c_Binary(const Op0_t &Op0, const Op1_t &Op1) {
316+
return AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, true>(Op0, Op1);
317+
}
318+
312319
template <typename Op0_t, typename Op1_t>
313320
inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Mul>
314321
m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
@@ -339,6 +346,18 @@ m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
339346
return m_BinaryOr<Op0_t, Op1_t, /*Commutative*/ true>(Op0, Op1);
340347
}
341348

349+
template <typename Op0_t, typename Op1_t>
350+
using GEPLikeRecipe_match =
351+
BinaryRecipe_match<Op0_t, Op1_t, Instruction::GetElementPtr, false,
352+
VPWidenRecipe, VPReplicateRecipe, VPWidenGEPRecipe,
353+
VPInstruction>;
354+
355+
template <typename Op0_t, typename Op1_t>
356+
inline GEPLikeRecipe_match<Op0_t, Op1_t> m_GetElementPtr(const Op0_t &Op0,
357+
const Op1_t &Op1) {
358+
return GEPLikeRecipe_match<Op0_t, Op1_t>(Op0, Op1);
359+
}
360+
342361
template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode>
343362
using AllTernaryRecipe_match =
344363
Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, Opcode, false,

llvm/test/Transforms/LoopVectorize/X86/induction-step.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
115115
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
116116
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
117117
; CHECK: middle.block:
118+
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3
118119
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
119120
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
120121
; CHECK: scalar.ph:
@@ -131,7 +132,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
131132
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
132133
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
133134
; CHECK: exit:
134-
; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
135+
; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
135136
; CHECK-NEXT: ret i16 [[SUB_LCSSA]]
136137
;
137138
entry:

0 commit comments

Comments
 (0)