From 0f66ee14c952c25dac8a5542994f62f2bb905ef5 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 31 Dec 2024 12:08:43 +0000 Subject: [PATCH 1/9] [LV] Add test with FP induction and increment operands swapped. --- .../Transforms/Vectorize/LoopVectorize.cpp | 14 ++ .../LoopVectorize/X86/induction-step.ll | 6 +- .../LoopVectorize/iv_outside_user.ll | 120 ++++++++++++++++++ 3 files changed, 136 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7ef5295bb1276..5b75f6b26b6c5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9311,6 +9311,20 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { "VPBasicBlock"); RecipeBuilder.fixHeaderPhis(); + // Update wide induction increments to use the same step as the corresponding + // wide induction. This enables detecting induction increments directly in + // VPlan and removes redundant splats. + for (const auto &[Phi, ID] : Legal->getInductionVars()) { + auto *IVInc = cast( + Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch())); + if (IVInc->getOperand(0) != Phi || IVInc->getOpcode() != Instruction::Add) + continue; + VPWidenInductionRecipe *WideIV = + cast(RecipeBuilder.getRecipe(Phi)); + VPRecipeBase *R = RecipeBuilder.getRecipe(IVInc); + R->setOperand(1, WideIV->getStepValue()); + } + if (auto *UncountableExitingBlock = Legal->getUncountableEarlyExitingBlock()) { VPlanTransforms::handleUncountableEarlyExit( diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll index 6aac11a579719..f6a9767c7f87d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll @@ -21,16 +21,14 @@ define i16 @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i16> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 4 diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index e9f67036faf2b..66fdbc6a98bd0 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -860,6 +860,126 @@ exit: ret float %add } +define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture %A, i64 %N, float %fpinc) { +; VEC-LABEL: define float @fp_postinc_use_fadd_ops_swapped( +; VEC-SAME: float [[INIT:%.*]], ptr noalias nocapture [[A:%.*]], i64 [[N:%.*]], float [[FPINC:%.*]]) { +; VEC-NEXT: [[ENTRY:.*]]: +; VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VEC: [[VECTOR_PH]]: +; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] +; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]] +; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 +; VEC-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> , [[DOTSPLAT2]] +; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]] +; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 +; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 +; VEC-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC-NEXT: br label %[[VECTOR_BODY:.*]] +; VEC: [[VECTOR_BODY]]: +; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VEC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] +; VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 +; VEC-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP6]], align 4 +; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], [[DOTSPLAT4]] +; VEC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; VEC: [[MIDDLE_BLOCK]]: +; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; VEC: [[SCALAR_PH]]: +; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VEC-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] +; VEC-NEXT: br label %[[LOOP:.*]] +; VEC: [[LOOP]]: +; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] +; VEC-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4 +; VEC-NEXT: [[ADD]] = fadd fast float [[FPINC]], [[FP_IV]] +; VEC-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; VEC-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; VEC-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}} +; VEC: [[EXIT]]: +; VEC-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[LOOP]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ] +; VEC-NEXT: ret float [[ADD_LCSSA]] +; +; INTERLEAVE-LABEL: define float @fp_postinc_use_fadd_ops_swapped( +; INTERLEAVE-SAME: float [[INIT:%.*]], ptr noalias nocapture [[A:%.*]], i64 [[N:%.*]], float [[FPINC:%.*]]) { +; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; INTERLEAVE: [[VECTOR_PH]]: +; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; INTERLEAVE-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] +; INTERLEAVE-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]] +; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] +; INTERLEAVE: [[VECTOR_BODY]]: +; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 +; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float +; INTERLEAVE-NEXT: [[TMP4:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]] +; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP4]] +; INTERLEAVE-NEXT: [[TMP5:%.*]] = fmul fast float 0.000000e+00, [[FPINC]] +; INTERLEAVE-NEXT: [[TMP6:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP5]] +; INTERLEAVE-NEXT: [[TMP7:%.*]] = fmul fast float 1.000000e+00, [[FPINC]] +; INTERLEAVE-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP7]] +; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] +; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]] +; INTERLEAVE-NEXT: store float [[TMP6]], ptr [[TMP9]], align 4 +; INTERLEAVE-NEXT: store float [[TMP8]], ptr [[TMP10]], align 4 +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; INTERLEAVE-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; INTERLEAVE: [[MIDDLE_BLOCK]]: +; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; INTERLEAVE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; INTERLEAVE: [[SCALAR_PH]]: +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] +; INTERLEAVE-NEXT: br label %[[LOOP:.*]] +; INTERLEAVE: [[LOOP]]: +; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] +; INTERLEAVE-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4 +; INTERLEAVE-NEXT: [[ADD]] = fadd fast float [[FPINC]], [[FP_IV]] +; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; INTERLEAVE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; INTERLEAVE-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}} +; INTERLEAVE: [[EXIT]]: +; INTERLEAVE-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[LOOP]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ] +; INTERLEAVE-NEXT: ret float [[ADD_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %fp.iv = phi float [ %init, %entry ], [ %add, %loop ] + %gep.A = getelementptr inbounds float, ptr %A, i64 %iv + store float %fp.iv, ptr %gep.A, align 4 + %add = fadd fast float %fpinc, %fp.iv + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %N + br i1 %ec, label %exit, label %loop + +exit: + ret float %add +} + define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N, float %fpinc) { ; VEC-LABEL: define float @fp_postinc_use_fsub( ; VEC-SAME: float [[INIT:%.*]], ptr noalias nocapture [[A:%.*]], i64 [[N:%.*]], float [[FPINC:%.*]]) { From 2764023e9678d7eda6cbe049a5434bf4be359de7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 31 Dec 2024 14:01:48 +0000 Subject: [PATCH 2/9] Step --- .../Transforms/Vectorize/LoopVectorize.cpp | 71 +++++++++++++++---- llvm/lib/Transforms/Vectorize/VPlan.h | 9 +++ .../Transforms/Vectorize/VPlanPatternMatch.h | 21 +++++- .../LoopVectorize/X86/induction-step.ll | 3 +- 4 files changed, 87 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5b75f6b26b6c5..b55e82f217818 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8953,14 +8953,65 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { } } +static bool isIVUse(VPValue *Incoming) { + VPRecipeBase *IncomingDef = Incoming->getDefiningRecipe(); + if (!IncomingDef) + return false; + auto *WideIV = dyn_cast(IncomingDef); + if (WideIV) { + return isa(WideIV) || !cast(WideIV)->getTruncInst(); + } + + if (IncomingDef->getNumOperands() != 2) + return false; + WideIV = dyn_cast(IncomingDef->getOperand(0)); + if (!WideIV) + WideIV = dyn_cast(IncomingDef->getOperand(1)); + if (!WideIV) + return false; + + using namespace VPlanPatternMatch; + auto &ID = WideIV->getInductionDescriptor(); + switch (ID.getInductionOpcode()) { + case Instruction::Add: + return match(Incoming, + m_c_Binary( + m_VPValue(), m_Specific(WideIV->getStepValue()))); + case Instruction::FAdd: + return match(Incoming, + m_c_Binary( + m_VPValue(), m_Specific(WideIV->getStepValue()))); + case Instruction::FSub: + return match(Incoming, + m_Binary( + m_VPValue(), m_Specific(WideIV->getStepValue()))); + case Instruction::Sub: { + VPValue *Step; + return match(Incoming, + m_Binary(m_VPValue(), m_VPValue(Step))) && + Step->isLiveIn() && WideIV->getStepValue()->isLiveIn() && + (cast(Step->getLiveInIRValue())->getValue() + + cast(WideIV->getStepValue()->getLiveInIRValue()) + ->getValue()) + .isZero(); + } + default: + return ID.getKind() == InductionDescriptor::IK_PtrInduction && + match( + Incoming, + m_GetElementPtr(m_VPValue(), m_Specific(WideIV->getStepValue()))); + } + llvm_unreachable("should have been covered by switch above"); +} + // Collect VPIRInstructions for phis in the exit blocks that are modeled // in VPlan and add the exiting VPValue as operand. Some exiting values are not // modeled explicitly yet and won't be included. Those are un-truncated // VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction // increments. static SetVector collectUsersInExitBlocks( - Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, - const MapVector &Inductions) { + Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan + ) { auto *MiddleVPBB = Plan.getMiddleBlock(); SetVector ExitUsersToFix; for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) { @@ -8985,18 +9036,8 @@ static SetVector collectUsersInExitBlocks( // Exit values for inductions are computed and updated outside of VPlan // and independent of induction recipes. // TODO: Compute induction exit values in VPlan. - if ((isa(V) && - !cast(V)->getTruncInst()) || - isa(V) || - (isa(IncomingValue) && - OrigLoop->contains(cast(IncomingValue)) && - any_of(IncomingValue->users(), [&Inductions](User *U) { - auto *P = dyn_cast(U); - return P && Inductions.contains(P); - }))) { - if (ExitVPBB->getSinglePredecessor() == MiddleVPBB) - continue; - } + if (isIVUse(V) && ExitVPBB->getSinglePredecessor() == MiddleVPBB) + continue; ExitUsersToFix.insert(ExitIRI); ExitIRI->addOperand(V); } @@ -9332,7 +9373,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { } addScalarResumePhis(RecipeBuilder, *Plan); SetVector ExitUsersToFix = collectUsersInExitBlocks( - OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); + OrigLoop, RecipeBuilder, *Plan); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) { reportVectorizationFailure( diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 88f3f672d3aa3..1be57d23f19cf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2095,6 +2095,15 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe { R->getVPDefID() == VPDef::VPWidenPointerInductionSC; } + static inline bool classof(const VPValue *V) { + auto *R = V->getDefiningRecipe(); + return R && classof(R); + } + + static inline bool classof(const VPHeaderPHIRecipe *R) { + return classof(static_cast(R)); + } + virtual void execute(VPTransformState &State) override = 0; /// Returns the step value of the induction. diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index ec3c203a61b38..4866426ad8848 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -139,7 +139,8 @@ struct MatchRecipeAndOpcode { if constexpr (std::is_same::value || std::is_same::value || std::is_same::value || - std::is_same::value) + std::is_same::value || + std::is_same::value) return DefR; else return DefR && DefR->getOpcode() == Opcode; @@ -309,6 +310,12 @@ m_Binary(const Op0_t &Op0, const Op1_t &Op1) { return AllBinaryRecipe_match(Op0, Op1); } +template +inline AllBinaryRecipe_match +m_c_Binary(const Op0_t &Op0, const Op1_t &Op1) { + return AllBinaryRecipe_match(Op0, Op1); +} + template inline AllBinaryRecipe_match m_Mul(const Op0_t &Op0, const Op1_t &Op1) { @@ -339,6 +346,18 @@ m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) { return m_BinaryOr(Op0, Op1); } +template +using GEPLikeRecipe_match = + BinaryRecipe_match; + +template +inline GEPLikeRecipe_match m_GetElementPtr(const Op0_t &Op0, + const Op1_t &Op1) { + return GEPLikeRecipe_match(Op0, Op1); +} + template using AllTernaryRecipe_match = Recipe_match, Opcode, false, diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll index f6a9767c7f87d..1dd2692ba6822 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll @@ -115,6 +115,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -131,7 +132,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: -; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[SUB_LCSSA]] ; entry: From 844aa2a7995766d7e685bd4b8d03c58575b898ec Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 11 Oct 2024 14:11:42 +0100 Subject: [PATCH 3/9] [VPlan] Update final exit value via VPlan. Model updating IV users directly in VPlan, replace fixupIVUsers. Depends on https://github.com/llvm/llvm-project/pull/110004, https://github.com/llvm/llvm-project/pull/109975 and https://github.com/llvm/llvm-project/pull/112145. --- .../Transforms/Vectorize/LoopVectorize.cpp | 240 +++++++----------- llvm/lib/Transforms/Vectorize/VPlan.cpp | 24 +- llvm/lib/Transforms/Vectorize/VPlan.h | 5 + .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +- .../RISCV/riscv-vector-reverse.ll | 2 - 5 files changed, 118 insertions(+), 155 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b55e82f217818..6c0b47a427f68 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -543,11 +543,6 @@ class InnerLoopVectorizer { protected: friend class LoopVectorizationPlanner; - /// Set up the values of the IVs correctly when exiting the vector loop. - virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, - Value *VectorTripCount, BasicBlock *MiddleBlock, - VPTransformState &State); - /// Iteratively sink the scalarized operands of a predicated instruction into /// the block that was created for it. void sinkScalarOperands(Instruction *PredInst); @@ -785,10 +780,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer { BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue); void printDebugTracesAtStart() override; void printDebugTracesAtEnd() override; - - void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, - Value *VectorTripCount, BasicBlock *MiddleBlock, - VPTransformState &State) override {}; }; // A specialized derived class of inner loop vectorizer that performs @@ -2775,97 +2766,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton( return LoopVectorPreHeader; } -// Fix up external users of the induction variable. At this point, we are -// in LCSSA form, with all external PHIs that use the IV having one input value, -// coming from the remainder loop. We need those PHIs to also have a correct -// value for the IV when arriving directly from the middle block. -void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, - const InductionDescriptor &II, - Value *VectorTripCount, - BasicBlock *MiddleBlock, - VPTransformState &State) { - // There are two kinds of external IV usages - those that use the value - // computed in the last iteration (the PHI) and those that use the penultimate - // value (the value that feeds into the phi from the loop latch). - // We allow both, but they, obviously, have different values. - - DenseMap MissingVals; - - Value *EndValue = cast(OrigPhi->getIncomingValueForBlock( - OrigLoop->getLoopPreheader())) - ->getIncomingValueForBlock(MiddleBlock); - - // An external user of the last iteration's value should see the value that - // the remainder loop uses to initialize its own IV. - Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch()); - for (User *U : PostInc->users()) { - Instruction *UI = cast(U); - if (!OrigLoop->contains(UI)) { - assert(isa(UI) && "Expected LCSSA form"); - MissingVals[UI] = EndValue; - } - } - - // An external user of the penultimate value need to see EndValue - Step. - // The simplest way to get this is to recompute it from the constituent SCEVs, - // that is Start + (Step * (CRD - 1)). - for (User *U : OrigPhi->users()) { - auto *UI = cast(U); - if (!OrigLoop->contains(UI)) { - assert(isa(UI) && "Expected LCSSA form"); - IRBuilder<> B(MiddleBlock->getTerminator()); - - // Fast-math-flags propagate from the original induction instruction. - if (isa_and_nonnull(II.getInductionBinOp())) - B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); - - VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep()); - assert(StepVPV && "step must have been expanded during VPlan execution"); - Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue() - : State.get(StepVPV, VPLane(0)); - Value *Escape = nullptr; - if (EndValue->getType()->isIntegerTy()) - Escape = B.CreateSub(EndValue, Step); - else if (EndValue->getType()->isPointerTy()) - Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step)); - else { - assert(EndValue->getType()->isFloatingPointTy() && - "Unexpected induction type"); - Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() == - Instruction::FAdd - ? Instruction::FSub - : Instruction::FAdd, - EndValue, Step); - } - Escape->setName("ind.escape"); - MissingVals[UI] = Escape; - } - } - - assert((MissingVals.empty() || - all_of(MissingVals, - [MiddleBlock, this](const std::pair &P) { - return all_of( - predecessors(cast(P.first)->getParent()), - [MiddleBlock, this](BasicBlock *Pred) { - return Pred == MiddleBlock || - Pred == OrigLoop->getLoopLatch(); - }); - })) && - "Expected escaping values from latch/middle.block only"); - - for (auto &I : MissingVals) { - PHINode *PHI = cast(I.first); - // One corner case we have to handle is two IVs "chasing" each-other, - // that is %IV2 = phi [...], [ %IV1, %latch ] - // In this case, if IV1 has an external use, we need to avoid adding both - // "last value of IV1" and "penultimate value of IV2". So, verify that we - // don't already have an incoming value for the middle block. - if (PHI->getBasicBlockIndex(MiddleBlock) == -1) - PHI->addIncoming(I.second, MiddleBlock); - } -} - namespace { struct CSEDenseMapInfo { @@ -2994,24 +2894,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { for (PHINode &PN : Exit->phis()) PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN); - if (Cost->requiresScalarEpilogue(VF.isVector())) { - // No edge from the middle block to the unique exit block has been inserted - // and there is nothing to fix from vector loop; phis should have incoming - // from scalar loop only. - } else { - // TODO: Check in VPlan to see if IV users need fixing instead of checking - // the cost model. - - // If we inserted an edge from the middle block to the unique exit block, - // update uses outside the loop (phis) to account for the newly inserted - // edge. - - // Fix-up external users of the induction variables. - for (const auto &Entry : Legal->getInductionVars()) - fixupIVUsers(Entry.first, Entry.second, - getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State); - } - for (Instruction *PI : PredicatedInstructions) sinkScalarOperands(&*PI); @@ -8866,11 +8748,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, /// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the /// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute /// the end value of the induction. -static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV, - VPBuilder &VectorPHBuilder, - VPBuilder &ScalarPHBuilder, - VPTypeAnalysis &TypeInfo, - VPValue *VectorTC) { +static VPValue *addResumePhiRecipeForInduction( + VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder, + VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC, + DenseMap &EndValues) { auto *WideIntOrFp = dyn_cast(WideIV); // Truncated wide inductions resume from the last lane of their vector value // in the last vector iteration which is handled elsewhere. @@ -8895,6 +8776,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV, ScalarTypeOfWideIV); } + EndValues[WideIV] = EndValue; auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start}, WideIV->getDebugLoc(), "bc.resume.val"); @@ -8904,7 +8786,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV, /// Create resume phis in the scalar preheader for first-order recurrences, /// reductions and inductions, and update the VPIRInstructions wrapping the /// original phis in the scalar header. -static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { +static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, + Loop *OrigLoop, + DenseMap &EndValues) { VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); auto *ScalarPH = Plan.getScalarPreheader(); auto *MiddleVPBB = cast(ScalarPH->getSinglePredecessor()); @@ -8924,7 +8808,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { if (auto *WideIVR = dyn_cast(VectorPhiR)) { if (VPValue *ResumePhi = addResumePhiRecipeForInduction( WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, - &Plan.getVectorTripCount())) { + &Plan.getVectorTripCount(), EndValues)) { ScalarPhiIRI->addOperand(ResumePhi); continue; } @@ -9009,9 +8893,9 @@ static bool isIVUse(VPValue *Incoming) { // modeled explicitly yet and won't be included. Those are un-truncated // VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction // increments. -static SetVector collectUsersInExitBlocks( - Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan - ) { +static SetVector +collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder, + VPlan &Plan) { auto *MiddleVPBB = Plan.getMiddleBlock(); SetVector ExitUsersToFix; for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) { @@ -9033,11 +8917,6 @@ static SetVector collectUsersInExitBlocks( } Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); - // Exit values for inductions are computed and updated outside of VPlan - // and independent of induction recipes. - // TODO: Compute induction exit values in VPlan. - if (isIVUse(V) && ExitVPBB->getSinglePredecessor() == MiddleVPBB) - continue; ExitUsersToFix.insert(ExitIRI); ExitIRI->addOperand(V); } @@ -9046,17 +8925,86 @@ static SetVector collectUsersInExitBlocks( return ExitUsersToFix; } +/// If \p Incoming is a user of a non-truncated induction, create recipes to +/// compute the final value and update the user \p ExitIRI. +static bool addInductionEndValue( + VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming, + const MapVector &Inductions, + DenseMap &EndValues, VPTypeAnalysis &TypeInfo) { + if ((isa(Incoming) && + !cast(Incoming)->getTruncInst()) || + isa(Incoming) || + (isa(Incoming->getUnderlyingValue()) && + any_of(cast(Incoming->getUnderlyingValue())->users(), + [&Inductions](User *U) { + auto *P = dyn_cast(U); + return P && Inductions.contains(P); + }))) { + VPValue *IV; + if (auto *WideIV = + dyn_cast(Incoming->getDefiningRecipe())) + IV = WideIV; + else if (auto *WideIV = + dyn_cast(Incoming->getDefiningRecipe() + ->getOperand(0) + ->getDefiningRecipe())) + IV = WideIV; + else + IV = Incoming->getDefiningRecipe()->getOperand(1); + // Skip phi nodes already updated. This can be the case if 2 induction + // phis chase each other. + VPValue *EndValue = EndValues[IV]; + if (any_of(cast(Incoming->getDefiningRecipe())->operands(), + IsaPred)) { + ExitIRI->setOperand(0, EndValue); + return true; + } + + VPBuilder B(Plan.getMiddleBlock()->getTerminator()); + VPValue *Escape = nullptr; + auto *WideIV = cast(IV->getDefiningRecipe()); + VPValue *Step = WideIV->getStepValue(); + Type *ScalarTy = TypeInfo.inferScalarType(WideIV); + if (ScalarTy->isIntegerTy()) + Escape = + B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape"); + else if (ScalarTy->isPointerTy()) + Escape = B.createPtrAdd( + EndValue, + B.createNaryOp(Instruction::Sub, + {Plan.getOrAddLiveIn(ConstantInt::get( + Step->getLiveInIRValue()->getType(), 0)), + Step}), + {}, "ind.escape"); + else if (ScalarTy->isFloatingPointTy()) { + const auto &ID = WideIV->getInductionDescriptor(); + Escape = B.createNaryOp( + ID.getInductionBinOp()->getOpcode() == Instruction::FAdd + ? Instruction::FSub + : Instruction::FAdd, + {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()}); + } else { + llvm_unreachable("all possible induction types must be handled"); + } + ExitIRI->setOperand(0, Escape); + return true; + } + return false; +} // Add exit values to \p Plan. Extracts are added for each entry in \p // ExitUsersToFix if needed and their operands are updated. Returns true if all // exit users can be handled, otherwise return false. -static bool -addUsersInExitBlocks(VPlan &Plan, - const SetVector &ExitUsersToFix) { +static bool addUsersInExitBlocks( + VPlan &Plan, const SetVector &ExitUsersToFix, + const MapVector &Inductions, + DenseMap &EndValues) { if (ExitUsersToFix.empty()) return true; auto *MiddleVPBB = Plan.getMiddleBlock(); VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); + VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); // Introduce extract for exiting values and update the VPIRInstructions // modeling the corresponding LCSSA phis. @@ -9072,11 +9020,16 @@ addUsersInExitBlocks(VPlan &Plan, if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB) return false; + VPValue *Incoming = ExitIRI->getOperand(0); + if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues, + TypeInfo)) + continue; + LLVMContext &Ctx = ExitIRI->getInstruction().getContext(); VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd, {Op, Plan.getOrAddLiveIn(ConstantInt::get( IntegerType::get(Ctx, 32), 1))}); - ExitIRI->setOperand(Idx, Ext); + ExitIRI->setOperand(0, Ext); } } return true; @@ -9371,11 +9324,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { VPlanTransforms::handleUncountableEarlyExit( *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder); } - addScalarResumePhis(RecipeBuilder, *Plan); - SetVector ExitUsersToFix = collectUsersInExitBlocks( - OrigLoop, RecipeBuilder, *Plan); + DenseMap EndValues; + addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues); + SetVector ExitUsersToFix = + collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); - if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) { + if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, + EndValues)) { reportVectorizationFailure( "Some exit values in loop with uncountable exit not supported yet", "UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop); @@ -9502,7 +9457,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { auto *HeaderR = cast(&R); RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR); } - addScalarResumePhis(RecipeBuilder, *Plan); + DenseMap EndValues; + addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues); assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 06c36396a17f3..bd8c2e28ab36b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -311,16 +311,20 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { LastLane = 0; } - auto *LastInst = cast(get(Def, LastLane)); - // Set the insert point after the last scalarized instruction or after the - // last PHI, if LastInst is a PHI. This ensures the insertelement sequence - // will directly follow the scalar definitions. + auto *LastDef = get(Def, LastLane); auto OldIP = Builder.saveIP(); - auto NewIP = - isa(LastInst) - ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI()) - : std::next(BasicBlock::iterator(LastInst)); - Builder.SetInsertPoint(&*NewIP); + if (auto *LastInst = dyn_cast(LastDef)) { + // TODO: Remove once VPDerivedIVReicpe can be simplified, which requires + // vector trip count being modeled in VPlan. + // Set the insert point after the last scalarized instruction or after the + // last PHI, if LastInst is a PHI. This ensures the insertelement sequence + // will directly follow the scalar definitions. + auto NewIP = + isa(LastInst) + ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI()) + : std::next(BasicBlock::iterator(LastInst)); + Builder.SetInsertPoint(&*NewIP); + } // However, if we are vectorizing, we need to construct the vector values. // If the value is known to be uniform after vectorization, we can just @@ -335,7 +339,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { } else { // Initialize packing with insertelements to start from undef. assert(!VF.isScalable() && "VF is assumed to be non scalable."); - Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF)); + Value *Undef = PoisonValue::get(VectorType::get(LastDef->getType(), VF)); set(Def, Undef); for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) packScalarIntoVectorValue(Def, Lane); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 1be57d23f19cf..a109bc2350799 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1416,6 +1416,11 @@ class VPIRInstruction : public VPRecipeBase { "Op must be an operand of the recipe"); return true; } + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } }; /// VPWidenRecipe is a recipe for producing a widened instruction using the diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 77c08839dbfa9..3514a5a9d4467 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -843,7 +843,7 @@ void VPIRInstruction::execute(VPTransformState &State) { BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; // Set insertion point in PredBB in case an extract needs to be generated. // TODO: Model extracts explicitly. - State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); + State.Builder.SetInsertPoint(&*PredBB->getTerminator()); Value *V = State.get(ExitValue, VPLane(Lane)); auto *Phi = cast(&I); // If there is no existing block for PredBB in the phi, add a new incoming diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 951d833fa941e..f630f4f21e065 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -230,7 +230,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK: LV: Loop does not require scalar epilogue ; entry: %cmp7 = icmp sgt i32 %n, 0 @@ -480,7 +479,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK: LV: Loop does not require scalar epilogue ; entry: %cmp7 = icmp sgt i32 %n, 0 From 4b3ff6332b04b6170b01d7965365423876310485 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 30 Dec 2024 13:47:04 +0000 Subject: [PATCH 4/9] !fixup Turn into VPlan transform --- .../Transforms/Vectorize/LoopVectorize.cpp | 163 +++--------------- llvm/lib/Transforms/Vectorize/VPlan.cpp | 6 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +- .../Transforms/Vectorize/VPlanTransforms.cpp | 121 +++++++++++++ .../Transforms/Vectorize/VPlanTransforms.h | 4 + .../single_early_exit_live_outs.ll | 5 +- 6 files changed, 153 insertions(+), 148 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6c0b47a427f68..421d915bf55ca 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8748,10 +8748,9 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, /// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the /// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute /// the end value of the induction. -static VPValue *addResumePhiRecipeForInduction( +static VPInstruction *addResumePhiRecipeForInduction( VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder, - VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC, - DenseMap &EndValues) { + VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) { auto *WideIntOrFp = dyn_cast(WideIV); // Truncated wide inductions resume from the last lane of their vector value // in the last vector iteration which is handled elsewhere. @@ -8776,7 +8775,6 @@ static VPValue *addResumePhiRecipeForInduction( ScalarTypeOfWideIV); } - EndValues[WideIV] = EndValue; auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start}, WideIV->getDebugLoc(), "bc.resume.val"); @@ -8785,10 +8783,10 @@ static VPValue *addResumePhiRecipeForInduction( /// Create resume phis in the scalar preheader for first-order recurrences, /// reductions and inductions, and update the VPIRInstructions wrapping the -/// original phis in the scalar header. +/// original phis in the scalar header. End values for inductions are added to +/// \p IVEndValues. static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, - Loop *OrigLoop, - DenseMap &EndValues) { + DenseMap &IVEndValues) { VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); auto *ScalarPH = Plan.getScalarPreheader(); auto *MiddleVPBB = cast(ScalarPH->getSinglePredecessor()); @@ -8806,9 +8804,10 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, auto *VectorPhiR = cast(Builder.getRecipe(ScalarPhiI)); if (auto *WideIVR = dyn_cast(VectorPhiR)) { - if (VPValue *ResumePhi = addResumePhiRecipeForInduction( + if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction( WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, - &Plan.getVectorTripCount(), EndValues)) { + &Plan.getVectorTripCount())) { + IVEndValues[WideIVR] = ResumePhi->getOperand(0); ScalarPhiIRI->addOperand(ResumePhi); continue; } @@ -8837,57 +8836,6 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, } } -static bool isIVUse(VPValue *Incoming) { - VPRecipeBase *IncomingDef = Incoming->getDefiningRecipe(); - if (!IncomingDef) - return false; - auto *WideIV = dyn_cast(IncomingDef); - if (WideIV) { - return isa(WideIV) || !cast(WideIV)->getTruncInst(); - } - - if (IncomingDef->getNumOperands() != 2) - return false; - WideIV = dyn_cast(IncomingDef->getOperand(0)); - if (!WideIV) - WideIV = dyn_cast(IncomingDef->getOperand(1)); - if (!WideIV) - return false; - - using namespace VPlanPatternMatch; - auto &ID = WideIV->getInductionDescriptor(); - switch (ID.getInductionOpcode()) { - case Instruction::Add: - return match(Incoming, - m_c_Binary( - m_VPValue(), m_Specific(WideIV->getStepValue()))); - case Instruction::FAdd: - return match(Incoming, - m_c_Binary( - m_VPValue(), m_Specific(WideIV->getStepValue()))); - case Instruction::FSub: - return match(Incoming, - m_Binary( - m_VPValue(), m_Specific(WideIV->getStepValue()))); - case Instruction::Sub: { - VPValue *Step; - return match(Incoming, - m_Binary(m_VPValue(), m_VPValue(Step))) && - Step->isLiveIn() && WideIV->getStepValue()->isLiveIn() && - (cast(Step->getLiveInIRValue())->getValue() + - cast(WideIV->getStepValue()->getLiveInIRValue()) - ->getValue()) - .isZero(); - } - default: - return ID.getKind() == InductionDescriptor::IK_PtrInduction && - match( - Incoming, - m_GetElementPtr(m_VPValue(), m_Specific(WideIV->getStepValue()))); - } - llvm_unreachable("should have been covered by switch above"); -} - // Collect VPIRInstructions for phis in the exit blocks that are modeled // in VPlan and add the exiting VPValue as operand. Some exiting values are not // modeled explicitly yet and won't be included. Those are un-truncated @@ -8925,80 +8873,13 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder, return ExitUsersToFix; } -/// If \p Incoming is a user of a non-truncated induction, create recipes to -/// compute the final value and update the user \p ExitIRI. -static bool addInductionEndValue( - VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming, - const MapVector &Inductions, - DenseMap &EndValues, VPTypeAnalysis &TypeInfo) { - if ((isa(Incoming) && - !cast(Incoming)->getTruncInst()) || - isa(Incoming) || - (isa(Incoming->getUnderlyingValue()) && - any_of(cast(Incoming->getUnderlyingValue())->users(), - [&Inductions](User *U) { - auto *P = dyn_cast(U); - return P && Inductions.contains(P); - }))) { - VPValue *IV; - if (auto *WideIV = - dyn_cast(Incoming->getDefiningRecipe())) - IV = WideIV; - else if (auto *WideIV = - dyn_cast(Incoming->getDefiningRecipe() - ->getOperand(0) - ->getDefiningRecipe())) - IV = WideIV; - else - IV = Incoming->getDefiningRecipe()->getOperand(1); - // Skip phi nodes already updated. This can be the case if 2 induction - // phis chase each other. - VPValue *EndValue = EndValues[IV]; - if (any_of(cast(Incoming->getDefiningRecipe())->operands(), - IsaPred)) { - ExitIRI->setOperand(0, EndValue); - return true; - } - - VPBuilder B(Plan.getMiddleBlock()->getTerminator()); - VPValue *Escape = nullptr; - auto *WideIV = cast(IV->getDefiningRecipe()); - VPValue *Step = WideIV->getStepValue(); - Type *ScalarTy = TypeInfo.inferScalarType(WideIV); - if (ScalarTy->isIntegerTy()) - Escape = - B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape"); - else if (ScalarTy->isPointerTy()) - Escape = B.createPtrAdd( - EndValue, - B.createNaryOp(Instruction::Sub, - {Plan.getOrAddLiveIn(ConstantInt::get( - Step->getLiveInIRValue()->getType(), 0)), - Step}), - {}, "ind.escape"); - else if (ScalarTy->isFloatingPointTy()) { - const auto &ID = WideIV->getInductionDescriptor(); - Escape = B.createNaryOp( - ID.getInductionBinOp()->getOpcode() == Instruction::FAdd - ? Instruction::FSub - : Instruction::FAdd, - {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()}); - } else { - llvm_unreachable("all possible induction types must be handled"); - } - ExitIRI->setOperand(0, Escape); - return true; - } - return false; -} // Add exit values to \p Plan. Extracts are added for each entry in \p // ExitUsersToFix if needed and their operands are updated. Returns true if all // exit users can be handled, otherwise return false. -static bool addUsersInExitBlocks( - VPlan &Plan, const SetVector &ExitUsersToFix, - const MapVector &Inductions, - DenseMap &EndValues) { +static bool +addUsersInExitBlocks(VPlan &Plan, + const SetVector &ExitUsersToFix, + DenseMap &IVEndValues) { if (ExitUsersToFix.empty()) return true; @@ -9020,16 +8901,11 @@ static bool addUsersInExitBlocks( if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB) return false; - VPValue *Incoming = ExitIRI->getOperand(0); - if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues, - TypeInfo)) - continue; - LLVMContext &Ctx = ExitIRI->getInstruction().getContext(); VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd, {Op, Plan.getOrAddLiveIn(ConstantInt::get( IntegerType::get(Ctx, 32), 1))}); - ExitIRI->setOperand(0, Ext); + ExitIRI->setOperand(Idx, Ext); } } return true; @@ -9324,13 +9200,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { VPlanTransforms::handleUncountableEarlyExit( *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder); } - DenseMap EndValues; - addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues); + DenseMap IVEndValues; + addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); SetVector ExitUsersToFix = collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); - if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, - EndValues)) { + if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, IVEndValues)) { reportVectorizationFailure( "Some exit values in loop with uncountable exit not supported yet", "UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop); @@ -9409,6 +9284,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow, WithoutRuntimeCheck); } + + VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues); return Plan; } @@ -9457,8 +9334,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { auto *HeaderR = cast(&R); RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR); } - DenseMap EndValues; - addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues); + DenseMap IVEndValues; + addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index bd8c2e28ab36b..5514f494f1826 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -311,9 +311,9 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { LastLane = 0; } - auto *LastDef = get(Def, LastLane); + auto *LastValue = get(Def, LastLane); auto OldIP = Builder.saveIP(); - if (auto *LastInst = dyn_cast(LastDef)) { + if (auto *LastInst = dyn_cast(LastValue)) { // TODO: Remove once VPDerivedIVReicpe can be simplified, which requires // vector trip count being modeled in VPlan. // Set the insert point after the last scalarized instruction or after the @@ -339,7 +339,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { } else { // Initialize packing with insertelements to start from undef. assert(!VF.isScalable() && "VF is assumed to be non scalable."); - Value *Undef = PoisonValue::get(VectorType::get(LastDef->getType(), VF)); + Value *Undef = PoisonValue::get(VectorType::get(LastValue->getType(), VF)); set(Def, Undef); for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) packScalarIntoVectorValue(Def, Lane); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 3514a5a9d4467..77c08839dbfa9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -843,7 +843,7 @@ void VPIRInstruction::execute(VPTransformState &State) { BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; // Set insertion point in PredBB in case an extract needs to be generated. // TODO: Model extracts explicitly. - State.Builder.SetInsertPoint(&*PredBB->getTerminator()); + State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); Value *V = State.get(ExitValue, VPLane(Lane)); auto *Phi = cast(&I); // If there is no existing block for PredBB in the phi, add a new incoming diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 8ac2bd5160c26..e713d297cb528 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -624,6 +624,127 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { } } +VPWidenInductionRecipe *isIVUse(VPValue *Incoming) { + auto *WideIV = dyn_cast(Incoming); + if (WideIV) { + auto *WideIntOrFpIV = dyn_cast(WideIV); + if (WideIntOrFpIV && WideIntOrFpIV->getTruncInst()) + return nullptr; + return WideIV; + } + + VPRecipeBase *IncomingDef = Incoming->getDefiningRecipe(); + if (!IncomingDef || IncomingDef->getNumOperands() != 2) + return nullptr; + + WideIV = dyn_cast(IncomingDef->getOperand(0)); + if (!WideIV) + WideIV = dyn_cast(IncomingDef->getOperand(1)); + if (!WideIV) + return nullptr; + + auto IsWideIVInc = [&]() { + using namespace VPlanPatternMatch; + auto &ID = WideIV->getInductionDescriptor(); + switch (ID.getInductionOpcode()) { + case Instruction::Add: + return match(Incoming, + m_c_Binary( + m_VPValue(), m_Specific(WideIV->getStepValue()))); + case Instruction::FAdd: + return match(Incoming, + m_c_Binary( + m_VPValue(), m_Specific(WideIV->getStepValue()))); + case Instruction::FSub: + return match(Incoming, + m_Binary( + m_VPValue(), m_Specific(WideIV->getStepValue()))); + case Instruction::Sub: { + VPValue *Step; + return match(Incoming, + m_Binary(m_VPValue(), m_VPValue(Step))) && + Step->isLiveIn() && WideIV->getStepValue()->isLiveIn() && + (cast(Step->getLiveInIRValue())->getValue() + + cast(WideIV->getStepValue()->getLiveInIRValue()) + ->getValue()) + .isZero(); + } + default: + return ID.getKind() == InductionDescriptor::IK_PtrInduction && + match(Incoming, + m_GetElementPtr(m_VPValue(), + m_Specific(WideIV->getStepValue()))); + } + llvm_unreachable("should have been covered by switch above"); + }; + return IsWideIVInc() ? WideIV : nullptr; +} + +void VPlanTransforms::optimizeInductionExitUsers( + VPlan &Plan, DenseMap &EndValues) { + using namespace VPlanPatternMatch; + SmallVector ExitVPBBs(Plan.getExitBlocks()); + if (ExitVPBBs.size() != 1) + return; + + VPIRBasicBlock *ExitVPBB = ExitVPBBs[0]; + VPBlockBase *PredVPBB = ExitVPBB->getSinglePredecessor(); + if (!PredVPBB) + return; + assert(PredVPBB == Plan.getMiddleBlock() && + "predecessor must be the middle block"); + + VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); + VPBuilder B(Plan.getMiddleBlock()->getTerminator()); + for (VPRecipeBase &R : *ExitVPBB) { + auto *ExitIRI = cast(&R); + if (!isa(ExitIRI->getInstruction())) + break; + + VPValue *Incoming; + if (!match(ExitIRI->getOperand(0), + m_VPInstruction( + m_VPValue(Incoming), m_SpecificInt(1)))) + continue; + + auto *WideIV = isIVUse(Incoming); + if (!WideIV) + continue; + VPValue *EndValue = EndValues.lookup(WideIV); + if (!EndValue) + continue; + + if (Incoming != WideIV) { + ExitIRI->setOperand(0, EndValue); + continue; + } + + VPValue *Escape = nullptr; + VPValue *Step = WideIV->getStepValue(); + Type *ScalarTy = TypeInfo.inferScalarType(WideIV); + if (ScalarTy->isIntegerTy()) { + Escape = + B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape"); + } else if (ScalarTy->isPointerTy()) { + auto *Zero = Plan.getOrAddLiveIn( + ConstantInt::get(Step->getLiveInIRValue()->getType(), 0)); + Escape = B.createPtrAdd(EndValue, + B.createNaryOp(Instruction::Sub, {Zero, Step}), + {}, "ind.escape"); + } else if (ScalarTy->isFloatingPointTy()) { + const auto &ID = WideIV->getInductionDescriptor(); + Escape = B.createNaryOp( + ID.getInductionBinOp()->getOpcode() == Instruction::FAdd + ? Instruction::FSub + : Instruction::FAdd, + {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()}); + } else { + llvm_unreachable("all possible induction types must be handled"); + } + ExitIRI->setOperand(0, Escape); + } +} + /// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing /// them with already existing recipes expanding the same SCEV expression. static void removeRedundantExpandSCEVRecipes(VPlan &Plan) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index fddde86891166..9a1a90b3eef29 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -137,6 +137,10 @@ struct VPlanTransforms { /// Lower abstract recipes to concrete ones, that can be codegen'd. static void convertToConcreteRecipes(VPlan &Plan); + + static void + optimizeInductionExitUsers(VPlan &Plan, + DenseMap &EndValues); }; } // namespace llvm diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 085438aa80f24..6e542bd873b8c 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -532,6 +532,7 @@ define i64 @diff_exit_block_pre_inc_use2() { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]] @@ -545,11 +546,13 @@ define i64 @diff_exit_block_pre_inc_use2() { ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3 ; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ] @@ -570,7 +573,7 @@ define i64 @diff_exit_block_pre_inc_use2() { ; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP1]] ], [ 67, [[MIDDLE_SPLIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL1]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RETVAL2]] ; entry: From 3fdef6070a8584356d1cc8b4be1ffa95242aacf3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 9 Jan 2025 22:48:11 +0000 Subject: [PATCH 5/9] !fixup fixups after recent merges. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 5 ++--- llvm/lib/Transforms/Vectorize/VPlanUtils.h | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 52f58510ba863..59e71ddf7c08d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8895,8 +8895,7 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder, // exit users can be handled, otherwise return false. static bool addUsersInExitBlocks(VPlan &Plan, - const SetVector &ExitUsersToFix, - DenseMap &IVEndValues) { + const SetVector &ExitUsersToFix) { if (ExitUsersToFix.empty()) return true; @@ -9222,7 +9221,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { SetVector ExitUsersToFix = collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); - if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, IVEndValues)) { + if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) { reportVectorizationFailure( "Some exit values in loop with uncountable exit not supported yet", "UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 777944264f458..1395202c10d15 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -45,8 +45,8 @@ inline bool isUniformAfterVectorization(const VPValue *VPV) { assert(Def && "Must have definition for value defined inside vector region"); if (auto *Rep = dyn_cast(Def)) return Rep->isUniform(); - if (auto *GEP = dyn_cast(Def)) - return all_of(GEP->operands(), isUniformAfterVectorization); + if (isa(Def)) + return all_of(Def->operands(), isUniformAfterVectorization); if (auto *VPI = dyn_cast(Def)) return VPI->isSingleScalar() || VPI->isVectorToScalar(); // VPExpandSCEVRecipes must be placed in the entry and are alway uniform. From fcd177a1365e2f9f50fbee78f8362455b1daff46 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 10 Jan 2025 15:59:14 +0000 Subject: [PATCH 6/9] !fixup make sure dead recipes are cleaned up before ind legalization --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 4bce80147e66f..6cd06070028eb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1445,6 +1445,7 @@ void VPlanTransforms::optimize(VPlan &Plan) { removeRedundantInductionCasts(Plan); simplifyRecipes(Plan, Plan.getCanonicalIV()->getScalarType()); + removeDeadRecipes(Plan); legalizeAndOptimizeInductions(Plan); removeRedundantExpandSCEVRecipes(Plan); simplifyRecipes(Plan, Plan.getCanonicalIV()->getScalarType()); From 06e1a5d0be8c2f70a45775d7d90dda054413c1d4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 14 Jan 2025 22:35:13 +0000 Subject: [PATCH 7/9] !fixup address latest comments, thanks --- .../Transforms/Vectorize/LoopVectorize.cpp | 3 +++ llvm/lib/Transforms/Vectorize/VPlan.cpp | 26 +++++++++---------- llvm/lib/Transforms/Vectorize/VPlan.h | 1 + .../Transforms/Vectorize/VPlanTransforms.cpp | 16 +++++------- .../LoopVectorize/X86/multi-exit-cost.ll | 14 ---------- .../unused-blend-mask-for-first-operand.ll | 6 ++--- 6 files changed, 25 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f7297a4512e9c..c4443003dab91 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8964,6 +8964,8 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction( WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, &Plan.getVectorTripCount())) { + assert(ResumePhi->getOpcode() == VPInstruction::ResumePhi && + "Expected a ResumePhi"); IVEndValues[WideIVR] = ResumePhi->getOperand(0); ScalarPhiIRI->addOperand(ResumePhi); continue; @@ -9497,6 +9499,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR); } DenseMap IVEndValues; + // IVEndValues are not used yet in the native path. addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index c537d54363907..153e3cbaeffc5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -311,20 +311,18 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { LastLane = 0; } - auto *LastValue = get(Def, LastLane); + auto *LastInst = cast(get(Def, LastLane)); auto OldIP = Builder.saveIP(); - if (auto *LastInst = dyn_cast(LastValue)) { - // TODO: Remove once VPDerivedIVReicpe can be simplified, which requires - // vector trip count being modeled in VPlan. - // Set the insert point after the last scalarized instruction or after the - // last PHI, if LastInst is a PHI. This ensures the insertelement sequence - // will directly follow the scalar definitions. - auto NewIP = - isa(LastInst) - ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI()) - : std::next(BasicBlock::iterator(LastInst)); - Builder.SetInsertPoint(&*NewIP); - } + // TODO: Remove once VPDerivedRecipe can be simplified, which requires + // vector trip count being modeled in VPlan. + // Set the insert point after the last scalarized instruction or after the + // last PHI, if LastInst is a PHI. This ensures the insertelement sequence + // will directly follow the scalar definitions. + auto NewIP = + isa(LastInst) + ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI()) + : std::next(BasicBlock::iterator(LastInst)); + Builder.SetInsertPoint(&*NewIP); // However, if we are vectorizing, we need to construct the vector values. // If the value is known to be uniform after vectorization, we can just @@ -339,7 +337,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { } else { // Initialize packing with insertelements to start from undef. assert(!VF.isScalable() && "VF is assumed to be non scalable."); - Value *Undef = PoisonValue::get(VectorType::get(LastValue->getType(), VF)); + Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF)); set(Def, Undef); for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) packScalarIntoVectorValue(Def, Lane); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 4235ecf55464e..08c6cdce1f02f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1420,6 +1420,7 @@ class VPIRInstruction : public VPRecipeBase { "Op must be an operand of the recipe"); return true; } + bool onlyFirstLaneUsed(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9ed85bd7209fd..d5ca87b986be7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -667,18 +667,16 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { } } -/// Return a wide IV, if \p VPV is an optimizable wide IV or wide IV use. That -/// is, if \p VPV is either an untruncated wide induction, or if it increments a -/// wide induction by its step. -static VPWidenInductionRecipe *isOptimizableIVOrUse(VPValue *VPV) { +/// Check if \p VPV is an untruncated wide induction, either before or after the +/// increment. If so return the header IV (before the increment), otherwise +/// return null. +static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) { auto *WideIV = dyn_cast(VPV); if (WideIV) { // VPV itself is a wide induction, separately compute the end value for exit // users if it is not a truncated IV. - if (isa(WideIV) || - !cast(WideIV)->getTruncInst()) - return WideIV; - return nullptr; + auto *IntOrFpIV = dyn_cast(WideIV); + return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV; } // Check if VPV is an optimizable induction increment. @@ -757,7 +755,7 @@ void VPlanTransforms::optimizeInductionExitUsers( m_VPValue(Incoming), m_SpecificInt(1)))) continue; - auto *WideIV = isOptimizableIVOrUse(Incoming); + auto *WideIV = getOptimizableIVOf(Incoming); if (!WideIV) continue; VPValue *EndValue = EndValues.lookup(WideIV); diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll index 7b29d0ef7cbb5..6c97ab362fc86 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll @@ -29,21 +29,7 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i8> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 5 -; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 6 -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 7 ; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP10]], 1 -; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP11]], 1 -; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP12]], 1 -; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP13]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP14]], 1 -; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP15]], 1 -; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP16]], 1 -; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP17]], 1 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP18]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP27]], i32 -7 diff --git a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll index d7d7d5d9c5da0..50c1f74d2aacc 100644 --- a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll +++ b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll @@ -84,8 +84,6 @@ define void @test_not_first_lane_only_wide_compare(ptr %A, ptr noalias %B, i16 % ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[X]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -94,8 +92,8 @@ define void @test_not_first_lane_only_wide_compare(ptr %A, ptr noalias %B, i16 % ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i16 [[TMP3]], [[X]] ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP4]], ptr poison, ptr [[B]] ; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0 From c33d89abf07f27a75a64a600d9a5189f0f9e065f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 16 Jan 2025 19:53:19 +0000 Subject: [PATCH 8/9] !fixup address latest comments, thanks! --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 ++- llvm/lib/Transforms/Vectorize/VPlan.cpp | 4 +--- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 3 +-- llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 1 + 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8747accef7576..7ac35ee24ab96 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8977,6 +8977,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, if (!ScalarPhiI) break; + // TODO: Extract final value from induction recipe initially, optimize to pre-computed end value together in optimizeInductionExitUsers. auto *VectorPhiR = cast(Builder.getRecipe(ScalarPhiI)); if (auto *WideIVR = dyn_cast(VectorPhiR)) { if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction( @@ -9522,7 +9523,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR); } DenseMap IVEndValues; - // IVEndValues are not used yet in the native path. + // TODO: IVEndValues are not used yet in the native path, to optimize exit values. addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 153e3cbaeffc5..aa41c41e90c4c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -312,12 +312,10 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { } auto *LastInst = cast(get(Def, LastLane)); - auto OldIP = Builder.saveIP(); - // TODO: Remove once VPDerivedRecipe can be simplified, which requires - // vector trip count being modeled in VPlan. // Set the insert point after the last scalarized instruction or after the // last PHI, if LastInst is a PHI. This ensures the insertelement sequence // will directly follow the scalar definitions. + auto OldIP = Builder.saveIP(); auto NewIP = isa(LastInst) ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI()) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d5ca87b986be7..9febd612c644e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -759,8 +759,7 @@ void VPlanTransforms::optimizeInductionExitUsers( if (!WideIV) continue; VPValue *EndValue = EndValues.lookup(WideIV); - if (!EndValue) - continue; + assert(EndValue && "end value must have been pre-computed"); if (Incoming != WideIV) { ExitIRI->setOperand(0, EndValue); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 9a1a90b3eef29..4dce1fd765b9b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -138,6 +138,7 @@ struct VPlanTransforms { /// Lower abstract recipes to concrete ones, that can be codegen'd. static void convertToConcreteRecipes(VPlan &Plan); + /// If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them precomputed end values instead, possibly taken one step backwards. static void optimizeInductionExitUsers(VPlan &Plan, DenseMap &EndValues); From 04933b5e86f6206419ccdd100db1bed53e4e93b2 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 17 Jan 2025 21:27:41 +0000 Subject: [PATCH 9/9] !fixup fix formatting --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 6 ++++-- llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c8e22ff315cc3..34c5bc3312aec 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8987,7 +8987,8 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, if (!ScalarPhiI) break; - // TODO: Extract final value from induction recipe initially, optimize to pre-computed end value together in optimizeInductionExitUsers. + // TODO: Extract final value from induction recipe initially, optimize to + // pre-computed end value together in optimizeInductionExitUsers. auto *VectorPhiR = cast(Builder.getRecipe(ScalarPhiI)); if (auto *WideIVR = dyn_cast(VectorPhiR)) { if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction( @@ -9533,7 +9534,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR); } DenseMap IVEndValues; - // TODO: IVEndValues are not used yet in the native path, to optimize exit values. + // TODO: IVEndValues are not used yet in the native path, to optimize exit + // values. addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 4dce1fd765b9b..a751b8b5e8dc5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -138,7 +138,9 @@ struct VPlanTransforms { /// Lower abstract recipes to concrete ones, that can be codegen'd. static void convertToConcreteRecipes(VPlan &Plan); - /// If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them precomputed end values instead, possibly taken one step backwards. + /// If there's a single exit block, optimize its phi recipes that use exiting + /// IV values by feeding them precomputed end values instead, possibly taken + /// one step backwards. static void optimizeInductionExitUsers(VPlan &Plan, DenseMap &EndValues);