diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 78a3d7216f87e..e7459fb456702 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7722,6 +7722,7 @@ DenseMap LoopVectorizationPlanner::executePlan( BestVPlan.prepareToExecute(ILV.getTripCount(), ILV.getOrCreateVectorTripCount(nullptr), CanonicalIVStartValue, State); + VPlanTransforms::prepareToExecute(BestVPlan); BestVPlan.execute(&State); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 529108a5aaa97..b801d1863e252 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1070,10 +1070,9 @@ void VPlan::execute(VPTransformState *State) { } auto *PhiR = cast(&R); - bool NeedsScalar = - isa(PhiR) || - (isa(PhiR) && - cast(PhiR)->isInLoop()); + bool NeedsScalar = isa(PhiR) || + (isa(PhiR) && + cast(PhiR)->isInLoop()); Value *Phi = State->get(PhiR, NeedsScalar); Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar); cast(Phi)->addIncoming(Val, VectorLatchBB); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 1b1630ebc6c23..e1d828f038f9a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2239,6 +2239,45 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe, #endif }; +/// Recipe to generate a scalar PHI. Used to generate code for recipes that +/// produce scalar header phis, including VPCanonicalIVPHIRecipe and +/// VPEVLBasedIVPHIRecipe. +class VPScalarPHIRecipe : public VPHeaderPHIRecipe { + std::string Name; + +public: + VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL, + StringRef Name) + : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL), + Name(Name.str()) { + addOperand(BackedgeValue); + } + + ~VPScalarPHIRecipe() override = default; + + VPScalarPHIRecipe *clone() override { + llvm_unreachable("cloning not implemented yet"); + } + + VP_CLASSOF_IMPL(VPDef::VPScalarPHISC) + + /// Generate the phi/select nodes. + void execute(VPTransformState &State) override; + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for handling phis that are widened in the vector loop. /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are /// managed in the recipe directly. @@ -3134,8 +3173,10 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { return D->getVPDefID() == VPDef::VPCanonicalIVPHISC; } - /// Generate the canonical scalar induction phi of the vector loop. - void execute(VPTransformState &State) override; + void execute(VPTransformState &State) override { + llvm_unreachable( + "cannot execute this recipe, should be replaced by VPScalarPHIRecipe"); + } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. @@ -3231,9 +3272,10 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe { return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC; } - /// Generate phi for handling IV based on EVL over iterations correctly. - /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe. - void execute(VPTransformState &State) override; + void execute(VPTransformState &State) override { + llvm_unreachable( + "cannot execute this recipe, should be replaced by VPScalarPHIRecipe"); + } /// Return the cost of this VPEVLBasedIVPHIRecipe. InstructionCost computeCost(ElementCount VF, diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index cb42cfe8159b0..969d07b229e46 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -213,14 +213,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { TypeSwitch(V->getDefiningRecipe()) .Case( - [this](const auto *R) { - // Handle header phi recipes, except VPWidenIntOrFpInduction - // which needs special handling due it being possibly truncated. - // TODO: consider inferring/caching type of siblings, e.g., - // backedge value, here and in cases below. - return inferScalarType(R->getStartValue()); - }) + VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe, + VPScalarPHIRecipe>([this](const auto *R) { + // Handle header phi recipes, except VPWidenIntOrFpInduction + // which needs special handling due it being possibly truncated. + // TODO: consider inferring/caching type of siblings, e.g., + // backedge value, here and in cases below. + return inferScalarType(R->getStartValue()); + }) .Case( [](const auto *R) { return R->getScalarType(); }) .CasegetLiveInIRValue(); - PHINode *Phi = PHINode::Create(Start->getType(), 2, "index"); - Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); - - BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); - Phi->addIncoming(Start, VectorPH); - Phi->setDebugLoc(getDebugLoc()); - State.set(this, Phi, /*IsScalar*/ true); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -3154,8 +3143,6 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { assert(!onlyScalarsGenerated(State.VF.isScalable()) && "Recipe should have been replaced"); - auto *IVR = getParent()->getPlan()->getCanonicalIV(); - PHINode *CanonicalIV = cast(State.get(IVR, /*IsScalar*/ true)); unsigned CurrentPart = getUnrollPart(*this); // Build a pointer phi @@ -3165,6 +3152,12 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); PHINode *NewPointerPhi = nullptr; if (CurrentPart == 0) { + auto *IVR = cast(&getParent() + ->getPlan() + ->getVectorLoopRegion() + ->getEntryBasicBlock() + ->front()); + PHINode *CanonicalIV = cast(State.get(IVR, /*IsScalar*/ true)); NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV->getIterator()); NewPointerPhi->addIncoming(ScalarStartValue, VectorPH); @@ -3478,20 +3471,30 @@ void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI "; + + printAsOperand(O, SlotTracker); + O << " = phi "; + printOperands(O, SlotTracker); +} +#endif + +void VPScalarPHIRecipe::execute(VPTransformState &State) { BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); Value *Start = State.get(getOperand(0), VPLane(0)); - PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv"); + PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name); Phi->addIncoming(Start, VectorPH); Phi->setDebugLoc(getDebugLoc()); State.set(this, Phi, /*IsScalar=*/true); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI "; - +void VPScalarPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "SCALAR-PHI"; printAsOperand(O, SlotTracker); O << " = phi "; printOperands(O, SlotTracker); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ba791c679b4ae..81f566d3d06e4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1821,3 +1821,24 @@ void VPlanTransforms::createInterleaveGroups( } } } + +void VPlanTransforms::prepareToExecute(VPlan &Plan) { + ReversePostOrderTraversal> RPOT( + Plan.getVectorLoopRegion()); + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( + vp_depth_first_deep(Plan.getEntry()))) { + for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) { + if (!isa(&R)) + continue; + auto *PhiR = cast(&R); + StringRef Name = + isa(PhiR) ? "index" : "evl.based.iv"; + auto *ScalarR = + new VPScalarPHIRecipe(PhiR->getStartValue(), PhiR->getBackedgeValue(), + PhiR->getDebugLoc(), Name); + ScalarR->insertBefore(PhiR); + PhiR->replaceAllUsesWith(ScalarR); + PhiR->eraseFromParent(); + } + } +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 11e094db6294f..1491e0a8df04d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -123,6 +123,9 @@ struct VPlanTransforms { /// Remove dead recipes from \p Plan. static void removeDeadRecipes(VPlan &Plan); + + /// Lower abstract recipes to concrete ones, that can be codegen'd. + static void prepareToExecute(VPlan &Plan); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 691b0d40823cf..957a602091c73 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -359,6 +359,7 @@ class VPDef { VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, + VPScalarPHISC, VPReductionPHISC, // END: SubclassID for recipes that inherit VPHeaderPHIRecipe // END: Phi-like recipes