From 35c51116c8acd495f87c4e26d8496ea7294979ae Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 24 May 2024 15:55:34 +0100 Subject: [PATCH 1/9] [VPlan] Add VPIRWrapperBlock, use to model pre-preheader. This patch adds a new special type of VPBasicBlock that wraps an existing IR basic block. Recipes of the block get added before the terminator of the wrapped IR basic block. Making it a subclass of VPBasicBlock avoids duplicating various APIs to manage recipes in a block, as well as makes sure the traversals filtering VPBasicBlocks automatically apply as well. Initially VPIRWrappedBlocks are only used for the pre-preheader (wrapping the original preheader of the scalar loop). As follow-up, this will be used to move more parts of the skeleton inside VPlan, startingt with the branch and condition in the middle block. Note: This reqiores updating all VPlan-printing tests, which I will do once we converge on a final version. --- .../Transforms/Vectorize/LoopVectorize.cpp | 4 +- llvm/lib/Transforms/Vectorize/VPlan.cpp | 57 +++++++++++++++- llvm/lib/Transforms/Vectorize/VPlan.h | 65 +++++++++++++++++-- .../vplan-printing-before-execute.ll | 4 +- .../Transforms/Vectorize/VPlanTestBase.h | 14 ++-- 5 files changed, 127 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 48981a6bd39e3..e71a0df1d9c7c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8607,7 +8607,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { // loop region contains a header and latch basic blocks. VPlanPtr Plan = VPlan::createInitialVPlan( createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), - *PSE.getSE()); + *PSE.getSE(), OrigLoop->getLoopPreheader()); VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); @@ -8855,7 +8855,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { // Create new empty VPlan auto Plan = VPlan::createInitialVPlan( createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), - *PSE.getSE()); + *PSE.getSE(), OrigLoop->getLoopPreheader()); // Build hierarchical CFG VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index d71d7580e6ba6..8998e392a433e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -442,6 +442,58 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { return NewBB; } +void VPIRWrapperBlock::execute(VPTransformState *State) { + for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { + VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); + auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); + BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB]; + + assert(PredBB && "Predecessor basic-block not found building successor."); + auto *PredBBTerminator = PredBB->getTerminator(); + LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); + + auto *TermBr = dyn_cast(PredBBTerminator); + if (TermBr) { + // Set each forward successor here when it is created, excluding + // backedges. A backward successor is set when the branch is created. + unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; + assert(!TermBr->getSuccessor(idx) && + "Trying to reset an existing successor block."); + TermBr->setSuccessor(idx, WrappedBlock); + } + } + + assert(getHierarchicalSuccessors().size() == 0 && + "VPIRWrapperBlock cannot have successors"); + State->CFG.VPBB2IRBB[this] = getWrappedBlock(); + State->CFG.PrevVPBB = this; + + auto *Term = cast(getWrappedBlock()->getTerminator()); + State->Builder.SetInsertPoint(Term); + + for (VPRecipeBase &Recipe : *this) + Recipe.execute(*State); + + LLVM_DEBUG(dbgs() << "LV: filled BB:" << *getWrappedBlock()); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + +void VPIRWrapperBlock::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "ir-bb<" << getName() << ">:\n"; + + auto RecipeIndent = Indent + " "; + for (const VPRecipeBase &Recipe : *this) { + Recipe.print(O, RecipeIndent, SlotTracker); + O << '\n'; + } + assert(getSuccessors().empty() && + "Wrapper blocks should not have successors"); + printSuccessors(O, Indent); +} +#endif + void VPBasicBlock::execute(VPTransformState *State) { bool Replica = State->Instance && !State->Instance->isFirstIteration(); VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; @@ -769,8 +821,9 @@ VPlan::~VPlan() { delete BackedgeTakenCount; } -VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE) { - VPBasicBlock *Preheader = new VPBasicBlock("ph"); +VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE, + BasicBlock *PH) { + VPIRWrapperBlock *Preheader = new VPIRWrapperBlock(PH); VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); auto Plan = std::make_unique(Preheader, VecPreheader); Plan->TripCount = diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 3aee17921086d..20a12b571d0c0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -473,7 +473,11 @@ class VPBlockBase { /// that are actually instantiated. Values of this enumeration are kept in the /// SubclassID field of the VPBlockBase objects. They are used for concrete /// type identification. - using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC }; + using VPBlockTy = enum { + VPBasicBlockSC, + VPRegionBlockSC, + VPIRWrapperBlockSC + }; using VPBlocksTy = SmallVectorImpl; @@ -2834,6 +2838,10 @@ class VPBasicBlock : public VPBlockBase { /// The VPRecipes held in the order of output instructions to generate. RecipeListTy Recipes; +protected: + VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "") + : VPBlockBase(BlockSC, Name.str()) {} + public: VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr) : VPBlockBase(VPBasicBlockSC, Name.str()) { @@ -2882,7 +2890,8 @@ class VPBasicBlock : public VPBlockBase { /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPBlockBase *V) { - return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC; + return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC || + V->getVPBlockID() == VPBlockBase::VPIRWrapperBlockSC; } void insert(VPRecipeBase *Recipe, iterator InsertPt) { @@ -2951,6 +2960,50 @@ class VPBasicBlock : public VPBlockBase { BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG); }; +/// A special type of VPBasicBlock that wraps an existing IR basic block. +/// Recipes of the block get added before the terminator of the wrapped IR basic +/// block. +class VPIRWrapperBlock : public VPBasicBlock { + BasicBlock *WrappedBlock; + +public: + VPIRWrapperBlock(BasicBlock *WrappedBlock) + : VPBasicBlock(VPIRWrapperBlockSC, WrappedBlock->getName()), + WrappedBlock(WrappedBlock) {} + + ~VPIRWrapperBlock() override {} + + static inline bool classof(const VPBlockBase *V) { + return V->getVPBlockID() == VPBlockBase::VPIRWrapperBlockSC; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p + /// SlotTracker is used to print unnamed VPValue's using consequtive numbers. + /// + /// Note that the numbering is applied to the whole VPlan, so printing + /// individual blocks is consistent with the whole VPlan printing. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; + using VPBlockBase::print; // Get the print(raw_stream &O) version. +#endif + /// The method which generates the output IR instructions that correspond to + /// this VPBasicBlock, thereby "executing" the VPlan. + void execute(VPTransformState *State) override; + + VPIRWrapperBlock *clone() override { + auto *NewBlock = new VPIRWrapperBlock(WrappedBlock); + for (VPRecipeBase &R : *this) + NewBlock->appendRecipe(R.clone()); + return NewBlock; + } + + void dropAllReferences(VPValue *NewValue) override {} + void resetBlock(BasicBlock *N) { WrappedBlock = N; } + + BasicBlock *getWrappedBlock() { return WrappedBlock; } +}; + /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG. /// A VPRegionBlock may indicate that its contents are to be replicated several @@ -3139,12 +3192,12 @@ class VPlan { ~VPlan(); /// Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping - /// original scalar pre-header) which contains SCEV expansions that need to - /// happen before the CFG is modified; a VPBasicBlock for the vector + /// original scalar pre-header \p PH) which contains SCEV expansions that need + /// to happen before the CFG is modified; a VPBasicBlock for the vector /// pre-header, followed by a region for the vector loop, followed by the /// middle VPBasicBlock. static VPlanPtr createInitialVPlan(const SCEV *TripCount, - ScalarEvolution &PSE); + ScalarEvolution &PSE, BasicBlock *PH); /// Prepare the plan for execution, setting up the required live-in values. void prepareToExecute(Value *TripCount, Value *VectorTripCount, @@ -3321,6 +3374,8 @@ class VPlanPrinter { /// its successor blocks. void dumpBasicBlock(const VPBasicBlock *BasicBlock); + void dumpIRWrapperBlock(const VPIRWrapperBlock *WrapperBlock); + /// Print a given \p Region of the Plan. void dumpRegion(const VPRegionBlock *Region); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index ca9dfdc6f6d29..2bb3c898c7cda 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -13,7 +13,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -45,7 +45,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64) ; CHECK-NEXT: No successors ; CHECK-EMPTY: diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index 6cd43f6803130..c658724278fe0 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -67,9 +67,10 @@ class VPlanTestBase : public testing::Test { assert(!verifyFunction(F) && "input function must be valid"); doAnalysis(F); - auto Plan = VPlan::createInitialVPlan( - SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE); - VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan); + Loop *L = LI->getLoopFor(LoopHeader); + auto Plan = VPlan::createInitialVPlan(SE->getBackedgeTakenCount(L), *SE, + L->getLoopPreheader()); + VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); HCFGBuilder.buildHierarchicalCFG(); return Plan; } @@ -80,9 +81,10 @@ class VPlanTestBase : public testing::Test { assert(!verifyFunction(F) && "input function must be valid"); doAnalysis(F); - auto Plan = VPlan::createInitialVPlan( - SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE); - VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan); + Loop *L = LI->getLoopFor(LoopHeader); + auto Plan = VPlan::createInitialVPlan(SE->getBackedgeTakenCount(L), *SE, + L->getLoopPreheader()); + VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); HCFGBuilder.buildPlainCFG(); return Plan; } From a7714cb93449c5acf44cc70bb69844bb020951d4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 27 May 2024 22:09:06 -0700 Subject: [PATCH 2/9] !fixup address latest comments, thanks! --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 76 +++++++------------------ llvm/lib/Transforms/Vectorize/VPlan.h | 28 +++------ 2 files changed, 28 insertions(+), 76 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 6846c6219d4fc..089ec88abfaee 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -443,57 +443,15 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { } void VPIRWrapperBlock::execute(VPTransformState *State) { - for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { - VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); - auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); - BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB]; - - assert(PredBB && "Predecessor basic-block not found building successor."); - auto *PredBBTerminator = PredBB->getTerminator(); - LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); - - auto *TermBr = dyn_cast(PredBBTerminator); - if (TermBr) { - // Set each forward successor here when it is created, excluding - // backedges. A backward successor is set when the branch is created. - unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; - assert(!TermBr->getSuccessor(idx) && - "Trying to reset an existing successor block."); - TermBr->setSuccessor(idx, WrappedBlock); - } - } - - assert(getHierarchicalSuccessors().size() == 0 && + assert(getHierarchicalPredecessors().empty() && + "VPIRWrapperBlock cannot have predecessors at the moment"); + assert(getHierarchicalSuccessors().empty() && "VPIRWrapperBlock cannot have successors"); - State->CFG.VPBB2IRBB[this] = getWrappedBlock(); - State->CFG.PrevVPBB = this; - - auto *Term = cast(getWrappedBlock()->getTerminator()); - State->Builder.SetInsertPoint(Term); - for (VPRecipeBase &Recipe : *this) - Recipe.execute(*State); - - LLVM_DEBUG(dbgs() << "LV: filled BB:" << *getWrappedBlock()); + State->Builder.SetInsertPoint(getWrappedBlock()->getTerminator()); + executeRecipes(State, getWrappedBlock()); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - -void VPIRWrapperBlock::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "ir-bb<" << getName() << ">:\n"; - - auto RecipeIndent = Indent + " "; - for (const VPRecipeBase &Recipe : *this) { - Recipe.print(O, RecipeIndent, SlotTracker); - O << '\n'; - } - assert(getSuccessors().empty() && - "Wrapper blocks should not have successors"); - printSuccessors(O, Indent); -} -#endif - void VPBasicBlock::execute(VPTransformState *State) { bool Replica = State->Instance && !State->Instance->isFirstIteration(); VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; @@ -551,16 +509,7 @@ void VPBasicBlock::execute(VPTransformState *State) { } // 2. Fill the IR basic block with IR instructions. - LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() - << " in BB:" << NewBB->getName() << '\n'); - - State->CFG.VPBB2IRBB[this] = NewBB; - State->CFG.PrevVPBB = this; - - for (VPRecipeBase &Recipe : Recipes) - Recipe.execute(*State); - - LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); + executeRecipes(State, NewBB); } void VPBasicBlock::dropAllReferences(VPValue *NewValue) { @@ -573,6 +522,19 @@ void VPBasicBlock::dropAllReferences(VPValue *NewValue) { } } +void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) { + LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() + << " in BB:" << BB->getName() << '\n'); + + State->CFG.VPBB2IRBB[this] = BB; + State->CFG.PrevVPBB = this; + + for (VPRecipeBase &Recipe : Recipes) + Recipe.execute(*State); + + LLVM_DEBUG(dbgs() << "LV: filled BB:" << *BB); +} + VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { assert((SplitAt == end() || SplitAt->getParent() == this) && "can only split at a position in the same block"); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index e61cbdd85c5a5..10b24233b3fe8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2957,6 +2957,9 @@ class VPBasicBlock : public VPBlockBase { return NewBlock; } +protected: + void executeRecipes(VPTransformState *State, BasicBlock *BB); + private: /// Create an IR BasicBlock to hold the output instructions generated by this /// VPBasicBlock, and return it. Update the CFGState accordingly. @@ -2964,14 +2967,16 @@ class VPBasicBlock : public VPBlockBase { }; /// A special type of VPBasicBlock that wraps an existing IR basic block. -/// Recipes of the block get added before the terminator of the wrapped IR basic -/// block. +/// Recipes of the block get added before the first non-phi instruction in the +/// wrapped block. class VPIRWrapperBlock : public VPBasicBlock { BasicBlock *WrappedBlock; public: VPIRWrapperBlock(BasicBlock *WrappedBlock) - : VPBasicBlock(VPIRWrapperBlockSC, WrappedBlock->getName()), + : VPBasicBlock( + VPIRWrapperBlockSC, + (Twine("ir-bb<") + WrappedBlock->getName() + Twine(">")).str()), WrappedBlock(WrappedBlock) {} ~VPIRWrapperBlock() override {} @@ -2980,16 +2985,6 @@ class VPIRWrapperBlock : public VPBasicBlock { return V->getVPBlockID() == VPBlockBase::VPIRWrapperBlockSC; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p - /// SlotTracker is used to print unnamed VPValue's using consequtive numbers. - /// - /// Note that the numbering is applied to the whole VPlan, so printing - /// individual blocks is consistent with the whole VPlan printing. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; - using VPBlockBase::print; // Get the print(raw_stream &O) version. -#endif /// The method which generates the output IR instructions that correspond to /// this VPBasicBlock, thereby "executing" the VPlan. void execute(VPTransformState *State) override; @@ -3001,10 +2996,7 @@ class VPIRWrapperBlock : public VPBasicBlock { return NewBlock; } - void dropAllReferences(VPValue *NewValue) override {} - void resetBlock(BasicBlock *N) { WrappedBlock = N; } - - BasicBlock *getWrappedBlock() { return WrappedBlock; } + BasicBlock *getWrappedBlock() const { return WrappedBlock; } }; /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks @@ -3377,8 +3369,6 @@ class VPlanPrinter { /// its successor blocks. void dumpBasicBlock(const VPBasicBlock *BasicBlock); - void dumpIRWrapperBlock(const VPIRWrapperBlock *WrapperBlock); - /// Print a given \p Region of the Plan. void dumpRegion(const VPRegionBlock *Region); From f469a3af6fa85e8f2bae818531e68cf20c56cc77 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 28 May 2024 07:20:39 -0700 Subject: [PATCH 3/9] !fixup VPIRWrapperBlock -> VPIRBasicBlock. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 8 ++++---- llvm/lib/Transforms/Vectorize/VPlan.h | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 089ec88abfaee..52ec89900959a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -442,11 +442,11 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { return NewBB; } -void VPIRWrapperBlock::execute(VPTransformState *State) { +void VPIRBasicBlock::execute(VPTransformState *State) { assert(getHierarchicalPredecessors().empty() && - "VPIRWrapperBlock cannot have predecessors at the moment"); + "VPIRBasicBlock cannot have predecessors at the moment"); assert(getHierarchicalSuccessors().empty() && - "VPIRWrapperBlock cannot have successors"); + "VPIRBasicBlock cannot have successors"); State->Builder.SetInsertPoint(getWrappedBlock()->getTerminator()); executeRecipes(State, getWrappedBlock()); @@ -785,7 +785,7 @@ VPlan::~VPlan() { VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE, BasicBlock *PH) { - VPIRWrapperBlock *Preheader = new VPIRWrapperBlock(PH); + VPIRBasicBlock *Preheader = new VPIRBasicBlock(PH); VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); auto Plan = std::make_unique(Preheader, VecPreheader); Plan->TripCount = diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 10b24233b3fe8..b53fd385744ce 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -476,7 +476,7 @@ class VPBlockBase { using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC, - VPIRWrapperBlockSC + VPIRBasicBlockSC }; using VPBlocksTy = SmallVectorImpl; @@ -2894,7 +2894,7 @@ class VPBasicBlock : public VPBlockBase { /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPBlockBase *V) { return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC || - V->getVPBlockID() == VPBlockBase::VPIRWrapperBlockSC; + V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC; } void insert(VPRecipeBase *Recipe, iterator InsertPt) { @@ -2969,28 +2969,28 @@ class VPBasicBlock : public VPBlockBase { /// A special type of VPBasicBlock that wraps an existing IR basic block. /// Recipes of the block get added before the first non-phi instruction in the /// wrapped block. -class VPIRWrapperBlock : public VPBasicBlock { +class VPIRBasicBlock : public VPBasicBlock { BasicBlock *WrappedBlock; public: - VPIRWrapperBlock(BasicBlock *WrappedBlock) + VPIRBasicBlock(BasicBlock *WrappedBlock) : VPBasicBlock( - VPIRWrapperBlockSC, + VPIRBasicBlockSC, (Twine("ir-bb<") + WrappedBlock->getName() + Twine(">")).str()), WrappedBlock(WrappedBlock) {} - ~VPIRWrapperBlock() override {} + ~VPIRBasicBlock() override {} static inline bool classof(const VPBlockBase *V) { - return V->getVPBlockID() == VPBlockBase::VPIRWrapperBlockSC; + return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC; } /// The method which generates the output IR instructions that correspond to /// this VPBasicBlock, thereby "executing" the VPlan. void execute(VPTransformState *State) override; - VPIRWrapperBlock *clone() override { - auto *NewBlock = new VPIRWrapperBlock(WrappedBlock); + VPIRBasicBlock *clone() override { + auto *NewBlock = new VPIRBasicBlock(WrappedBlock); for (VPRecipeBase &R : *this) NewBlock->appendRecipe(R.clone()); return NewBlock; From 73729d41f120b9620f8df6bc996fc4a2d80e310f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 28 May 2024 08:23:37 -0700 Subject: [PATCH 4/9] !fixup fix formatting --- llvm/lib/Transforms/Vectorize/VPlan.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index b53fd385744ce..df165967faab3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -473,11 +473,7 @@ class VPBlockBase { /// that are actually instantiated. Values of this enumeration are kept in the /// SubclassID field of the VPBlockBase objects. They are used for concrete /// type identification. - using VPBlockTy = enum { - VPBasicBlockSC, - VPRegionBlockSC, - VPIRBasicBlockSC - }; + using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC, VPIRBasicBlockSC }; using VPBlocksTy = SmallVectorImpl; From 872f0b53797047a203d06a6a1e80513722ca6207 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 28 May 2024 21:52:58 -0700 Subject: [PATCH 5/9] !fixup update tests. --- .../AArch64/sve-tail-folding-forced.ll | 2 +- .../RISCV/riscv-vector-reverse.ll | 4 ++-- ...-order-recurrence-sink-replicate-region.ll | 2 +- .../interleave-and-scalarize-only.ll | 4 ++-- .../LoopVectorize/vplan-dot-printing.ll | 2 +- .../LoopVectorize/vplan-printing.ll | 4 ++-- .../vplan-sink-scalars-and-merge.ll | 20 +++++++++---------- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index cc72dfa4ce639..1c2bf7f2ca10b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -13,7 +13,7 @@ target triple = "aarch64-unknown-linux-gnu" ; VPLANS-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; VPLANS-NEXT: vp<[[TC:%[0-9]+]]> = original trip-count ; VPLANS-EMPTY: -; VPLANS-NEXT: ph: +; VPLANS-NEXT: ir-bb: ; VPLANS-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 umax %n) ; VPLANS-NEXT: No successors ; VPLANS-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index c3374fceb1fb5..b5aa96eb23f5e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -54,7 +54,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count -; CHECK: ph: +; CHECK: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) ; CHECK-NEXT: No successors ; CHECK: vector.ph: @@ -195,7 +195,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count -; CHECK: ph: +; CHECK: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) ; CHECK-NEXT: No successors ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 833d55f09294e..116d0f65c235f 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -350,7 +350,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 smax (1 + (sext i8 %y to i32))) ; CHECK-NEXT: No successors ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 81cc2024bb31a..078d6ca35ba11 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -9,7 +9,7 @@ ; DBG-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; DBG-NEXT: vp<[[TC:%.+]]> = original trip-count ; DBG-EMPTY: -; DBG-NEXT: ph: +; DBG-NEXT: ir-bb: ; DBG-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1000 + (-1 * %start)) ; DBG-NEXT: No successors ; DBG-EMPTY: @@ -179,7 +179,7 @@ exit: ; DBG-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; DBG-NEXT: vp<[[TC:%.+]]> = original trip-count ; DBG-EMPTY: -; DBG-NEXT: ph: +; DBG-NEXT: ir-bb: ; DBG-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 (1 smax %n) to i64) ; DBG-NEXT: No successors ; DBG-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll index 12b8e657aabf1..d872fb187a3bc 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -13,7 +13,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: edge [fontname=Courier, fontsize=30] ; CHECK-NEXT: compound=true ; CHECK-NEXT: N0 [label = -; CHECK-NEXT: "ph:\l" + +; CHECK-NEXT: "ir-bb\:\l" + ; CHECK-NEXT: "No successors\l" ; CHECK-NEXT: ] ; CHECK-NEXT: N1 [label = diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index c84191665d949..dd7735584737b 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -215,7 +215,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 smax %n) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -507,7 +507,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + ((15 + (%y /u 492802768830814060)) /u (1 + (%y /u 492802768830814060)))) ; CHECK-NEXT: EMIT vp<[[EXP_SCEV:%.+]]> = EXPAND SCEV (1 + (%y /u 492802768830814060)) ; CHECK-NEXT: No successors diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index ae5879bb2bae9..41bb3ca8694fa 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -16,7 +16,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -85,7 +85,7 @@ exit: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -169,7 +169,7 @@ exit: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -326,7 +326,7 @@ define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -423,7 +423,7 @@ define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -529,7 +529,7 @@ define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -635,7 +635,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -740,7 +740,7 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -807,7 +807,7 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -1094,7 +1094,7 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ph: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV ((-1 * (ptrtoint ptr %end to i64)) + (ptrtoint ptr %start to i64)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: From c69dde045e80f3e70d929e5a6631af0910f448ee Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 29 May 2024 07:48:56 -0700 Subject: [PATCH 6/9] !fixup address comments, thanks! --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 6 +++--- llvm/lib/Transforms/Vectorize/VPlan.h | 23 +++++++++++------------ 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 52ec89900959a..f17be451e6846 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -446,10 +446,10 @@ void VPIRBasicBlock::execute(VPTransformState *State) { assert(getHierarchicalPredecessors().empty() && "VPIRBasicBlock cannot have predecessors at the moment"); assert(getHierarchicalSuccessors().empty() && - "VPIRBasicBlock cannot have successors"); + "VPIRBasicBlock cannot have successors at the moment"); - State->Builder.SetInsertPoint(getWrappedBlock()->getTerminator()); - executeRecipes(State, getWrappedBlock()); + State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator()); + executeRecipes(State, getIRBasicBlock()); } void VPBasicBlock::execute(VPTransformState *State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index df165967faab3..37d58fc746676 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -473,7 +473,7 @@ class VPBlockBase { /// that are actually instantiated. Values of this enumeration are kept in the /// SubclassID field of the VPBlockBase objects. They are used for concrete /// type identification. - using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC, VPIRBasicBlockSC }; + using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC }; using VPBlocksTy = SmallVectorImpl; @@ -2833,11 +2833,10 @@ class VPBasicBlock : public VPBlockBase { public: using RecipeListTy = iplist; -private: +protected: /// The VPRecipes held in the order of output instructions to generate. RecipeListTy Recipes; -protected: VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "") : VPBlockBase(BlockSC, Name.str()) {} @@ -2954,6 +2953,7 @@ class VPBasicBlock : public VPBlockBase { } protected: + /// Execute the recipes in the IR basic block \p BB. void executeRecipes(VPTransformState *State, BasicBlock *BB); private: @@ -2966,14 +2966,13 @@ class VPBasicBlock : public VPBlockBase { /// Recipes of the block get added before the first non-phi instruction in the /// wrapped block. class VPIRBasicBlock : public VPBasicBlock { - BasicBlock *WrappedBlock; + BasicBlock *IRBB; public: - VPIRBasicBlock(BasicBlock *WrappedBlock) - : VPBasicBlock( - VPIRBasicBlockSC, - (Twine("ir-bb<") + WrappedBlock->getName() + Twine(">")).str()), - WrappedBlock(WrappedBlock) {} + VPIRBasicBlock(BasicBlock *IRBB) + : VPBasicBlock(VPIRBasicBlockSC, + (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()), + IRBB(IRBB) {} ~VPIRBasicBlock() override {} @@ -2986,13 +2985,13 @@ class VPIRBasicBlock : public VPBasicBlock { void execute(VPTransformState *State) override; VPIRBasicBlock *clone() override { - auto *NewBlock = new VPIRBasicBlock(WrappedBlock); - for (VPRecipeBase &R : *this) + auto *NewBlock = new VPIRBasicBlock(IRBB); + for (VPRecipeBase &R : Recipes) NewBlock->appendRecipe(R.clone()); return NewBlock; } - BasicBlock *getWrappedBlock() const { return WrappedBlock; } + BasicBlock *getIRBasicBlock() const { return IRBB; } }; /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks From b4381ff42e9188732e1e5413347efc0e7b783d2f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 29 May 2024 21:07:55 -0700 Subject: [PATCH 7/9] !fixup verify the same BB is wrapped only once per VPlan. --- llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp | 9 +++++++++ llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 2fe487f972bb9..888e863abc28c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -17,6 +17,7 @@ #include "VPlanCFG.h" #include "VPlanDominatorTree.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/CommandLine.h" #define DEBUG_TYPE "loop-vectorize" @@ -27,6 +28,8 @@ namespace { class VPlanVerifier { const VPDominatorTree &VPDT; + SmallPtrSet WrappedIRBBs; + // Verify that phi-like recipes are at the beginning of \p VPBB, with no // other recipes in between. Also check that only header blocks contain // VPHeaderPHIRecipes. @@ -148,6 +151,12 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { } } } + + auto *IRBB = dyn_cast(VPBB); + if (IRBB && !WrappedIRBBs.insert(IRBB->getIRBasicBlock()).second) { + errs() << "Same IR basic block used by multiple wrapper blocks!\n"; + return false; + } return true; } diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index 2b25c62ac2f65..9ab35b74c0b68 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -106,7 +106,7 @@ node [shape=rect, fontname=Courier, fontsize=30] edge [fontname=Courier, fontsize=30] compound=true N0 [label = - "ph:\l" + + "ir-bb\:\l" + " EMIT vp\<%1\> = EXPAND SCEV (-1 + %N)\l" + "No successors\l" ] From 9ef075a2726dbe058ba92bcb47d4df998f602280 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 30 May 2024 07:16:42 -0700 Subject: [PATCH 8/9] !fixup restrict to VPlan preheader only. --- llvm/lib/Transforms/Vectorize/VPlan.h | 6 +++--- .../Transforms/Vectorize/VPlanVerifier.cpp | 9 ++++++++- .../AArch64/sve-tail-folding-forced.ll | 2 +- .../RISCV/riscv-vector-reverse.ll | 4 ++-- ...-order-recurrence-sink-replicate-region.ll | 2 +- .../interleave-and-scalarize-only.ll | 4 ++-- .../LoopVectorize/vplan-dot-printing.ll | 2 +- .../vplan-printing-before-execute.ll | 4 ++-- .../LoopVectorize/vplan-printing.ll | 4 ++-- .../vplan-sink-scalars-and-merge.ll | 20 +++++++++---------- 10 files changed, 32 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 37d58fc746676..8734389ceb058 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2965,14 +2965,14 @@ class VPBasicBlock : public VPBlockBase { /// A special type of VPBasicBlock that wraps an existing IR basic block. /// Recipes of the block get added before the first non-phi instruction in the /// wrapped block. +/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's +/// preheader block. class VPIRBasicBlock : public VPBasicBlock { BasicBlock *IRBB; public: VPIRBasicBlock(BasicBlock *IRBB) - : VPBasicBlock(VPIRBasicBlockSC, - (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()), - IRBB(IRBB) {} + : VPBasicBlock(VPIRBasicBlockSC, "ph"), IRBB(IRBB) {} ~VPIRBasicBlock() override {} diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 888e863abc28c..56c82b13ccdef 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -153,10 +153,17 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { } auto *IRBB = dyn_cast(VPBB); - if (IRBB && !WrappedIRBBs.insert(IRBB->getIRBasicBlock()).second) { + if (!IRBB) + return true; + + if (!WrappedIRBBs.insert(IRBB->getIRBasicBlock()).second) { errs() << "Same IR basic block used by multiple wrapper blocks!\n"; return false; } + if (IRBB != IRBB->getPlan()->getPreheader()) { + errs() << "VPIRBasicBlock can only be used as pre-header at the moment!\n"; + return false; + } return true; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 1c2bf7f2ca10b..cc72dfa4ce639 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -13,7 +13,7 @@ target triple = "aarch64-unknown-linux-gnu" ; VPLANS-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; VPLANS-NEXT: vp<[[TC:%[0-9]+]]> = original trip-count ; VPLANS-EMPTY: -; VPLANS-NEXT: ir-bb: +; VPLANS-NEXT: ph: ; VPLANS-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 umax %n) ; VPLANS-NEXT: No successors ; VPLANS-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index b5aa96eb23f5e..c3374fceb1fb5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -54,7 +54,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count -; CHECK: ir-bb: +; CHECK: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) ; CHECK-NEXT: No successors ; CHECK: vector.ph: @@ -195,7 +195,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count -; CHECK: ir-bb: +; CHECK: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) ; CHECK-NEXT: No successors ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 116d0f65c235f..833d55f09294e 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -350,7 +350,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 smax (1 + (sext i8 %y to i32))) ; CHECK-NEXT: No successors ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 078d6ca35ba11..81cc2024bb31a 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -9,7 +9,7 @@ ; DBG-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; DBG-NEXT: vp<[[TC:%.+]]> = original trip-count ; DBG-EMPTY: -; DBG-NEXT: ir-bb: +; DBG-NEXT: ph: ; DBG-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1000 + (-1 * %start)) ; DBG-NEXT: No successors ; DBG-EMPTY: @@ -179,7 +179,7 @@ exit: ; DBG-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; DBG-NEXT: vp<[[TC:%.+]]> = original trip-count ; DBG-EMPTY: -; DBG-NEXT: ir-bb: +; DBG-NEXT: ph: ; DBG-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 (1 smax %n) to i64) ; DBG-NEXT: No successors ; DBG-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll index d872fb187a3bc..12b8e657aabf1 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -13,7 +13,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: edge [fontname=Courier, fontsize=30] ; CHECK-NEXT: compound=true ; CHECK-NEXT: N0 [label = -; CHECK-NEXT: "ir-bb\:\l" + +; CHECK-NEXT: "ph:\l" + ; CHECK-NEXT: "No successors\l" ; CHECK-NEXT: ] ; CHECK-NEXT: N1 [label = diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index 2bb3c898c7cda..ca9dfdc6f6d29 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -13,7 +13,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -45,7 +45,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64) ; CHECK-NEXT: No successors ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index dd7735584737b..c84191665d949 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -215,7 +215,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 smax %n) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -507,7 +507,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + ((15 + (%y /u 492802768830814060)) /u (1 + (%y /u 492802768830814060)))) ; CHECK-NEXT: EMIT vp<[[EXP_SCEV:%.+]]> = EXPAND SCEV (1 + (%y /u 492802768830814060)) ; CHECK-NEXT: No successors diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 41bb3ca8694fa..ae5879bb2bae9 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -16,7 +16,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -85,7 +85,7 @@ exit: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -169,7 +169,7 @@ exit: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -326,7 +326,7 @@ define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -423,7 +423,7 @@ define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -529,7 +529,7 @@ define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -635,7 +635,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -740,7 +740,7 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -807,7 +807,7 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -1094,7 +1094,7 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: ph: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV ((-1 * (ptrtoint ptr %end to i64)) + (ptrtoint ptr %start to i64)) ; CHECK-NEXT: No successors ; CHECK-EMPTY: From 1134c38ba1fe3304285a28089944be387fcc2ff2 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 30 May 2024 09:20:36 -0700 Subject: [PATCH 9/9] !fixup also undo unit test change. --- llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index 9ab35b74c0b68..2b25c62ac2f65 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -106,7 +106,7 @@ node [shape=rect, fontname=Courier, fontsize=30] edge [fontname=Courier, fontsize=30] compound=true N0 [label = - "ir-bb\:\l" + + "ph:\l" + " EMIT vp\<%1\> = EXPAND SCEV (-1 + %N)\l" + "No successors\l" ]