From d5ba9a38d09e021d9acfb67bf6a2395ce37fc9b4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 1 Mar 2025 21:07:00 +0000 Subject: [PATCH 1/8] [VPlan] Introduce child regions as VPlan transform. Further simplify VPlan CFG builder by moving introduction of inner regions to a VPlan transform, building on https://github.com/llvm/llvm-project/pull/128419 The HCFG builder now only constructs plain CFGs. I will move it to VPlanConstruction as follow-up. --- .../Transforms/Vectorize/LoopVectorize.cpp | 6 +- .../Vectorize/VPlanConstruction.cpp | 69 +++++++---- .../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 112 +++--------------- .../Transforms/Vectorize/VPlanHCFGBuilder.h | 8 +- .../vplan-printing-outer-loop.ll | 21 ++-- .../LoopVectorize/vplan_hcfg_stress_test.ll | 2 +- .../Transforms/Vectorize/VPlanTestBase.h | 2 +- 7 files changed, 79 insertions(+), 141 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3fc5e716e3757..54d727e38b633 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9308,10 +9308,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { Range); auto Plan = std::make_unique(OrigLoop); // Build hierarchical CFG. - // Convert to VPlan-transform and consoliate all transforms for VPlan + // TODO: Convert to VPlan-transform and consoliate all transforms for VPlan // creation. VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); - HCFGBuilder.buildHierarchicalCFG(); + HCFGBuilder.buildPlainCFG(); VPlanTransforms::introduceTopLevelVectorLoopRegion( *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck, @@ -9615,7 +9615,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { auto Plan = std::make_unique(OrigLoop); // Build hierarchical CFG VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); - HCFGBuilder.buildHierarchicalCFG(); + HCFGBuilder.buildPlainCFG(); VPlanTransforms::introduceTopLevelVectorLoopRegion( *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop); diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index f58f0290b5fa9..17a758682905b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -14,26 +14,57 @@ #include "LoopVectorizationPlanner.h" #include "VPlan.h" #include "VPlanCFG.h" +#include "VPlanDominatorTree.h" #include "VPlanTransforms.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" using namespace llvm; +/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB, if +/// it is a header of a loop. +static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB, + VPDominatorTree &VPDT) { + if (HeaderVPBB->getNumPredecessors() != 2) + return nullptr; + VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0]; + VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1]; + if (!VPDT.dominates(HeaderVPBB, LatchVPBB)) + return nullptr; + assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) && + "preheader must dominate header"); + VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB); + VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB); + VPBlockBase *Succ = LatchVPBB->getSingleSuccessor(); + if (Succ) + VPBlockUtils::disconnectBlocks(LatchVPBB, Succ); + + auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "", + false /*isReplicator*/); + // All VPBB's reachable shallowly from HeaderVPBB belong to top level loop, + // because VPlan is expected to end at top level latch. + for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB)) + VPBB->setParent(R); + + VPBlockUtils::insertBlockAfter(R, PreheaderVPBB); + if (Succ) + VPBlockUtils::connectBlocks(R, Succ); + return R; +} + void VPlanTransforms::introduceTopLevelVectorLoopRegion( VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) { - // TODO: Generalize to introduce all loop regions. - auto *HeaderVPBB = cast(Plan.getEntry()->getSingleSuccessor()); - VPBlockUtils::disconnectBlocks(Plan.getEntry(), HeaderVPBB); + VPDominatorTree VPDT; + VPDT.recalculate(Plan); - VPBasicBlock *OriginalLatch = - cast(HeaderVPBB->getSinglePredecessor()); - VPBlockUtils::disconnectBlocks(OriginalLatch, HeaderVPBB); - VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph"); - VPBlockUtils::connectBlocks(Plan.getEntry(), VecPreheader); - assert(OriginalLatch->getNumSuccessors() == 0 && - "Plan should end at top level latch"); + auto *HeaderVPBB = cast(Plan.getEntry()->getSingleSuccessor()); + VPRegionBlock *TopRegion = introduceRegion(Plan, HeaderVPBB, VPDT); + auto *OrigExiting = TopRegion->getExiting(); + VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch"); + VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting); + TopRegion->setExiting(LatchVPBB); + TopRegion->setName("vector loop"); // Create SCEV and VPValue for the trip count. // We use the symbolic max backedge-taken-count, which works also when @@ -47,18 +78,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion( Plan.setTripCount( vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE)); - // Create VPRegionBlock, with existing header and new empty latch block, to be - // filled. - VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch"); - VPBlockUtils::insertBlockAfter(LatchVPBB, OriginalLatch); - auto *TopRegion = Plan.createVPRegionBlock( - HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/); - // All VPBB's reachable shallowly from HeaderVPBB belong to top level loop, - // because VPlan is expected to end at top level latch. - for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB)) - VPBB->setParent(TopRegion); + VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph"); + VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry()); - VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader); VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block"); VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); @@ -98,4 +120,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion( ScalarLatchTerm->getDebugLoc(), "cmp.n"); Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, ScalarLatchTerm->getDebugLoc()); + + for (VPBlockBase *HeaderVPBB : + vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) { + introduceRegion(Plan, HeaderVPBB, VPDT); + } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 4b8a2420b3037..4e06ce86caad7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -12,9 +12,7 @@ /// components and steps: // /// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that -/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top -/// Region) is created to enclose and serve as parent of all the VPBasicBlocks -/// in the plain CFG. +/// faithfully represents the CFG in the incoming IR. /// NOTE: At this point, there is a direct correspondence between all the /// VPBasicBlocks created for the initial plain CFG and the incoming /// BasicBlocks. However, this might change in the future. @@ -57,12 +55,8 @@ class PlainCFGBuilder { // Hold phi node's that need to be fixed once the plain CFG has been built. SmallVector PhisToFix; - /// Maps loops in the original IR to their corresponding region. - DenseMap Loop2Region; - // Utility functions. void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB); - void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB); void fixHeaderPhis(); VPBasicBlock *getOrCreateVPBB(BasicBlock *BB); #ifndef NDEBUG @@ -83,25 +77,6 @@ class PlainCFGBuilder { // Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB // must have no predecessors. void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) { - auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * { - auto *SinglePred = BB->getSinglePredecessor(); - Loop *LoopForBB = LI->getLoopFor(BB); - if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB) - return nullptr; - // The input IR must be in loop-simplify form, ensuring a single predecessor - // for exit blocks. - assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() && - "SinglePred must be the only loop latch"); - return SinglePred; - }; - if (auto *LatchBB = GetLatchOfExit(BB)) { - auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent(); - assert(VPBB == cast(PredRegion->getSingleSuccessor()) && - "successor must already be set for PredRegion; it must have VPBB " - "as single successor"); - VPBB->setPredecessors({PredRegion}); - return; - } // Collect VPBB predecessors. SmallVector VPBBPreds; for (BasicBlock *Pred : predecessors(BB)) @@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) { return L && BB == L->getHeader(); } -void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region, - BasicBlock *BB) { - // BB is a loop header block. Connect the region to the loop preheader. - Loop *LoopOfBB = LI->getLoopFor(BB); - Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())}); -} - // Add operands to VPInstructions representing phi nodes from the input IR. void PlainCFGBuilder::fixHeaderPhis() { for (auto *Phi : PhisToFix) { @@ -150,19 +118,6 @@ static bool isHeaderVPBB(VPBasicBlock *VPBB) { return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB; } -/// Return true of \p L loop is contained within \p OuterLoop. -static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) { - if (L->getLoopDepth() < OuterLoop->getLoopDepth()) - return false; - const Loop *P = L; - while (P) { - if (P == OuterLoop) - return true; - P = P->getParentLoop(); - } - return false; -} - // Create a new empty VPBasicBlock for an incoming BasicBlock in the region // corresponding to the containing loop or retrieve an existing one if it was // already created. If no region exists yet for the loop containing \p BB, a new @@ -178,28 +133,6 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n"); VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name); BB2VPBB[BB] = VPBB; - - // Get or create a region for the loop containing BB, except for the top - // region of TheLoop which is created later. - Loop *LoopOfBB = LI->getLoopFor(BB); - if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop)) - return VPBB; - - auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB); - if (!isHeaderBB(BB, LoopOfBB)) { - assert(RegionOfVPBB && - "Region should have been created by visiting header earlier"); - VPBB->setParent(RegionOfVPBB); - return VPBB; - } - - assert(!RegionOfVPBB && - "First visit of a header basic block expects to register its region."); - // Handle a header - take care of its Region. - RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/); - RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]); - RegionOfVPBB->setEntry(VPBB); - Loop2Region[LoopOfBB] = RegionOfVPBB; return VPBB; } @@ -351,6 +284,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, // Main interface to build the plain CFG. void PlainCFGBuilder::buildPlainCFG( DenseMap &VPB2IRBB) { + VPIRBasicBlock *Entry = cast(Plan.getEntry()); + BB2VPBB[Entry->getIRBasicBlock()] = Entry; // 1. Scan the body of the loop in a topological order to visit each basic // block after having visited its predecessor basic blocks. Create a VPBB for @@ -376,15 +311,13 @@ void PlainCFGBuilder::buildPlainCFG( for (BasicBlock *BB : RPO) { // Create or retrieve the VPBasicBlock for this BB. VPBasicBlock *VPBB = getOrCreateVPBB(BB); - VPRegionBlock *Region = VPBB->getParent(); Loop *LoopForBB = LI->getLoopFor(BB); // Set VPBB predecessors in the same order as they are in the incoming BB. if (!isHeaderBB(BB, LoopForBB)) { setVPBBPredsFromBB(VPBB, BB); - } else if (Region) { - // BB is a loop header and there's a corresponding region, set the - // predecessor for it. - setRegionPredsFromBB(Region, BB); + } else { + VPBB->setPredecessors({getOrCreateVPBB(LoopForBB->getLoopPredecessor()), + getOrCreateVPBB(LoopForBB->getLoopLatch())}); } // Create VPInstructions for BB. @@ -392,7 +325,7 @@ void PlainCFGBuilder::buildPlainCFG( if (BB == TheLoop->getLoopLatch()) { VPBasicBlock *HeaderVPBB = getOrCreateVPBB(LoopForBB->getHeader()); - VPBlockUtils::connectBlocks(VPBB, HeaderVPBB); + VPBB->setOneSuccessor(HeaderVPBB); continue; } @@ -423,21 +356,11 @@ void PlainCFGBuilder::buildPlainCFG( BasicBlock *IRSucc1 = BI->getSuccessor(1); VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0); VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1); - if (BB == LoopForBB->getLoopLatch()) { - // For a latch we need to set the successor of the region rather than that - // of VPBB and it should be set to the exit, i.e., non-header successor, - // except for the top region, which is handled elsewhere. - assert(LoopForBB != TheLoop && - "Latch of the top region should have been handled earlier"); - Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1 - : Successor0); - Region->setExiting(VPBB); - continue; - } - // Don't connect any blocks outside the current loop except the latch for - // now. The latch is handled above. - if (LoopForBB) { + // Don't connect any blocks outside the current loop except the latch, which + // is handled below. + if (LoopForBB && + (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) { if (!LoopForBB->contains(IRSucc0)) { VPBB->setOneSuccessor(Successor1); continue; @@ -456,21 +379,16 @@ void PlainCFGBuilder::buildPlainCFG( // corresponding VPlan operands. fixHeaderPhis(); - VPBlockUtils::connectBlocks(Plan.getEntry(), - getOrCreateVPBB(TheLoop->getHeader())); + Plan.getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader())); + Plan.getEntry()->setPlan(&Plan); for (const auto &[IRBB, VPB] : BB2VPBB) VPB2IRBB[VPB] = IRBB; + + LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan); } void VPlanHCFGBuilder::buildPlainCFG() { PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); PCFGBuilder.buildPlainCFG(VPB2IRBB); } - -// Public interface to build a H-CFG. -void VPlanHCFGBuilder::buildHierarchicalCFG() { - // Build Top Region enclosing the plain CFG. - buildPlainCFG(); - LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan); -} diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h index f7f98ed7b1755..f2e90d3f4d9b3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h @@ -30,7 +30,6 @@ namespace llvm { class Loop; class LoopInfo; -class VPRegionBlock; class VPlan; class VPlanTestIRBase; class VPBlockBase; @@ -54,15 +53,12 @@ class VPlanHCFGBuilder { /// created for a input IR basic block. DenseMap VPB2IRBB; - /// Build plain CFG for TheLoop and connects it to Plan's entry. - void buildPlainCFG(); - public: VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) : TheLoop(Lp), LI(LI), Plan(P) {} - /// Build H-CFG for TheLoop and update Plan accordingly. - void buildHierarchicalCFG(); + /// Build plain CFG for TheLoop and connects it to Plan's entry. + void buildPlainCFG(); /// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if /// there is no such corresponding block. diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll index 625a32c098f94..b4b6d3d760349 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -6,7 +6,7 @@ @arr = external global [8 x [8 x i64]], align 16 define void @foo(i64 %n) { -; CHECK: VPlan 'HCFGBuilder: Plain CFG +; CHECK: VPlan 'Plain CFG ; CHECK-NEXT: { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -19,17 +19,14 @@ define void @foo(i64 %n) { ; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n> ; CHECK-NEXT: Successor(s): inner ; CHECK-EMPTY: -; CHECK-NEXT: inner: { -; CHECK-NEXT: inner: -; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next> -; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> -; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2> -; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1> -; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8> -; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec> -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): outer.latch +; CHECK-NEXT: inner: +; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next> +; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> +; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2> +; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1> +; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8> +; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec> +; CHECK-NEXT: Successor(s): outer.latch, inner ; CHECK-EMPTY: ; CHECK-NEXT: outer.latch: ; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1> diff --git a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll index 89eaca0cfa8c8..29aeb7c4e97f9 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll @@ -4,7 +4,7 @@ ; Verify that the stress testing flag for the VPlan H-CFG builder works as ; expected with and without enabling the VPlan H-CFG Verifier. -; CHECK: VPlan 'HCFGBuilder: Plain CFG +; CHECK: VPlan 'Plain CFG target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index caf5d2357411d..92961e44c5e54 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -73,7 +73,7 @@ class VPlanTestIRBase : public testing::Test { PredicatedScalarEvolution PSE(*SE, *L); auto Plan = std::make_unique(L); VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); - HCFGBuilder.buildHierarchicalCFG(); + HCFGBuilder.buildPlainCFG(); VPlanTransforms::introduceTopLevelVectorLoopRegion( *Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L); return Plan; From a88f03e42b57cdf42bec4a16e06fa78f06eb1b3e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 4 Apr 2025 13:51:30 +0100 Subject: [PATCH 2/8] !fixup address comments, thanks! --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 11 +++++++ llvm/lib/Transforms/Vectorize/VPlan.h | 6 ++++ .../Vectorize/VPlanConstruction.cpp | 32 ++++++++----------- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 1e2f70e5c103e..759f1b7091037 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -648,6 +648,17 @@ bool VPBasicBlock::isExiting() const { return getParent() && getParent()->getExitingBasicBlock() == this; } +bool VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const { + if (getNumPredecessors() != 2) + return false; + VPBlockBase *LatchVPBB = getPredecessors()[1]; + if (!VPDT.dominates(this, LatchVPBB)) + return false; + assert(VPDT.dominates(getPredecessors()[0], this) && + "preheader must dominate header"); + return true; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPBlockBase::print(raw_ostream &O) const { VPSlotTracker SlotTracker(getPlan()); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 50baf220a1002..4f42c595df0f0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -57,6 +57,7 @@ class SCEV; class Type; class VPBasicBlock; class VPBuilder; +class VPDominatorTree; class VPRegionBlock; class VPlan; class VPLane; @@ -3251,6 +3252,11 @@ class VPBasicBlock : public VPBlockBase { /// Returns true if the block is exiting it's parent region. bool isExiting() const; + /// Returns true if the block is a loop header block in the plain CFG; that + /// is, it has exactly 2 predecessors (preheader and latch), where the block + /// dominates the latch and the preheader dominates the block. + bool isHeader(const VPDominatorTree &VPDT) const; + /// Clone the current block and it's recipes, without updating the operands of /// the cloned recipes. VPBasicBlock *clone() override; diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 17a758682905b..0bea53c423760 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -21,28 +21,24 @@ using namespace llvm; -/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB, if -/// it is a header of a loop. -static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB, - VPDominatorTree &VPDT) { - if (HeaderVPBB->getNumPredecessors() != 2) - return nullptr; +/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB and +/// return it. +static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB) { VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0]; VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1]; - if (!VPDT.dominates(HeaderVPBB, LatchVPBB)) - return nullptr; - assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) && - "preheader must dominate header"); VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB); VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB); VPBlockBase *Succ = LatchVPBB->getSingleSuccessor(); + assert(LatchVPBB->getNumSuccessors() <= 1 && + "Latch has more than one successor"); if (Succ) VPBlockUtils::disconnectBlocks(LatchVPBB, Succ); auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "", false /*isReplicator*/); + R->setParent(HeaderVPBB->getParent()); // All VPBB's reachable shallowly from HeaderVPBB belong to top level loop, - // because VPlan is expected to end at top level latch. + // because VPlan is expected to end at top level latch disconnected above. for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB)) VPBB->setParent(R); @@ -57,9 +53,14 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion( bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) { VPDominatorTree VPDT; VPDT.recalculate(Plan); + for (VPBasicBlock *HeaderVPBB : VPBlockUtils::blocksOnly( + vp_depth_first_shallow(Plan.getEntry()))) { + if (!HeaderVPBB->isHeader(VPDT)) + continue; + introduceRegion(Plan, HeaderVPBB); + } - auto *HeaderVPBB = cast(Plan.getEntry()->getSingleSuccessor()); - VPRegionBlock *TopRegion = introduceRegion(Plan, HeaderVPBB, VPDT); + VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); auto *OrigExiting = TopRegion->getExiting(); VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting); @@ -120,9 +121,4 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion( ScalarLatchTerm->getDebugLoc(), "cmp.n"); Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, ScalarLatchTerm->getDebugLoc()); - - for (VPBlockBase *HeaderVPBB : - vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) { - introduceRegion(Plan, HeaderVPBB, VPDT); - } } From 29f6ddffd22ed109bac5f94532390c9c77e2d7a7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 4 Apr 2025 16:21:13 +0100 Subject: [PATCH 3/8] !fixup adjust names and comments after recent changes. --- .../Transforms/Vectorize/LoopVectorize.cpp | 10 +++++----- .../Vectorize/VPlanConstruction.cpp | 7 ++++--- .../Transforms/Vectorize/VPlanTransforms.h | 19 +++++++++---------- .../Transforms/Vectorize/VPlanTestBase.h | 4 ++-- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8ec9c003e841b..732cec7db81cb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9332,9 +9332,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); HCFGBuilder.buildPlainCFG(); - VPlanTransforms::introduceTopLevelVectorLoopRegion( - *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck, - CM.foldTailByMasking(), OrigLoop); + VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE, + RequiresScalarEpilogueCheck, + CM.foldTailByMasking(), OrigLoop); // Don't use getDecisionAndClampRange here, because we don't know the UF // so this function is better to be conservative, rather than to split @@ -9636,8 +9636,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); HCFGBuilder.buildPlainCFG(); - VPlanTransforms::introduceTopLevelVectorLoopRegion( - *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop); + VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE, + true, false, OrigLoop); for (ElementCount VF : Range) Plan->addVF(VF); diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 0bea53c423760..876333d045f84 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -48,9 +48,10 @@ static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB) { return R; } -void VPlanTransforms::introduceTopLevelVectorLoopRegion( - VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, - bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) { +void VPlanTransforms::introduceRegions(VPlan &Plan, Type *InductionTy, + PredicatedScalarEvolution &PSE, + bool RequiresScalarEpilogueCheck, + bool TailFolded, Loop *TheLoop) { VPDominatorTree VPDT; VPDT.recalculate(Plan); for (VPBasicBlock *HeaderVPBB : VPBlockUtils::blocksOnly( diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index c23ff38265670..ed8b7a08ea187 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -52,20 +52,19 @@ struct VPlanTransforms { verifyVPlanIsValid(Plan); } - /// Introduce the top-level VPRegionBlock for the main loop in \p Plan. Coming - /// into this function, \p Plan's top-level loop is modeled using a plain CFG. - /// This transform wraps the plain CFG of the top-level loop within a - /// VPRegionBlock and creates a VPValue expression for the original trip - /// count. It will also introduce a dedicated VPBasicBlock for the vector - /// pre-header as well a VPBasicBlock as exit block of the region - /// (middle.block). If a check is needed to guard executing the scalar + /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turing \p Plan's + /// flat CFG into a hierarchical CFG. It also creates a VPValue expression for + /// the original trip count. It will also introduce a dedicated VPBasicBlock + /// for the vector pre-header as well a VPBasicBlock as exit block of the + /// region (middle.block). If a check is needed to guard executing the scalar /// epilogue loop, it will be added to the middle block, together with /// VPBasicBlocks for the scalar preheader and exit blocks. \p InductionTy is /// the type of the canonical induction and used for related values, like the /// trip count expression. - static void introduceTopLevelVectorLoopRegion( - VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, - bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop); + static void introduceRegions(VPlan &Plan, Type *InductionTy, + PredicatedScalarEvolution &PSE, + bool RequiresScalarEpilogueCheck, + bool TailFolded, Loop *TheLoop); /// Replaces the VPInstructions in \p Plan with corresponding /// widen recipes. Returns false if any VPInstructions could not be converted diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index 92961e44c5e54..f2d3d37b40ba9 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -74,8 +74,8 @@ class VPlanTestIRBase : public testing::Test { auto Plan = std::make_unique(L); VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); HCFGBuilder.buildPlainCFG(); - VPlanTransforms::introduceTopLevelVectorLoopRegion( - *Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L); + VPlanTransforms::introduceRegions(*Plan, IntegerType::get(*Ctx, 64), PSE, + true, false, L); return Plan; } }; From cc818012a9683fd4d1fe4dceb3a5e2fb32d91a44 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 5 Apr 2025 11:39:30 +0100 Subject: [PATCH 4/8] !fixup don't special case header/latch predecessor order. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 23 +++++++++++-------- llvm/lib/Transforms/Vectorize/VPlan.h | 11 +++++---- .../Vectorize/VPlanConstruction.cpp | 12 ++++++---- .../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 13 +---------- 4 files changed, 29 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 759f1b7091037..c97edcf3ecd3a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -648,15 +648,20 @@ bool VPBasicBlock::isExiting() const { return getParent() && getParent()->getExitingBasicBlock() == this; } -bool VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const { - if (getNumPredecessors() != 2) - return false; - VPBlockBase *LatchVPBB = getPredecessors()[1]; - if (!VPDT.dominates(this, LatchVPBB)) - return false; - assert(VPDT.dominates(getPredecessors()[0], this) && - "preheader must dominate header"); - return true; +std::optional> +VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const { + ArrayRef Preds = getPredecessors(); + if (Preds.size() != 2) + return std::nullopt; + + for (unsigned Idx : {0, 1}) { + auto *PreheaderVPBB = cast(Preds[Idx]); + auto *LatchVPBB = cast(Preds[1 - Idx]); + if (VPDT.dominates(PreheaderVPBB, this) && VPDT.dominates(this, LatchVPBB)) + return {std::make_pair(PreheaderVPBB, LatchVPBB)}; + } + + return std::nullopt; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 5715c17a94b72..814990f6ea00b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3255,10 +3255,13 @@ class VPBasicBlock : public VPBlockBase { /// Returns true if the block is exiting it's parent region. bool isExiting() const; - /// Returns true if the block is a loop header block in the plain CFG; that - /// is, it has exactly 2 predecessors (preheader and latch), where the block - /// dominates the latch and the preheader dominates the block. - bool isHeader(const VPDominatorTree &VPDT) const; + /// Checks if the block is a loop header block in the plain CFG; that is, it + /// has exactly 2 predecessors (preheader and latch), where the block + /// dominates the latch and the preheader dominates the block. If it is a + /// header block, returns a pair with the corresponding preheader and latch + /// blocks. Otherwise return std::nullopt. + std::optional> + isHeader(const VPDominatorTree &VPDT) const; /// Clone the current block and it's recipes, without updating the operands of /// the cloned recipes. diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 876333d045f84..fb6f681497fa6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -23,9 +23,9 @@ using namespace llvm; /// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB and /// return it. -static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB) { - VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0]; - VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1]; +static VPRegionBlock *introduceRegion(VPlan &Plan, VPBasicBlock *PreheaderVPBB, + VPBasicBlock *HeaderVPBB, + VPBasicBlock *LatchVPBB) { VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB); VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB); VPBlockBase *Succ = LatchVPBB->getSingleSuccessor(); @@ -56,9 +56,11 @@ void VPlanTransforms::introduceRegions(VPlan &Plan, Type *InductionTy, VPDT.recalculate(Plan); for (VPBasicBlock *HeaderVPBB : VPBlockUtils::blocksOnly( vp_depth_first_shallow(Plan.getEntry()))) { - if (!HeaderVPBB->isHeader(VPDT)) + auto Res = HeaderVPBB->isHeader(VPDT); + if (!Res) continue; - introduceRegion(Plan, HeaderVPBB); + const auto &[PreheaderVPBB, LatchVPBB] = *Res; + introduceRegion(Plan, PreheaderVPBB, HeaderVPBB, LatchVPBB); } VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 4e06ce86caad7..6aa181c5a0fd6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -313,22 +313,11 @@ void PlainCFGBuilder::buildPlainCFG( VPBasicBlock *VPBB = getOrCreateVPBB(BB); Loop *LoopForBB = LI->getLoopFor(BB); // Set VPBB predecessors in the same order as they are in the incoming BB. - if (!isHeaderBB(BB, LoopForBB)) { - setVPBBPredsFromBB(VPBB, BB); - } else { - VPBB->setPredecessors({getOrCreateVPBB(LoopForBB->getLoopPredecessor()), - getOrCreateVPBB(LoopForBB->getLoopLatch())}); - } + setVPBBPredsFromBB(VPBB, BB); // Create VPInstructions for BB. createVPInstructionsForVPBB(VPBB, BB); - if (BB == TheLoop->getLoopLatch()) { - VPBasicBlock *HeaderVPBB = getOrCreateVPBB(LoopForBB->getHeader()); - VPBB->setOneSuccessor(HeaderVPBB); - continue; - } - // Set VPBB successors. We create empty VPBBs for successors if they don't // exist already. Recipes will be created when the successor is visited // during the RPO traversal. From 2a8344e4435a090afa19368dad3859921ee1bb1f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 5 Apr 2025 11:43:14 +0100 Subject: [PATCH 5/8] !fixup adjust comment --- llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 6aa181c5a0fd6..1dc16d66750ae 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -118,10 +118,8 @@ static bool isHeaderVPBB(VPBasicBlock *VPBB) { return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB; } -// Create a new empty VPBasicBlock for an incoming BasicBlock in the region -// corresponding to the containing loop or retrieve an existing one if it was -// already created. If no region exists yet for the loop containing \p BB, a new -// one is created. +// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an +// existing one if it was already created. VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { if (auto *VPBB = BB2VPBB.lookup(BB)) { // Retrieve existing VPBB. From 3b1618e85aebd63292d18c9e6a1a6833e72b117f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 7 Apr 2025 13:13:41 +0100 Subject: [PATCH 6/8] !fixup address latest comments, thanks --- .../Transforms/Vectorize/LoopVectorize.cpp | 10 +-- llvm/lib/Transforms/Vectorize/VPlan.cpp | 16 ---- llvm/lib/Transforms/Vectorize/VPlan.h | 8 -- .../Vectorize/VPlanConstruction.cpp | 80 +++++++++++++------ .../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 17 ++-- .../Transforms/Vectorize/VPlanTransforms.h | 8 +- .../vplan-printing-outer-loop.ll | 4 +- .../Transforms/Vectorize/VPlanTestBase.h | 4 +- 8 files changed, 75 insertions(+), 72 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7d8acbf277f13..3f259ecb90ced 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9317,9 +9317,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); HCFGBuilder.buildPlainCFG(); - VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE, - RequiresScalarEpilogueCheck, - CM.foldTailByMasking(), OrigLoop); + VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(), + PSE, RequiresScalarEpilogueCheck, + CM.foldTailByMasking(), OrigLoop); // Don't use getDecisionAndClampRange here, because we don't know the UF // so this function is better to be conservative, rather than to split @@ -9621,8 +9621,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); HCFGBuilder.buildPlainCFG(); - VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE, - true, false, OrigLoop); + VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(), + PSE, true, false, OrigLoop); for (ElementCount VF : Range) Plan->addVF(VF); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 13d7e1b28c93d..bc3957f573d82 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -652,22 +652,6 @@ bool VPBasicBlock::isExiting() const { return getParent() && getParent()->getExitingBasicBlock() == this; } -std::optional> -VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const { - ArrayRef Preds = getPredecessors(); - if (Preds.size() != 2) - return std::nullopt; - - for (unsigned Idx : {0, 1}) { - auto *PreheaderVPBB = cast(Preds[Idx]); - auto *LatchVPBB = cast(Preds[1 - Idx]); - if (VPDT.dominates(PreheaderVPBB, this) && VPDT.dominates(this, LatchVPBB)) - return {std::make_pair(PreheaderVPBB, LatchVPBB)}; - } - - return std::nullopt; -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPBlockBase::print(raw_ostream &O) const { VPSlotTracker SlotTracker(getPlan()); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 814990f6ea00b..48f5907ef37ae 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3255,14 +3255,6 @@ class VPBasicBlock : public VPBlockBase { /// Returns true if the block is exiting it's parent region. bool isExiting() const; - /// Checks if the block is a loop header block in the plain CFG; that is, it - /// has exactly 2 predecessors (preheader and latch), where the block - /// dominates the latch and the preheader dominates the block. If it is a - /// header block, returns a pair with the corresponding preheader and latch - /// blocks. Otherwise return std::nullopt. - std::optional> - isHeader(const VPDominatorTree &VPDT) const; - /// Clone the current block and it's recipes, without updating the operands of /// the cloned recipes. VPBasicBlock *clone() override; diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index fb6f681497fa6..a6748c2db488f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -21,47 +21,79 @@ using namespace llvm; -/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB and -/// return it. -static VPRegionBlock *introduceRegion(VPlan &Plan, VPBasicBlock *PreheaderVPBB, - VPBasicBlock *HeaderVPBB, - VPBasicBlock *LatchVPBB) { - VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB); - VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB); +/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it +/// has exactly 2 predecessors (preheader and latch), where the block +/// dominates the latch and the preheader dominates the block. If it is a +/// header block, returns a pair with the corresponding preheader and latch +/// blocks. Otherwise return std::nullopt. +static std::optional> +getPreheaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT) { + ArrayRef Preds = HeaderVPB->getPredecessors(); + if (Preds.size() != 2) + return std::nullopt; + + auto *PreheaderVPBB = cast(Preds[0]); + auto *LatchVPBB = cast(Preds[1]); + if (VPDT.dominates(PreheaderVPBB, HeaderVPB) && + VPDT.dominates(HeaderVPB, LatchVPBB)) + return {std::make_pair(PreheaderVPBB, LatchVPBB)}; + + std::swap(PreheaderVPBB, LatchVPBB); + if (VPDT.dominates(PreheaderVPBB, HeaderVPB) && + VPDT.dominates(HeaderVPB, LatchVPBB)) + return {std::make_pair(PreheaderVPBB, LatchVPBB)}; + + return std::nullopt; +} + +/// Create a new VPRegionBlock if there is a loop starting at \p HeaderVPB. +static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB, + VPDominatorTree &VPDT) { + auto Res = getPreheaderAndLatch(HeaderVPB, VPDT); + if (!Res) + return; + + const auto &[PreheaderVPBB, LatchVPBB] = *Res; + + // Swap the operands of header phis if needed. After creating the region, the + // incoming value from the preheader must be the first operand and the one + // from the latch must be the second operand. + if (HeaderVPB->getPredecessors()[0] != PreheaderVPBB) { + for (VPRecipeBase &R : cast(HeaderVPB)->phis()) { + VPValue *Inc0 = R.getOperand(0); + R.setOperand(0, R.getOperand(1)); + R.setOperand(1, Inc0); + } + } + VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB); + VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB); VPBlockBase *Succ = LatchVPBB->getSingleSuccessor(); assert(LatchVPBB->getNumSuccessors() <= 1 && "Latch has more than one successor"); if (Succ) VPBlockUtils::disconnectBlocks(LatchVPBB, Succ); - auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "", + auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "", false /*isReplicator*/); - R->setParent(HeaderVPBB->getParent()); - // All VPBB's reachable shallowly from HeaderVPBB belong to top level loop, + R->setParent(HeaderVPB->getParent()); + // All VPBB's reachable shallowly from HeaderVPB belong to top level loop, // because VPlan is expected to end at top level latch disconnected above. - for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB)) + for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB)) VPBB->setParent(R); VPBlockUtils::insertBlockAfter(R, PreheaderVPBB); if (Succ) VPBlockUtils::connectBlocks(R, Succ); - return R; } -void VPlanTransforms::introduceRegions(VPlan &Plan, Type *InductionTy, - PredicatedScalarEvolution &PSE, - bool RequiresScalarEpilogueCheck, - bool TailFolded, Loop *TheLoop) { +void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy, + PredicatedScalarEvolution &PSE, + bool RequiresScalarEpilogueCheck, + bool TailFolded, Loop *TheLoop) { VPDominatorTree VPDT; VPDT.recalculate(Plan); - for (VPBasicBlock *HeaderVPBB : VPBlockUtils::blocksOnly( - vp_depth_first_shallow(Plan.getEntry()))) { - auto Res = HeaderVPBB->isHeader(VPDT); - if (!Res) - continue; - const auto &[PreheaderVPBB, LatchVPBB] = *Res; - introduceRegion(Plan, PreheaderVPBB, HeaderVPBB, LatchVPBB); - } + for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry())) + createLoopRegion(Plan, HeaderVPB, VPDT); VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); auto *OrigExiting = TopRegion->getExiting(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 1dc16d66750ae..a8adc3d165715 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -98,19 +98,14 @@ void PlainCFGBuilder::fixHeaderPhis() { auto *VPPhi = cast(VPVal); assert(VPPhi->getNumOperands() == 0 && "Expected VPInstruction with no operands."); - - Loop *L = LI->getLoopFor(Phi->getParent()); - assert(isHeaderBB(Phi->getParent(), L)); + assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))); // For header phis, make sure the incoming value from the loop // predecessor is the first operand of the recipe. assert(Phi->getNumOperands() == 2 && "header phi must have exactly 2 operands"); - BasicBlock *LoopPred = L->getLoopPredecessor(); - VPPhi->addOperand( - getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopPred))); - BasicBlock *LoopLatch = L->getLoopLatch(); - VPPhi->addOperand( - getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopLatch))); + for (BasicBlock *Pred : predecessors(Phi->getParent())) + VPPhi->addOperand( + getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred))); } } @@ -344,8 +339,8 @@ void PlainCFGBuilder::buildPlainCFG( VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0); VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1); - // Don't connect any blocks outside the current loop except the latch, which - // is handled below. + // Don't connect any blocks outside the current loop except the latches for + // inner loops. if (LoopForBB && (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) { if (!LoopForBB->contains(IRSucc0)) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index ed8b7a08ea187..405e9eb62d294 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -61,10 +61,10 @@ struct VPlanTransforms { /// VPBasicBlocks for the scalar preheader and exit blocks. \p InductionTy is /// the type of the canonical induction and used for related values, like the /// trip count expression. - static void introduceRegions(VPlan &Plan, Type *InductionTy, - PredicatedScalarEvolution &PSE, - bool RequiresScalarEpilogueCheck, - bool TailFolded, Loop *TheLoop); + static void createLoopRegions(VPlan &Plan, Type *InductionTy, + PredicatedScalarEvolution &PSE, + bool RequiresScalarEpilogueCheck, + bool TailFolded, Loop *TheLoop); /// Replaces the VPInstructions in \p Plan with corresponding /// widen recipes. Returns false if any VPInstructions could not be converted diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll index b4b6d3d760349..7f8bfcd507a45 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -13,14 +13,14 @@ define void @foo(i64 %n) { ; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next> +; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<%outer.iv.next>, ir<0> ; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> ; CHECK-NEXT: EMIT store ir<%outer.iv>, ir<%gep.1> ; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n> ; CHECK-NEXT: Successor(s): inner ; CHECK-EMPTY: ; CHECK-NEXT: inner: -; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next> +; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<%inner.iv.next>, ir<0> ; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> ; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2> ; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1> diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index f2d3d37b40ba9..486296535996b 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -74,8 +74,8 @@ class VPlanTestIRBase : public testing::Test { auto Plan = std::make_unique(L); VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); HCFGBuilder.buildPlainCFG(); - VPlanTransforms::introduceRegions(*Plan, IntegerType::get(*Ctx, 64), PSE, - true, false, L); + VPlanTransforms::createLoopRegions(*Plan, IntegerType::get(*Ctx, 64), PSE, + true, false, L); return Plan; } }; From c6903d64f99a7243cfbf80b009623c083fb2573c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 13 Apr 2025 12:43:09 +0100 Subject: [PATCH 7/8] !fixup adderss latest comments, thanks! --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- .../Transforms/Vectorize/VPlanConstruction.cpp | 10 ++++++---- .../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 17 ++++------------- .../LoopVectorize/vplan-printing-outer-loop.ll | 6 +++--- 4 files changed, 14 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index fe38a998acf1c..e1f1ac7de932c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9561,7 +9561,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { Range); auto Plan = std::make_unique(OrigLoop); // Build hierarchical CFG. - // TODO: Convert to VPlan-transform and consoliate all transforms for VPlan + // TODO: Convert to VPlan-transform and consolidate all transforms for VPlan // creation. VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); HCFGBuilder.buildPlainCFG(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index a6748c2db488f..f002519c48569 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -46,9 +46,10 @@ getPreheaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT) { return std::nullopt; } -/// Create a new VPRegionBlock if there is a loop starting at \p HeaderVPB. -static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB, - VPDominatorTree &VPDT) { +/// Try to create a new VPRegionBlock if there is a loop starting at \p +/// HeaderVPB. +static void tryToCreateLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB, + VPDominatorTree &VPDT) { auto Res = getPreheaderAndLatch(HeaderVPB, VPDT); if (!Res) return; @@ -93,7 +94,7 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy, VPDominatorTree VPDT; VPDT.recalculate(Plan); for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry())) - createLoopRegion(Plan, HeaderVPB, VPDT); + tryToCreateLoopRegion(Plan, HeaderVPB, VPDT); VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); auto *OrigExiting = TopRegion->getExiting(); @@ -101,6 +102,7 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy, VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting); TopRegion->setExiting(LatchVPBB); TopRegion->setName("vector loop"); + TopRegion->getEntryBasicBlock()->setName("vector.body"); // Create SCEV and VPValue for the trip count. // We use the symbolic max backedge-taken-count, which works also when diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index a8adc3d165715..81d4a8217756b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -99,8 +99,6 @@ void PlainCFGBuilder::fixHeaderPhis() { assert(VPPhi->getNumOperands() == 0 && "Expected VPInstruction with no operands."); assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))); - // For header phis, make sure the incoming value from the loop - // predecessor is the first operand of the recipe. assert(Phi->getNumOperands() == 2 && "header phi must have exactly 2 operands"); for (BasicBlock *Pred : predecessors(Phi->getParent())) @@ -109,10 +107,6 @@ void PlainCFGBuilder::fixHeaderPhis() { } } -static bool isHeaderVPBB(VPBasicBlock *VPBB) { - return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB; -} - // Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an // existing one if it was already created. VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { @@ -122,7 +116,7 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { } // Create new VPBB. - StringRef Name = isHeaderBB(BB, TheLoop) ? "vector.body" : BB->getName(); + StringRef Name = BB->getName(); LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n"); VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name); BB2VPBB[BB] = VPBB; @@ -325,10 +319,7 @@ void PlainCFGBuilder::buildPlainCFG( auto *BI = cast(BB->getTerminator()); unsigned NumSuccs = succ_size(BB); if (NumSuccs == 1) { - auto *Successor = getOrCreateVPBB(BB->getSingleSuccessor()); - VPBB->setOneSuccessor(isHeaderVPBB(Successor) - ? Successor->getParent() - : static_cast(Successor)); + VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor())); continue; } assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() && @@ -341,8 +332,8 @@ void PlainCFGBuilder::buildPlainCFG( // Don't connect any blocks outside the current loop except the latches for // inner loops. - if (LoopForBB && - (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) { + // TODO: Also connect exit blocks during initial VPlan construction. + if (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch()) { if (!LoopForBB->contains(IRSucc0)) { VPBB->setOneSuccessor(Successor1); continue; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll index 7f8bfcd507a45..91a5ea6b7fe36 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -10,9 +10,9 @@ define void @foo(i64 %n) { ; CHECK-NEXT: { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: Successor(s): vector.body +; CHECK-NEXT: Successor(s): outer.header ; CHECK-EMPTY: -; CHECK-NEXT: vector.body: +; CHECK-NEXT: outer.header: ; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<%outer.iv.next>, ir<0> ; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> ; CHECK-NEXT: EMIT store ir<%outer.iv>, ir<%gep.1> @@ -31,7 +31,7 @@ define void @foo(i64 %n) { ; CHECK-NEXT: outer.latch: ; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1> ; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8> -; CHECK-NEXT: Successor(s): vector.body +; CHECK-NEXT: Successor(s): outer.header ; CHECK-NEXT: } entry: br label %outer.header From 29b7487226bd33ee2bf6ed940ad49ba3be8c4741 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 16 Apr 2025 11:39:00 +0200 Subject: [PATCH 8/8] !fixup address latest comments, thanks --- llvm/lib/Transforms/Vectorize/VPlan.h | 7 +++ .../Vectorize/VPlanConstruction.cpp | 55 ++++++++----------- .../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 3 +- llvm/lib/Transforms/Vectorize/VPlanValue.h | 6 ++ 4 files changed, 39 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index a950e278d354c..7084676af6d5b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -304,6 +304,13 @@ class VPBlockBase { /// Remove all the successors of this block. void clearSuccessors() { Successors.clear(); } + /// Swap predecessors of the block. The block must have exactly 2 + /// predecessors. + void swapPredecessors() { + assert(Predecessors.size() == 2 && "must have 2 predecessors to swap"); + std::swap(Predecessors[0], Predecessors[1]); + } + /// Swap successors of the block. The block must have exactly 2 successors. // TODO: This should be part of introducing conditional branch recipes rather // than being independent. diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index f002519c48569..1e687d0879f18 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -24,48 +24,40 @@ using namespace llvm; /// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it /// has exactly 2 predecessors (preheader and latch), where the block /// dominates the latch and the preheader dominates the block. If it is a -/// header block, returns a pair with the corresponding preheader and latch -/// blocks. Otherwise return std::nullopt. -static std::optional> -getPreheaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT) { +/// header block return true, making sure the preheader appears first and +/// the latch second. Otherwise return false. +static bool canonicalHeader(VPBlockBase *HeaderVPB, + const VPDominatorTree &VPDT) { ArrayRef Preds = HeaderVPB->getPredecessors(); if (Preds.size() != 2) - return std::nullopt; + return false; - auto *PreheaderVPBB = cast(Preds[0]); - auto *LatchVPBB = cast(Preds[1]); + auto *PreheaderVPBB = Preds[0]; + auto *LatchVPBB = Preds[1]; if (VPDT.dominates(PreheaderVPBB, HeaderVPB) && VPDT.dominates(HeaderVPB, LatchVPBB)) - return {std::make_pair(PreheaderVPBB, LatchVPBB)}; + return true; std::swap(PreheaderVPBB, LatchVPBB); + if (VPDT.dominates(PreheaderVPBB, HeaderVPB) && - VPDT.dominates(HeaderVPB, LatchVPBB)) - return {std::make_pair(PreheaderVPBB, LatchVPBB)}; + VPDT.dominates(HeaderVPB, LatchVPBB)) { + // Canonicalize predecessors of header so that preheader is first and latch + // second. + HeaderVPB->swapPredecessors(); + for (VPRecipeBase &R : cast(HeaderVPB)->phis()) + R.swapOperands(); + return true; + } - return std::nullopt; + return false; } -/// Try to create a new VPRegionBlock if there is a loop starting at \p -/// HeaderVPB. -static void tryToCreateLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB, - VPDominatorTree &VPDT) { - auto Res = getPreheaderAndLatch(HeaderVPB, VPDT); - if (!Res) - return; +/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB. +static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { + auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0]; + auto *LatchVPBB = HeaderVPB->getPredecessors()[1]; - const auto &[PreheaderVPBB, LatchVPBB] = *Res; - - // Swap the operands of header phis if needed. After creating the region, the - // incoming value from the preheader must be the first operand and the one - // from the latch must be the second operand. - if (HeaderVPB->getPredecessors()[0] != PreheaderVPBB) { - for (VPRecipeBase &R : cast(HeaderVPB)->phis()) { - VPValue *Inc0 = R.getOperand(0); - R.setOperand(0, R.getOperand(1)); - R.setOperand(1, Inc0); - } - } VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB); VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB); VPBlockBase *Succ = LatchVPBB->getSingleSuccessor(); @@ -94,7 +86,8 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy, VPDominatorTree VPDT; VPDT.recalculate(Plan); for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry())) - tryToCreateLoopRegion(Plan, HeaderVPB, VPDT); + if (canonicalHeader(HeaderVPB, VPDT)) + createLoopRegion(Plan, HeaderVPB); VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); auto *OrigExiting = TopRegion->getExiting(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 81d4a8217756b..5bacd2d4e6d88 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -98,7 +98,8 @@ void PlainCFGBuilder::fixHeaderPhis() { auto *VPPhi = cast(VPVal); assert(VPPhi->getNumOperands() == 0 && "Expected VPInstruction with no operands."); - assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))); + assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) && + "Expected Phi in header block."); assert(Phi->getNumOperands() == 2 && "header phi must have exactly 2 operands"); for (BasicBlock *Pred : predecessors(Phi->getParent())) diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index ced60a30ad56e..638156eab7a84 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -246,6 +246,12 @@ class VPUser { New->addUser(*this); } + /// Swap operands of the VPUser. It must have exactly 2 operands. + void swapOperands() { + assert(Operands.size() == 2 && "must have 2 operands to swap"); + std::swap(Operands[0], Operands[1]); + } + /// Replaces all uses of \p From in the VPUser with \p To. void replaceUsesOfWith(VPValue *From, VPValue *To);