diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6d64aaa75922b..52cb8c9f88f94 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7533,8 +7533,9 @@ LoopVectorizationPlanner::executePlan( LLVM_DEBUG(BestVPlan.dump()); // Perform the actual loop transformation. - VPTransformState State(BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan, - OrigLoop->getHeader()->getContext()); + VPTransformState State(BestVF, BestUF, LI, + EnableVPlanNativePath ? nullptr : DT, ILV.Builder, + &ILV, &BestVPlan, OrigLoop->getHeader()->getContext()); // 0. Generate SCEV-dependent code into the preheader, including TripCount, // before making any changes to the CFG. @@ -10402,6 +10403,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F, PA.preserve(); PA.preserve(); } + PA.preserve(); if (Result.MadeCFGChange) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 27f8e239b1c09..d71d7580e6ba6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -218,7 +219,7 @@ VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder, InnerLoopVectorizer *ILV, VPlan *Plan, LLVMContext &Ctx) - : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan), + : VF(VF), UF(UF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan), LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType(), Ctx) {} @@ -436,6 +437,7 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { "Trying to reset an existing successor block."); TermBr->setSuccessor(idx, NewBB); } + CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}}); } return NewBB; } @@ -467,6 +469,7 @@ void VPBasicBlock::execute(VPTransformState *State) { // The Exit block of a loop is always set to be successor 0 of the Exiting // block. cast(ExitingBB->getTerminator())->setSuccessor(0, NewBB); + State->CFG.DTU.applyUpdates({{DominatorTree::Insert, ExitingBB, NewBB}}); } else if (PrevVPBB && /* A */ !((SingleHPred = getSingleHierarchicalPredecessor()) && SingleHPred->getExitingBasicBlock() == PrevVPBB && @@ -829,6 +832,11 @@ void VPlan::execute(VPTransformState *State) { BasicBlock *VectorPreHeader = State->CFG.PrevBB; State->Builder.SetInsertPoint(VectorPreHeader->getTerminator()); + // Disconnect VectorPreHeader from ExitBB in both the CFG and DT. + cast(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr); + State->CFG.DTU.applyUpdates( + {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}}); + // Generate code in the loop pre-header and body. for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) Block->execute(State); @@ -891,13 +899,10 @@ void VPlan::execute(VPTransformState *State) { } } - // We do not attempt to preserve DT for outer loop vectorization currently. - if (!EnableVPlanNativePath) { - BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header]; - State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader); - updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB, - State->CFG.ExitBB); - } + State->CFG.DTU.flush(); + // DT is currently updated for non-native path only. + assert(EnableVPlanNativePath || State->CFG.DTU.getDomTree().verify( + DominatorTree::VerificationLevel::Fast)); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -995,44 +1000,6 @@ void VPlan::addLiveOut(PHINode *PN, VPValue *V) { LiveOuts.insert({PN, new VPLiveOut(PN, V)}); } -void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB, - BasicBlock *LoopLatchBB, - BasicBlock *LoopExitBB) { - // The vector body may be more than a single basic-block by this point. - // Update the dominator tree information inside the vector body by propagating - // it from header to latch, expecting only triangular control-flow, if any. - BasicBlock *PostDomSucc = nullptr; - for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) { - // Get the list of successors of this block. - std::vector Succs(succ_begin(BB), succ_end(BB)); - assert(Succs.size() <= 2 && - "Basic block in vector loop has more than 2 successors."); - PostDomSucc = Succs[0]; - if (Succs.size() == 1) { - assert(PostDomSucc->getSinglePredecessor() && - "PostDom successor has more than one predecessor."); - DT->addNewBlock(PostDomSucc, BB); - continue; - } - BasicBlock *InterimSucc = Succs[1]; - if (PostDomSucc->getSingleSuccessor() == InterimSucc) { - PostDomSucc = Succs[1]; - InterimSucc = Succs[0]; - } - assert(InterimSucc->getSingleSuccessor() == PostDomSucc && - "One successor of a basic block does not lead to the other."); - assert(InterimSucc->getSinglePredecessor() && - "Interim successor has more than one predecessor."); - assert(PostDomSucc->hasNPredecessors(2) && - "PostDom successor has more than two predecessors."); - DT->addNewBlock(InterimSucc, BB); - DT->addNewBlock(PostDomSucc, BB); - } - // Latch block is a new dominator for the loop exit. - DT->changeImmediateDominator(LoopExitBB, LoopLatchBB); - assert(DT->verify(DominatorTree::VerificationLevel::Fast)); -} - static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap &Old2NewVPValues) { // Update the operands of all cloned recipes starting at NewEntry. This diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 4b3cb15b5e1e6..3aee17921086d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -35,6 +35,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/VectorUtils.h" @@ -372,7 +373,11 @@ struct VPTransformState { /// of replication, maps the BasicBlock of the last replica created. SmallDenseMap VPBB2IRBB; - CFGState() = default; + /// Updater for the DominatorTree. + DomTreeUpdater DTU; + + CFGState(DominatorTree *DT) + : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {} /// Returns the BasicBlock* mapped to the pre-header of the loop region /// containing \p R. @@ -382,9 +387,6 @@ struct VPTransformState { /// Hold a pointer to LoopInfo to register new basic blocks in the loop. LoopInfo *LI; - /// Hold a pointer to Dominator Tree to register new basic blocks in the loop. - DominatorTree *DT; - /// Hold a reference to the IRBuilder used to generate output IR code. IRBuilderBase &Builder; @@ -3289,13 +3291,6 @@ class VPlan { /// Clone the current VPlan, update all VPValues of the new VPlan and cloned /// recipes to refer to the clones, and return it. VPlan *duplicate(); - -private: - /// Add to the given dominator tree the header block and every new basic block - /// that was created between it and the latch block, inclusive. - static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB, - BasicBlock *LoopLatchBB, - BasicBlock *LoopExitBB); }; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)