diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8a35afbb73f3c..8636550d4f644 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2781,13 +2781,13 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { PSE.getSE()->forgetLoop(OrigLoop); PSE.getSE()->forgetBlockAndLoopDispositions(); - // Don't apply optimizations below when no vector region remains, as they all - // require a vector loop at the moment. - if (!State.Plan->getVectorLoopRegion()) + // Don't apply optimizations below when no (vector) loop remains, as they all + // require one at the moment. + VPBasicBlock *HeaderVPBB = + vputils::getFirstLoopHeader(*State.Plan, State.VPDT); + if (!HeaderVPBB) return; - VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); - VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock(); BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB]; // Remove redundant induction instructions. @@ -2812,7 +2812,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { } void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) { - auto Iter = vp_depth_first_deep(Plan.getEntry()); + auto Iter = vp_depth_first_shallow(Plan.getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { for (VPRecipeBase &P : VPBB->phis()) { VPWidenPHIRecipe *VPPhi = dyn_cast(&P); @@ -7623,6 +7623,13 @@ DenseMap LoopVectorizationPlanner::executePlan( BestVPlan, BestVF, TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)); VPlanTransforms::removeDeadRecipes(BestVPlan); + + // Retrieve and store the middle block before dissolving regions. Regions are + // dissolved after optimizing for VF and UF, which completely removes unneeded + // loop regions first. + VPBasicBlock *MiddleVPBB = + BestVPlan.getVectorLoopRegion() ? BestVPlan.getMiddleBlock() : nullptr; + VPlanTransforms::dissolveLoopRegions(BestVPlan); VPlanTransforms::convertToConcreteRecipes(BestVPlan, *Legal->getWidestInductionType()); // Perform the actual loop transformation. @@ -7720,14 +7727,14 @@ DenseMap LoopVectorizationPlanner::executePlan( // 2.6. Maintain Loop Hints // Keep all loop hints from the original loop on the vector loop (we'll // replace the vectorizer-specific hints below). - if (auto *LoopRegion = BestVPlan.getVectorLoopRegion()) { + VPBasicBlock *HeaderVPBB = vputils::getFirstLoopHeader(BestVPlan, State.VPDT); + if (HeaderVPBB) { MDNode *OrigLoopID = OrigLoop->getLoopID(); std::optional VectorizedLoopID = makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupVectorized}); - VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]); if (VectorizedLoopID) { L->setLoopID(*VectorizedLoopID); @@ -7773,8 +7780,7 @@ DenseMap LoopVectorizationPlanner::executePlan( ILV.printDebugTracesAtEnd(); // 4. Adjust branch weight of the branch in the middle block. - if (BestVPlan.getVectorLoopRegion()) { - auto *MiddleVPBB = BestVPlan.getMiddleBlock(); + if (HeaderVPBB) { auto *MiddleTerm = cast(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator()); if (MiddleTerm->isConditional() && diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 15b4865d22f8e..165b57c87beb1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -207,6 +207,32 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { return Parent->getEnclosingBlockWithPredecessors(); } +bool VPBlockUtils::isHeader(const VPBlockBase *VPB, + const VPDominatorTree &VPDT) { + auto *VPBB = dyn_cast(VPB); + if (!VPBB) + return false; + + // If VPBB is in a region R, VPBB is a loop header if R is a loop region with + // VPBB as its entry, i.e., free of predecessors. + if (auto *R = VPBB->getParent()) + return !R->isReplicator() && VPBB->getNumPredecessors() == 0; + + // A header dominates its second predecessor (the latch), with the other + // predecessor being the preheader + return VPB->getPredecessors().size() == 2 && + VPDT.dominates(VPB, VPB->getPredecessors()[1]); +} + +bool VPBlockUtils::isLatch(const VPBlockBase *VPB, + const VPDominatorTree &VPDT) { + // A latch has a header as its second successor, with its other successor + // leaving the loop. A preheader OTOH has a header as its first (and only) + // successor. + return VPB->getNumSuccessors() == 2 && + VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT); +} + VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { iterator It = begin(); while (It != end() && It->isPhi()) @@ -424,13 +450,21 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) { if (ParentLoop && !State.LI->getLoopFor(NewBB)) ParentLoop->addBasicBlockToLoop(NewBB, *State.LI); + SmallVector Preds; + if (VPBlockUtils::isHeader(this, State.VPDT)) { + // There's no block for the latch yet, connect to the preheader only. + Preds = {getPredecessors()[0]}; + } else { + Preds = to_vector(getPredecessors()); + } + // Hook up the new basic block to its predecessors. - for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { + for (VPBlockBase *PredVPBlock : Preds) { VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); + assert(CFG.VPBB2IRBB.contains(PredVPBB) && + "Predecessor basic-block not found building successor."); BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; - - assert(PredBB && "Predecessor basic-block not found building successor."); auto *PredBBTerminator = PredBB->getTerminator(); LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); @@ -491,11 +525,25 @@ void VPBasicBlock::execute(VPTransformState *State) { bool Replica = bool(State->Lane); BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. + if (VPBlockUtils::isHeader(this, State->VPDT)) { + // Create and register the new vector loop. + Loop *PrevParentLoop = State->CurrentParentLoop; + State->CurrentParentLoop = State->LI->AllocateLoop(); + + // Insert the new loop into the loop nest and register the new basic blocks + // before calling any utilities such as SCEV that require valid LoopInfo. + if (PrevParentLoop) + PrevParentLoop->addChildLoop(State->CurrentParentLoop); + else + State->LI->addTopLevelLoop(State->CurrentParentLoop); + } + auto IsReplicateRegion = [](VPBlockBase *BB) { auto *R = dyn_cast_or_null(BB); - return R && R->isReplicator(); + assert((!R || R->isReplicator()) && + "only replicate region blocks should remain"); + return R; }; - // 1. Create an IR basic block. if ((Replica && this == getParent()->getEntry()) || IsReplicateRegion(getSingleHierarchicalPredecessor())) { @@ -518,6 +566,10 @@ void VPBasicBlock::execute(VPTransformState *State) { // 2. Fill the IR basic block with IR instructions. executeRecipes(State, NewBB); + + // If this block is a latch, update CurrentParentLoop. + if (VPBlockUtils::isLatch(this, State->VPDT)) + State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop(); } VPBasicBlock *VPBasicBlock::clone() { @@ -729,35 +781,13 @@ VPRegionBlock *VPRegionBlock::clone() { } void VPRegionBlock::execute(VPTransformState *State) { - ReversePostOrderTraversal> - RPOT(Entry); - - if (!isReplicator()) { - // Create and register the new vector loop. - Loop *PrevParentLoop = State->CurrentParentLoop; - State->CurrentParentLoop = State->LI->AllocateLoop(); - - // Insert the new loop into the loop nest and register the new basic blocks - // before calling any utilities such as SCEV that require valid LoopInfo. - if (PrevParentLoop) - PrevParentLoop->addChildLoop(State->CurrentParentLoop); - else - State->LI->addTopLevelLoop(State->CurrentParentLoop); - - // Visit the VPBlocks connected to "this", starting from it. - for (VPBlockBase *Block : RPOT) { - LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); - Block->execute(State); - } - - State->CurrentParentLoop = PrevParentLoop; - return; - } - + assert(isReplicator() && + "Loop regions should have been lowered to plain CFG"); assert(!State->Lane && "Replicating a Region with non-null instance."); - - // Enter replicating mode. assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); + + ReversePostOrderTraversal> RPOT( + Entry); State->Lane = VPLane(0); for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; ++Lane) { @@ -851,6 +881,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, } #endif +void VPRegionBlock::dissolveToCFGLoop() { + auto *Header = cast(getEntry()); + VPBlockBase *Preheader = getSinglePredecessor(); + auto *ExitingLatch = cast(getExiting()); + VPBlockBase *Middle = getSingleSuccessor(); + VPBlockUtils::disconnectBlocks(Preheader, this); + VPBlockUtils::disconnectBlocks(this, Middle); + + for (VPBlockBase *VPB : vp_depth_first_shallow(Entry)) + VPB->setParent(getParent()); + + VPBlockUtils::connectBlocks(Preheader, Header); + VPBlockUtils::connectBlocks(ExitingLatch, Middle); + VPBlockUtils::connectBlocks(ExitingLatch, Header); +} + VPlan::VPlan(Loop *L) { setEntry(createVPIRBasicBlock(L->getLoopPreheader())); ScalarHeader = createVPIRBasicBlock(L->getHeader()); @@ -962,16 +1008,15 @@ void VPlan::execute(VPTransformState *State) { State->CFG.DTU.flush(); - auto *LoopRegion = getVectorLoopRegion(); - if (!LoopRegion) + VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT); + if (!Header) return; - VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock(); + auto *LatchVPBB = cast(Header->getPredecessors()[1]); BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; // Fix the latch value of canonical, reduction and first-order recurrences // phis in the vector loop. - VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); for (VPRecipeBase &R : Header->phis()) { // Skip phi-like recipes that generate their backedege values themselves. if (isa(&R)) @@ -1007,8 +1052,10 @@ void VPlan::execute(VPTransformState *State) { bool NeedsScalar = isa(PhiR) || (isa(PhiR) && cast(PhiR)->isInLoop()); + Value *Phi = State->get(PhiR, NeedsScalar); - // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not. + // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does + // not. Value *Val = State->get(PhiR->getOperand(1), NeedsScalar); cast(Phi)->addIncoming(Val, VectorLatchBB); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c19e0298cdad9..c4e66cd89e69c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3872,6 +3872,10 @@ class VPRegionBlock : public VPBlockBase { /// Clone all blocks in the single-entry single-exit region of the block and /// their recipes without updating the operands of the cloned recipes. VPRegionBlock *clone() override; + + /// Remove the current region from its VPlan, connecting its predecessor to + /// its entry, and its exiting block to its successor. + void dissolveToCFGLoop(); }; /// VPlan models a candidate for vectorization, encoding various decisions take diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 14ed40f16683a..5c2ddb62c7155 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -462,6 +462,26 @@ Value *VPInstruction::generatePerLane(VPTransformState &State, State.get(getOperand(1), Lane), Name); } +/// Create a conditional branch using \p Cond branching to the successors of \p +/// VPBB. Note that the first successor is always forward (i.e. not created yet) +/// while the second successor may already have been created (if it is a header +/// block and VPBB is a latch). +static BranchInst *createCondBranch(Value *Cond, VPBasicBlock *VPBB, + VPTransformState &State) { + // Replace the temporary unreachable terminator with a new conditional + // branch, hooking it up to backward destination (header) for latch blocks + // now, and to forward destination(s) later when they are created. + // Second successor may be backwards - iff it is already in VPBB2IRBB. + VPBasicBlock *SecondVPSucc = cast(VPBB->getSuccessors()[1]); + BasicBlock *SecondIRSucc = State.CFG.VPBB2IRBB.lookup(SecondVPSucc); + BasicBlock *IRBB = State.CFG.VPBB2IRBB[VPBB]; + BranchInst *CondBr = State.Builder.CreateCondBr(Cond, IRBB, SecondIRSucc); + // First successor is always forward, reset it to nullptr + CondBr->setSuccessor(0, nullptr); + IRBB->getTerminator()->eraseFromParent(); + return CondBr; +} + Value *VPInstruction::generate(VPTransformState &State) { IRBuilderBase &Builder = State.Builder; @@ -581,43 +601,14 @@ Value *VPInstruction::generate(VPTransformState &State) { } case VPInstruction::BranchOnCond: { Value *Cond = State.get(getOperand(0), VPLane(0)); - // Replace the temporary unreachable terminator with a new conditional - // branch, hooking it up to backward destination for exiting blocks now and - // to forward destination(s) later when they are created. - BranchInst *CondBr = - Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); - CondBr->setSuccessor(0, nullptr); - Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); - - if (!getParent()->isExiting()) - return CondBr; - - VPRegionBlock *ParentRegion = getParent()->getParent(); - VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); - CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); - return CondBr; + return createCondBranch(Cond, getParent(), State); } case VPInstruction::BranchOnCount: { // First create the compare. Value *IV = State.get(getOperand(0), /*IsScalar*/ true); Value *TC = State.get(getOperand(1), /*IsScalar*/ true); Value *Cond = Builder.CreateICmpEQ(IV, TC); - - // Now create the branch. - auto *Plan = getParent()->getPlan(); - VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); - VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); - - // Replace the temporary unreachable terminator with a new conditional - // branch, hooking it up to backward destination (the header) now and to the - // forward destination (the exit/middle block) later when it is created. - // Note that CreateCondBr expects a valid BB as first argument, so we need - // to set it to nullptr later. - BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), - State.CFG.VPBB2IRBB[Header]); - CondBr->setSuccessor(0, nullptr); - Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); - return CondBr; + return createCondBranch(Cond, getParent(), State); } case VPInstruction::Broadcast: { return Builder.CreateVectorSplat( @@ -1127,10 +1118,6 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent, void VPPhi::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); - assert(getParent() == - getParent()->getPlan()->getVectorLoopRegion()->getEntry() && - "VPInstructions with PHI opcodes must be used for header phis only " - "at the moment"); BasicBlock *VectorPH = State.CFG.VPBB2IRBB.at(getIncomingBlock(0)); Value *Start = State.get(getIncomingValue(0), VPLane(0)); PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, getName()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 34633cd748eb1..52d61d96c8083 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2506,6 +2506,18 @@ void VPlanTransforms::createInterleaveGroups( } } +void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) { + // Replace loop regions with explicity CFG. + SmallVector LoopRegions; + for (VPRegionBlock *R : VPBlockUtils::blocksOnly( + vp_depth_first_deep(Plan.getEntry()))) { + if (!R->isReplicator()) + LoopRegions.push_back(R); + } + for (VPRegionBlock *R : LoopRegions) + R->dissolveToCFGLoop(); +} + // Expand VPExtendedReductionRecipe to VPWidenCastRecipe + VPReductionRecipe. static void expandVPExtendedReduction(VPExtendedReductionRecipe *ExtRed) { VPWidenCastRecipe *Ext; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 36fc78ce566b2..34e2de4eb3b74 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -184,6 +184,9 @@ struct VPlanTransforms { VPBasicBlock *LatchVPBB, VFRange &Range); + /// Replace loop regions with explicit CFG. + static void dissolveLoopRegions(VPlan &Plan); + /// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis. static void convertToConcreteRecipes(VPlan &Plan, Type &CanonicalIVTy); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 6438c5437b7e3..81bd21bb904c0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "VPlanUtils.h" +#include "VPlanCFG.h" #include "VPlanPatternMatch.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -126,3 +127,11 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { return false; }); } + +VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) { + auto DepthFirst = vp_depth_first_shallow(Plan.getEntry()); + auto I = find_if(DepthFirst, [&VPDT](VPBlockBase *VPB) { + return VPBlockUtils::isHeader(VPB, VPDT); + }); + return I == DepthFirst.end() ? nullptr : cast(*I); +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 28c1a6af2570b..1e51949c07746 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -93,6 +93,9 @@ bool isHeaderMask(const VPValue *V, VPlan &Plan); /// VPDerivedIV or VPCanonicalIVPHI). bool isUniformAcrossVFsAndUFs(VPValue *V); +/// Returns the header block of the first, top-level loop, or null if none +/// exist. +VPBasicBlock *getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT); } // namespace vputils //===----------------------------------------------------------------------===// @@ -239,6 +242,13 @@ class VPBlockUtils { VPBlockUtils::connectBlocks(From, BlockPtr, -1, SuccIdx); VPBlockUtils::connectBlocks(BlockPtr, To, PredIx, -1); } + + /// Returns true if \p VPB is a loop header, based on regions or \p VPDT in + /// their absence. + static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT); + + /// Returns true if \p VPB is a loop latch, using isHeader(). + static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 64065edd315f9..6cc792627f60d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -181,7 +181,7 @@ class VPValue { return getUnderlyingValue(); } - /// Returns true if the VPValue is defined outside any loop region. + /// Returns true if the VPValue is defined outside any loop. bool isDefinedOutsideLoopRegions() const; // Set \p Val as the underlying Value of this VPValue. diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 68b35d42e8674..54cf8ac2ed04a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -73,9 +73,7 @@ bool VPlanVerifier::verifyPhiRecipes(const VPBasicBlock *VPBB) { auto RecipeI = VPBB->begin(); auto End = VPBB->end(); unsigned NumActiveLaneMaskPhiRecipes = 0; - const VPRegionBlock *ParentR = VPBB->getParent(); - bool IsHeaderVPBB = ParentR && !ParentR->isReplicator() && - ParentR->getEntryBasicBlock() == VPBB; + bool IsHeaderVPBB = VPBlockUtils::isHeader(VPBB, VPDT); while (RecipeI != End && RecipeI->isPhi()) { if (isa(RecipeI)) NumActiveLaneMaskPhiRecipes++; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index c0806ea16a5fc..d4494089f7083 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -153,11 +153,10 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[TMP4]] = select i1 [[TMP2]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI2]] -; CHECK-NEXT: [[TMP5]] = select i1 [[TMP2]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI3]] -; CHECK-NEXT: [[TMP6]] = select i1 [[TMP2]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI4]] +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP1]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI2]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP1]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI3]] +; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP1]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -196,8 +195,7 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI12:%.*]] = phi <4 x i32> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP14:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP14]] = select i1 [[TMP13]], <4 x i32> [[VEC_IND15]], <4 x i32> [[VEC_PHI12]] +; CHECK-NEXT: [[TMP14]] = select <4 x i1> [[TMP11]], <4 x i32> [[VEC_IND15]], <4 x i32> [[VEC_PHI12]] ; CHECK-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], splat (i32 4) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT17]], [[N_VEC8]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 969bb413f9c50..c2fe37ad214c6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -74,10 +74,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2-NEXT: [[TMP13:%.*]] = and [[TMP12]], splat (i32 1) ; VSCALEFORTUNING2-NEXT: [[TMP14:%.*]] = xor [[TMP13]], splat (i32 1) ; VSCALEFORTUNING2-NEXT: [[TMP15:%.*]] = zext [[TMP14]] to -; VSCALEFORTUNING2-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 -; VSCALEFORTUNING2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[SRC_2]], i64 [[TMP16]] -; VSCALEFORTUNING2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, ptr [[TMP17]], i64 0 -; VSCALEFORTUNING2-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VSCALEFORTUNING2-NEXT: [[DOTSPLAT:%.*]] = getelementptr i32, ptr [[SRC_2]], [[TMP15]] ; VSCALEFORTUNING2-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() ; VSCALEFORTUNING2-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 4 ; VSCALEFORTUNING2-NEXT: [[TMP20:%.*]] = sub i32 [[TMP19]], 1 @@ -210,10 +207,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP17:%.*]] = and [[TMP16]], splat (i32 1) ; PRED-NEXT: [[TMP18:%.*]] = xor [[TMP17]], splat (i32 1) ; PRED-NEXT: [[TMP19:%.*]] = zext [[TMP18]] to -; PRED-NEXT: [[TMP20:%.*]] = extractelement [[TMP19]], i32 0 -; PRED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[SRC_2]], i64 [[TMP20]] -; PRED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, ptr [[TMP21]], i64 0 -; PRED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PRED-NEXT: [[DOTSPLAT:%.*]] = getelementptr i32, ptr [[SRC_2]], [[TMP19]] ; PRED-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32() ; PRED-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 4 ; PRED-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index 567aa63483771..2e9d90f762ccd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -83,27 +83,24 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: Successor(s): ir-bb, ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: Successor(s): vector loop +; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: -; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[EP_IV:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<%index.next>, vector.body ] -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add> (VF scaled by 1/4) -; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[EP_IV]]> -; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a> -; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]> -; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32 -; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<[[EP_IV]]> -; CHECK-NEXT: vp<[[PTR_B:%.+]]> = vector-pointer ir<%gep.b> -; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]> -; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32 -; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a> -; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%accum>, ir<%mul> -; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16> -; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024> -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): middle.block +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[EP_IV:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<%index.next>, vector.body ] +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add> (VF scaled by 1/4) +; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[EP_IV]]> +; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a> +; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]> +; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32 +; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<[[EP_IV]]> +; CHECK-NEXT: vp<[[PTR_B:%.+]]> = vector-pointer ir<%gep.b> +; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]> +; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32 +; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a> +; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%accum>, ir<%mul> +; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16> +; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024> +; CHECK-NEXT: Successor(s): middle.block, vector.body ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 9e77a0ca8bcc9..0d77dfc50dd70 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -193,26 +193,23 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: IR %18 = mul i64 %17, 4 ; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%0> + ir<[[VEC_TC]]> * ir<-1> ; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%n> + ir<[[VEC_TC]]> * ir<-1> -; CHECK-NEXT: Successor(s): vector loop -; CHECK-EMPTY: -; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<[[CAN_IV_NEXT:%.+]]>, vector.body ] -; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> -; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[DEV_IV]]>, ir<-1> -; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, ir<[[VF]]> -; CHECK-NEXT: WIDEN ir<[[L:%.+]]> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: WIDEN ir<%add9> = add ir<[[L]]>, ir<1> -; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-end-pointer inbounds ir<%arrayidx3>, ir<[[VF]]> -; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]>.1 -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VEC_TC]]> -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): middle.block +; CHECK-NEXT: Successor(s): vector.body +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<[[CAN_IV_NEXT:%.+]]>, vector.body ] +; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[DEV_IV]]>, ir<-1> +; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, ir<[[VF]]> +; CHECK-NEXT: WIDEN ir<[[L:%.+]]> = load vp<[[VEC_PTR]]> +; CHECK-NEXT: WIDEN ir<%add9> = add ir<[[L]]>, ir<1> +; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-end-pointer inbounds ir<%arrayidx3>, ir<[[VF]]> +; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]>.1 +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VEC_TC]]> +; CHECK-NEXT: Successor(s): middle.block, vector.body ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%0>, ir<[[VEC_TC]]> @@ -444,26 +441,23 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: IR %18 = mul i64 %17, 4 ; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%0> + ir<[[VEC_TC]]> * ir<-1> ; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%n> + ir<[[VEC_TC]]> * ir<-1> -; CHECK-NEXT: Successor(s): vector loop -; CHECK-EMPTY: -; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<[[CAN_IV_NEXT:%.+]]>, vector.body ] -; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> -; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[DEV_IV]]>, ir<-1> -; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, ir<[[VF]]> -; CHECK-NEXT: WIDEN ir<[[L:%.+]]> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<[[L]]>, ir<1.000000e+00> -; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx3>, ir<[[VF]]> -; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%conv1> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]>.1 -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VEC_TC]]> -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): middle.block +; CHECK-NEXT: Successor(s): vector.body +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<[[CAN_IV_NEXT:%.+]]>, vector.body ] +; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[DEV_IV]]>, ir<-1> +; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, ir<[[VF]]> +; CHECK-NEXT: WIDEN ir<[[L:%.+]]> = load vp<[[VEC_PTR]]> +; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<[[L]]>, ir<1.000000e+00> +; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx3>, ir<[[VF]]> +; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%conv1> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]>.1 +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VEC_TC]]> +; CHECK-NEXT: Successor(s): middle.block, vector.body ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%0>, ir<[[VEC_TC]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll index b2ec86ea3ec53..86647b1386ec5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll @@ -28,33 +28,30 @@ ; IF-EVL-NEXT: IR %n.vec = sub i64 %n.rnd.up, %n.mod.vf ; IF-EVL-NEXT: IR %7 = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: IR %8 = mul i64 %7, 4 - ; IF-EVL-NEXT: Successor(s): vector loop - - ; IF-EVL: vector loop: { - ; IF-EVL-NEXT: vector.body: - ; IF-EVL-NEXT: EMIT vp<[[IV:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<[[IV_NEXT_EXIT:%.+]]>, vector.body ] - ; IF-EVL-NEXT: EMIT vp<[[EVL_PHI:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<[[IV_NEX:%.+]]>, vector.body ] - ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]> - ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> - ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[EVL_PHI]]> - ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> - ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> - ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[EVL_PHI]]> - ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> - ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> - ; IF-EVL-NEXT: WIDEN ir<[[CMP:%.+]]> = icmp sgt ir<[[LD1]]>, ir<[[LD2]]> - ; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = sub ir<0>, ir<[[LD2]]> - ; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SELECT:%.+]]> = call llvm.vp.select(ir<[[CMP]]>, ir<[[LD2]]>, ir<[[SUB]]>, vp<[[EVL]]>) - ; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add vp<[[SELECT]]>, ir<[[LD1]]> - ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[EVL_PHI]]> - ; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]> - ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> - ; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 - ; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> - ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT]]> = add vp<[[IV]]>, ir<[[VFUF]]> - ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, ir<[[VTC]]> - ; IF-EVL-NEXT: No successors - ; IF-EVL-NEXT: } + ; IF-EVL-NEXT: Successor(s): vector.body + ; IF-EVL-EMPTY: + ; IF-EVL-NEXT: vector.body: + ; IF-EVL-NEXT: EMIT vp<[[IV:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<[[IV_NEXT_EXIT:%.+]]>, vector.body ] + ; IF-EVL-NEXT: EMIT vp<[[EVL_PHI:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<[[IV_NEX:%.+]]>, vector.body ] + ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]> + ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> + ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[EVL_PHI]]> + ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> + ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> + ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[EVL_PHI]]> + ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> + ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> + ; IF-EVL-NEXT: WIDEN ir<[[CMP:%.+]]> = icmp sgt ir<[[LD1]]>, ir<[[LD2]]> + ; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = sub ir<0>, ir<[[LD2]]> + ; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SELECT:%.+]]> = call llvm.vp.select(ir<[[CMP]]>, ir<[[LD2]]>, ir<[[SUB]]>, vp<[[EVL]]>) + ; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add vp<[[SELECT]]>, ir<[[LD1]]> + ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[EVL_PHI]]> + ; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]> + ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> + ; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 + ; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> + ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT]]> = add vp<[[IV]]>, ir<[[VFUF]]> + ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, ir<[[VTC]]> entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll index 61a5bd69b7ba3..59e2664cc1402 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll @@ -15,75 +15,72 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: IR %n.mod.vf = urem i64 %0, 2 ; CHECK-NEXT: IR %n.vec = sub i64 %0, %n.mod.vf ; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%start> + ir<%n.vec> * ir<1> -; CHECK-NEXT: Successor(s): vector loop -; CHECK-EMPTY: -; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ] -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2> -; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[WIDE_PTR:%.+]]> = vector-pointer vp<[[PTR]]> -; CHECK-NEXT: WIDEN ir<%l> = load vp<[[WIDE_PTR]]> -; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12> -; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13> -; CHECK-NEXT: EMIT vp<[[OR_CASES:%.+]]> = or vp<[[C1]]>, vp<[[C2]]> -; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK:%.+]]> = not vp<[[OR_CASES]]> -; CHECK-NEXT: Successor(s): pred.store -; CHECK-EMPTY: -; CHECK-NEXT: pred.store: { -; CHECK-NEXT: pred.store.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<[[C2]]> -; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR]]> -; CHECK-NEXT: Successor(s): pred.store.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): if.then.2.0 +; CHECK-NEXT: Successor(s): vector.body +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ] +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2> +; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[WIDE_PTR:%.+]]> = vector-pointer vp<[[PTR]]> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[WIDE_PTR]]> +; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12> +; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13> +; CHECK-NEXT: EMIT vp<[[OR_CASES:%.+]]> = or vp<[[C1]]>, vp<[[C2]]> +; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK:%.+]]> = not vp<[[OR_CASES]]> +; CHECK-NEXT: Successor(s): pred.store +; CHECK-EMPTY: +; CHECK-NEXT: pred.store: { +; CHECK-NEXT: pred.store.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<[[C2]]> +; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR]]> +; CHECK-NEXT: Successor(s): pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): if.then.2.0 ; CHECK-EMPTY: -; CHECK-NEXT: if.then.2.0: -; CHECK-NEXT: Successor(s): pred.store +; CHECK-NEXT: if.then.2.0: +; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: -; CHECK-NEXT: pred.store: { -; CHECK-NEXT: pred.store.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<[[C1]]> -; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-NEXT: pred.store: { +; CHECK-NEXT: pred.store.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<[[C1]]> +; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR]]> -; CHECK-NEXT: Successor(s): pred.store.continue +; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR]]> +; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: -; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): if.then.1.1 +; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): if.then.1.1 ; CHECK-EMPTY: -; CHECK-NEXT: if.then.1.1: -; CHECK-NEXT: Successor(s): pred.store +; CHECK-NEXT: if.then.1.1: +; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: -; CHECK-NEXT: pred.store: { -; CHECK-NEXT: pred.store.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<[[DEFAULT_MASK]]> -; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-NEXT: pred.store: { +; CHECK-NEXT: pred.store.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<[[DEFAULT_MASK]]> +; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: -; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR]]> -; CHECK-NEXT: Successor(s): pred.store.continue +; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR]]> +; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: -; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): default.2 -; CHECK-EMPTY: -; CHECK-NEXT: default.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VTC]]> +; CHECK-NEXT: pred.store.continue: ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): middle.block +; CHECK-NEXT: Successor(s): default.2 +; CHECK-EMPTY: +; CHECK-NEXT: default.2: +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VTC]]> +; CHECK-NEXT: Successor(s): middle.block, vector.body ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<%0>, ir<[[VTC]]>