diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index b2745c81dec88..7787f58683b2a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -506,7 +506,7 @@ class LoopVectorizationPlanner { // instructions leading from the loop exit instr to the phi need to be // converted to reductions, with one operand being vector and the other being // the scalar reduction chain. For other reductions, a select is introduced - // between the phi and live-out recipes when folding the tail. + // between the phi and users outside the vector region when folding the tail. void adjustRecipesForReductions(VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3d638e52328b5..58fcba93f1a18 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -498,7 +498,7 @@ class InnerLoopVectorizer { virtual std::pair createVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs); - /// Fix the vectorized code, taking care of header phi's, live-outs, and more. + /// Fix the vectorized code, taking care of header phi's, and more. void fixVectorizedLoop(VPTransformState &State); // Return true if any runtime check is added. @@ -2713,7 +2713,8 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton( | | (opt) v <-- edge from middle to exit iff epilogue is not required. | [ ] \ - | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue). + | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue, header + | | wrapped in VPIRBasicBlock). \ | \ v >[ ] <-- exit block(s). (wrapped in VPIRBasicBlock) @@ -2956,7 +2957,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { // and there is nothing to fix from vector loop; phis should have incoming // from scalar loop only. } else { - // TODO: Check VPLiveOuts to see if IV users need fixing instead of checking + // TODO: Check in VPlan to see if IV users need fixing instead of checking // the cost model. // If we inserted an edge from the middle block to the unique exit block, @@ -2970,10 +2971,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { IVEndValues[Entry.first], LoopMiddleBlock, State); } - // Fix live-out phis not already fixed earlier. - for (const auto &KV : Plan.getLiveOuts()) - KV.second->fixPhi(Plan, State); - for (Instruction *PI : PredicatedInstructions) sinkScalarOperands(&*PI); @@ -8790,6 +8787,41 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); } +/// Create resume phis in the scalar preheader for first-order recurrences and +/// reductions and update the VPIRInstructions wrapping the original phis in the +/// scalar header. +static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { + auto *ScalarPH = Plan.getScalarPreheader(); + auto *MiddleVPBB = cast(ScalarPH->getSinglePredecessor()); + VPBuilder ScalarPHBuilder(ScalarPH); + VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); + VPValue *OneVPV = Plan.getOrAddLiveIn( + ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); + for (VPRecipeBase &ScalarPhiR : *Plan.getScalarHeader()) { + auto *ScalarPhiIRI = cast(&ScalarPhiR); + auto *ScalarPhiI = dyn_cast(&ScalarPhiIRI->getInstruction()); + if (!ScalarPhiI) + break; + auto *VectorPhiR = cast(Builder.getRecipe(ScalarPhiI)); + if (!isa(VectorPhiR)) + continue; + // The backedge value provides the value to resume coming out of a loop, + // which for FORs is a vector whose last element needs to be extracted. The + // start value provides the value if the loop is bypassed. + bool IsFOR = isa(VectorPhiR); + auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue(); + if (IsFOR) + ResumeFromVectorLoop = MiddleBuilder.createNaryOp( + VPInstruction::ExtractFromEnd, {ResumeFromVectorLoop, OneVPV}, {}, + "vector.recur.extract"); + StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx"; + auto *ResumePhiR = ScalarPHBuilder.createNaryOp( + VPInstruction::ResumePhi, + {ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name); + ScalarPhiIRI->addOperand(ResumePhiR); + } +} + // Collect VPIRInstructions for phis in the original exit block that are modeled // in VPlan and add the exiting VPValue as operand. Some exiting values are not // modeled explicitly yet and won't be included. Those are un-truncated @@ -8819,8 +8851,7 @@ static SetVector collectUsersInExitBlock( VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); // Exit values for inductions are computed and updated outside of VPlan and // independent of induction recipes. - // TODO: Compute induction exit values in VPlan, use VPLiveOuts to update - // live-outs. + // TODO: Compute induction exit values in VPlan. if ((isa(V) && !cast(V)->getTruncInst()) || isa(V) || @@ -8853,7 +8884,8 @@ addUsersInExitBlock(VPlan &Plan, // modeling the corresponding LCSSA phis. for (VPIRInstruction *ExitIRI : ExitUsersToFix) { VPValue *V = ExitIRI->getOperand(0); - // Pass live-in values used by exit phis directly through to the live-out. + // Pass live-in values used by exit phis directly through to their users in + // the exit block. if (V->isLiveIn()) continue; @@ -8865,39 +8897,17 @@ addUsersInExitBlock(VPlan &Plan, } } -/// Handle live-outs for first order reductions, both in the scalar preheader -/// and the original exit block: -/// 1. Feed a resume value for every FOR from the vector loop to the scalar -/// loop, if middle block branches to scalar preheader, by introducing -/// ExtractFromEnd and ResumePhi recipes in each, respectively, and a -/// VPLiveOut which uses the latter and corresponds to the scalar header. -/// 2. Feed the penultimate value of recurrences to their LCSSA phi users in -/// the original exit block using a VPLiveOut. -static void addLiveOutsForFirstOrderRecurrences( +/// Handle users in the exit block for first order reductions in the original +/// exit block. The penultimate value of recurrences is fed to their LCSSA phi +/// users in the original exit block using the VPIRInstruction wrapping to the +/// LCSSA phi. +static void addExitUsersForFirstOrderRecurrences( VPlan &Plan, SetVector &ExitUsersToFix) { VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); - - // Start by finding out if middle block branches to scalar preheader, which is - // not a VPIRBasicBlock, unlike Exit block - the other possible successor of - // middle block. - // TODO: Should be replaced by - // Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the - // scalar region is modeled as well. + auto *ScalarPHVPBB = Plan.getScalarPreheader(); auto *MiddleVPBB = cast(VectorRegion->getSingleSuccessor()); - VPBasicBlock *ScalarPHVPBB = nullptr; - if (MiddleVPBB->getNumSuccessors() == 2) { - // Order is strict: first is the exit block, second is the scalar preheader. - ScalarPHVPBB = cast(MiddleVPBB->getSuccessors()[1]); - } else if (ExitUsersToFix.empty()) { - ScalarPHVPBB = cast(MiddleVPBB->getSingleSuccessor()); - } else { - llvm_unreachable("unsupported CFG in VPlan"); - } - VPBuilder ScalarPHBuilder(ScalarPHVPBB); VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); - VPValue *OneVPV = Plan.getOrAddLiveIn( - ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); VPValue *TwoVPV = Plan.getOrAddLiveIn( ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2)); @@ -8973,26 +8983,16 @@ static void addLiveOutsForFirstOrderRecurrences( // lo = lcssa.phi [s1, scalar.body], // [vector.recur.extract.for.phi, middle.block] // - // Extract the resume value and create a new VPLiveOut for it. - auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd, - {FOR->getBackedgeValue(), OneVPV}, - {}, "vector.recur.extract"); - auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp( - VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {}, - "scalar.recur.init"); - auto *FORPhi = cast(FOR->getUnderlyingInstr()); - Plan.addLiveOut(FORPhi, ResumePhiRecipe); - // Now update VPIRInstructions modeling LCSSA phis in the exit block. // Extract the penultimate value of the recurrence and use it as operand for // the VPIRInstruction modeling the phi. for (VPIRInstruction *ExitIRI : ExitUsersToFix) { if (ExitIRI->getOperand(0) != FOR) continue; - VPValue *Ext = MiddleBuilder.createNaryOp( + VPValue *PenultimateElement = MiddleBuilder.createNaryOp( VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), TwoVPV}, {}, "vector.recur.extract.for.phi"); - ExitIRI->setOperand(0, Ext); + ExitIRI->setOperand(0, PenultimateElement); ExitUsersToFix.remove(ExitIRI); } } @@ -9166,11 +9166,11 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { "VPBasicBlock"); RecipeBuilder.fixHeaderPhis(); + addScalarResumePhis(RecipeBuilder, *Plan); SetVector ExitUsersToFix = collectUsersInExitBlock( OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); - addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix); + addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); addUsersInExitBlock(*Plan, ExitUsersToFix); - // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to // bring the VPlan to its final state. @@ -9192,9 +9192,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { // Replace VPValues for known constant strides guaranteed by predicate scalar // evolution. auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) { - auto *R = dyn_cast(&U); - if (!R) - return false; + auto *R = cast(&U); return R->getParent()->getParent() || R->getParent() == Plan->getVectorLoopRegion()->getSinglePredecessor(); @@ -9291,7 +9289,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { // instructions leading from the loop exit instr to the phi need to be converted // to reductions, with one operand being vector and the other being the scalar // reduction chain. For other reductions, a select is introduced between the phi -// and live-out recipes when folding the tail. +// and users outside the vector region when folding the tail. // // A ComputeReductionResult recipe is added to the middle block, also for // in-loop reductions which compute their result in-loop, because generating @@ -9325,8 +9323,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( for (VPUser *U : Cur->users()) { auto *UserRecipe = cast(U); if (!UserRecipe->getParent()->getEnclosingLoopRegion()) { - assert(UserRecipe->getParent() == MiddleVPBB && - "U must be either in the loop region or the middle block."); + assert((UserRecipe->getParent() == MiddleVPBB || + UserRecipe->getParent() == Plan->getScalarPreheader()) && + "U must be either in the loop region, the middle block or the " + "scalar preheader."); continue; } Worklist.insert(UserRecipe); @@ -9440,8 +9440,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); // If tail is folded by masking, introduce selects between the phi - // and the live-out instruction of each reduction, at the beginning of the - // dedicated latch block. + // and the users outside the vector region of each reduction, at the + // beginning of the dedicated latch block. auto *OrigExitingVPV = PhiR->getBackedgeValue(); auto *NewExitingVPV = PhiR->getBackedgeValue(); if (!PhiR->isInLoop() && CM.foldTailByMasking()) { @@ -9513,17 +9513,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( }); FinalReductionResult->insertBefore(*MiddleVPBB, IP); - // Order is strict: if there are multiple successors, the first is the exit - // block, second is the scalar preheader. - VPBasicBlock *ScalarPHVPBB = - cast(MiddleVPBB->getSuccessors().back()); - VPBuilder ScalarPHBuilder(ScalarPHVPBB); - auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp( - VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()}, - {}, "bc.merge.rdx"); - auto *RedPhi = cast(PhiR->getUnderlyingInstr()); - Plan->addLiveOut(RedPhi, ResumePhiRecipe); - // Adjust AnyOf reductions; replace the reduction phi for the selected value // with a boolean reduction phi node to check if the condition is true in // any iteration. The final value is selected by the final diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 0484543d2d039..ccfe8ac5830e1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -455,11 +455,16 @@ void VPIRBasicBlock::execute(VPTransformState *State) { "VPIRBasicBlock can have at most two successors at the moment!"); State->Builder.SetInsertPoint(IRBB->getTerminator()); executeRecipes(State, IRBB); - if (getSingleSuccessor()) { - assert(isa(IRBB->getTerminator())); + // Create a branch instruction to terminate IRBB if one was not created yet + // and is needed. + if (getSingleSuccessor() && isa(IRBB->getTerminator())) { auto *Br = State->Builder.CreateBr(IRBB); Br->setOperand(0, nullptr); IRBB->getTerminator()->eraseFromParent(); + } else { + assert( + (getNumSuccessors() == 0 || isa(IRBB->getTerminator())) && + "other blocks must be terminated by a branch"); } for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { @@ -474,7 +479,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) { // backedges. A backward successor is set when the branch is created. const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; - assert(!TermBr->getSuccessor(idx) && + assert((!TermBr->getSuccessor(idx) || TermBr->getSuccessor(idx) == IRBB) && "Trying to reset an existing successor block."); TermBr->setSuccessor(idx, IRBB); State->CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, IRBB}}); @@ -843,10 +848,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, #endif VPlan::~VPlan() { - for (auto &KV : LiveOuts) - delete KV.second; - LiveOuts.clear(); - if (Entry) { VPValue DummyValue; for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) @@ -878,7 +879,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, VPIRBasicBlock *Entry = VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader()); VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); - auto Plan = std::make_unique(Entry, VecPreheader); + VPIRBasicBlock *ScalarHeader = + VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader()); + auto Plan = std::make_unique(Entry, VecPreheader, ScalarHeader); // Create SCEV and VPValue for the trip count. @@ -909,6 +912,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); + VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader); if (!RequiresScalarEpilogueCheck) { VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); return Plan; @@ -1032,19 +1036,8 @@ void VPlan::execute(VPTransformState *State) { BasicBlock *MiddleBB = State->CFG.ExitBB; VPBasicBlock *MiddleVPBB = cast(getVectorLoopRegion()->getSingleSuccessor()); - // Find the VPBB for the scalar preheader, relying on the current structure - // when creating the middle block and its successrs: if there's a single - // predecessor, it must be the scalar preheader. Otherwise, the second - // successor is the scalar preheader. BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor(); - auto &MiddleSuccs = MiddleVPBB->getSuccessors(); - assert((MiddleSuccs.size() == 1 || MiddleSuccs.size() == 2) && - "middle block has unexpected successors"); - VPBasicBlock *ScalarPhVPBB = cast( - MiddleSuccs.size() == 1 ? MiddleSuccs[0] : MiddleSuccs[1]); - assert(!isa(ScalarPhVPBB) && - "scalar preheader cannot be wrapped already"); - replaceVPBBWithIRVPBB(ScalarPhVPBB, ScalarPh); + replaceVPBBWithIRVPBB(getScalarPreheader(), ScalarPh); replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB); // Disconnect the middle block from its single successor (the scalar loop @@ -1054,6 +1047,9 @@ void VPlan::execute(VPTransformState *State) { BrInst->insertBefore(MiddleBB->getTerminator()); MiddleBB->getTerminator()->eraseFromParent(); State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}}); + // Disconnect scalar preheader and scalar header, as the dominator tree edge will be updated as part of VPlan execution. This allows keeping the DTU logic generic during VPlan execution. + State->CFG.DTU.applyUpdates( + {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}}); // Generate code in the loop pre-header and body. for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) @@ -1172,12 +1168,6 @@ void VPlan::print(raw_ostream &O) const { Block->print(O, "", SlotTracker); } - if (!LiveOuts.empty()) - O << "\n"; - for (const auto &KV : LiveOuts) { - KV.second->print(O, SlotTracker); - } - O << "}\n"; } @@ -1214,11 +1204,6 @@ LLVM_DUMP_METHOD void VPlan::dump() const { print(dbgs()); } #endif -void VPlan::addLiveOut(PHINode *PN, VPValue *V) { - assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists"); - LiveOuts.insert({PN, new VPLiveOut(PN, V)}); -} - static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap &Old2NewVPValues) { // Update the operands of all cloned recipes starting at NewEntry. This @@ -1262,8 +1247,15 @@ VPlan *VPlan::duplicate() { VPBasicBlock *NewPreheader = Preheader->clone(); const auto &[NewEntry, __] = cloneFrom(Entry); + BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock(); + VPIRBasicBlock *NewScalarHeader = cast(*find_if( + vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) { + auto *VPIRBB = dyn_cast(VPB); + return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB; + })); // Create VPlan, clone live-ins and remap operands in the cloned blocks. - auto *NewPlan = new VPlan(NewPreheader, cast(NewEntry)); + auto *NewPlan = + new VPlan(NewPreheader, cast(NewEntry), NewScalarHeader); DenseMap Old2NewVPValues; for (VPValue *OldLiveIn : VPLiveInsToFree) { Old2NewVPValues[OldLiveIn] = @@ -1286,10 +1278,6 @@ VPlan *VPlan::duplicate() { remapOperands(Preheader, NewPreheader, Old2NewVPValues); remapOperands(Entry, NewEntry, Old2NewVPValues); - // Clone live-outs. - for (const auto &[_, LO] : LiveOuts) - NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]); - // Initialize remaining fields of cloned VPlan. NewPlan->VFs = VFs; NewPlan->UFs = UFs; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0e0c64f6df9cb..4e5878cae2ddc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -675,48 +675,6 @@ class VPBlockBase { virtual VPBlockBase *clone() = 0; }; -/// A value that is used outside the VPlan. The operand of the user needs to be -/// added to the associated phi node. The incoming block from VPlan is -/// determined by where the VPValue is defined: if it is defined by a recipe -/// outside a region, its parent block is used, otherwise the middle block is -/// used. -class VPLiveOut : public VPUser { - PHINode *Phi; - -public: - VPLiveOut(PHINode *Phi, VPValue *Op) - : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {} - - static inline bool classof(const VPUser *U) { - return U->getVPUserID() == VPUser::VPUserID::LiveOut; - } - - /// Fix the wrapped phi node. This means adding an incoming value to exit - /// block phi's from the vector loop via middle block (values from scalar loop - /// already reach these phi's), and updating the value to scalar header phi's - /// from the scalar preheader. - void fixPhi(VPlan &Plan, VPTransformState &State); - - /// Returns true if the VPLiveOut uses scalars of operand \p Op. - bool usesScalars(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; - } - - PHINode *getPhi() const { return Phi; } - - /// Live-outs are marked as only using the first part during the transition - /// to unrolling directly on VPlan. - /// TODO: Remove after unroller transition. - bool onlyFirstPartUsed(const VPValue *Op) const override { return true; } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the VPLiveOut to \p O. - void print(raw_ostream &O, VPSlotTracker &SlotTracker) const; -#endif -}; - /// Struct to hold various analysis needed for cost computations. struct VPCostContext { const TargetTransformInfo &TTI; @@ -763,12 +721,12 @@ class VPRecipeBase : public ilist_node_with_parent, public: VPRecipeBase(const unsigned char SC, ArrayRef Operands, DebugLoc DL = {}) - : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {} + : VPDef(SC), VPUser(Operands), DL(DL) {} template VPRecipeBase(const unsigned char SC, iterator_range Operands, DebugLoc DL = {}) - : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {} + : VPDef(SC), VPUser(Operands), DL(DL) {} virtual ~VPRecipeBase() = default; /// Clone the current recipe. @@ -822,9 +780,7 @@ class VPRecipeBase : public ilist_node_with_parent, return true; } - static inline bool classof(const VPUser *U) { - return U->getVPUserID() == VPUser::VPUserID::Recipe; - } + static inline bool classof(const VPUser *U) { return true; } /// Returns true if the recipe may have side-effects. bool mayHaveSideEffects() const; @@ -1465,6 +1421,12 @@ class VPIRInstruction : public VPRecipeBase { "Op must be an operand of the recipe"); return true; } + + bool onlyFirstPartUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } }; /// VPWidenRecipe is a recipe for producing a widened instruction using the @@ -2801,7 +2763,8 @@ class VPPredInstPHIRecipe : public VPSingleDefRecipe { VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC) - /// Generates phi nodes for live-outs as needed to retain SSA form. + /// Generates phi nodes for live-outs (from a replicate region) as needed to + /// retain SSA form. void execute(VPTransformState &State) override; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -3676,6 +3639,9 @@ class VPlan { /// rest of VPlan execution. VPBasicBlock *Preheader; + /// VPIRBasicBlock wrapping the header of the original scalar loop. + VPIRBasicBlock *ScalarHeader; + /// Holds the VFs applicable to this VPlan. SmallSetVector VFs; @@ -3711,37 +3677,38 @@ class VPlan { /// definitions are VPValues that hold a pointer to their underlying IR. SmallVector VPLiveInsToFree; - /// Values used outside the plan. It contains live-outs that need fixing. Any - /// live-out that is fixed outside VPlan needs to be removed. The remaining - /// live-outs are fixed via VPLiveOut::fixPhi. - MapVector LiveOuts; - /// Mapping from SCEVs to the VPValues representing their expansions. /// NOTE: This mapping is temporary and will be removed once all users have /// been modeled in VPlan directly. DenseMap SCEVToExpansion; public: - /// Construct a VPlan with original preheader \p Preheader, trip count \p TC - /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to - /// be disconnected, as the bypass blocks between them are not yet modeled in + /// Construct a VPlan with original preheader \p Preheader, trip count \p TC, + /// \p Entry to the plan and with \p ScalarHeader wrapping the original header + /// of the scalar loop. At the moment, \p Preheader and \p Entry need to be + /// disconnected, as the bypass blocks between them are not yet modeled in /// VPlan. - VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry) - : VPlan(Preheader, Entry) { + VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry, + VPIRBasicBlock *ScalarHeader) + : VPlan(Preheader, Entry, ScalarHeader) { TripCount = TC; } - /// Construct a VPlan with original preheader \p Preheader and \p Entry to - /// the plan. At the moment, \p Preheader and \p Entry need to be + /// Construct a VPlan with original preheader \p Preheader, \p Entry to + /// the plan and with \p ScalarHeader wrapping the original header of the + /// scalar loop. At the moment, \p Preheader and \p Entry need to be /// disconnected, as the bypass blocks between them are not yet modeled in /// VPlan. - VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry) - : Entry(Entry), Preheader(Preheader) { + VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry, + VPIRBasicBlock *ScalarHeader) + : Entry(Entry), Preheader(Preheader), ScalarHeader(ScalarHeader) { Entry->setPlan(this); Preheader->setPlan(this); assert(Preheader->getNumSuccessors() == 0 && Preheader->getNumPredecessors() == 0 && "preheader must be disconnected"); + assert(ScalarHeader->getNumSuccessors() == 0 && + "scalar header must be a leaf node"); } ~VPlan(); @@ -3773,6 +3740,14 @@ class VPlan { VPBasicBlock *getEntry() { return Entry; } const VPBasicBlock *getEntry() const { return Entry; } + /// Return the VPIRBasicBlock wrapping the header of the scalar loop. + VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; } + + /// Return the VPBasicBlock for the preheader of the scalar loop. + VPBasicBlock *getScalarPreheader() const { + return cast(ScalarHeader->getSinglePredecessor()); + } + /// The trip count of the original loop. VPValue *getTripCount() const { assert(TripCount && "trip count needs to be set before accessing it"); @@ -3900,12 +3875,6 @@ class VPlan { return cast(&*EntryVPBB->begin()); } - void addLiveOut(PHINode *PN, VPValue *V); - - const MapVector &getLiveOuts() const { - return LiveOuts; - } - VPValue *getSCEVExpansion(const SCEV *S) const { return SCEVToExpansion.lookup(S); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index de7023167df89..2ecd546633825 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -214,35 +214,6 @@ bool VPRecipeBase::mayHaveSideEffects() const { } } -void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { - VPValue *ExitValue = getOperand(0); - VPBasicBlock *MiddleVPBB = - cast(Plan.getVectorLoopRegion()->getSingleSuccessor()); - VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe(); - auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr; - // Values leaving the vector loop reach live out phi's in the exiting block - // via middle block. - auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion() - ? MiddleVPBB - : ExitingVPBB; - BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; - Value *V = State.get(ExitValue, VPLane(0)); - if (Phi->getBasicBlockIndex(PredBB) != -1) - Phi->setIncomingValueForBlock(PredBB, V); - else - Phi->addIncoming(V, PredBB); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const { - O << "Live-out "; - getPhi()->printAsOperand(O); - O << " = "; - getOperand(0)->printAsOperand(O, SlotTracker); - O << "\n"; -} -#endif - void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { assert(!Parent && "Recipe already in some VPBasicBlock"); assert(InsertPos->getParent() && @@ -873,7 +844,12 @@ void VPIRInstruction::execute(VPTransformState &State) { State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); Value *V = State.get(ExitValue, VPLane(Lane)); auto *Phi = cast(&I); - Phi->addIncoming(V, PredBB); + // If there is no existing block for PredBB in the phi, add a new incoming + // value. Otherwise update the existing incoming value for PredBB. + if (Phi->getBasicBlockIndex(PredBB) == -1) + Phi->addIncoming(V, PredBB); + else + Phi->setIncomingValueForBlock(PredBB, V); } // Advance the insert point after the wrapped IR instruction. This allows diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 355781f955052..622b2592f3e09 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -151,9 +151,7 @@ static bool sinkScalarOperands(VPlan &Plan) { // SinkCandidate. auto CanSinkWithUser = [SinkTo, &NeedsDuplicating, SinkCandidate](VPUser *U) { - auto *UI = dyn_cast(U); - if (!UI) - return false; + auto *UI = cast(U); if (UI->getParent() == SinkTo) return true; NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate); @@ -280,8 +278,7 @@ static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { cast(&Phi1ToMove)->getOperand(0); VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue(); Phi1ToMoveV->replaceUsesWithIf(PredInst1, [Then2](VPUser &U, unsigned) { - auto *UI = dyn_cast(&U); - return UI && UI->getParent() == Then2; + return cast(&U)->getParent() == Then2; }); // Remove phi recipes that are unused after merging the regions. @@ -376,10 +373,10 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) { SmallVector WorkList; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getEntry()))) { - // Don't fold the exit block of the Plan into its single predecessor for - // now. + // Don't fold the blocks in the skeleton of the Plan into their single + // predecessors for now. // TODO: Remove restriction once more of the skeleton is modeled in VPlan. - if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent()) + if (!VPBB->getParent()) continue; auto *PredVPBB = dyn_cast_or_null(VPBB->getSinglePredecessor()); @@ -750,9 +747,8 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, "only recipes with a single defined value expected"); for (VPUser *User : Current->getVPSingleValue()->users()) { - if (auto *R = dyn_cast(User)) - if (!TryToPushSinkCandidate(R)) - return false; + if (!TryToPushSinkCandidate(cast(User))) + return false; } } @@ -786,16 +782,14 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, // Find the closest hoist point by looking at all users of FOR and selecting // the recipe dominating all other users. for (VPUser *U : FOR->users()) { - auto *R = dyn_cast(U); - if (!R) - continue; + auto *R = cast(U); if (!HoistPoint || VPDT.properlyDominates(R, HoistPoint)) HoistPoint = R; } assert(all_of(FOR->users(), [&VPDT, HoistPoint](VPUser *U) { - auto *R = dyn_cast(U); - return !R || HoistPoint == R || + auto *R = cast(U); + return HoistPoint == R || VPDT.properlyDominates(HoistPoint, R); }) && "HoistPoint must dominate all users of FOR"); @@ -922,8 +916,8 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, static SmallVector collectUsersRecursively(VPValue *V) { SetVector Users(V->user_begin(), V->user_end()); for (unsigned I = 0; I != Users.size(); ++I) { - VPRecipeBase *Cur = dyn_cast(Users[I]); - if (!Cur || isa(Cur)) + VPRecipeBase *Cur = cast(Users[I]); + if (isa(Cur)) continue; for (VPValue *V : Cur->definedValues()) Users.insert(V->user_begin(), V->user_end()); @@ -1044,9 +1038,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { R.getParent()->getPlan()->getCanonicalIV()->getScalarType()); assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A)); for (VPUser *U : A->users()) { - auto *R = dyn_cast(U); - if (!R) - continue; + auto *R = cast(U); for (VPValue *VPV : R->definedValues()) assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV)); } @@ -1455,9 +1447,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) { for (VPUser *U : collectUsersRecursively(HeaderMask)) { - auto *CurRecipe = dyn_cast(U); - if (!CurRecipe) - continue; + auto *CurRecipe = cast(U); auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * { assert(OrigMask && "Unmasked recipe when folding tail"); return HeaderMask == OrigMask ? nullptr : OrigMask; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 1e32865e8ee57..15dcf4dc0d91e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -449,11 +449,5 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) { Part++; } - // Remap the operand of live-outs to the last part. - for (const auto &[_, LO] : Plan.getLiveOuts()) { - VPValue *In = Unroller.getValueForPart(LO->getOperand(0), UF - 1); - LO->setOperand(0, In); - } - VPlanTransforms::removeDeadRecipes(Plan); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 89b3ed72b8eb6..691b0d40823cf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -39,8 +39,8 @@ class VPRecipeBase; // This is the base class of the VPlan Def/Use graph, used for modeling the data // flow into, within and out of the VPlan. VPValues can stand for live-ins -// coming from the input IR, instructions which VPlan will generate if executed -// and live-outs which the VPlan will need to fix accordingly. +// coming from the input IR and instructions which VPlan will generate if +// executed. class VPValue { friend class VPBuilder; friend class VPDef; @@ -198,34 +198,23 @@ raw_ostream &operator<<(raw_ostream &OS, const VPValue &V); /// This class augments VPValue with operands which provide the inverse def-use /// edges from VPValue's users to their defs. class VPUser { -public: - /// Subclass identifier (for isa/dyn_cast). - enum class VPUserID { - Recipe, - LiveOut, - }; - -private: SmallVector Operands; - VPUserID ID; - protected: #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the operands to \p O. void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const; #endif - VPUser(ArrayRef Operands, VPUserID ID) : ID(ID) { + VPUser(ArrayRef Operands) { for (VPValue *Operand : Operands) addOperand(Operand); } - VPUser(std::initializer_list Operands, VPUserID ID) - : VPUser(ArrayRef(Operands), ID) {} + VPUser(std::initializer_list Operands) + : VPUser(ArrayRef(Operands)) {} - template - VPUser(iterator_range Operands, VPUserID ID) : ID(ID) { + template VPUser(iterator_range Operands) { for (VPValue *Operand : Operands) addOperand(Operand); } @@ -239,8 +228,6 @@ class VPUser { Op->removeUser(*this); } - VPUserID getVPUserID() const { return ID; } - void addOperand(VPValue *Operand) { Operands.push_back(Operand); Operand->addUser(*this); diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 7ea5ee341cc54..3b7ba61454899 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -248,14 +248,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { return false; } - VPBlockBase *MiddleBB = - IRBB->getPlan()->getVectorLoopRegion()->getSingleSuccessor(); - if (IRBB != IRBB->getPlan()->getPreheader() && - IRBB->getSinglePredecessor() != MiddleBB) { - errs() << "VPIRBasicBlock can only be used as pre-header or a successor of " - "middle-block at the moment!\n"; - return false; - } return true; } @@ -420,12 +412,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) { return false; } - for (const auto &KV : Plan.getLiveOuts()) - if (KV.second->getNumOperands() != 1) { - errs() << "live outs must have a single operand\n"; - return false; - } - return true; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 36eee8d0c98ce..cd7662a657dfe 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -117,8 +117,8 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -175,8 +175,8 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index 61bd8c51e1605..1a4ed0f21bf4c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -868,8 +868,8 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll index 9be068ce880ea..6257d3325f979 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll @@ -45,6 +45,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; CHECK: IR %exitcond = icmp eq i64 %iv.next, %N ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -82,6 +87,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; CHECK: IR %exitcond = icmp eq i64 %iv.next, %N ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll index 74fd76df99259..994f2f5e37763 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -43,6 +43,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -80,6 +85,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -122,6 +132,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -159,6 +174,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -200,6 +220,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -237,6 +262,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll index e9303ec9d3eb7..afc2fd5a049ad 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -41,6 +41,11 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %cmp = icmp ne i64 %iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -78,6 +83,11 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %cmp = icmp ne i64 %iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index f4dfdacac1b32..dd2e75f1f5e21 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -24,8 +24,8 @@ define i64 @pr97452_scalable_vf1_for(ptr %src) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[WIDE_LOAD1]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[WIDE_LOAD1]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[WIDE_LOAD1]], i32 3 ; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index c7bb1ffab23e7..a38835f5613fd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -96,6 +96,12 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] +; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ] +; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] @@ -181,6 +187,12 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] +; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ] +; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: LV: Loop does not require scalar epilogue @@ -303,6 +315,12 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] +; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ] +; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] @@ -388,6 +406,12 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] +; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ] +; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: LV: Loop does not require scalar epilogue diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 59db6c197ef8c..77a9d105c85f3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -66,9 +66,13 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: scalar.ph: ; IF-EVL-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> -; IF-EVL-INLOOP-NEXT: No successors +; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb ; IF-EVL-INLOOP-EMPTY: -; IF-EVL-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]> +; IF-EVL-INLOOP-NEXT: ir-bb: +; IF-EVL-INLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; IF-EVL-INLOOP-NEXT: IR %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] +; IF-EVL-INLOOP: IR %exitcond.not = icmp eq i64 %iv.next, %n +; IF-EVL-INLOOP-NEXT: No successors ; IF-EVL-INLOOP-NEXT: } ; @@ -108,9 +112,13 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: scalar.ph: ; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> -; NO-VP-OUTLOOP-NEXT: No successors +; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb ; NO-VP-OUTLOOP-EMPTY: -; NO-VP-OUTLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]> +; NO-VP-OUTLOOP-NEXT: ir-bb: +; NO-VP-OUTLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; NO-VP-OUTLOOP-NEXT: IR %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] +; NO-VP-OUTLOOP: IR %exitcond.not = icmp eq i64 %iv.next, %n +; NO-VP-OUTLOOP-NEXT: No successors ; NO-VP-OUTLOOP-NEXT: } ; @@ -150,9 +158,13 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: scalar.ph: ; NO-VP-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> -; NO-VP-INLOOP-NEXT: No successors +; NO-VP-INLOOP-NEXT: Successor(s): ir-bb ; NO-VP-INLOOP-EMPTY: -; NO-VP-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]> +; NO-VP-INLOOP-NEXT: ir-bb: +; NO-VP-INLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; NO-VP-INLOOP-NEXT: IR %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] +; NO-VP-INLOOP: IR %exitcond.not = icmp eq i64 %iv.next, %n +; NO-VP-INLOOP-NEXT: No successors ; NO-VP-INLOOP-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 8dca8302e8714..c0098eb533c00 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -390,8 +390,8 @@ define i16 @iv_and_step_trunc() { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index fe48008792ff7..bcacfb358ec05 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -45,10 +45,14 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> ; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]>.1 = resume-phi vp<[[RESUME_2]]>.1, ir<33> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1_P]]> -; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2_P]]>.1 +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1) +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: @@ -118,11 +122,15 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> ; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]>.1 = resume-phi vp<[[RESUME_2]]>.1, ir<33> ; CHECK-NEXT: EMIT vp<[[RESUME_3_P:%.*]]>.2 = resume-phi vp<[[RESUME_3]]>.2, ir<33> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1_P]]> -; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2_P]]>.1 -; CHECK-NEXT: Live-out i16 %for.3 = vp<[[RESUME_3_P]]>.2 +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1) +; CHECK-NEXT: IR %for.3 = phi i16 [ 33, %entry ], [ %for.2, %loop ] (extra operand: vp<[[RESUME_3_P]]>.2) +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: @@ -193,10 +201,13 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-NEXT: scalar.ph: ; CHECK-NEXT: EMIT vp<[[RESUME_X:%.+]]> = resume-phi vp<[[EXT_X]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[RESUME_Y:%.+]]>.1 = resume-phi vp<[[EXT_Y]]>.1, ir<0> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i64 %for.x = vp<[[RESUME_X]]> -; CHECK-NEXT: Live-out i32 %for.y = vp<[[RESUME_Y]]>.1 +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] +; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]>) +; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1) +; CHECK: No successors ; CHECK-NEXT: } ; entry: @@ -264,10 +275,13 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-NEXT: scalar.ph: ; CHECK-NEXT: EMIT vp<[[RESUME_X:%.+]]> = resume-phi vp<[[EXT_X]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[RESUME_Y:%.+]]>.1 = resume-phi vp<[[EXT_Y]]>.1, ir<0> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i64 %for.x = vp<[[RESUME_X]]> -; CHECK-NEXT: Live-out i32 %for.y = vp<[[RESUME_Y]]>.1 +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] +; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]>) +; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1) +; CHECK: No successors ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index 1e34e1d0d517d..b0ece3980cdf2 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -18,10 +18,10 @@ define i16 @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label %middle.block, label %vector.body ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 ; entry: br label %loop @@ -61,10 +61,10 @@ define i16 @test_chained_first_order_recurrences_2(ptr %ptr) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label %middle.block, label %vector.body, !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; entry: br label %loop @@ -107,12 +107,12 @@ define i16 @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 ; entry: br label %loop @@ -219,12 +219,12 @@ define i16 @test_chained_first_order_recurrences_3_reordered_1(ptr %ptr) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; entry: br label %loop @@ -270,12 +270,12 @@ define i16 @test_chained_first_order_recurrences_3_reordered_2(ptr %ptr) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; entry: br label %loop @@ -321,12 +321,12 @@ define i16 @test_chained_first_order_recurrences_3_for2_no_other_uses(ptr %ptr) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 ; entry: br label %loop @@ -371,12 +371,12 @@ define i16 @test_chained_first_order_recurrences_3_for1_for2_no_other_uses(ptr % ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 ; entry: br label %loop @@ -420,10 +420,10 @@ define double @test_chained_first_order_recurrence_sink_users_1(ptr %ptr) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; CHECK-NEXT: br i1 [[TMP9]], label %middle.block, label %vector.body, !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 ; entry: br label %loop @@ -488,8 +488,8 @@ define i64 @test_first_order_recurrences_and_induction(ptr %ptr) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3 ; CHECK-NEXT: br i1 true entry: @@ -528,8 +528,8 @@ define i64 @test_first_order_recurrences_and_induction2(ptr %ptr) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3 ; CHECK-NEXT: br i1 true ; entry: @@ -568,8 +568,8 @@ define ptr @test_first_order_recurrences_and_pointer_induction1(ptr %ptr) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3 ; CHECK-NEXT: br i1 true ; entry: @@ -611,8 +611,8 @@ define ptr @test_first_order_recurrences_and_pointer_induction2(ptr %ptr) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3 ; CHECK-NEXT: br i1 true ; entry: @@ -657,8 +657,8 @@ define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 ; CHECK-NEXT: br i1 true, label %End, label %scalar.ph ; CHECK: scalar.ph: ; CHECK-NEXT: %bc.resume.val = phi i64 [ 0, %middle.block ], [ 0, %Entry ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index b05980bef1b38..8ae538cf63986 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -82,9 +82,13 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1_P]]> +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: @@ -165,9 +169,13 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]> +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: @@ -233,10 +241,14 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]> -; CHECK-NEXT: Live-out i32 %and.red = vp<[[RESUME_RED]]> +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %and.red = phi i32 [ 1234, %entry ], [ %and.red.next, %loop ] +; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: @@ -340,9 +352,13 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1_P]]> +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: @@ -433,9 +449,13 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]> +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %C = icmp sgt i32 %iv.next, %recur.next +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: @@ -515,9 +535,13 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i32 %.pn = vp<[[RESUME_1_P]]> +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 2, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %.pn = phi i32 [ 0, %entry ], [ %l, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK: IR %ec = icmp ugt i64 %iv, 3 +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index c4e3e0b8c5a36..a90594085d3cf 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -907,8 +907,8 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 2 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: @@ -1000,8 +1000,8 @@ define i32 @PR27246() { ; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 2 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3 ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: @@ -1358,8 +1358,8 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 ; UNROLL-NO-IC-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1427,8 +1427,8 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 ; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 ; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1555,8 +1555,8 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP43]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.cond.cleanup: ; UNROLL-NO-IC-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] @@ -1615,8 +1615,8 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; UNROLL-NO-VF: scalar.ph: ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.cond.cleanup: ; UNROLL-NO-VF-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] @@ -1684,8 +1684,8 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY]] ] ; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.cond.cleanup: ; SINK-AFTER-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] @@ -3437,8 +3437,8 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -3532,8 +3532,8 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; SINK-AFTER-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 ; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index b061cf526b780..8bdba25b1b761 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -5563,8 +5563,8 @@ define i64 @trunc_with_first_order_recurrence() { ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 42, [[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 42, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] @@ -5625,8 +5625,8 @@ define i64 @trunc_with_first_order_recurrence() { ; IND-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i64 1 ; IND-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY:%.*]] ] -; IND-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY]] ] +; IND-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY:%.*]] ] +; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY]] ] ; IND-NEXT: br label [[LOOP:%.*]] ; IND: exit: ; IND-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ poison, [[MIDDLE_BLOCK]] ] @@ -5703,8 +5703,8 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD7]], i64 1 ; UNROLL-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY:%.*]] ] -; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY]] ] +; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY:%.*]] ] +; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY]] ] ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: exit: ; UNROLL-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ poison, [[MIDDLE_BLOCK]] ] @@ -5783,8 +5783,8 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 42, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 42, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: exit: ; UNROLL-NO-IC-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] @@ -5861,8 +5861,8 @@ define i64 @trunc_with_first_order_recurrence() { ; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD7]], i64 3 ; INTERLEAVE-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY:%.*]] ] -; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY]] ] +; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY:%.*]] ] +; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY]] ] ; INTERLEAVE-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE: exit: ; INTERLEAVE-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ poison, [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index b7f8ddbfa5d7c..a71666d8c3167 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -116,6 +116,12 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-NEXT: No successors ; DBG-EMPTY: ; DBG-NEXT: scalar.ph: +; DBG-NEXT: Successor(s): ir-bb +; DBG-EMPTY: +; DBG-NEXT: ir-bb: +; DBG-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] +; DBG-NEXT: IR %d = phi i1 [ false, %entry ], [ %d.next, %loop.latch ] +; DBG-NEXT: IR %d.next = xor i1 %d, true ; DBG-NEXT: No successors ; DBG-NEXT: } @@ -217,9 +223,13 @@ exit: ; DBG-EMPTY: ; DBG-NEXT: scalar.ph: ; DBG-NEXT: EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> -; DBG-NEXT: No successors +; DBG-NEXT: Successor(s): ir-bb ; DBG-EMPTY: -; DBG-NEXT: Live-out i32 %for = vp<[[RESUME_P]]> +; DBG-NEXT: ir-bb: +; DBG-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; DBG-NEXT: IR %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] (extra operand: vp<[[RESUME_P]]>) +; DBG: IR %ec = icmp slt i32 %iv.next.trunc, %n +; DBG-NEXT: No successors ; DBG-NEXT: } define void @first_order_recurrence_using_induction(i32 %n, ptr %dst) { diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index 6fd5d979724fc..fb174870ed95b 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -23,14 +23,14 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF1: %[[LOAD]] = load , ptr ; CHECK-VF4UF1: %[[SPLICE:.*]] = call @llvm.vector.splice.nxv4i32( %[[VEC_RECUR]], %[[LOAD]], i32 -1) ; CHECK-VF4UF1: middle.block: -; CHECK-VF4UF1: %[[VSCALE3:.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF1: %[[MUL3:.*]] = mul i32 %[[VSCALE3]], 4 -; CHECK-VF4UF1: %[[SUB3:.*]] = sub i32 %[[MUL3]], 1 -; CHECK-VF4UF1: %[[VEC_RECUR_EXT:.*]] = extractelement %[[LOAD]], i32 %[[SUB3]] ; CHECK-VF4UF1: %[[VSCALE2:.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF1: %[[MUL2:.*]] = mul i32 %[[VSCALE2]], 4 ; CHECK-VF4UF1: %[[SUB3:.*]] = sub i32 %[[MUL2]], 2 ; CHECK-VF4UF1: %[[VEC_RECUR_FOR_PHI:.*]] = extractelement %[[LOAD]], i32 %[[SUB3]] +; CHECK-VF4UF1: %[[VSCALE3:.*]] = call i32 @llvm.vscale.i32() +; CHECK-VF4UF1: %[[MUL3:.*]] = mul i32 %[[VSCALE3]], 4 +; CHECK-VF4UF1: %[[SUB3:.*]] = sub i32 %[[MUL3]], 1 +; CHECK-VF4UF1: %[[VEC_RECUR_EXT:.*]] = extractelement %[[LOAD]], i32 %[[SUB3]] entry: br label %for.preheader @@ -207,14 +207,14 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi [ %[[VEC_RECUR_INIT]], %vector.ph ], [ %[[ADD2:.*]], %vector.body ] ; CHECK-VF4UF2: %[[ADD1:.*]] = add %{{.*}}, %[[SPLAT1]] ; CHECK-VF4UF2: middle.block -; CHECK-VF4UF2: %[[VSCALE3:.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF2: %[[MUL3:.*]] = mul i32 %[[VSCALE3]], 4 -; CHECK-VF4UF2: %[[SUB2:.*]] = sub i32 %[[MUL3]], 1 -; CHECK-VF4UF2: %vector.recur.extract = extractelement %[[ADD2]], i32 %[[SUB2]] ; CHECK-VF4UF2: %[[VSCALE2:.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF2: %[[MUL2:.*]] = mul i32 %[[VSCALE2]], 4 ; CHECK-VF4UF2: %[[SUB3:.*]] = sub i32 %[[MUL2]], 2 ; CHECK-VF4UF2: %vector.recur.extract.for.phi = extractelement %[[ADD2]], i32 %[[SUB3]] +; CHECK-VF4UF2: %[[VSCALE3:.*]] = call i32 @llvm.vscale.i32() +; CHECK-VF4UF2: %[[MUL3:.*]] = mul i32 %[[VSCALE3]], 4 +; CHECK-VF4UF2: %[[SUB2:.*]] = sub i32 %[[MUL3]], 1 +; CHECK-VF4UF2: %vector.recur.extract = extractelement %[[ADD2]], i32 %[[SUB2]] entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index cab784b61c544..1f815899ed55c 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -35,6 +35,11 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next.p, %loop.latch ] +; CHECK: IR %iv.next = add i64 %iv, 1 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll index a1e28999a4002..53f5a5658fb68 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll @@ -91,6 +91,11 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ] +; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index 81c56f7590079..c9612ced3eee0 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -46,6 +46,12 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %and, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ] +; CHECK: IR %cmp = icmp eq i64 %iv.next, 0 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -93,6 +99,12 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %and, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ] +; CHECK: IR %cmp = icmp eq i64 %iv.next, 0 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll index 8c7a4e57f9d35..50d406d0c0416 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -51,6 +51,13 @@ define void @foo(i64 %n) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] +; CHECK-NEXT: IR %gep.1 = getelementptr inbounds [8 x i64], ptr @arr2, i64 0, i64 %outer.iv +; CHECK-NEXT: IR store i64 %outer.iv, ptr %gep.1, align 4 +; CHECK-NEXT: IR %add = add nsw i64 %outer.iv, %n ; CHECK-NEXT: No successors ; CHECK-NEXT: } entry: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 2247295295663..6bb20a301e0ad 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -42,6 +42,11 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %for.body.preheader ] +; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -104,6 +109,11 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %for.body.preheader ] +; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -166,9 +176,12 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]> +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] +; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: @@ -225,9 +238,13 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]> +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] +; CHECK-NEXT: IR %red = phi float [ %red.next, %for.body ], [ 0.000000e+00, %entry ] +; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: @@ -306,6 +323,11 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] +; CHECK-NEXT: IR %cmp = icmp ult i64 %i, 5 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -378,6 +400,11 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; CHECK: IR %cmp = icmp slt i64 %iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -454,9 +481,13 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out float %sum.07 = vp<[[RED_RESUME]]> +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; CHECK-NEXT: IR %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ] +; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, %n +; CHECK-NEXT: No successors ; CHECK-NEXT:} entry: @@ -538,7 +569,12 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %if.end ] +; CHECK: IR %cmp1 = icmp slt i32 %lsd, 100 ; CHECK-NEXT: No successors ; CHECK-NEXT:} ; @@ -619,6 +655,11 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] +; CHECK: IR %iv.next = add i64 %iv, %inc ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -679,6 +720,11 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %ec = icmp eq i32 %iv.next, 1000 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -737,6 +783,11 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -797,6 +848,11 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -877,6 +933,11 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.loop ] +; CHECK: IR %ifcond = fcmp oeq float %ld.value, 5.0 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -946,6 +1007,11 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -1038,8 +1104,8 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> ; CHECK-NEXT: EMIT vp<[[FOR_RESULT:%.+]]> = extract-from-end ir<%for.1.next>, ir<2> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -1050,9 +1116,13 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_P]]> +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_P]]>) +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll index f846ba0166b2c..cdeffeff84d03 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -53,6 +53,12 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ] +; CHECK-NEXT: IR %tmp1 = phi i64 [ %tmp7, %for.inc ], [ 0, %entry ] +; CHECK: IR %tmp5 = trunc i32 %tmp4 to i8 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 446b720ad1ba4..88e7aaccfe2f3 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -1077,6 +1077,17 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, -1 +; CHECK-NEXT: IR %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv +; CHECK-NEXT: IR %l = load i32, ptr %gep.src, align 16 +; CHECK-NEXT: IR %dead_gep = getelementptr inbounds i32, ptr %dst, i64 1 +; CHECK-NEXT: IR %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv +; CHECK-NEXT: IR store i32 %l, ptr %gep.dst, align 16 +; CHECK-NEXT: IR %ec = icmp eq i32 %iv.next, 0 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -1157,6 +1168,13 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: Successor(s): ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ] +; CHECK-NEXT: IR %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 -1 +; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv.next, align 1 +; CHECK-NEXT: IR %c.1 = icmp eq i8 %l, 0 ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp index bb064b5f63b72..37c505e8b3790 100644 --- a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp @@ -41,7 +41,11 @@ TEST(VPDominatorTreeTest, DominanceNoRegionsTest) { VPBlockUtils::connectBlocks(VPBB3, VPBB4); auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB0); + LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB); + VPDominatorTree VPDT; VPDT.recalculate(Plan); @@ -71,6 +75,8 @@ checkDomChildren(VPDominatorTree &VPDT, VPBlockBase *Src, } TEST(VPDominatorTreeTest, DominanceRegionsTest) { + LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); { // 2 consecutive regions. // VPBB0 @@ -115,7 +121,8 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) { VPBlockUtils::connectBlocks(R1, R2); auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB0); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB); VPDominatorTree VPDT; VPDT.recalculate(Plan); @@ -195,7 +202,8 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) { VPBlockUtils::connectBlocks(R1, VPBB2); auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); VPDominatorTree VPDT; VPDT.recalculate(Plan); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index 01d630124a4bb..93277eed8be12 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -145,6 +145,18 @@ compound=true ] N6 [label = "scalar.ph:\l" + + "Successor(s): ir-bb\\l" + ] + N6 -> N7 [ label=""] + N7 [label = + "ir-bb\:\l" + + " IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\l" + + " IR %arr.idx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv\l" + + " IR %l1 = load i32, ptr %arr.idx, align 4\l" + + " IR %res = add i32 %l1, 10\l" + + " IR store i32 %res, ptr %arr.idx, align 4\l" + + " IR %indvars.iv.next = add i64 %indvars.iv, 1\l" + + " IR %exitcond = icmp ne i64 %indvars.iv.next, %N\l" + "No successors\l" ] } diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 0f170efac207b..ae0122cd67916 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -238,6 +238,8 @@ TEST(VPInstructionTest, releaseOperandsAtDeletion) { delete VPV2; } TEST(VPBasicBlockTest, getPlan) { + LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); { VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *VPBB1 = new VPBasicBlock(); @@ -256,7 +258,8 @@ TEST(VPBasicBlockTest, getPlan) { VPBlockUtils::connectBlocks(VPBB3, VPBB4); auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); EXPECT_EQ(&Plan, VPBB1->getPlan()); EXPECT_EQ(&Plan, VPBB2->getPlan()); @@ -276,7 +279,8 @@ TEST(VPBasicBlockTest, getPlan) { VPBlockUtils::connectBlocks(VPBB1, R1); auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); EXPECT_EQ(&Plan, VPBB1->getPlan()); EXPECT_EQ(&Plan, R1->getPlan()); @@ -306,7 +310,8 @@ TEST(VPBasicBlockTest, getPlan) { VPBlockUtils::connectBlocks(R2, VPBB2); auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); EXPECT_EQ(&Plan, VPBB1->getPlan()); EXPECT_EQ(&Plan, R1->getPlan()); @@ -320,6 +325,8 @@ TEST(VPBasicBlockTest, getPlan) { } TEST(VPBasicBlockTest, TraversingIteratorTest) { + LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); { // VPBasicBlocks only // VPBB1 @@ -347,7 +354,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) { // Use Plan to properly clean up created blocks. auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); } { @@ -447,7 +455,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) { // Use Plan to properly clean up created blocks. auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB0); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB); } { @@ -530,7 +539,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) { // Use Plan to properly clean up created blocks. auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); } { @@ -578,7 +588,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) { // Use Plan to properly clean up created blocks. auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); } { @@ -670,7 +681,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) { // Use Plan to properly clean up created blocks. auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); } } @@ -708,7 +720,10 @@ TEST(VPBasicBlockTest, print) { EXPECT_EQ("EMIT br , ", I3Dump); } - VPlan Plan(VPBB0, TC, VPBB1); + LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPBB0, TC, VPBB1, ScalarHeaderVPBB); std::string FullDump; raw_string_ostream OS(FullDump); Plan.printDOT(OS); @@ -790,7 +805,10 @@ TEST(VPBasicBlockTest, printPlanWithVFsAndUFs) { VPBB1->appendRecipe(I1); VPBB1->setName("bb1"); - VPlan Plan(VPBB0, TC, VPBB1); + LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPBB0, TC, VPBB1, ScalarHeaderVPBB); Plan.setName("TestPlan"); Plan.addVF(ElementCount::getFixed(4)); @@ -1250,9 +1268,10 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { TEST(VPRecipeTest, dumpRecipeInPlan) { VPBasicBlock *VPBB0 = new VPBasicBlock("preheader"); VPBasicBlock *VPBB1 = new VPBasicBlock(); - VPlan Plan(VPBB0, VPBB1); - LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPBB0, VPBB1, ScalarHeaderVPBB); IntegerType *Int32 = IntegerType::get(C, 32); auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32), @@ -1319,9 +1338,10 @@ TEST(VPRecipeTest, dumpRecipeInPlan) { TEST(VPRecipeTest, dumpRecipeUnnamedVPValuesInPlan) { VPBasicBlock *VPBB0 = new VPBasicBlock("preheader"); VPBasicBlock *VPBB1 = new VPBasicBlock(); - VPlan Plan(VPBB0, VPBB1); - LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPBB0, VPBB1, ScalarHeaderVPBB); IntegerType *Int32 = IntegerType::get(C, 32); auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32), diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index 9958d6ea124f8..e70cd271b8d73 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -28,7 +28,11 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefSameBB) { VPBasicBlock *VPBB2 = new VPBasicBlock(); VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1"); VPBlockUtils::connectBlocks(VPBB1, R1); - VPlan Plan(VPPH, &*TC, VPBB1); + + LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -60,7 +64,10 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) { VPBlockUtils::connectBlocks(VPBB1, R1); auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -103,7 +110,9 @@ TEST(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) { VPBB3->setParent(R1); auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -139,7 +148,10 @@ TEST(VPVerifierTest, DuplicateSuccessorsOutsideRegion) { VPBlockUtils::connectBlocks(VPBB1, R1); auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -176,7 +188,10 @@ TEST(VPVerifierTest, DuplicateSuccessorsInsideRegion) { VPBB3->setParent(R1); auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -205,7 +220,10 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) { VPBB1->setParent(R1); auto TC = std::make_unique(); - VPlan Plan(VPPH, &*TC, VPBB1); + LLVMContext C; + auto ScalarHeader = std::make_unique(BasicBlock::Create(C, "")); + VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader); + VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr();