diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3a313a96be7e8..e7ed6ad5d4746 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8630,11 +8630,12 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); } -// Collect (ExitPhi, ExitingValue) pairs phis in the original exit block that -// are modeled in VPlan. Some exiting values are not modeled explicitly yet and -// won't be included. Those are un-truncated VPWidenIntOrFpInductionRecipe, -// VPWidenPointerInductionRecipe and induction increments. -static MapVector collectUsersInExitBlock( +// Collect VPIRInstructions for phis in the original exit block that are modeled +// in VPlan and add the exiting VPValue as operand. Some exiting values are not +// modeled explicitly yet and won't be included. Those are un-truncated +// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction +// increments. +static SetVector collectUsersInExitBlock( Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, const MapVector &Inductions) { auto *MiddleVPBB = @@ -8644,13 +8645,17 @@ static MapVector collectUsersInExitBlock( // from scalar loop only. if (MiddleVPBB->getNumSuccessors() != 2) return {}; - MapVector ExitingValuesToFix; - BasicBlock *ExitBB = - cast(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock(); + SetVector ExitUsersToFix; + VPBasicBlock *ExitVPBB = cast(MiddleVPBB->getSuccessors()[0]); BasicBlock *ExitingBB = OrigLoop->getExitingBlock(); - for (PHINode &ExitPhi : ExitBB->phis()) { - Value *IncomingValue = - ExitPhi.getIncomingValueForBlock(ExitingBB); + for (VPRecipeBase &R : *ExitVPBB) { + auto *ExitIRI = dyn_cast(&R); + if (!ExitIRI) + continue; + auto *ExitPhi = dyn_cast(&ExitIRI->getInstruction()); + if (!ExitPhi) + break; + Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); // Exit values for inductions are computed and updated outside of VPlan and // independent of induction recipes. @@ -8666,17 +8671,18 @@ static MapVector collectUsersInExitBlock( return P && Inductions.contains(P); }))) continue; - ExitingValuesToFix.insert({&ExitPhi, V}); + ExitUsersToFix.insert(ExitIRI); + ExitIRI->addOperand(V); } - return ExitingValuesToFix; + return ExitUsersToFix; } -// Add exit values to \p Plan. Extracts and VPLiveOuts are added for each entry -// in \p ExitingValuesToFix. +// Add exit values to \p Plan. Extracts are added for each entry in \p +// ExitUsersToFix if needed and their operands are updated. static void addUsersInExitBlock(VPlan &Plan, - MapVector &ExitingValuesToFix) { - if (ExitingValuesToFix.empty()) + const SetVector &ExitUsersToFix) { + if (ExitUsersToFix.empty()) return; auto *MiddleVPBB = @@ -8685,18 +8691,19 @@ addUsersInExitBlock(VPlan &Plan, cast(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock(); VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); - // Introduce VPUsers modeling the exit values. - for (const auto &[ExitPhi, V] : ExitingValuesToFix) { + // Introduce extract for exiting values and update the VPIRInstructions + // modeling the corresponding LCSSA phis. + for (VPIRInstruction *ExitIRI : ExitUsersToFix) { + VPValue *V = ExitIRI->getOperand(0); // Pass live-in values used by exit phis directly through to the live-out. - if (V->isLiveIn()) { - Plan.addLiveOut(ExitPhi, V); + if (V->isLiveIn()) continue; - } + VPValue *Ext = B.createNaryOp( VPInstruction::ExtractFromEnd, {V, Plan.getOrAddLiveIn(ConstantInt::get( IntegerType::get(ExitBB->getContext(), 32), 1))}); - Plan.addLiveOut(ExitPhi, Ext); + ExitIRI->setOperand(0, Ext); } } @@ -8709,7 +8716,7 @@ addUsersInExitBlock(VPlan &Plan, /// 2. Feed the penultimate value of recurrences to their LCSSA phi users in /// the original exit block using a VPLiveOut. static void addLiveOutsForFirstOrderRecurrences( - VPlan &Plan, MapVector &ExitingValuesToFix) { + VPlan &Plan, SetVector &ExitUsersToFix) { VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); // Start by finding out if middle block branches to scalar preheader, which is @@ -8726,14 +8733,14 @@ static void addLiveOutsForFirstOrderRecurrences( ExitBB = cast(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock(); ScalarPHVPBB = cast(MiddleVPBB->getSuccessors()[1]); - } else if (ExitingValuesToFix.empty()) { + } else if (ExitUsersToFix.empty()) { ScalarPHVPBB = cast(MiddleVPBB->getSingleSuccessor()); } else { ExitBB = cast(MiddleVPBB->getSingleSuccessor()) ->getIRBasicBlock(); } if (!ScalarPHVPBB) { - assert(ExitingValuesToFix.empty() && + assert(ExitUsersToFix.empty() && "missed inserting extracts for exiting values"); return; } @@ -8827,24 +8834,17 @@ static void addLiveOutsForFirstOrderRecurrences( auto *FORPhi = cast(FOR->getUnderlyingInstr()); Plan.addLiveOut(FORPhi, ResumePhiRecipe); - // Now create VPLiveOuts for users in the exit block. - // Extract the penultimate value of the recurrence and add VPLiveOut - // users of the recurrence splice. - - // No edge from the middle block to the unique exit block has been inserted - // and there is nothing to fix from vector loop; phis should have incoming - // from scalar loop only. - if (ExitingValuesToFix.empty()) - continue; - for (User *U : FORPhi->users()) { - auto *UI = cast(U); - if (UI->getParent() != ExitBB) + // Now update VPIRInstructions modeling LCSSA phis in the exit block. + // Extract the penultimate value of the recurrence and use it as operand for + // the VPIRInstruction modeling the phi. + for (VPIRInstruction *ExitIRI : ExitUsersToFix) { + if (ExitIRI->getOperand(0) != FOR) continue; VPValue *Ext = MiddleBuilder.createNaryOp( VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), TwoVPV}, {}, "vector.recur.extract.for.phi"); - Plan.addLiveOut(cast(UI), Ext); - ExitingValuesToFix.erase(cast(UI)); + ExitIRI->setOperand(0, Ext); + ExitUsersToFix.remove(ExitIRI); } } } @@ -9006,11 +9006,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { "VPBasicBlock"); RecipeBuilder.fixHeaderPhis(); - MapVector ExitingValuesToFix = collectUsersInExitBlock( + SetVector ExitUsersToFix = collectUsersInExitBlock( OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); - - addLiveOutsForFirstOrderRecurrences(*Plan, ExitingValuesToFix); - addUsersInExitBlock(*Plan, ExitingValuesToFix); + addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix); + addUsersInExitBlock(*Plan, ExitUsersToFix); // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to @@ -10128,7 +10127,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { // directly in VPlan. EpilogILV.setTripCount(MainILV.getTripCount()); for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) { - auto *ExpandR = cast(&R); + auto *ExpandR = dyn_cast(&R); + if (!ExpandR) + continue; auto *ExpandedVal = BestEpiPlan.getOrAddLiveIn( ExpandedSCEVs.find(ExpandR->getSCEV())->second); ExpandR->replaceAllUsesWith(ExpandedVal); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 316468651df1a..e6c7bec873a01 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -860,10 +860,18 @@ VPlan::~VPlan() { delete BackedgeTakenCount; } +static VPIRBasicBlock *createVPIRBasicBlockFor(BasicBlock *BB) { + auto *VPIRBB = new VPIRBasicBlock(BB); + for (Instruction &I : + make_range(BB->begin(), BB->getTerminator()->getIterator())) + VPIRBB->appendRecipe(new VPIRInstruction(I)); + return VPIRBB; +} + VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) { - VPIRBasicBlock *Entry = new VPIRBasicBlock(TheLoop->getLoopPreheader()); + VPIRBasicBlock *Entry = createVPIRBasicBlockFor(TheLoop->getLoopPreheader()); VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); auto Plan = std::make_unique(Entry, VecPreheader); Plan->TripCount = @@ -895,7 +903,7 @@ VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE, // we unconditionally branch to the scalar preheader. Do nothing. // 3) Otherwise, construct a runtime check. BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); - auto *VPExitBlock = new VPIRBasicBlock(IRExitBlock); + auto *VPExitBlock = createVPIRBasicBlockFor(IRExitBlock); // The connection order corresponds to the operands of the conditional branch. VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); @@ -972,7 +980,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, /// predecessor, which is rewired to the new VPIRBasicBlock. All successors of /// VPBB, if any, are rewired to the new VPIRBasicBlock. static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) { - VPIRBasicBlock *IRMiddleVPBB = new VPIRBasicBlock(IRBB); + VPIRBasicBlock *IRMiddleVPBB = createVPIRBasicBlockFor(IRBB); for (auto &R : make_early_inc_range(*VPBB)) R.moveBefore(*IRMiddleVPBB, IRMiddleVPBB->end()); VPBlockBase *PredVPBB = VPBB->getSinglePredecessor(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 64242e43c56bc..cff3b7514857c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -936,8 +936,9 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPReductionPHISC: case VPRecipeBase::VPScalarCastSC: return true; - case VPRecipeBase::VPInterleaveSC: case VPRecipeBase::VPBranchOnMaskSC: + case VPRecipeBase::VPInterleaveSC: + case VPRecipeBase::VPIRInstructionSC: case VPRecipeBase::VPWidenLoadEVLSC: case VPRecipeBase::VPWidenLoadSC: case VPRecipeBase::VPWidenStoreEVLSC: @@ -1405,6 +1406,45 @@ class VPInstruction : public VPRecipeWithIRFlags { bool isSingleScalar() const; }; +/// A recipe to wrap on original IR instruction not to be modified during +/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed, +/// and it is used to add a new incoming value for the single predecessor VPBB. +/// Expect PHIs, VPIRInstructions cannot have any operands. +class VPIRInstruction : public VPRecipeBase { + Instruction &I; + +public: + VPIRInstruction(Instruction &I) + : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef()), I(I) {} + + ~VPIRInstruction() override = default; + + VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC) + + VPIRInstruction *clone() override { + auto *R = new VPIRInstruction(I); + for (auto *Op : operands()) + R->addOperand(Op); + return R; + } + + void execute(VPTransformState &State) override; + + Instruction &getInstruction() { return I; } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + bool usesScalars(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } +}; + /// VPWidenRecipe is a recipe for producing a widened instruction using the /// opcode and operands of the recipe. This recipe covers most of the /// traditional vectorization cases where each recipe transforms into a diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 351f909ac0279..9068ccf519c55 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -867,6 +867,43 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, } #endif +void VPIRInstruction::execute(VPTransformState &State) { + assert((isa(&I) || getNumOperands() == 0) && + "Only PHINodes can have extra operands"); + if (getNumOperands() == 1) { + VPValue *ExitValue = getOperand(0); + auto Lane = vputils::isUniformAfterVectorization(ExitValue) + ? VPLane::getFirstLane() + : VPLane::getLastLaneForVF(State.VF); + auto *PredVPBB = cast(getParent()->getSinglePredecessor()); + BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; + // Set insertion point in PredBB in case an extract needs to be generated. + // TODO: Model extracts explicitly. + State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); + Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane)); + auto *Phi = cast(&I); + Phi->addIncoming(V, PredBB); + } + + // Advance the insert point after the wrapped IR instruction. This allows + // interleaving VPIRInstructions and other recipes. + State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator())); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "IR " << I; + + if (getNumOperands() != 0) { + assert(getNumOperands() == 1 && "can have at most 1 operand"); + O << " (extra operand: "; + printOperands(O, SlotTracker); + O << ")"; + } +} +#endif + void VPWidenCallRecipe::execute(VPTransformState &State) { assert(State.VF.isVector() && "not widening"); Function *CalledScalarFn = getCalledScalarFunction(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index b8b2c0bd4d5ff..1dd8d09ff6247 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -339,6 +339,7 @@ class VPDef { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, + VPIRInstructionSC, VPInstructionSC, VPInterleaveSC, VPReductionEVLSC, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index dfddb5b45f623..0870671e67190 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -126,6 +126,15 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { RecipeNumbering[&R] = Cnt++; for (const VPRecipeBase &R : *VPBB) { + if (isa(&R) ^ isa(VPBB)) { + errs() << "VPIRInstructions "; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + R.dump(); + errs() << " "; +#endif + errs() << "not in a VPIRBasicBlock!\n"; + return false; + } for (const VPValue *V : R.definedValues()) { for (const VPUser *U : V->users()) { auto *UI = dyn_cast(U); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 38af580e25c9c..349fd13a58d29 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -58,6 +58,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: vp<%2> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %0 = zext i32 %n to i64 ; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -141,6 +142,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: vp<%2> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %0 = zext i32 %n to i64 ; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -260,6 +262,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: vp<%2> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %0 = zext i32 %n to i64 ; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -343,6 +346,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: vp<%2> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %0 = zext i32 %n to i64 ; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) ; CHECK-NEXT: No successors ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index f14ffe854a3a6..11405a1c91158 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -60,12 +60,11 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: ir-bb: +; IF-EVL-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) ; IF-EVL-INLOOP-NEXT: No successors ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: scalar.ph: ; IF-EVL-INLOOP-NEXT: No successors -; IF-EVL-INLOOP-EMPTY: -; IF-EVL-INLOOP-NEXT: Live-out i32 %add.lcssa = vp<[[RDX_EX]]> ; IF-EVL-INLOOP-NEXT: } ; @@ -100,12 +99,11 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: ir-bb: +; NO-VP-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) ; NO-VP-OUTLOOP-NEXT: No successors ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: scalar.ph: ; NO-VP-OUTLOOP-NEXT: No successors -; NO-VP-OUTLOOP-EMPTY: -; NO-VP-OUTLOOP-NEXT: Live-out i32 %add.lcssa = vp<[[RDX_EX]]> ; NO-VP-OUTLOOP-NEXT: } ; @@ -140,12 +138,11 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: ir-bb: +; NO-VP-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) ; NO-VP-INLOOP-NEXT: No successors ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: scalar.ph: ; NO-VP-INLOOP-NEXT: No successors -; NO-VP-INLOOP-EMPTY: -; NO-VP-INLOOP-NEXT: Live-out i32 %add.lcssa = vp<[[RDX_EX]]> ; NO-VP-INLOOP-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 69b851924b711..45545feffd325 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -227,6 +227,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb +; CHECK-NEXT: IR %res = phi i32 [ %and.red.next, %loop ] (extra operand: vp<[[RED_EX]]>) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -234,7 +235,6 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]> -; CHECK-NEXT: Live-out i32 %res = vp<[[RED_EX]]> ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/pr36983.ll b/llvm/test/Transforms/LoopVectorize/pr36983.ll index 20689456fa4d1..7e38d60b6f581 100644 --- a/llvm/test/Transforms/LoopVectorize/pr36983.ll +++ b/llvm/test/Transforms/LoopVectorize/pr36983.ll @@ -3,8 +3,8 @@ ; There could be more than one LCSSA PHIs in loop exit block. ; CHECK-LABEL: bb1.bb3_crit_edge: -; CHECK: %_tmp133.lcssa1 = phi i16 [ %_tmp133, %bb2 ], [ %vector.recur.extract.for.phi1, %middle.block ] -; CHECK: %_tmp133.lcssa = phi i16 [ %_tmp133, %bb2 ], [ %vector.recur.extract.for.phi, %middle.block ] +; CHECK: %_tmp133.lcssa1 = phi i16 [ %_tmp133, %bb2 ], [ %vector.recur.extract.for.phi, %middle.block ] +; CHECK: %_tmp133.lcssa = phi i16 [ %_tmp133, %bb2 ], [ %vector.recur.extract.for.phi1, %middle.block ] define void @f1() { bb2.lr.ph: diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll index dcc8f3f2f9d8f..008971697775e 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45259.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll @@ -14,11 +14,12 @@ define i8 @widget(ptr %arr, i8 %t9) { ; CHECK-NEXT: br i1 [[C]], label [[FOR_PREHEADER:%.*]], label [[BB6]] ; CHECK: for.preheader: ; CHECK-NEXT: [[T1_0_LCSSA:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ] -; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[ARR1]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[T1_0_LCSSA2]] to i32 +; CHECK-NEXT: [[T1_0_LCSSA3:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[T1_0_LCSSA3]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index e4984f52ee6ff..431d14be45857 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -14,6 +14,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %and = and i64 %N, 15 ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64) ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -55,6 +56,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %and = and i64 %N, 15 ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64) ; CHECK-NEXT: No successors ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index dd2b724efaec8..26974c2307065 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -161,12 +161,11 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb +; CHECK-NEXT: IR %red.next.lcssa = phi float [ %red.next, %for.body ] (extra operand: vp<[[RED_EX]]>) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: Live-out float %red.next.lcssa = vp<[[RED_EX]]> ; CHECK-NEXT: } ; entry: @@ -444,12 +443,11 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb +; CHECK-NEXT: IR %muladd.lcssa = phi float [ %muladd, %for.body ] (extra operand: vp<[[RED_EX]]>) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: Live-out float %muladd.lcssa = vp<[[RED_EX]]> ; CHECK-NEXT:} entry: @@ -577,6 +575,8 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %div = udiv i64 %y, 492802768830814060 +; CHECK-NEXT: IR %inc = add i64 %div, 1 ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + ((15 + (%y /u 492802768830814060)) /u (1 + (%y /u 492802768830814060)))) ; CHECK-NEXT: EMIT vp<[[EXP_SCEV:%.+]]> = EXPAND SCEV (1 + (%y /u 492802768830814060)) ; CHECK-NEXT: No successors @@ -666,12 +666,11 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb +; CHECK-NEXT: IR %lcssa = phi i32 [ %add, %loop ] (extra operand: vp<[[EXIT]]>) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: Live-out i32 %lcssa = vp<[[EXIT]]> ; CHECK-NEXT: } ; entry: @@ -1037,6 +1036,7 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb +; CHECK-NEXT: IR %for.1.lcssa = phi i16 [ %for.1, %loop ] (extra operand: vp<[[FOR_RESULT]]>) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -1044,7 +1044,6 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_P]]> -; CHECK-NEXT: Live-out i16 %for.1.lcssa = vp<[[FOR_RESULT]]> ; CHECK-NEXT: } ; entry: