diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1a7b301c35f2b..7668dc26b1d80 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9220,42 +9220,11 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { "Not a pointer induction according to InductionDescriptor!"); assert(cast(getUnderlyingInstr())->getType()->isPointerTy() && "Unexpected type."); + assert(!onlyScalarsGenerated(State.VF.isScalable()) && + "Recipe should have been replaced"); auto *IVR = getParent()->getPlan()->getCanonicalIV(); PHINode *CanonicalIV = cast(State.get(IVR, 0)); - - if (onlyScalarsGenerated(State.VF.isScalable())) { - // This is the normalized GEP that starts counting at zero. - Value *PtrInd = State.Builder.CreateSExtOrTrunc( - CanonicalIV, IndDesc.getStep()->getType()); - // Determine the number of scalars we need to generate for each unroll - // iteration. If the instruction is uniform, we only need to generate the - // first lane. Otherwise, we generate all VF values. - bool IsUniform = vputils::onlyFirstLaneUsed(this); - assert((IsUniform || !State.VF.isScalable()) && - "Cannot scalarize a scalable VF"); - unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue(); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *PartStart = - createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part); - - for (unsigned Lane = 0; Lane < Lanes; ++Lane) { - Value *Idx = State.Builder.CreateAdd( - PartStart, ConstantInt::get(PtrInd->getType(), Lane)); - Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx); - - Value *Step = State.get(getOperand(1), VPIteration(Part, Lane)); - Value *SclrGep = emitTransformedIndex( - State.Builder, GlobalIdx, IndDesc.getStartValue(), Step, - IndDesc.getKind(), IndDesc.getInductionBinOp()); - SclrGep->setName("next.gep"); - State.set(this, SclrGep, VPIteration(Part, Lane)); - } - } - return; - } - Type *PhiType = IndDesc.getStep()->getType(); // Build a pointer phi diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index e9c499c5cddf6..b5df58bf548cb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -857,11 +857,8 @@ void VPlan::execute(VPTransformState *State) { Phi = cast(State->get(R.getVPSingleValue(), 0)); } else { auto *WidenPhi = cast(&R); - // TODO: Split off the case that all users of a pointer phi are scalar - // from the VPWidenPointerInductionRecipe. - if (WidenPhi->onlyScalarsGenerated(State->VF.isScalable())) - continue; - + assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) && + "recipe only generating scalars should have been replaced"); auto *GEP = cast(State->get(WidenPhi, 0)); Phi = cast(GEP->getPointerOperand()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 2363683f1a188..70d6032d1a093 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1156,6 +1156,7 @@ class VPInstruction : public VPRecipeWithIRFlags { BranchOnCount, BranchOnCond, ComputeReductionResult, + PtrAdd, }; private: @@ -1165,11 +1166,14 @@ class VPInstruction : public VPRecipeWithIRFlags { /// An optional name that can be used for the generated IR instruction. const std::string Name; + bool generatesScalars() const; + /// Utility method serving execute(): generates a single instance of the /// modeled instruction. \returns the generated value for \p Part. /// In some cases an existing value is returned rather than a generated /// one. - Value *generateInstruction(VPTransformState &State, unsigned Part); + Value *generatePerPart(VPTransformState &State, unsigned Part); + Value *generatePerLane(VPTransformState &State, const VPIteration &Lane); #if !defined(NDEBUG) /// Return true if the VPInstruction is a floating point math operation, i.e. @@ -2489,11 +2493,6 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { /// for floating point inductions. const FPMathOperator *FPBinOp; - VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, - const FPMathOperator *FPBinOp, VPValue *Start, - VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) - : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), - Kind(Kind), FPBinOp(FPBinOp) {} public: VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, @@ -2503,6 +2502,12 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { dyn_cast_or_null(IndDesc.getInductionBinOp()), Start, CanonicalIV, Step) {} + VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, + const FPMathOperator *FPBinOp, VPValue *Start, + VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) + : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), + Kind(Kind), FPBinOp(FPBinOp) {} + ~VPDerivedIVRecipe() override = default; VPRecipeBase *clone() override { diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index b9ffe7e5b7af7..a2bf441ac43d4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -49,6 +49,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { CachedTypes[OtherV] = ResTy; return ResTy; } + case VPInstruction::PtrAdd: + return inferScalarType(R->getOperand(0)); default: break; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index ed7cac5b809cf..e089f58f12ea1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -127,6 +127,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPInstruction::Not: case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: + case VPInstruction::PtrAdd: return false; default: return true; @@ -273,23 +274,26 @@ VPInstruction::VPInstruction(unsigned Opcode, assert(isFPMathOp() && "this op can't take fast-math flags"); } -Value *VPInstruction::generateInstruction(VPTransformState &State, - unsigned Part) { +bool VPInstruction::generatesScalars() const { + if (Opcode == VPInstruction::FirstOrderRecurrenceSplice || + Opcode == VPInstruction::ActiveLaneMask) + return false; + + return Opcode == VPInstruction::ComputeReductionResult || + Opcode == VPInstruction::PtrAdd || vputils::onlyFirstLaneUsed(this); +} + +Value *VPInstruction::generatePerLane(VPTransformState &State, + const VPIteration &Lane) { IRBuilderBase &Builder = State.Builder; Builder.SetCurrentDebugLocation(getDebugLoc()); - if (Instruction::isBinaryOp(getOpcode())) { - bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); - if (Part != 0 && vputils::onlyFirstPartUsed(this)) - return OnlyFirstLaneUsed ? State.get(this, VPIteration(0, 0)) - : State.get(this, 0); + if (Lane.Part > 0 && vputils::onlyFirstPartUsed(this)) + return State.get(this, VPIteration(0, Lane.Lane.getKnownLane())); - Value *A = OnlyFirstLaneUsed - ? State.get(getOperand(0), VPIteration(Part, 0)) - : State.get(getOperand(0), Part); - Value *B = OnlyFirstLaneUsed - ? State.get(getOperand(1), VPIteration(Part, 0)) - : State.get(getOperand(1), Part); + if (Instruction::isBinaryOp(getOpcode())) { + Value *A = State.get(getOperand(0), Lane); + Value *B = State.get(getOperand(1), Lane); auto *Res = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); if (auto *I = dyn_cast(Res)) @@ -299,78 +303,42 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, switch (getOpcode()) { case VPInstruction::Not: { - Value *A = State.get(getOperand(0), Part); + Value *A = State.get(getOperand(0), Lane); return Builder.CreateNot(A, Name); } - case Instruction::ICmp: { - Value *A = State.get(getOperand(0), Part); - Value *B = State.get(getOperand(1), Part); - return Builder.CreateCmp(getPredicate(), A, B, Name); - } case Instruction::Select: { - Value *Cond = State.get(getOperand(0), Part); - Value *Op1 = State.get(getOperand(1), Part); - Value *Op2 = State.get(getOperand(2), Part); + Value *Cond = State.get(getOperand(0), Lane); + Value *Op1 = State.get(getOperand(1), Lane); + Value *Op2 = State.get(getOperand(2), Lane); return Builder.CreateSelect(Cond, Op1, Op2, Name); } - case VPInstruction::ActiveLaneMask: { - // Get first lane of vector induction variable. - Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); - // Get the original loop tripcount. - Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0)); - - auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); - auto *PredTy = VectorType::get(Int1Ty, State.VF); - return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, - {PredTy, ScalarTC->getType()}, - {VIVElem0, ScalarTC}, nullptr, Name); - } - case VPInstruction::FirstOrderRecurrenceSplice: { - // Generate code to combine the previous and current values in vector v3. - // - // vector.ph: - // v_init = vector(..., ..., ..., a[-1]) - // br vector.body - // - // vector.body - // i = phi [0, vector.ph], [i+4, vector.body] - // v1 = phi [v_init, vector.ph], [v2, vector.body] - // v2 = a[i, i+1, i+2, i+3]; - // v3 = vector(v1(3), v2(0, 1, 2)) - - // For the first part, use the recurrence phi (v1), otherwise v2. - auto *V1 = State.get(getOperand(0), 0); - Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); - if (!PartMinus1->getType()->isVectorTy()) - return PartMinus1; - Value *V2 = State.get(getOperand(1), Part); - return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name); - } case VPInstruction::CalculateTripCountMinusVF: { Value *ScalarTC = State.get(getOperand(0), {0, 0}); Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF); Value *Sub = Builder.CreateSub(ScalarTC, Step); - Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); + Value *Cmp = + Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); Value *Zero = ConstantInt::get(ScalarTC->getType(), 0); return Builder.CreateSelect(Cmp, Sub, Zero); } case VPInstruction::CanonicalIVIncrementForPart: { auto *IV = State.get(getOperand(0), VPIteration(0, 0)); - if (Part == 0) + if (Lane.Part == 0) return IV; // The canonical IV is incremented by the vectorization factor (num of SIMD // elements) times the unroll part. - Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); + Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Lane.Part); return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(), hasNoSignedWrap()); } + case VPInstruction::BranchOnCond: { - if (Part != 0) + if (Lane.Part != 0) return nullptr; - Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); + Value *Cond = State.get(getOperand(0), Lane); VPRegionBlock *ParentRegion = getParent()->getParent(); VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); @@ -388,11 +356,11 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, return CondBr; } case VPInstruction::BranchOnCount: { - if (Part != 0) + if (Lane.Part != 0) return nullptr; // First create the compare. - Value *IV = State.get(getOperand(0), VPIteration(0, 0)); - Value *TC = State.get(getOperand(1), VPIteration(0, 0)); + Value *IV = State.get(getOperand(0), Lane); + Value *TC = State.get(getOperand(1), Lane); Value *Cond = Builder.CreateICmpEQ(IV, TC); // Now create the branch. @@ -412,7 +380,7 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, return CondBr; } case VPInstruction::ComputeReductionResult: { - if (Part != 0) + if (Lane.Part != 0) return State.get(this, VPIteration(0, 0)); // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary @@ -489,6 +457,88 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, return ReducedPartRdx; } + case VPInstruction::PtrAdd: { + auto *P = Builder.CreatePtrAdd(State.get(getOperand(0), Lane), + State.get(getOperand(1), Lane), Name); + return P; + } + default: + llvm_unreachable("Unsupported opcode for instruction"); + } +} + +Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { + IRBuilderBase &Builder = State.Builder; + Builder.SetCurrentDebugLocation(getDebugLoc()); + + if (Instruction::isBinaryOp(getOpcode())) { + bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); + if (Part != 0 && vputils::onlyFirstPartUsed(this)) + return OnlyFirstLaneUsed ? State.get(this, VPIteration(0, 0)) + : State.get(this, 0); + + Value *A = OnlyFirstLaneUsed + ? State.get(getOperand(0), VPIteration(Part, 0)) + : State.get(getOperand(0), Part); + Value *B = OnlyFirstLaneUsed + ? State.get(getOperand(1), VPIteration(Part, 0)) + : State.get(getOperand(1), Part); + auto *Res = + Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); + if (auto *I = dyn_cast(Res)) + setFlags(I); + return Res; + } + + switch (getOpcode()) { + case VPInstruction::Not: { + Value *A = State.get(getOperand(0), Part); + return Builder.CreateNot(A, Name); + } + case Instruction::ICmp: { + Value *A = State.get(getOperand(0), Part); + Value *B = State.get(getOperand(1), Part); + return Builder.CreateCmp(getPredicate(), A, B, Name); + } + case Instruction::Select: { + Value *Cond = State.get(getOperand(0), Part); + Value *Op1 = State.get(getOperand(1), Part); + Value *Op2 = State.get(getOperand(2), Part); + return Builder.CreateSelect(Cond, Op1, Op2, Name); + } + case VPInstruction::ActiveLaneMask: { + // Get first lane of vector induction variable. + Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); + // Get the original loop tripcount. + Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0)); + + auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); + auto *PredTy = VectorType::get(Int1Ty, State.VF); + return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, + {PredTy, ScalarTC->getType()}, + {VIVElem0, ScalarTC}, nullptr, Name); + } + case VPInstruction::FirstOrderRecurrenceSplice: { + // Generate code to combine the previous and current values in vector v3. + // + // vector.ph: + // v_init = vector(..., ..., ..., a[-1]) + // br vector.body + // + // vector.body + // i = phi [0, vector.ph], [i+4, vector.body] + // v1 = phi [v_init, vector.ph], [v2, vector.body] + // v2 = a[i, i+1, i+2, i+3]; + // v3 = vector(v1(3), v2(0, 1, 2)) + + // For the first part, use the recurrence phi (v1), otherwise v2. + auto *V1 = State.get(getOperand(0), 0); + Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); + if (!PartMinus1->getType()->isVectorTy()) + return PartMinus1; + Value *V2 = State.get(getOperand(1), Part); + return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name); + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -514,18 +564,25 @@ void VPInstruction::execute(VPTransformState &State) { if (hasFastMathFlags()) State.Builder.setFastMathFlags(getFastMathFlags()); for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *GeneratedValue = generateInstruction(State, Part); - if (!hasResult()) + if (generatesScalars()) { + unsigned NumLanes = + vputils::onlyFirstLaneUsed(this) ? 1 : State.VF.getKnownMinValue(); + if (getOpcode() == VPInstruction::ComputeReductionResult) + NumLanes = 1; + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + Value *P = generatePerLane(State, VPIteration(Part, Lane)); + State.set(this, P, VPIteration(Part, Lane)); + } continue; - assert(GeneratedValue && "generateInstruction must produce a value"); - if (GeneratedValue->getType()->isVectorTy()) - State.set(this, GeneratedValue, Part); - else { - assert((getOpcode() == VPInstruction::ComputeReductionResult || - State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) && - "scalar value but not only first lane used"); - State.set(this, GeneratedValue, VPIteration(Part, 0)); } + + Value *GeneratedValue = generatePerPart(State, Part); + if (!hasResult()) + continue; + assert(GeneratedValue && + (State.VF.isScalar() || GeneratedValue->getType()->isVectorTy()) && + "generateInstruction must produce a vector value"); + State.set(this, GeneratedValue, Part); } } bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { @@ -537,6 +594,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { default: return false; case Instruction::ICmp: + case VPInstruction::PtrAdd: // TODO: Cover additional opcodes. return vputils::onlyFirstLaneUsed(this); case VPInstruction::ActiveLaneMask: @@ -594,6 +652,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ComputeReductionResult: O << "compute-reduction-result"; break; + case VPInstruction::PtrAdd: + O << "ptradd"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 71f5285f90236..8448c2bd3dd13 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -489,15 +489,18 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) { } } -static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, +static VPValue *createScalarIVSteps(VPlan &Plan, + InductionDescriptor::InductionKind Kind, + Instruction::BinaryOps InductionOpcode, + FPMathOperator *FPBinOp, ScalarEvolution &SE, Instruction *TruncI, VPValue *StartV, VPValue *Step, VPBasicBlock::iterator IP) { VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); VPSingleDefRecipe *BaseIV = CanonicalIV; - if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) { - BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step); + if (!CanonicalIV->isCanonical(Kind, StartV, Step)) { + BaseIV = new VPDerivedIVRecipe(Kind, FPBinOp, StartV, CanonicalIV, Step); HeaderVPBB->insert(BaseIV, IP); } @@ -526,7 +529,9 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, VecPreheader->appendRecipe(Step->getDefiningRecipe()); } - VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step); + VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe( + BaseIV, Step, InductionOpcode, + FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()); HeaderVPBB->insert(Steps, IP); return Steps; } @@ -537,6 +542,32 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi(); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { + // Replace wide pointer inductions which have only their scalars used by + // PtrAdd(IndStart, ScalarIVSteps (0, Step)). + if (auto *PtrIV = dyn_cast(&Phi)) { + if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF())) + continue; + + const InductionDescriptor &ID = PtrIV->getInductionDescriptor(); + VPValue *StartV = Plan.getVPValueOrAddLiveIn( + ConstantInt::get(ID.getStep()->getType(), 0)); + VPValue *StepV = PtrIV->getOperand(1); + VPRecipeBase *Steps = + createScalarIVSteps(Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr, SE, + nullptr, StartV, StepV, + InsertPt) + ->getDefiningRecipe(); + + auto *Recipe = + new VPInstruction(VPInstruction::PtrAdd, + {PtrIV->getStartValue(), Steps->getVPSingleValue()}, + PtrIV->getDebugLoc(), "next.gep"); + + Recipe->insertAfter(Steps); + PtrIV->replaceAllUsesWith(Recipe); + continue; + } + auto *WideIV = dyn_cast(&Phi); if (!WideIV) continue; @@ -546,9 +577,10 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { continue; const InductionDescriptor &ID = WideIV->getInductionDescriptor(); - VPValue *Steps = createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(), - WideIV->getStartValue(), - WideIV->getStepValue(), InsertPt); + VPValue *Steps = createScalarIVSteps( + Plan, ID.getKind(), ID.getInductionOpcode(), dyn_cast_or_null(ID.getInductionBinOp()), SE, WideIV->getTruncInst(), WideIV->getStartValue(), + WideIV->getStepValue(), InsertPt + ); // Update scalar users of IV to use Step instead. if (!HasOnlyVectorVFs) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 3bf91115debb7..c59f196e9b050 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -107,6 +107,7 @@ struct VPlanTransforms { /// provide them by building scalar steps off of the canonical scalar IV and /// update the original IV's users. This is an optional optimization to reduce /// the needs of vector extracts. + /// If all users of VPWidenPointerInductionRecipe only use its scalar values, replace it with a PtrAdd (IndStart, ScalarIVSteps (0, Step)). static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE); /// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 24c59fdb47b61..00ec396107dcb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -11,76 +11,74 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 10000 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x ptr> [[TMP2]], ptr [[NEXT_GEP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP10]]) -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]]) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP14]], align 1 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <2 x ptr> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]]) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]]) +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP17]]) +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP4]], i32 2 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP18]], align 1 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END6:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 +; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 ; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX8]], 0 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX8]], 1 -; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP9]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP10]], i32 1 -; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]]) -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]]) -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP9]], i32 0 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP24]], align 1 -; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX8]], 2 -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT11]], 10000 -; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; CHECK-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX3]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX3]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP23]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x ptr> [[TMP25]], ptr [[TMP24]], i32 1 +; CHECK-NEXT: [[TMP27:%.*]] = icmp ne <2 x ptr> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP27]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP28]]) +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[TMP27]], i32 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP29]]) +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[TMP23]], i32 0 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX3]], 2 +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 10000 +; CHECK-NEXT: br i1 [[TMP31]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi ptr [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL7]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp ne ptr [[PTR_IV]], null ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I_I]]) ; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 24d2127ee171a..12889c2acc8e5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -146,13 +146,13 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: -; CHECK-VF8-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX2]], 0 +; CHECK-VF8-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP19]] ; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0 ; CHECK-VF8-NEXT: store <8 x i8> , ptr [[TMP21]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 -; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 +; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 +; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -246,13 +246,13 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX2]], 0 +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0 ; CHECK-NEXT: store <8 x i64> , ptr [[TMP21]], align 1 -; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 +; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 ; CHECK-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -315,13 +315,13 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: -; CHECK-VF8-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX2]], 0 +; CHECK-VF8-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]] ; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0 ; CHECK-VF8-NEXT: store <8 x i64> , ptr [[TMP21]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 -; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 +; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 +; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -374,66 +374,65 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 32 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 16 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 16 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP15]] -; CHECK-NEXT: store zeroinitializer, ptr [[TMP13]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 16 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP14]], i64 [[TMP18]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP16]], align 1 +; CHECK-NEXT: store zeroinitializer, ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 10000, [[N_VEC]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 8 +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP22]] ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 -; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 10000, [[TMP21]] -; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 10000, [[N_MOD_VF3]] -; CHECK-NEXT: [[IND_END6:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC4]] -; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 8 +; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 8 +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 10000, [[TMP24]] +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 10000, [[N_MOD_VF2]] +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC3]] +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX10]], 0 -; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[NEXT_GEP11]], i32 0 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP25]], align 1 -; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], [[TMP23]] -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX7]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP28]], i32 0 +; CHECK-NEXT: store zeroinitializer, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX7]], [[TMP26]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N9:%.*]] = icmp eq i64 10000, [[N_VEC4]] -; CHECK-NEXT: br i1 [[CMP_N9]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 10000, [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi ptr [ [[IND_END6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL8]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1 ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 @@ -457,57 +456,56 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] ; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 -; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 -; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] ; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 ; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 -; CHECK-VF8-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] -; CHECK-VF8-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP10]] -; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; CHECK-VF8-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 -; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP13]] -; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP11]], align 1 +; CHECK-VF8-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-VF8-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP6]] +; CHECK-VF8-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP11]] +; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0 +; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 16 +; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP16]] ; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP14]], align 1 +; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP17]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-VF8: middle.block: ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK-VF8: vec.epilog.iter.check: -; CHECK-VF8-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] +; CHECK-VF8-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] ; CHECK-VF8-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 10000, [[N_VEC]] ; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 ; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK-VF8: vec.epilog.ph: -; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-VF8-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[START]], i64 10000 +; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 10000 ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: -; CHECK-VF8-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP16:%.*]] = add i64 [[INDEX7]], 0 -; CHECK-VF8-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP16]] -; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP8]], i32 0 -; CHECK-VF8-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP17]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], 8 -; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT9]], 10000 -; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-VF8-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX3]], 0 +; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP19]] +; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i32 0 +; CHECK-VF8-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP21]], align 1 +; CHECK-VF8-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX3]], 8 +; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 10000 +; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: -; CHECK-VF8-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-VF8-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] +; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-VF8-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[LOOP:%.*]] ; CHECK-VF8: loop: -; CHECK-VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-VF8-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-VF8-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-VF8-NEXT: store i8 0, ptr [[PTR_IV]], align 1 ; CHECK-VF8-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1 ; CHECK-VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index cfb0f9e59ecbc..8b64d7a083662 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -23,54 +23,54 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 8, [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP11]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP14]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = mul i64 8, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP16:%.*]] = add [[DOTSPLAT]], [[TMP15]] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP16]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP11]], 1 -; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP18:%.*]] = add [[DOTSPLAT]], [[TMP17]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP18]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP13]], 1 +; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector [[DOTSPLATINSERT5]], poison, zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP20:%.*]] = add [[DOTSPLAT6]], [[TMP19]] -; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul [[TMP20]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP7]] -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 8 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2 -; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[TMP25]], 0 -; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], [[TMP26]] +; CHECK-NEXT: [[TMP21:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP22:%.*]] = add [[DOTSPLAT6]], [[TMP21]] +; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul [[TMP22]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP7]] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 2 +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], 0 ; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP28]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 2 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 [[TMP31]] -; CHECK-NEXT: store zeroinitializer, ptr [[TMP29]], align 8 +; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[TMP30]], i32 0 +; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 2 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP30]], i64 [[TMP34]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP32]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: store zeroinitializer, ptr [[TMP35]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP36:%.*]] = mul i64 [[CMO]], 8 -; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP36]] +; CHECK-NEXT: [[TMP37:%.*]] = mul i64 [[CMO]], 8 +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP37]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 899fcce5c02ab..3bab341e1c248 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -19,10 +19,12 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: EMIT ir<%ptr.iv.1> = WIDEN-POINTER-INDUCTION ir<%start.1>, 8 ; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1 +; CHECK-NEXT: vp<[[PTR_IDX:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<8> +; CHECK-NEXT: vp<[[PTR_IDX_STEPS:%.+]]> = SCALAR-STEPS vp<[[PTR_IDX]]>, ir<8> +; CHECK-NEXT: EMIT vp<[[PTR_IV_1:%.+]]> = ptradd ir<%start.1>, vp<[[PTR_IDX_STEPS]]> ; CHECK-NEXT: WIDEN-GEP Var[Inv] ir<%ptr.iv.2.next> = getelementptr inbounds ir<%ptr.iv.2>, ir<1> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%ptr.iv.1> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<[[PTR_IV_1]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%ptr.iv.2.next> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%ptr.iv.2> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR2]]> @@ -59,9 +61,6 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1 @@ -73,6 +72,9 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[TMP13:%.*]] = add [[DOTSPLAT]], [[TMP12]] ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, [[TMP14]], i64 1 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: store [[TMP15]], ptr [[TMP16]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 7226048c478d4..126ceac7325a4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -32,21 +32,20 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 5 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP8]] -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP10]], i64 [[TMP9]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[NEXT_GEP]], align 4 -; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load , ptr [[NEXT_GEP2]], align 4 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 5 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[C]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[C]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i64 [[TMP7]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load , ptr [[TMP10]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC3]]) -; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC4]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC4]], 1 +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC2]]) +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC3]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC3]], 1 ; CHECK-NEXT: [[TMP15:%.*]] = add nsw [[TMP11]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP16:%.*]] = add nsw [[TMP13]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] @@ -148,21 +147,21 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX4]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 [[TMP10]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[NEXT_GEP]], align 4 -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP7]], i64 [[TMP10]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = shl nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = shl nsw [[WIDE_LOAD7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = shl nsw [[WIDE_LOAD5]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[NEXT_GEP5]], i64 [[TMP15]] -; CHECK-NEXT: store [[TMP12]], ptr [[NEXT_GEP5]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP15]] +; CHECK-NEXT: store [[TMP12]], ptr [[TMP8]], align 4 ; CHECK-NEXT: store [[TMP13]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -246,12 +245,12 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl [[TMP9]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[TMP10]], i64 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 8 ; CHECK-NEXT: [[TMP13]] = add [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: store [[TMP10]], ptr [[NEXT_GEP]], align 8 +; CHECK-NEXT: store [[TMP10]], ptr [[TMP11]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index e9541c1ee035f..6516b05ab4ede 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -639,87 +639,84 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512: vector.ph: ; AVX512-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 16 ; AVX512-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] -; AVX512-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 4 -; AVX512-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP13]] -; AVX512-NEXT: [[TMP14:%.*]] = mul i64 [[N_VEC]], 64 -; AVX512-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP14]] +; AVX512-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 64 +; AVX512-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP13]] ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[DEST]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0 -; AVX512-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 -; AVX512-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP16]] -; AVX512-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <16 x i64> -; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[NEXT_GEP]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP19]], align 4, !alias.scope !8 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD]], <16 x ptr> [[TMP17]], i32 4, <16 x i1> ), !alias.scope !11, !noalias !13 -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr float, ptr [[NEXT_GEP]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP20]], align 4, !alias.scope !15 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, <16 x ptr> [[TMP17]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD8]], <16 x ptr> [[TMP21]], i32 4, <16 x i1> ), !alias.scope !11, !noalias !13 +; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <16 x i64> +; AVX512-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 +; AVX512-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 0 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP15]] +; AVX512-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 +; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP18]], align 4, !alias.scope [[META8:![0-9]+]] +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD]], <16 x ptr> [[TMP14]], i32 4, <16 x i1> ), !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]] +; AVX512-NEXT: [[TMP19:%.*]] = getelementptr float, ptr [[TMP16]], i32 0 +; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP19]], align 4, !alias.scope [[META15:![0-9]+]] +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, <16 x ptr> [[TMP14]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD8]], <16 x ptr> [[TMP20]], i32 4, <16 x i1> ), !alias.scope [[META11]], !noalias [[META13]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX512-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 1024 -; AVX512-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; AVX512-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; AVX512-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; AVX512-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; AVX512: middle.block: ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; AVX512: vec.epilog.iter.check: -; AVX512-NEXT: [[TMP23:%.*]] = mul i64 [[N_VEC]], 64 -; AVX512-NEXT: [[IND_END17:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP23]] -; AVX512-NEXT: [[TMP24:%.*]] = mul i64 [[N_VEC]], 4 -; AVX512-NEXT: [[IND_END14:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP24]] +; AVX512-NEXT: [[TMP22:%.*]] = mul i64 [[N_VEC]], 64 +; AVX512-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP22]] +; AVX512-NEXT: [[TMP23:%.*]] = mul i64 [[N_VEC]], 4 +; AVX512-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP23]] ; AVX512-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]] ; AVX512-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 ; AVX512-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; AVX512: vec.epilog.ph: -; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; AVX512-NEXT: [[BC_RESUME_VAL10:%.*]] = phi ptr [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; AVX512-NEXT: [[N_MOD_VF11:%.*]] = urem i64 [[TMP3]], 8 -; AVX512-NEXT: [[N_VEC12:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF11]] -; AVX512-NEXT: [[TMP25:%.*]] = mul i64 [[N_VEC12]], 4 -; AVX512-NEXT: [[IND_END13:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP25]] -; AVX512-NEXT: [[TMP26:%.*]] = mul i64 [[N_VEC12]], 64 -; AVX512-NEXT: [[IND_END16:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP26]] +; AVX512-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[TMP3]], 8 +; AVX512-NEXT: [[N_VEC10:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF9]] +; AVX512-NEXT: [[TMP24:%.*]] = mul i64 [[N_VEC10]], 4 +; AVX512-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP24]] +; AVX512-NEXT: [[TMP25:%.*]] = mul i64 [[N_VEC10]], 64 +; AVX512-NEXT: [[IND_END14:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP25]] ; AVX512-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; AVX512: vec.epilog.vector.body: -; AVX512-NEXT: [[POINTER_PHI22:%.*]] = phi ptr [ [[BC_RESUME_VAL10]], [[VEC_EPILOG_PH]] ], [ [[PTR_IND23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; AVX512-NEXT: [[INDEX20:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT26:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; AVX512-NEXT: [[TMP27:%.*]] = add i64 [[INDEX20]], 0 -; AVX512-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 4 -; AVX512-NEXT: [[NEXT_GEP21:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP28]] -; AVX512-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI22]], <8 x i64> -; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[NEXT_GEP21]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP30]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD24:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope !17 -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD24]], <8 x ptr> [[TMP29]], i32 4, <8 x i1> ), !alias.scope !20, !noalias !22 -; AVX512-NEXT: [[TMP32:%.*]] = getelementptr float, ptr [[NEXT_GEP21]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD25:%.*]] = load <8 x float>, ptr [[TMP32]], align 4, !alias.scope !24 -; AVX512-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP29]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD25]], <8 x ptr> [[TMP33]], i32 4, <8 x i1> ), !alias.scope !20, !noalias !22 -; AVX512-NEXT: [[INDEX_NEXT26]] = add nuw i64 [[INDEX20]], 8 -; AVX512-NEXT: [[PTR_IND23]] = getelementptr i8, ptr [[POINTER_PHI22]], i64 512 -; AVX512-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT26]], [[N_VEC12]] -; AVX512-NEXT: br i1 [[TMP34]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; AVX512-NEXT: [[POINTER_PHI19:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[PTR_IND20:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; AVX512-NEXT: [[INDEX18:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT24:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; AVX512-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[POINTER_PHI19]], <8 x i64> +; AVX512-NEXT: [[OFFSET_IDX21:%.*]] = mul i64 [[INDEX18]], 4 +; AVX512-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX21]], 0 +; AVX512-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP27]] +; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i32 0 +; AVX512-NEXT: [[WIDE_LOAD22:%.*]] = load <8 x float>, ptr [[TMP30]], align 4, !alias.scope [[META17:![0-9]+]] +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD22]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> ), !alias.scope [[META20:![0-9]+]], !noalias [[META22:![0-9]+]] +; AVX512-NEXT: [[TMP31:%.*]] = getelementptr float, ptr [[TMP28]], i32 0 +; AVX512-NEXT: [[WIDE_LOAD23:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope [[META24:![0-9]+]] +; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP26]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD23]], <8 x ptr> [[TMP32]], i32 4, <8 x i1> ), !alias.scope [[META20]], !noalias [[META22]] +; AVX512-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX18]], 8 +; AVX512-NEXT: [[PTR_IND20]] = getelementptr i8, ptr [[POINTER_PHI19]], i64 512 +; AVX512-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC10]] +; AVX512-NEXT: br i1 [[TMP33]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; AVX512: vec.epilog.middle.block: -; AVX512-NEXT: [[CMP_N19:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC12]] -; AVX512-NEXT: br i1 [[CMP_N19]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] +; AVX512-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC10]] +; AVX512-NEXT: br i1 [[CMP_N17]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: -; AVX512-NEXT: [[BC_RESUME_VAL15:%.*]] = phi ptr [ [[IND_END13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END14]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR]], [[VECTOR_MEMCHECK]] ], [ [[PTR]], [[ITER_CHECK]] ] -; AVX512-NEXT: [[BC_RESUME_VAL18:%.*]] = phi ptr [ [[IND_END16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END17]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MEMCHECK]] ], [ [[DEST]], [[ITER_CHECK]] ] +; AVX512-NEXT: [[BC_RESUME_VAL13:%.*]] = phi ptr [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR]], [[VECTOR_MEMCHECK]] ], [ [[PTR]], [[ITER_CHECK]] ] +; AVX512-NEXT: [[BC_RESUME_VAL16:%.*]] = phi ptr [ [[IND_END14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END15]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MEMCHECK]] ], [ [[DEST]], [[ITER_CHECK]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] ; AVX512: for.body: -; AVX512-NEXT: [[PTR_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL15]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] -; AVX512-NEXT: [[DEST_ADDR_011:%.*]] = phi ptr [ [[BC_RESUME_VAL18]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ] +; AVX512-NEXT: [[PTR_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL13]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] +; AVX512-NEXT: [[DEST_ADDR_011:%.*]] = phi ptr [ [[BC_RESUME_VAL16]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ] ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[PTR_ADDR_012]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; AVX512-NEXT: store float [[TMP35]], ptr [[DEST_ADDR_011]], align 4 -; AVX512-NEXT: [[TMP36:%.*]] = load float, ptr [[PTR_ADDR_012]], align 4 +; AVX512-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; AVX512-NEXT: store float [[TMP34]], ptr [[DEST_ADDR_011]], align 4 +; AVX512-NEXT: [[TMP35:%.*]] = load float, ptr [[PTR_ADDR_012]], align 4 ; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[DEST_ADDR_011]], i64 1 -; AVX512-NEXT: store float [[TMP36]], ptr [[ARRAYIDX5]], align 4 +; AVX512-NEXT: store float [[TMP35]], ptr [[ARRAYIDX5]], align 4 ; AVX512-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[PTR_ADDR_012]], i64 1 ; AVX512-NEXT: [[ADD_PTR6]] = getelementptr inbounds float, ptr [[DEST_ADDR_011]], i64 16 ; AVX512-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[ADD_PTR]] @@ -774,30 +771,29 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; FVW2-NEXT: br label [[VECTOR_BODY:%.*]] ; FVW2: vector.body: ; FVW2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; FVW2-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0 -; FVW2-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 -; FVW2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP16]] -; FVW2-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 0 -; FVW2-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 64 -; FVW2-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP18]] -; FVW2-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 1 -; FVW2-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 64 -; FVW2-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP20]] -; FVW2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[NEXT_GEP]], i64 [[IDXPROM]] +; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 +; FVW2-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 0 +; FVW2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP15]] +; FVW2-NEXT: [[OFFSET_IDX9:%.*]] = mul i64 [[INDEX]], 64 +; FVW2-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX9]], 0 +; FVW2-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX9]], 64 +; FVW2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP17]] +; FVW2-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP18]] +; FVW2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM]] ; FVW2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 0 -; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP22]], align 4, !alias.scope !8 +; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP22]], align 4, !alias.scope [[META8:![0-9]+]] ; FVW2-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 -; FVW2-NEXT: store float [[TMP23]], ptr [[NEXT_GEP9]], align 4, !alias.scope !11, !noalias !13 +; FVW2-NEXT: store float [[TMP23]], ptr [[TMP19]], align 4, !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]] ; FVW2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 -; FVW2-NEXT: store float [[TMP24]], ptr [[NEXT_GEP10]], align 4, !alias.scope !11, !noalias !13 -; FVW2-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[NEXT_GEP]], i32 0 -; FVW2-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x float>, ptr [[TMP25]], align 4, !alias.scope !15 -; FVW2-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[NEXT_GEP9]], i64 1 -; FVW2-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[NEXT_GEP10]], i64 1 -; FVW2-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[WIDE_LOAD11]], i32 0 -; FVW2-NEXT: store float [[TMP28]], ptr [[TMP26]], align 4, !alias.scope !11, !noalias !13 -; FVW2-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[WIDE_LOAD11]], i32 1 -; FVW2-NEXT: store float [[TMP29]], ptr [[TMP27]], align 4, !alias.scope !11, !noalias !13 +; FVW2-NEXT: store float [[TMP24]], ptr [[TMP20]], align 4, !alias.scope [[META11]], !noalias [[META13]] +; FVW2-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP16]], i32 0 +; FVW2-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x float>, ptr [[TMP25]], align 4, !alias.scope [[META15:![0-9]+]] +; FVW2-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 1 +; FVW2-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 1 +; FVW2-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[WIDE_LOAD10]], i32 0 +; FVW2-NEXT: store float [[TMP28]], ptr [[TMP26]], align 4, !alias.scope [[META11]], !noalias [[META13]] +; FVW2-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[WIDE_LOAD10]], i32 1 +; FVW2-NEXT: store float [[TMP29]], ptr [[TMP27]], align 4, !alias.scope [[META11]], !noalias [[META13]] ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; FVW2-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FVW2-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll index 022912f3b8552..1b0118e137e73 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll @@ -25,15 +25,14 @@ define void @test_pr55375_interleave_opaque_ptr(ptr %start, ptr %end) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[NEXT_GEP3]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr ptr, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x ptr> zeroinitializer, <2 x ptr> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[TMP12]], <4 x ptr> poison, <4 x i32> ; CHECK-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[TMP11]], align 8 @@ -53,7 +52,7 @@ define void @test_pr55375_interleave_opaque_ptr(ptr %start, ptr %end) { ; CHECK-NEXT: store ptr null, ptr [[IV]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds [[PAIR]], ptr [[IV]], i64 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_NEXT]], [[END]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index be83329d30fef..317f2f8973214 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -273,64 +273,58 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE19:%.*]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT12]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT13]], +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE20:%.*]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[OFFSET_IDX8:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT13]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT14]], ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[Q:%.*]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[NEXT_GEP8]], align 16 -; CHECK-NEXT: store i32 [[TMP7]], ptr [[NEXT_GEP]], align 16 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[Q:%.*]], i64 [[OFFSET_IDX8]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[NEXT_GEP9]], align 16 +; CHECK-NEXT: store i32 [[TMP5]], ptr [[NEXT_GEP]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 -; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] -; CHECK: pred.store.if14: -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP10:%.*]] = or disjoint i64 [[TMP9]], 4 -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP12:%.*]] = or disjoint i64 [[TMP11]], 4 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[NEXT_GEP9]], align 16 -; CHECK-NEXT: store i32 [[TMP13]], ptr [[NEXT_GEP5]], align 16 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE15]] -; CHECK: pred.store.continue15: -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] -; CHECK: pred.store.if16: -; CHECK-NEXT: [[TMP15:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = or disjoint i64 [[TMP15]], 8 -; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = or disjoint i64 [[TMP17]], 8 -; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[NEXT_GEP10]], align 16 -; CHECK-NEXT: store i32 [[TMP19]], ptr [[NEXT_GEP6]], align 16 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE17]] -; CHECK: pred.store.continue17: -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 -; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19]] -; CHECK: pred.store.if18: -; CHECK-NEXT: [[TMP21:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP22:%.*]] = or disjoint i64 [[TMP21]], 12 -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP24:%.*]] = or disjoint i64 [[TMP23]], 12 -; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[NEXT_GEP11]], align 16 -; CHECK-NEXT: store i32 [[TMP25]], ptr [[NEXT_GEP7]], align 16 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE19]] -; CHECK: pred.store.continue19: +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 +; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]] +; CHECK: pred.store.if15: +; CHECK-NEXT: [[TMP7:%.*]] = or disjoint i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = or disjoint i64 [[OFFSET_IDX8]], 4 +; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[NEXT_GEP10]], align 16 +; CHECK-NEXT: store i32 [[TMP9]], ptr [[NEXT_GEP5]], align 16 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] +; CHECK: pred.store.continue16: +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]] +; CHECK: pred.store.if17: +; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = or disjoint i64 [[OFFSET_IDX8]], 8 +; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[NEXT_GEP11]], align 16 +; CHECK-NEXT: store i32 [[TMP13]], ptr [[NEXT_GEP6]], align 16 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]] +; CHECK: pred.store.continue18: +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20]] +; CHECK: pred.store.if19: +; CHECK-NEXT: [[TMP15:%.*]] = or disjoint i64 [[OFFSET_IDX]], 12 +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = or disjoint i64 [[OFFSET_IDX8]], 12 +; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[NEXT_GEP12]], align 16 +; CHECK-NEXT: store i32 [[TMP17]], ptr [[NEXT_GEP7]], align 16 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]] +; CHECK: pred.store.continue20: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -410,24 +404,24 @@ define void @example23b(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[OFFSET_IDX4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw <4 x i32> [[TMP3]], -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[NEXT_GEP4]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw <4 x i32> [[TMP1]], +; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[NEXT_GEP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[TMP7:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br i1 true, label [[TMP5:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[TMP6:%.*]] -; CHECK: 6: -; CHECK-NEXT: br i1 poison, label [[TMP7]], label [[TMP6]], !llvm.loop [[LOOP11:![0-9]+]] -; CHECK: 7: +; CHECK-NEXT: br label [[TMP4:%.*]] +; CHECK: 4: +; CHECK-NEXT: br i1 poison, label [[TMP5]], label [[TMP4]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: 5: ; CHECK-NEXT: ret void ; br label %1 @@ -457,7 +451,9 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE17:%.*]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 +; CHECK-NEXT: [[OFFSET_IDX7:%.*]] = shl i64 [[INDEX]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT]], @@ -465,71 +461,63 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[NEXT_GEP]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP5]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i32 [[TMP6]], 7 -; CHECK-NEXT: store i32 [[TMP7]], ptr [[NEXT_GEP7]], align 4 +; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[OFFSET_IDX7]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[NEXT_GEP]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 7 +; CHECK-NEXT: store i32 [[TMP5]], ptr [[NEXT_GEP8]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 -; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] -; CHECK: pred.store.if11: -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP10:%.*]] = or disjoint i64 [[TMP9]], 4 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = or disjoint i64 [[TMP11]], 2 -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[NEXT_GEP4]], align 2 -; CHECK-NEXT: [[TMP14:%.*]] = zext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i32 [[TMP14]], 7 -; CHECK-NEXT: store i32 [[TMP15]], ptr [[NEXT_GEP8]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.continue12: -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 -; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] -; CHECK: pred.store.if13: -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = or disjoint i64 [[TMP17]], 8 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP20:%.*]] = or disjoint i64 [[TMP19]], 4 -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP20]] -; CHECK-NEXT: [[TMP21:%.*]] = load i16, ptr [[NEXT_GEP5]], align 2 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 +; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] +; CHECK: pred.store.if12: +; CHECK-NEXT: [[TMP7:%.*]] = or disjoint i64 [[OFFSET_IDX7]], 4 +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = or disjoint i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[NEXT_GEP4]], align 2 +; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i32 [[TMP10]], 7 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[NEXT_GEP9]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE13]] +; CHECK: pred.store.continue13: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 +; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] +; CHECK: pred.store.if14: +; CHECK-NEXT: [[TMP13:%.*]] = or disjoint i64 [[OFFSET_IDX7]], 8 +; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = or disjoint i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = load i16, ptr [[NEXT_GEP5]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = zext i16 [[TMP15]] to i32 +; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i32 [[TMP16]], 7 +; CHECK-NEXT: store i32 [[TMP17]], ptr [[NEXT_GEP10]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE15]] +; CHECK: pred.store.continue15: +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 +; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17]] +; CHECK: pred.store.if16: +; CHECK-NEXT: [[TMP19:%.*]] = or disjoint i64 [[OFFSET_IDX7]], 12 +; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = or disjoint i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = load i16, ptr [[NEXT_GEP6]], align 2 ; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP21]] to i32 ; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 7 -; CHECK-NEXT: store i32 [[TMP23]], ptr [[NEXT_GEP9]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] -; CHECK: pred.store.continue14: -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 -; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] -; CHECK: pred.store.if15: -; CHECK-NEXT: [[TMP25:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP26:%.*]] = or disjoint i64 [[TMP25]], 12 -; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP26]] -; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP28:%.*]] = or disjoint i64 [[TMP27]], 6 -; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP28]] -; CHECK-NEXT: [[TMP29:%.*]] = load i16, ptr [[NEXT_GEP6]], align 2 -; CHECK-NEXT: [[TMP30:%.*]] = zext i16 [[TMP29]] to i32 -; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP30]], 7 -; CHECK-NEXT: store i32 [[TMP31]], ptr [[NEXT_GEP10]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] -; CHECK: pred.store.continue16: +; CHECK-NEXT: store i32 [[TMP23]], ptr [[NEXT_GEP11]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE17]] +; CHECK: pred.store.continue17: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[TMP34:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br i1 true, label [[TMP26:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[TMP33:%.*]] -; CHECK: 33: -; CHECK-NEXT: br i1 poison, label [[TMP34]], label [[TMP33]], !llvm.loop [[LOOP13:![0-9]+]] -; CHECK: 34: +; CHECK-NEXT: br label [[TMP25:%.*]] +; CHECK: 25: +; CHECK-NEXT: br i1 poison, label [[TMP26]], label [[TMP25]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: 26: ; CHECK-NEXT: ret void ; br label %1 diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index cc7c1d8a61887..1e23f02ee2b16 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -311,15 +311,12 @@ for.end: ; INTER: vector.body ; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; INTER: %[[I0:.+]] = shl i64 %index, 4 +; INTER: %[[I1:.+]] = or disjoint i64 %[[I0]], 16 +; INTER: %[[I2:.+]] = or disjoint i64 %[[I0]], 32 +; INTER: %[[I3:.+]] = or disjoint i64 %[[I0]], 48 ; INTER: %next.gep = getelementptr i8, ptr %a, i64 %[[I0]] -; INTER: %[[S1:.+]] = shl i64 %index, 4 -; INTER: %[[I1:.+]] = or disjoint i64 %[[S1]], 16 ; INTER: %next.gep2 = getelementptr i8, ptr %a, i64 %[[I1]] -; INTER: %[[S2:.+]] = shl i64 %index, 4 -; INTER: %[[I2:.+]] = or disjoint i64 %[[S2]], 32 ; INTER: %next.gep3 = getelementptr i8, ptr %a, i64 %[[I2]] -; INTER: %[[S3:.+]] = shl i64 %index, 4 -; INTER: %[[I3:.+]] = or disjoint i64 %[[S3]], 48 ; INTER: %next.gep4 = getelementptr i8, ptr %a, i64 %[[I3]] ; INTER: br i1 {{.*}}, label %middle.block, label %vector.body ; @@ -361,15 +358,12 @@ for.end: ; CHECK: vector.body ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; CHECK: [[SHL1:%.+]] = shl i64 %index, 4 +; CHECK: %[[I1:.+]] = or disjoint i64 [[SHL1]], 16 +; CHECK: %[[I2:.+]] = or disjoint i64 [[SHL1]], 32 +; CHECK: %[[I3:.+]] = or disjoint i64 [[SHL1]], 48 ; CHECK: %next.gep = getelementptr i8, ptr %a, i64 [[SHL1]] -; CHECK: [[SHL2:%.+]] = shl i64 %index, 4 -; CHECK: %[[I1:.+]] = or disjoint i64 [[SHL2]], 16 ; CHECK: %next.gep2 = getelementptr i8, ptr %a, i64 %[[I1]] -; CHECK: [[SHL3:%.+]] = shl i64 %index, 4 -; CHECK: %[[I2:.+]] = or disjoint i64 [[SHL3]], 32 ; CHECK: %next.gep3 = getelementptr i8, ptr %a, i64 %[[I2]] -; CHECK: [[SHL4:%.+]] = shl i64 %index, 4 -; CHECK: %[[I3:.+]] = or disjoint i64 [[SHL4]], 48 ; CHECK: %next.gep4 = getelementptr i8, ptr %a, i64 %[[I3]] ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body ; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index b451d4b4e5462..0a37e5ea0ca00 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -1537,92 +1537,85 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP39:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 200 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 200 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 200 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 200 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 200 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5 -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 200 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 6 -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 200 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 7 -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 200 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]] -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP2]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP3]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP4]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP5]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP6]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP7]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP8]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP16]], align 8 -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP17]], align 8 -; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP18]], align 8 -; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP19]], align 8 +; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP40:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200 +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 400 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 600 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 800 +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1000 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1200 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 1400 +; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; UNROLL-NO-IC-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; UNROLL-NO-IC-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; UNROLL-NO-IC-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] +; UNROLL-NO-IC-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] +; UNROLL-NO-IC-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] +; UNROLL-NO-IC-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] +; UNROLL-NO-IC-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP3]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP4]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP5]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP6]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP7]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP8]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP9]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = load double, ptr [[TMP8]], align 8 +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = load double, ptr [[TMP9]], align 8 +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = load double, ptr [[TMP10]], align 8 +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = load double, ptr [[TMP11]], align 8 +; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = insertelement <4 x double> poison, double [[TMP16]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = insertelement <4 x double> [[TMP20]], double [[TMP17]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = insertelement <4 x double> [[TMP21]], double [[TMP18]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = insertelement <4 x double> [[TMP22]], double [[TMP19]], i32 3 +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP12]], align 8 +; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP13]], align 8 +; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP14]], align 8 +; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP15]], align 8 ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = insertelement <4 x double> poison, double [[TMP24]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = insertelement <4 x double> [[TMP28]], double [[TMP25]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <4 x double> [[TMP29]], double [[TMP26]], i32 2 -; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = insertelement <4 x double> [[TMP30]], double [[TMP27]], i32 3 -; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP20]], align 8 -; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = load double, ptr [[TMP21]], align 8 -; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = load double, ptr [[TMP22]], align 8 -; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP23]], align 8 -; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = insertelement <4 x double> poison, double [[TMP32]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = insertelement <4 x double> [[TMP36]], double [[TMP33]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = insertelement <4 x double> [[TMP37]], double [[TMP34]], i32 2 -; UNROLL-NO-IC-NEXT: [[TMP39]] = insertelement <4 x double> [[TMP38]], double [[TMP35]], i32 3 -; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP31]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = shufflevector <4 x double> [[TMP31]], <4 x double> [[TMP39]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = fmul <4 x double> [[TMP40]], [[TMP31]] -; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = fmul <4 x double> [[TMP41]], [[TMP39]] -; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = fcmp une <4 x double> [[TMP42]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = fcmp une <4 x double> [[TMP43]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP46:%.*]] = zext <4 x i1> [[TMP44]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP47:%.*]] = zext <4 x i1> [[TMP45]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP48]] = add <4 x i32> [[VEC_PHI]], [[TMP46]] -; UNROLL-NO-IC-NEXT: [[TMP49]] = add <4 x i32> [[VEC_PHI9]], [[TMP47]] +; UNROLL-NO-IC-NEXT: [[TMP31]] = insertelement <4 x double> [[TMP30]], double [[TMP27]], i32 3 +; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP23]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> [[TMP31]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = fmul <4 x double> [[TMP32]], [[TMP23]] +; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = fmul <4 x double> [[TMP33]], [[TMP31]] +; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = fcmp une <4 x double> [[TMP34]], zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = fcmp une <4 x double> [[TMP35]], zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = zext <4 x i1> [[TMP36]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = zext <4 x i1> [[TMP37]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP40]] = add <4 x i32> [[VEC_PHI]], [[TMP38]] +; UNROLL-NO-IC-NEXT: [[TMP41]] = add <4 x i32> [[VEC_PHI2]], [[TMP39]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 +; UNROLL-NO-IC-NEXT: br i1 [[TMP42]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]] -; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP39]], i32 3 +; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP41]], [[TMP40]] +; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP31]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.cond.cleanup: -; UNROLL-NO-IC-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[A_1_LCSSA]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[B_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP52:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP44:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP52]] = load double, ptr [[ARRAYIDX]], align 8 -; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP52]] +; UNROLL-NO-IC-NEXT: [[TMP44]] = load double, ptr [[ARRAYIDX]], align 8 +; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP44]] ; UNROLL-NO-IC-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00 ; UNROLL-NO-IC-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32 ; UNROLL-NO-IC-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]] @@ -1640,35 +1633,34 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi double [ [[J:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 200 -; UNROLL-NO-VF-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 200 -; UNROLL-NO-VF-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP]], i64 [[IDXPROM]] -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP2]], i64 [[IDXPROM]] -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP4]], align 8 -; UNROLL-NO-VF-NEXT: [[TMP7]] = load double, ptr [[TMP5]], align 8 -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = fmul double [[VECTOR_RECUR]], [[TMP6]] -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = fmul double [[TMP6]], [[TMP7]] -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = fcmp une double [[TMP8]], 0.000000e+00 -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = fcmp une double [[TMP9]], 0.000000e+00 -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = zext i1 [[TMP10]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = zext i1 [[TMP11]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP14]] = add i32 [[VEC_PHI]], [[TMP12]] -; UNROLL-NO-VF-NEXT: [[TMP15]] = add i32 [[VEC_PHI3]], [[TMP13]] +; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi double [ [[J:%.*]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200 +; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200 +; UNROLL-NO-VF-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; UNROLL-NO-VF-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP]], i64 [[IDXPROM]] +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP3]], i64 [[IDXPROM]] +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP2]], align 8 +; UNROLL-NO-VF-NEXT: [[TMP5]] = load double, ptr [[TMP3]], align 8 +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = fmul double [[VECTOR_RECUR]], [[TMP4]] +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = fmul double [[TMP4]], [[TMP5]] +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = fcmp une double [[TMP6]], 0.000000e+00 +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = fcmp une double [[TMP7]], 0.000000e+00 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = zext i1 [[TMP9]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP12]] = add i32 [[VEC_PHI]], [[TMP10]] +; UNROLL-NO-VF-NEXT: [[TMP13]] = add i32 [[VEC_PHI2]], [[TMP11]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 +; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; UNROLL-NO-VF: middle.block: -; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP14]] +; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]] ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] @@ -1680,10 +1672,10 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; UNROLL-NO-VF-NEXT: [[B_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 [[IDXPROM]] -; UNROLL-NO-VF-NEXT: [[TMP17]] = load double, ptr [[ARRAYIDX]], align 8 -; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP17]] +; UNROLL-NO-VF-NEXT: [[TMP15]] = load double, ptr [[ARRAYIDX]], align 8 +; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP15]] ; UNROLL-NO-VF-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00 ; UNROLL-NO-VF-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32 ; UNROLL-NO-VF-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]] @@ -1702,61 +1694,58 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; SINK-AFTER-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 200 -; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] -; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; SINK-AFTER-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 200 -; SINK-AFTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] -; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 -; SINK-AFTER-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 200 -; SINK-AFTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] -; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 -; SINK-AFTER-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 200 -; SINK-AFTER-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] -; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP]], i64 [[IDXPROM]] -; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP2]], i64 [[IDXPROM]] -; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP3]], i64 [[IDXPROM]] -; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP4]], i64 [[IDXPROM]] -; SINK-AFTER-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP8]], align 8 -; SINK-AFTER-NEXT: [[TMP13:%.*]] = load double, ptr [[TMP9]], align 8 -; SINK-AFTER-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP10]], align 8 -; SINK-AFTER-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP11]], align 8 -; SINK-AFTER-NEXT: [[TMP16:%.*]] = insertelement <4 x double> poison, double [[TMP12]], i32 0 -; SINK-AFTER-NEXT: [[TMP17:%.*]] = insertelement <4 x double> [[TMP16]], double [[TMP13]], i32 1 -; SINK-AFTER-NEXT: [[TMP18:%.*]] = insertelement <4 x double> [[TMP17]], double [[TMP14]], i32 2 -; SINK-AFTER-NEXT: [[TMP19]] = insertelement <4 x double> [[TMP18]], double [[TMP15]], i32 3 -; SINK-AFTER-NEXT: [[TMP20:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP19]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP21:%.*]] = fmul <4 x double> [[TMP20]], [[TMP19]] -; SINK-AFTER-NEXT: [[TMP22:%.*]] = fcmp une <4 x double> [[TMP21]], zeroinitializer -; SINK-AFTER-NEXT: [[TMP23:%.*]] = zext <4 x i1> [[TMP22]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]] +; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200 +; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200 +; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 400 +; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 600 +; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; SINK-AFTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; SINK-AFTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; SINK-AFTER-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] +; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP]], i64 [[IDXPROM]] +; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP2]], i64 [[IDXPROM]] +; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP3]], i64 [[IDXPROM]] +; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP4]], i64 [[IDXPROM]] +; SINK-AFTER-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +; SINK-AFTER-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8 +; SINK-AFTER-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8 +; SINK-AFTER-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8 +; SINK-AFTER-NEXT: [[TMP12:%.*]] = insertelement <4 x double> poison, double [[TMP8]], i32 0 +; SINK-AFTER-NEXT: [[TMP13:%.*]] = insertelement <4 x double> [[TMP12]], double [[TMP9]], i32 1 +; SINK-AFTER-NEXT: [[TMP14:%.*]] = insertelement <4 x double> [[TMP13]], double [[TMP10]], i32 2 +; SINK-AFTER-NEXT: [[TMP15]] = insertelement <4 x double> [[TMP14]], double [[TMP11]], i32 3 +; SINK-AFTER-NEXT: [[TMP16:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP15]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP17:%.*]] = fmul <4 x double> [[TMP16]], [[TMP15]] +; SINK-AFTER-NEXT: [[TMP18:%.*]] = fcmp une <4 x double> [[TMP17]], zeroinitializer +; SINK-AFTER-NEXT: [[TMP19:%.*]] = zext <4 x i1> [[TMP18]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP20]] = add <4 x i32> [[VEC_PHI]], [[TMP19]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; SINK-AFTER-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 +; SINK-AFTER-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP19]], i32 3 +; SINK-AFTER-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP20]]) +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP15]], i32 3 ; SINK-AFTER-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.cond.cleanup: -; SINK-AFTER-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: ret i32 [[A_1_LCSSA]] ; SINK-AFTER: for.body: ; SINK-AFTER-NEXT: [[B_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 [[IDXPROM]] -; SINK-AFTER-NEXT: [[TMP27]] = load double, ptr [[ARRAYIDX]], align 8 -; SINK-AFTER-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP27]] +; SINK-AFTER-NEXT: [[TMP23]] = load double, ptr [[ARRAYIDX]], align 8 +; SINK-AFTER-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP23]] ; SINK-AFTER-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00 ; SINK-AFTER-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32 ; SINK-AFTER-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll index 410947704fead..31d862a3438ae 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll @@ -32,35 +32,36 @@ define void @non_constant_scalar_expansion(i32 %0, ptr %call) { ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; STRIDED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP1]] -; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]] -; STRIDED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1 -; STRIDED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], [[TMP1]] -; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]] -; STRIDED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 2 -; STRIDED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], [[TMP1]] -; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]] -; STRIDED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 3 -; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], [[TMP1]] -; STRIDED-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] ; STRIDED-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32 ; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = add i32 30, [[DOTCAST]] -; STRIDED-NEXT: [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], 0 -; STRIDED-NEXT: [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], 1 -; STRIDED-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], 2 -; STRIDED-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], 3 -; STRIDED-NEXT: [[TMP16:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP12]] -; STRIDED-NEXT: [[TMP17:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP13]] -; STRIDED-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP14]] -; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP15]] -; STRIDED-NEXT: store ptr [[NEXT_GEP]], ptr [[TMP16]], align 4 -; STRIDED-NEXT: store ptr [[NEXT_GEP2]], ptr [[TMP17]], align 4 -; STRIDED-NEXT: store ptr [[NEXT_GEP3]], ptr [[TMP18]], align 4 -; STRIDED-NEXT: store ptr [[NEXT_GEP4]], ptr [[TMP19]], align 4 +; STRIDED-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 0 +; STRIDED-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 1 +; STRIDED-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 2 +; STRIDED-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 3 +; STRIDED-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], [[TMP1]] +; STRIDED-NEXT: [[TMP7:%.*]] = mul i64 0, [[TMP1]] +; STRIDED-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX2]], [[TMP7]] +; STRIDED-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP1]] +; STRIDED-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX2]], [[TMP9]] +; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 2, [[TMP1]] +; STRIDED-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX2]], [[TMP11]] +; STRIDED-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP1]] +; STRIDED-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX2]], [[TMP13]] +; STRIDED-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr null, i64 [[TMP8]] +; STRIDED-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr null, i64 [[TMP10]] +; STRIDED-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr null, i64 [[TMP12]] +; STRIDED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]] +; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP3]] +; STRIDED-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP4]] +; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP5]] +; STRIDED-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP6]] +; STRIDED-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 4 +; STRIDED-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 +; STRIDED-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 4 +; STRIDED-NEXT: store ptr [[TMP18]], ptr [[TMP22]], align 4 ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; STRIDED-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967264 -; STRIDED-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; STRIDED-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967264 +; STRIDED-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; STRIDED: middle.block: ; STRIDED-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; STRIDED: scalar.ph: @@ -68,13 +69,13 @@ define void @non_constant_scalar_expansion(i32 %0, ptr %call) { ; STRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[ENTRY]] ] ; STRIDED-NEXT: br label [[FOR_COND:%.*]] ; STRIDED: for.cond: -; STRIDED-NEXT: [[TMP21:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] +; STRIDED-NEXT: [[TMP24:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] ; STRIDED-NEXT: [[P_0:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_COND]] ] ; STRIDED-NEXT: [[ADD_PTR]] = getelementptr i8, ptr [[P_0]], i32 [[MUL]] -; STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP21]] +; STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP24]] ; STRIDED-NEXT: store ptr [[P_0]], ptr [[ARRAYIDX]], align 4 -; STRIDED-NEXT: [[INC]] = add i32 [[TMP21]], 1 -; STRIDED-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP21]], 0 +; STRIDED-NEXT: [[INC]] = add i32 [[TMP24]], 1 +; STRIDED-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP24]], 0 ; STRIDED-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] ; STRIDED: for.end: ; STRIDED-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 27f6f7b1cb4e6..35037968160c6 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -23,56 +23,53 @@ define void @a(ptr readnone %b) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], -1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 -; CHECK-NEXT: store i8 95, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 +; CHECK-NEXT: store i8 95, ptr [[TMP9]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], -1 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1 -; CHECK-NEXT: store i8 95, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1 +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1 +; CHECK-NEXT: store i8 95, ptr [[TMP12]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2 -; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], -1 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1 -; CHECK-NEXT: store i8 95, ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -2 +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1 +; CHECK-NEXT: store i8 95, ptr [[TMP15]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3 -; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 +; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]] ; CHECK: pred.store.if9: -; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], -1 -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP4]], i64 -1 -; CHECK-NEXT: store i8 95, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -3 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP4]], i64 -1 +; CHECK-NEXT: store i8 95, ptr [[TMP18]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] ; CHECK: pred.store.continue10: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -86,8 +83,8 @@ define void @a(ptr readnone %b) { ; CHECK: for.body: ; CHECK-NEXT: [[C_05:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[C_05]], i64 -1 -; CHECK-NEXT: [[TMP24:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP24]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP20]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: store i8 95, ptr [[INCDEC_PTR]], align 1 @@ -145,22 +142,22 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP3]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: store <4 x ptr> [[TMP4]], ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i8> [[WIDE_LOAD]], -; CHECK-NEXT: store <4 x i8> [[TMP8]], ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x ptr> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: store <4 x i8> [[TMP7]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll index 0c659a550b31e..92ca77bc841ca 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -92,6 +92,7 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2 @@ -102,39 +103,35 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 14 ; CHECK-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP4]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8 -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP5]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP1]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] ; CHECK: pred.store.if9: -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP6]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP2]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] ; CHECK: pred.store.continue10: ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] ; CHECK: pred.store.if11: -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 8 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 24 +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP7]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP3]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] ; CHECK: pred.store.continue12: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 9b9c3e704852a..89b3a6da16c1f 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -1113,8 +1113,10 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: EMIT ir<%ptr.iv> = WIDEN-POINTER-INDUCTION ir<%start>, -1 -; CHECK-NEXT: CLONE ir<%ptr.iv.next> = getelementptr inbounds ir<%ptr.iv>, ir<-1> +; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<0> + vp<%3> * ir<-1> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1> +; CHECK-NEXT: EMIT vp<[[PTR_IV:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> +; CHECK-NEXT: CLONE ir<%ptr.iv.next> = getelementptr inbounds vp<[[PTR_IV]]>, ir<-1> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%ptr.iv.next> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<0> @@ -1127,7 +1129,7 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%ptr.iv.next> = getelementptr inbounds ir<%ptr.iv>, ir<-1> +; CHECK-NEXT: REPLICATE ir<%ptr.iv.next> = getelementptr inbounds vp<[[PTR_IV]]>, ir<-1> ; CHECK-NEXT: REPLICATE store ir<95>, ir<%ptr.iv.next> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: