diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 6486c6745a680..41fb9a92bc547 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -877,7 +877,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPWidenGEPSC: case VPRecipeBase::VPWidenIntrinsicSC: case VPRecipeBase::VPWidenSC: - case VPRecipeBase::VPWidenEVLSC: case VPRecipeBase::VPWidenSelectSC: case VPRecipeBase::VPBlendSC: case VPRecipeBase::VPPredInstPHISC: @@ -1063,7 +1062,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { static inline bool classof(const VPRecipeBase *R) { return R->getVPDefID() == VPRecipeBase::VPInstructionSC || R->getVPDefID() == VPRecipeBase::VPWidenSC || - R->getVPDefID() == VPRecipeBase::VPWidenEVLSC || R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || R->getVPDefID() == VPRecipeBase::VPWidenCastSC || R->getVPDefID() == VPRecipeBase::VPReplicateSC || @@ -1431,16 +1429,11 @@ class VPIRInstruction : public VPRecipeBase { class VPWidenRecipe : public VPRecipeWithIRFlags { unsigned Opcode; -protected: - template - VPWidenRecipe(unsigned VPDefOpcode, Instruction &I, - iterator_range Operands) - : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {} - public: template VPWidenRecipe(Instruction &I, iterator_range Operands) - : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {} + : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), + Opcode(I.getOpcode()) {} ~VPWidenRecipe() override = default; @@ -1450,15 +1443,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags { return R; } - static inline bool classof(const VPRecipeBase *R) { - return R->getVPDefID() == VPRecipeBase::VPWidenSC || - R->getVPDefID() == VPRecipeBase::VPWidenEVLSC; - } - - static inline bool classof(const VPUser *U) { - auto *R = dyn_cast(U); - return R && classof(R); - } + VP_CLASSOF_IMPL(VPDef::VPWidenSC) /// Produce a widened instruction using the opcode and operands of the recipe, /// processing State.VF elements. @@ -1477,54 +1462,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags { #endif }; -/// A recipe for widening operations with vector-predication intrinsics with -/// explicit vector length (EVL). -class VPWidenEVLRecipe : public VPWidenRecipe { - using VPRecipeWithIRFlags::transferFlags; - -public: - template - VPWidenEVLRecipe(Instruction &I, iterator_range Operands, VPValue &EVL) - : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) { - addOperand(&EVL); - } - VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL) - : VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) { - transferFlags(W); - } - - ~VPWidenEVLRecipe() override = default; - - VPWidenRecipe *clone() override final { - llvm_unreachable("VPWidenEVLRecipe cannot be cloned"); - return nullptr; - } - - VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC); - - VPValue *getEVL() { return getOperand(getNumOperands() - 1); } - const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); } - - /// Produce a vp-intrinsic using the opcode and operands of the recipe, - /// processing EVL elements. - void execute(VPTransformState &State) override final; - - /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - // EVL in that recipe is always the last operand, thus any use before means - // the VPValue should be vectorized. - return getEVL() == Op; - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override final; -#endif -}; - /// VPWidenCastRecipe is a recipe to create vector cast instructions. class VPWidenCastRecipe : public VPRecipeWithIRFlags { /// Cast instruction opcode. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 35497a7431f76..956201d7efe24 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -235,9 +235,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { [this](const VPRecipeBase *R) { return inferScalarType(R->getOperand(0)); }) - .Case( + .Case( [this](const auto *R) { return inferScalarTypeForRecipe(R); }) .Case([](const VPWidenIntrinsicRecipe *R) { return R->getResultType(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 8be2b894acd40..5a1cd3297113a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -99,7 +99,6 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPWidenLoadSC: case VPWidenPHISC: case VPWidenSC: - case VPWidenEVLSC: case VPWidenSelectSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); @@ -143,7 +142,6 @@ bool VPRecipeBase::mayReadFromMemory() const { case VPWidenIntOrFpInductionSC: case VPWidenPHISC: case VPWidenSC: - case VPWidenEVLSC: case VPWidenSelectSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); @@ -184,7 +182,6 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPWidenPHISC: case VPWidenPointerInductionSC: case VPWidenSC: - case VPWidenEVLSC: case VPWidenSelectSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); @@ -1452,42 +1449,6 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, } } -void VPWidenEVLRecipe::execute(VPTransformState &State) { - unsigned Opcode = getOpcode(); - // TODO: Support other opcodes - if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode)) - llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute"); - - State.setDebugLocFrom(getDebugLoc()); - - assert(State.get(getOperand(0))->getType()->isVectorTy() && - "VPWidenEVLRecipe should not be used for scalars"); - - VPValue *EVL = getEVL(); - Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true); - IRBuilderBase &BuilderIR = State.Builder; - VectorBuilder Builder(BuilderIR); - Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue()); - - SmallVector Ops; - for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) { - VPValue *VPOp = getOperand(I); - Ops.push_back(State.get(VPOp)); - } - - Builder.setMask(Mask).setEVL(EVLArg); - Value *VPInst = - Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op"); - // Currently vp-intrinsics only accept FMF flags. - // TODO: Enable other flags when support is added. - if (isa(VPInst)) - setFlags(cast(VPInst)); - - State.set(this, VPInst); - State.addMetadata(VPInst, - dyn_cast_or_null(getUnderlyingValue())); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -1497,15 +1458,6 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, printFlags(O); printOperands(O, SlotTracker); } - -void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN "; - printAsOperand(O, SlotTracker); - O << " = vp." << Instruction::getOpcodeName(getOpcode()); - printFlags(O); - printOperands(O, SlotTracker); -} #endif void VPWidenCastRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 0b809c2b34df9..4307b71d5c59d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1476,36 +1476,36 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask, VPValue *NewMask = GetNewMask(S->getMask()); return new VPWidenStoreEVLRecipe(*S, EVL, NewMask); }) - .Case([&](VPWidenRecipe *W) -> VPRecipeBase * { - unsigned Opcode = W->getOpcode(); - if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode)) - return nullptr; - return new VPWidenEVLRecipe(*W, EVL); - }) .Case([&](VPReductionRecipe *Red) { VPValue *NewMask = GetNewMask(Red->getCondOp()); return new VPReductionEVLRecipe(*Red, EVL, NewMask); }) - .Case( - [&](auto *CR) -> VPRecipeBase * { - Intrinsic::ID VPID; - if (auto *CallR = dyn_cast(CR)) { + .Case( + [&](auto *R) -> VPRecipeBase * { + Intrinsic::ID VPID = Intrinsic::not_intrinsic; + if (auto *CallR = dyn_cast(R)) { VPID = VPIntrinsic::getForIntrinsic(CallR->getVectorIntrinsicID()); - } else { - auto *CastR = cast(CR); + } else if (auto *CastR = dyn_cast(R)) { VPID = VPIntrinsic::getForOpcode(CastR->getOpcode()); + } else if (auto *W = dyn_cast(R)) { + unsigned Opcode = W->getOpcode(); + // TODO: Support other opcodes + if (!Instruction::isBinaryOp(Opcode) && + !Instruction::isUnaryOp(Opcode)) + return nullptr; + VPID = VPIntrinsic::getForOpcode(Opcode); } assert(VPID != Intrinsic::not_intrinsic && "Expected VP intrinsic"); assert(VPIntrinsic::getMaskParamPos(VPID) && VPIntrinsic::getVectorLengthParamPos(VPID) && "Expected VP intrinsic"); - SmallVector Ops(CR->operands()); + SmallVector Ops(R->operands()); Ops.push_back(&AllOneMask); Ops.push_back(&EVL); return new VPWidenIntrinsicRecipe( - VPID, Ops, TypeInfo.inferScalarType(CR), CR->getDebugLoc()); + VPID, Ops, TypeInfo.inferScalarType(R), R->getDebugLoc()); }) .Case([&](VPWidenSelectRecipe *Sel) { SmallVector Ops(Sel->operands()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 957a602091c73..75464f9f755ab 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -344,7 +344,6 @@ class VPDef { VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, - VPWidenEVLSC, VPWidenSelectSC, VPBlendSC, VPHistogramSC, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index be420a873bef5..1a91264b29d95 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -143,10 +143,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { [&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); }) .Case( [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) - .Case([&](const VPWidenEVLRecipe *W) { - return VerifyEVLUse(*W, - Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2); - }) .Case( [&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); }) .Case([&](const VPInstruction *I) { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll index 38fa8d3d9d9c5..9ff06040f4432 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll @@ -1267,7 +1267,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast @llvm.vp.fadd.nxv4f32( [[VP_OP_LOAD]], splat (float 3.000000e+00), splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.fadd.nxv4f32( [[VP_OP_LOAD]], splat (float 3.000000e+00), splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) @@ -1362,7 +1362,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast @llvm.vp.fsub.nxv4f32( [[VP_OP_LOAD]], splat (float 3.000000e+00), splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.fsub.nxv4f32( [[VP_OP_LOAD]], splat (float 3.000000e+00), splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) @@ -1457,7 +1457,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast @llvm.vp.fmul.nxv4f32( [[VP_OP_LOAD]], splat (float 3.000000e+00), splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.fmul.nxv4f32( [[VP_OP_LOAD]], splat (float 3.000000e+00), splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) @@ -1552,7 +1552,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast @llvm.vp.fdiv.nxv4f32( [[VP_OP_LOAD]], splat (float 3.000000e+00), splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.fdiv.nxv4f32( [[VP_OP_LOAD]], splat (float 3.000000e+00), splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) @@ -1700,7 +1700,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast @llvm.vp.fneg.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.fneg.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) @@ -1761,3 +1761,43 @@ loop: finish.loopexit: ret void } +;. +; IF-EVL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; IF-EVL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; IF-EVL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; IF-EVL: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; IF-EVL: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; IF-EVL: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} +; IF-EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]} +; IF-EVL: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]} +; IF-EVL: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} +; IF-EVL: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]]} +; IF-EVL: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]]} +; IF-EVL: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]]} +; IF-EVL: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]]} +; IF-EVL: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]]} +; IF-EVL: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP25]] = distinct !{[[LOOP25]], [[META1]]} +; IF-EVL: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP27]] = distinct !{[[LOOP27]], [[META1]]} +; IF-EVL: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP29]] = distinct !{[[LOOP29]], [[META1]]} +; IF-EVL: [[LOOP30]] = distinct !{[[LOOP30]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP31]] = distinct !{[[LOOP31]], [[META1]]} +; IF-EVL: [[LOOP32]] = distinct !{[[LOOP32]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP33]] = distinct !{[[LOOP33]], [[META1]]} +; IF-EVL: [[LOOP34]] = distinct !{[[LOOP34]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP35]] = distinct !{[[LOOP35]], [[META1]]} +; IF-EVL: [[LOOP36]] = distinct !{[[LOOP36]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP37]] = distinct !{[[LOOP37]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll index 69418d9fea00e..aeab441b25d3c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll @@ -4,7 +4,7 @@ ; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ ; RUN: -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck %s -; CHECK: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir, ir<%add>, ir<%rdx>, vp<%{{.+}}>) +; CHECK: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir, vp<%{{.+}}, ir<%rdx>, vp<%{{.+}}>) ; CHECK: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %loop ] define i32 @add(ptr %a, i64 %n, i32 %start) { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll index 7557c10892d6d..8f70623947a5b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll @@ -1107,8 +1107,8 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call reassoc @llvm.vp.fadd.nxv4f32( [[VP_OP_LOAD]], [[VEC_PHI]], splat (i1 true), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14]] = call @llvm.vp.merge.nxv4f32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP19:%.*]] = call @llvm.vp.fadd.nxv4f32( [[VP_OP_LOAD]], [[VEC_PHI]], splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14]] = call @llvm.vp.merge.nxv4f32( splat (i1 true), [[TMP19]], [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index e7eb5778ffb93..71b0d15d19de7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -46,8 +46,8 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-OUTLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add ir<[[LD1]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]> -; IF-EVL-OUTLOOP-NEXT: WIDEN-INTRINSIC vp<[[RDX_SELECT]]> = call llvm.vp.merge(ir, ir<[[ADD]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]>) +; IF-EVL-OUTLOOP-NEXT: WIDEN-INTRINSIC vp<[[ADD:%.+]]> = call llvm.vp.add(ir<[[LD1]]>, ir<[[RDX_PHI]]>, ir, vp<[[EVL]]>) +; IF-EVL-OUTLOOP-NEXT: WIDEN-INTRINSIC vp<[[RDX_SELECT]]> = call llvm.vp.merge(ir, vp<[[ADD]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]>) ; IF-EVL-OUTLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll index 706b6f8882984..42c382750fc0c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -32,10 +32,10 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add nsw ir<[[LD2]]>, ir<[[LD1]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[ADD:%.+]]> = call llvm.vp.add(ir<[[LD2]]>, ir<[[LD1]]>, ir, vp<[[EVL]]>) ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[ADD]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll index 53c9fb0c604da..877c0f39d9278 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll @@ -39,12 +39,12 @@ ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> ; IF-EVL-NEXT: WIDEN ir<[[CMP:%.+]]> = icmp sgt ir<[[LD1]]>, ir<[[LD2]]> - ; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = vp.sub ir<0>, ir<[[LD2]]>, vp<[[EVL]]> - ; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SELECT:%.+]]> = call llvm.vp.select(ir<[[CMP]]>, ir<[[LD2]]>, ir<[[SUB]]>, vp<[[EVL]]>) - ; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add vp<[[SELECT]]>, ir<[[LD1]]>, vp<[[EVL]]> + ; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SUB:%.+]]> = call llvm.vp.sub(ir<0>, ir<[[LD2]]>, ir, vp<[[EVL]]>) + ; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SELECT:%.+]]> = call llvm.vp.select(ir<[[CMP]]>, ir<[[LD2]]>, vp<[[SUB]]>, vp<[[EVL]]>) + ; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[ADD:%.+]]> = call llvm.vp.add(vp<[[SELECT]]>, ir<[[LD1]]>, ir, vp<[[EVL]]>) ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]> - ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> + ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[ADD]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT]]> = add vp<[[IV]]>, ir<[[VFUF]]>