Skip to content

Commit 5ea8611

Browse files
committed
[VPlan] Use VPWidenIntrinsicRecipe to support binary and unary operations with EVL-vectorization
1 parent 4f7ff6b commit 5ea8611

12 files changed

+75
-150
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 3 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
877877
case VPRecipeBase::VPWidenGEPSC:
878878
case VPRecipeBase::VPWidenIntrinsicSC:
879879
case VPRecipeBase::VPWidenSC:
880-
case VPRecipeBase::VPWidenEVLSC:
881880
case VPRecipeBase::VPWidenSelectSC:
882881
case VPRecipeBase::VPBlendSC:
883882
case VPRecipeBase::VPPredInstPHISC:
@@ -1063,7 +1062,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
10631062
static inline bool classof(const VPRecipeBase *R) {
10641063
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
10651064
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1066-
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
10671065
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
10681066
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
10691067
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
@@ -1431,16 +1429,11 @@ class VPIRInstruction : public VPRecipeBase {
14311429
class VPWidenRecipe : public VPRecipeWithIRFlags {
14321430
unsigned Opcode;
14331431

1434-
protected:
1435-
template <typename IterT>
1436-
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1437-
iterator_range<IterT> Operands)
1438-
: VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1439-
14401432
public:
14411433
template <typename IterT>
14421434
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1443-
: VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
1435+
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1436+
Opcode(I.getOpcode()) {}
14441437

14451438
~VPWidenRecipe() override = default;
14461439

@@ -1450,15 +1443,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
14501443
return R;
14511444
}
14521445

1453-
static inline bool classof(const VPRecipeBase *R) {
1454-
return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1455-
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
1456-
}
1457-
1458-
static inline bool classof(const VPUser *U) {
1459-
auto *R = dyn_cast<VPRecipeBase>(U);
1460-
return R && classof(R);
1461-
}
1446+
VP_CLASSOF_IMPL(VPDef::VPWidenSC)
14621447

14631448
/// Produce a widened instruction using the opcode and operands of the recipe,
14641449
/// processing State.VF elements.
@@ -1477,54 +1462,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
14771462
#endif
14781463
};
14791464

1480-
/// A recipe for widening operations with vector-predication intrinsics with
1481-
/// explicit vector length (EVL).
1482-
class VPWidenEVLRecipe : public VPWidenRecipe {
1483-
using VPRecipeWithIRFlags::transferFlags;
1484-
1485-
public:
1486-
template <typename IterT>
1487-
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
1488-
: VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1489-
addOperand(&EVL);
1490-
}
1491-
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
1492-
: VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
1493-
transferFlags(W);
1494-
}
1495-
1496-
~VPWidenEVLRecipe() override = default;
1497-
1498-
VPWidenRecipe *clone() override final {
1499-
llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1500-
return nullptr;
1501-
}
1502-
1503-
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1504-
1505-
VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
1506-
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1507-
1508-
/// Produce a vp-intrinsic using the opcode and operands of the recipe,
1509-
/// processing EVL elements.
1510-
void execute(VPTransformState &State) override final;
1511-
1512-
/// Returns true if the recipe only uses the first lane of operand \p Op.
1513-
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1514-
assert(is_contained(operands(), Op) &&
1515-
"Op must be an operand of the recipe");
1516-
// EVL in that recipe is always the last operand, thus any use before means
1517-
// the VPValue should be vectorized.
1518-
return getEVL() == Op;
1519-
}
1520-
1521-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1522-
/// Print the recipe.
1523-
void print(raw_ostream &O, const Twine &Indent,
1524-
VPSlotTracker &SlotTracker) const override final;
1525-
#endif
1526-
};
1527-
15281465
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
15291466
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
15301467
/// Cast instruction opcode.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,9 +235,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
235235
[this](const VPRecipeBase *R) {
236236
return inferScalarType(R->getOperand(0));
237237
})
238-
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
239-
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
240-
VPWidenSelectRecipe>(
238+
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
239+
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
241240
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
242241
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
243242
return R->getResultType();

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
9999
case VPWidenLoadSC:
100100
case VPWidenPHISC:
101101
case VPWidenSC:
102-
case VPWidenEVLSC:
103102
case VPWidenSelectSC: {
104103
const Instruction *I =
105104
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -143,7 +142,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
143142
case VPWidenIntOrFpInductionSC:
144143
case VPWidenPHISC:
145144
case VPWidenSC:
146-
case VPWidenEVLSC:
147145
case VPWidenSelectSC: {
148146
const Instruction *I =
149147
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -184,7 +182,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
184182
case VPWidenPHISC:
185183
case VPWidenPointerInductionSC:
186184
case VPWidenSC:
187-
case VPWidenEVLSC:
188185
case VPWidenSelectSC: {
189186
const Instruction *I =
190187
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -1452,42 +1449,6 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
14521449
}
14531450
}
14541451

1455-
void VPWidenEVLRecipe::execute(VPTransformState &State) {
1456-
unsigned Opcode = getOpcode();
1457-
// TODO: Support other opcodes
1458-
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1459-
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1460-
1461-
State.setDebugLocFrom(getDebugLoc());
1462-
1463-
assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1464-
"VPWidenEVLRecipe should not be used for scalars");
1465-
1466-
VPValue *EVL = getEVL();
1467-
Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1468-
IRBuilderBase &BuilderIR = State.Builder;
1469-
VectorBuilder Builder(BuilderIR);
1470-
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1471-
1472-
SmallVector<Value *, 4> Ops;
1473-
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1474-
VPValue *VPOp = getOperand(I);
1475-
Ops.push_back(State.get(VPOp));
1476-
}
1477-
1478-
Builder.setMask(Mask).setEVL(EVLArg);
1479-
Value *VPInst =
1480-
Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1481-
// Currently vp-intrinsics only accept FMF flags.
1482-
// TODO: Enable other flags when support is added.
1483-
if (isa<FPMathOperator>(VPInst))
1484-
setFlags(cast<Instruction>(VPInst));
1485-
1486-
State.set(this, VPInst);
1487-
State.addMetadata(VPInst,
1488-
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1489-
}
1490-
14911452
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
14921453
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
14931454
VPSlotTracker &SlotTracker) const {
@@ -1497,15 +1458,6 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
14971458
printFlags(O);
14981459
printOperands(O, SlotTracker);
14991460
}
1500-
1501-
void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1502-
VPSlotTracker &SlotTracker) const {
1503-
O << Indent << "WIDEN ";
1504-
printAsOperand(O, SlotTracker);
1505-
O << " = vp." << Instruction::getOpcodeName(getOpcode());
1506-
printFlags(O);
1507-
printOperands(O, SlotTracker);
1508-
}
15091461
#endif
15101462

15111463
void VPWidenCastRecipe::execute(VPTransformState &State) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,34 +1476,36 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
14761476
VPValue *NewMask = GetNewMask(S->getMask());
14771477
return new VPWidenStoreEVLRecipe(*S, EVL, NewMask);
14781478
})
1479-
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
1480-
unsigned Opcode = W->getOpcode();
1481-
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1482-
return nullptr;
1483-
return new VPWidenEVLRecipe(*W, EVL);
1484-
})
14851479
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
14861480
VPValue *NewMask = GetNewMask(Red->getCondOp());
14871481
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
14881482
})
1489-
.Case<VPWidenIntrinsicRecipe, VPWidenCastRecipe>(
1490-
[&](auto *CR) -> VPRecipeBase * {
1483+
.Case<VPWidenIntrinsicRecipe, VPWidenCastRecipe, VPWidenRecipe>(
1484+
[&](auto *R) -> VPRecipeBase * {
14911485
Intrinsic::ID VPID = Intrinsic::not_intrinsic;
1492-
if (auto *CallR = dyn_cast<VPWidenIntrinsicRecipe>(CR))
1486+
if (auto *CallR = dyn_cast<VPWidenIntrinsicRecipe>(R)) {
14931487
VPID =
14941488
VPIntrinsic::getForIntrinsic(CallR->getVectorIntrinsicID());
1495-
else if (auto *CastR = dyn_cast<VPWidenCastRecipe>(CR))
1489+
} else if (auto *CastR = dyn_cast<VPWidenCastRecipe>(R)) {
14961490
VPID = VPIntrinsic::getForOpcode(CastR->getOpcode());
1491+
} else if (auto *W = dyn_cast<VPWidenRecipe>(R)) {
1492+
unsigned Opcode = W->getOpcode();
1493+
// TODO: Support other opcodes
1494+
if (!Instruction::isBinaryOp(Opcode) &&
1495+
!Instruction::isUnaryOp(Opcode))
1496+
return nullptr;
1497+
VPID = VPIntrinsic::getForOpcode(Opcode);
1498+
}
14971499
assert(VPID != Intrinsic::not_intrinsic && "Expected VP intrinsic");
14981500
assert(VPIntrinsic::getMaskParamPos(VPID) &&
14991501
VPIntrinsic::getVectorLengthParamPos(VPID) &&
15001502
"Expected VP intrinsic");
15011503

1502-
SmallVector<VPValue *> Ops(CR->operands());
1504+
SmallVector<VPValue *> Ops(R->operands());
15031505
Ops.push_back(&AllOneMask);
15041506
Ops.push_back(&EVL);
15051507
return new VPWidenIntrinsicRecipe(
1506-
VPID, Ops, TypeInfo.inferScalarType(CR), CR->getDebugLoc());
1508+
VPID, Ops, TypeInfo.inferScalarType(R), R->getDebugLoc());
15071509
})
15081510
.Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
15091511
SmallVector<VPValue *> Ops(Sel->operands());

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,6 @@ class VPDef {
344344
VPWidenStoreEVLSC,
345345
VPWidenStoreSC,
346346
VPWidenSC,
347-
VPWidenEVLSC,
348347
VPWidenSelectSC,
349348
VPBlendSC,
350349
VPHistogramSC,

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
143143
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
144144
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe>(
145145
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
146-
.Case<VPWidenEVLRecipe>([&](const VPWidenEVLRecipe *W) {
147-
return VerifyEVLUse(*W,
148-
Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
149-
})
150146
.Case<VPScalarCastRecipe>(
151147
[&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); })
152148
.Case<VPInstruction>([&](const VPInstruction *I) {

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,7 +1267,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) {
12671267
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
12681268
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
12691269
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1270-
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1270+
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
12711271
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
12721272
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
12731273
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -1362,7 +1362,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) {
13621362
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
13631363
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
13641364
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1365-
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1365+
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
13661366
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
13671367
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
13681368
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -1457,7 +1457,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) {
14571457
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
14581458
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
14591459
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1460-
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1460+
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
14611461
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
14621462
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
14631463
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -1552,7 +1552,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) {
15521552
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
15531553
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
15541554
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1555-
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1555+
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
15561556
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
15571557
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
15581558
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -1700,7 +1700,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) {
17001700
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
17011701
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
17021702
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1703-
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1703+
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
17041704
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
17051705
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
17061706
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -1761,3 +1761,43 @@ loop:
17611761
finish.loopexit:
17621762
ret void
17631763
}
1764+
;.
1765+
; IF-EVL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1766+
; IF-EVL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1767+
; IF-EVL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1768+
; IF-EVL: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
1769+
; IF-EVL: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1770+
; IF-EVL: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
1771+
; IF-EVL: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1772+
; IF-EVL: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
1773+
; IF-EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1774+
; IF-EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]}
1775+
; IF-EVL: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
1776+
; IF-EVL: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]}
1777+
; IF-EVL: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
1778+
; IF-EVL: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
1779+
; IF-EVL: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
1780+
; IF-EVL: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]]}
1781+
; IF-EVL: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
1782+
; IF-EVL: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]]}
1783+
; IF-EVL: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
1784+
; IF-EVL: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]]}
1785+
; IF-EVL: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]}
1786+
; IF-EVL: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]]}
1787+
; IF-EVL: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]}
1788+
; IF-EVL: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]]}
1789+
; IF-EVL: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]}
1790+
; IF-EVL: [[LOOP25]] = distinct !{[[LOOP25]], [[META1]]}
1791+
; IF-EVL: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]}
1792+
; IF-EVL: [[LOOP27]] = distinct !{[[LOOP27]], [[META1]]}
1793+
; IF-EVL: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]}
1794+
; IF-EVL: [[LOOP29]] = distinct !{[[LOOP29]], [[META1]]}
1795+
; IF-EVL: [[LOOP30]] = distinct !{[[LOOP30]], [[META1]], [[META2]]}
1796+
; IF-EVL: [[LOOP31]] = distinct !{[[LOOP31]], [[META1]]}
1797+
; IF-EVL: [[LOOP32]] = distinct !{[[LOOP32]], [[META1]], [[META2]]}
1798+
; IF-EVL: [[LOOP33]] = distinct !{[[LOOP33]], [[META1]]}
1799+
; IF-EVL: [[LOOP34]] = distinct !{[[LOOP34]], [[META1]], [[META2]]}
1800+
; IF-EVL: [[LOOP35]] = distinct !{[[LOOP35]], [[META1]]}
1801+
; IF-EVL: [[LOOP36]] = distinct !{[[LOOP36]], [[META1]], [[META2]]}
1802+
; IF-EVL: [[LOOP37]] = distinct !{[[LOOP37]], [[META1]]}
1803+
;.

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
55
; RUN: -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck %s
66

7-
; CHECK: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir<true>, ir<%add>, ir<%rdx>, vp<%{{.+}}>)
7+
; CHECK: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir<true>, vp<%{{.+}}, ir<%rdx>, vp<%{{.+}}>)
88
; CHECK: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %loop ]
99

1010
define i32 @add(ptr %a, i64 %n, i32 %start) {

0 commit comments

Comments
 (0)