Skip to content

Commit 2483763

Browse files
[RISCV][CostModel] Recommit VPIntrinsics have same cost as their non-vp counterparts (#68752)
This was reverted in commit 0abaf3c (#67178). This version of the patch includes a fix which was caused by vp-reductions having an extra start value argument which the non-vp counterparts did not have.
1 parent 84a3aad commit 2483763

File tree

3 files changed

+646
-6
lines changed

3 files changed

+646
-6
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1691,7 +1691,69 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
16911691
}
16921692
}
16931693

1694-
// Assume that we need to scalarize this intrinsic.
1694+
// VP Intrinsics should have the same cost as their non-vp counterpart.
1695+
// TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
1696+
// counterpart when the vector length argument is smaller than the maximum
1697+
// vector length.
1698+
// TODO: Support other kinds of VPIntrinsics
1699+
if (VPIntrinsic::isVPIntrinsic(ICA.getID())) {
1700+
std::optional<unsigned> FOp =
1701+
VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
1702+
if (FOp) {
1703+
if (ICA.getID() == Intrinsic::vp_load) {
1704+
Align Alignment;
1705+
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
1706+
Alignment = VPI->getPointerAlignment().valueOrOne();
1707+
unsigned AS = 0;
1708+
if (ICA.getArgs().size() > 1)
1709+
if (auto *PtrTy =
1710+
dyn_cast<PointerType>(ICA.getArgs()[0]->getType()))
1711+
AS = PtrTy->getAddressSpace();
1712+
return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment,
1713+
AS, CostKind);
1714+
}
1715+
if (ICA.getID() == Intrinsic::vp_store) {
1716+
Align Alignment;
1717+
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
1718+
Alignment = VPI->getPointerAlignment().valueOrOne();
1719+
unsigned AS = 0;
1720+
if (ICA.getArgs().size() >= 2)
1721+
if (auto *PtrTy =
1722+
dyn_cast<PointerType>(ICA.getArgs()[1]->getType()))
1723+
AS = PtrTy->getAddressSpace();
1724+
return thisT()->getMemoryOpCost(*FOp, Args[0]->getType(), Alignment,
1725+
AS, CostKind);
1726+
}
1727+
if (VPBinOpIntrinsic::isVPBinOp(ICA.getID())) {
1728+
return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(),
1729+
CostKind);
1730+
}
1731+
}
1732+
1733+
std::optional<Intrinsic::ID> FID =
1734+
VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
1735+
if (FID) {
1736+
// Non-vp version will have same Args/Tys except mask and vector length.
1737+
assert(ICA.getArgs().size() >= 2 && ICA.getArgTypes().size() >= 2 &&
1738+
"Expected VPIntrinsic to have Mask and Vector Length args and "
1739+
"types");
1740+
ArrayRef<Type *> NewTys = ArrayRef(ICA.getArgTypes()).drop_back(2);
1741+
1742+
// VPReduction intrinsics have a start value argument that their non-vp
1743+
// counterparts do not have, except for the fadd and fmul non-vp
1744+
// counterpart.
1745+
if (VPReductionIntrinsic::isVPReduction(ICA.getID()) &&
1746+
*FID != Intrinsic::vector_reduce_fadd &&
1747+
*FID != Intrinsic::vector_reduce_fmul)
1748+
NewTys = NewTys.drop_front();
1749+
1750+
IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewTys,
1751+
ICA.getFlags());
1752+
return thisT()->getIntrinsicInstrCost(NewICA, CostKind);
1753+
}
1754+
}
1755+
1756+
// Assume that we need to scalarize this intrinsic.)
16951757
// Compute the scalarization overhead based on Args for a vector
16961758
// intrinsic.
16971759
InstructionCost ScalarizationCost = InstructionCost::getInvalid();

llvm/test/Analysis/CostModel/RISCV/gep.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
270270
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = getelementptr i8, ptr %base, i32 42
271271
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
272272
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = getelementptr i8, ptr %base, i32 42
273-
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
273+
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
274274
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = getelementptr i8, ptr %base, i32 42
275275
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef)
276276
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = getelementptr i8, ptr %base, i32 42
@@ -282,7 +282,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
282282
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = getelementptr i8, ptr %base, i32 42
283283
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
284284
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = getelementptr i8, ptr %base, i32 42
285-
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
285+
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
286286
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = getelementptr i8, ptr %base, i32 42
287287
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef)
288288
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -340,7 +340,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
340340
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %4 = getelementptr i8, ptr %base, i32 0
341341
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
342342
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = getelementptr i8, ptr %base, i32 0
343-
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
343+
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
344344
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %6 = getelementptr i8, ptr %base, i32 0
345345
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef)
346346
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %7 = getelementptr i8, ptr %base, i32 0
@@ -352,7 +352,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
352352
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %10 = getelementptr i8, ptr %base, i32 0
353353
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
354354
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %11 = getelementptr i8, ptr %base, i32 0
355-
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
355+
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
356356
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %12 = getelementptr i8, ptr %base, i32 0
357357
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef)
358358
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void

0 commit comments

Comments
 (0)