Skip to content

Commit b84b717

Browse files
authored
[CostModel] getTypeBasedIntrinsicInstrCost - add default cost approximations for funnel shifts (#124175)
We only had handling for cases where we had argument data.
1 parent 8ef171e commit b84b717

File tree

3 files changed

+39
-10
lines changed

3 files changed

+39
-10
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2258,6 +2258,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
22582258
case Intrinsic::abs:
22592259
ISD = ISD::ABS;
22602260
break;
2261+
case Intrinsic::fshl:
2262+
ISD = ISD::FSHL;
2263+
break;
2264+
case Intrinsic::fshr:
2265+
ISD = ISD::FSHR;
2266+
break;
22612267
case Intrinsic::smax:
22622268
ISD = ISD::SMAX;
22632269
break;
@@ -2547,6 +2553,29 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
25472553
{TTI::OK_UniformConstantValue, TTI::OP_None});
25482554
return Cost;
25492555
}
2556+
case Intrinsic::fshl:
2557+
case Intrinsic::fshr: {
2558+
// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
2559+
// fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
2560+
Type *CondTy = RetTy->getWithNewBitWidth(1);
2561+
InstructionCost Cost = 0;
2562+
Cost +=
2563+
thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
2564+
Cost +=
2565+
thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
2566+
Cost +=
2567+
thisT()->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, CostKind);
2568+
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
2569+
CostKind);
2570+
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
2571+
CostKind);
2572+
// Shift-by-zero handling.
2573+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2574+
CmpInst::ICMP_EQ, CostKind);
2575+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2576+
CmpInst::ICMP_EQ, CostKind);
2577+
return Cost;
2578+
}
25502579
case Intrinsic::fptosi_sat:
25512580
case Intrinsic::fptoui_sat: {
25522581
if (Tys.empty())

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,10 +1025,10 @@ define void @fshr() #0 {
10251025
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
10261026
;
10271027
; TYPE_BASED_ONLY-LABEL: 'fshr'
1028-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
1029-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
1030-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
1031-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
1028+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
1029+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
1030+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
1031+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
10321032
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
10331033
;
10341034
call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
@@ -1054,10 +1054,10 @@ define void @fshl() #0 {
10541054
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
10551055
;
10561056
; TYPE_BASED_ONLY-LABEL: 'fshl'
1057-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
1058-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
1059-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
1060-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
1057+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
1058+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
1059+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
1060+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
10611061
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
10621062
;
10631063
call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)

llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ define void @fshr(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i3
3838
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
3939
;
4040
; TYPEBASED-LABEL: 'fshr'
41-
; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %1 = call <vscale x 1 x i32> @llvm.fshr.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
41+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <vscale x 1 x i32> @llvm.fshr.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
4242
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
4343
;
4444
call <vscale x 1 x i32> @llvm.fshr.nxv4i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
@@ -51,7 +51,7 @@ define void @fshl(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i3
5151
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
5252
;
5353
; TYPEBASED-LABEL: 'fshl'
54-
; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %1 = call <vscale x 1 x i32> @llvm.fshl.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
54+
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <vscale x 1 x i32> @llvm.fshl.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
5555
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
5656
;
5757
call <vscale x 1 x i32> @llvm.fshl.nxv4i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)

0 commit comments

Comments
 (0)