Skip to content

Commit 30a4e55

Browse files
committed
[LV] Teach the vectorizer to cost and vectorize llvm.sincos intrinsics
This teaches the loop vectorizer that `llvm.sincos` is trivially vectorizable. Additionally, this patch updates the cost model to cost intrinsics that return multiple values correctly. Previously, the cost model only thought intrinsics that return `VectorType` need scalarizing, which meant it cost intrinsics that return multiple vectors (that need scalarizing) way too cheap (giving it the cost of a single function call). The `llvm.sincos` intrinsic also has a custom cost when a vector function library is available, as certain VFs can be expanded (later in code-gen) to a vector function, reducing the cost to a single call (+ the possible loads from the vector function returns values via output pointers).
1 parent e0e67a6 commit 30a4e55

File tree

10 files changed

+301
-73
lines changed

10 files changed

+301
-73
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ class IntrinsicCostAttributes {
126126
// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
127127
// arguments and the return value will be computed based on types.
128128
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
129+
TargetLibraryInfo const *LibInfo = nullptr;
129130

130131
public:
131132
IntrinsicCostAttributes(
@@ -145,7 +146,8 @@ class IntrinsicCostAttributes {
145146
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
146147
ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
147148
const IntrinsicInst *I = nullptr,
148-
InstructionCost ScalarCost = InstructionCost::getInvalid());
149+
InstructionCost ScalarCost = InstructionCost::getInvalid(),
150+
TargetLibraryInfo const *LibInfo = nullptr);
149151

150152
Intrinsic::ID getID() const { return IID; }
151153
const IntrinsicInst *getInst() const { return II; }
@@ -154,6 +156,7 @@ class IntrinsicCostAttributes {
154156
InstructionCost getScalarizationCost() const { return ScalarizationCost; }
155157
const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
156158
const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
159+
const TargetLibraryInfo *getLibInfo() const { return LibInfo; }
157160

158161
bool isTypeBasedOnly() const {
159162
return Arguments.empty();

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 70 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/SmallVector.h"
2323
#include "llvm/Analysis/LoopInfo.h"
2424
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
25+
#include "llvm/Analysis/TargetLibraryInfo.h"
2526
#include "llvm/Analysis/TargetTransformInfo.h"
2627
#include "llvm/Analysis/TargetTransformInfoImpl.h"
2728
#include "llvm/Analysis/ValueTracking.h"
@@ -1726,9 +1727,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
17261727

17271728
Type *RetTy = ICA.getReturnType();
17281729

1729-
ElementCount RetVF =
1730-
(RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1731-
: ElementCount::getFixed(1));
1730+
ElementCount RetVF = isVectorizedTy(RetTy) ? getVectorizedTypeVF(RetTy)
1731+
: ElementCount::getFixed(1);
1732+
17321733
const IntrinsicInst *I = ICA.getInst();
17331734
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
17341735
FastMathFlags FMF = ICA.getFlags();
@@ -1997,6 +1998,49 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19971998
}
19981999
case Intrinsic::experimental_vector_match:
19992000
return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
2001+
case Intrinsic::sincos: {
2002+
// Vector variants of llvm.sincos can be mapped to a vector library call.
2003+
auto const *LibInfo = ICA.getLibInfo();
2004+
if (!LibInfo || !isVectorizedTy(RetTy))
2005+
break;
2006+
2007+
// Find associated libcall.
2008+
VectorType *VectorTy = cast<VectorType>(getContainedTypes(RetTy).front());
2009+
EVT VT = getTLI()->getValueType(DL, VectorTy);
2010+
RTLIB::Libcall LC = RTLIB::getSINCOS(VT.getVectorElementType());
2011+
const char *LCName = getTLI()->getLibcallName(LC);
2012+
if (!LC || !LCName)
2013+
break;
2014+
2015+
// Search for a corresponding vector variant.
2016+
LLVMContext &Ctx = RetTy->getContext();
2017+
auto VF = getVectorizedTypeVF(RetTy);
2018+
VecDesc const *VD = nullptr;
2019+
for (bool Masked : {false, true}) {
2020+
if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked)))
2021+
break;
2022+
}
2023+
if (!VD)
2024+
break;
2025+
2026+
// Cost the call + mask.
2027+
auto Cost = thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(),
2028+
CostKind);
2029+
if (VD->isMasked())
2030+
Cost += thisT()->getShuffleCost(
2031+
TargetTransformInfo::SK_Broadcast,
2032+
VectorType::get(IntegerType::getInt1Ty(Ctx), VF), {}, CostKind, 0,
2033+
nullptr, {});
2034+
2035+
// Lowering to a sincos library call (with output pointers) may require us
2036+
// to emit reloads for the results.
2037+
Cost +=
2038+
thisT()->getMemoryOpCost(
2039+
Instruction::Load, VectorTy,
2040+
thisT()->getDataLayout().getABITypeAlign(VectorTy), 0, CostKind) *
2041+
2;
2042+
return Cost;
2043+
}
20002044
}
20012045

20022046
// Assume that we need to scalarize this intrinsic.)
@@ -2005,10 +2049,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
20052049
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
20062050
if (RetVF.isVector() && !RetVF.isScalable()) {
20072051
ScalarizationCost = 0;
2008-
if (!RetTy->isVoidTy())
2009-
ScalarizationCost += getScalarizationOverhead(
2010-
cast<VectorType>(RetTy),
2011-
/*Insert*/ true, /*Extract*/ false, CostKind);
2052+
if (!RetTy->isVoidTy()) {
2053+
for (Type *VectorTy : getContainedTypes(RetTy)) {
2054+
ScalarizationCost += getScalarizationOverhead(
2055+
cast<VectorType>(VectorTy),
2056+
/*Insert*/ true, /*Extract*/ false, CostKind);
2057+
}
2058+
}
20122059
ScalarizationCost +=
20132060
getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind);
20142061
}
@@ -2689,27 +2736,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
26892736
// Else, assume that we need to scalarize this intrinsic. For math builtins
26902737
// this will emit a costly libcall, adding call overhead and spills. Make it
26912738
// very expensive.
2692-
if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2739+
if (isVectorizedTy(RetTy)) {
2740+
ArrayRef<Type *> RetVTys = getContainedTypes(RetTy);
2741+
26932742
// Scalable vectors cannot be scalarized, so return Invalid.
2694-
if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
2695-
return isa<ScalableVectorType>(Ty);
2696-
}))
2743+
if (any_of(concat<Type *const>(RetVTys, Tys),
2744+
[](Type *Ty) { return isa<ScalableVectorType>(Ty); }))
26972745
return InstructionCost::getInvalid();
26982746

2699-
InstructionCost ScalarizationCost =
2700-
SkipScalarizationCost
2701-
? ScalarizationCostPassed
2702-
: getScalarizationOverhead(RetVTy, /*Insert*/ true,
2703-
/*Extract*/ false, CostKind);
2747+
InstructionCost ScalarizationCost = ScalarizationCostPassed;
2748+
if (!SkipScalarizationCost) {
2749+
ScalarizationCost = 0;
2750+
for (Type *RetVTy : RetVTys) {
2751+
ScalarizationCost += getScalarizationOverhead(
2752+
cast<VectorType>(RetVTy), /*Insert*/ true,
2753+
/*Extract*/ false, CostKind);
2754+
}
2755+
}
27042756

2705-
unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
2757+
unsigned ScalarCalls = getVectorizedTypeVF(RetTy).getFixedValue();
27062758
SmallVector<Type *, 4> ScalarTys;
27072759
for (Type *Ty : Tys) {
27082760
if (Ty->isVectorTy())
27092761
Ty = Ty->getScalarType();
27102762
ScalarTys.push_back(Ty);
27112763
}
2712-
IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
2764+
IntrinsicCostAttributes Attrs(IID, toScalarizedTy(RetTy), ScalarTys, FMF);
27132765
InstructionCost ScalarCost =
27142766
thisT()->getIntrinsicInstrCost(Attrs, CostKind);
27152767
for (Type *Ty : Tys) {

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,12 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
101101
ParamTys.push_back(Argument->getType());
102102
}
103103

104-
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
105-
ArrayRef<const Value *> Args,
106-
ArrayRef<Type *> Tys,
107-
FastMathFlags Flags,
108-
const IntrinsicInst *I,
109-
InstructionCost ScalarCost)
110-
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
104+
IntrinsicCostAttributes::IntrinsicCostAttributes(
105+
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
106+
ArrayRef<Type *> Tys, FastMathFlags Flags, const IntrinsicInst *I,
107+
InstructionCost ScalarCost, TargetLibraryInfo const *LibInfo)
108+
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost),
109+
LibInfo(LibInfo) {
111110
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
112111
Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
113112
}

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
7272
case Intrinsic::atan2:
7373
case Intrinsic::sin:
7474
case Intrinsic::cos:
75+
case Intrinsic::sincos:
7576
case Intrinsic::tan:
7677
case Intrinsic::sinh:
7778
case Intrinsic::cosh:
@@ -185,6 +186,7 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
185186
case Intrinsic::ucmp:
186187
case Intrinsic::scmp:
187188
return OpdIdx == -1 || OpdIdx == 0;
189+
case Intrinsic::sincos:
188190
case Intrinsic::is_fpclass:
189191
case Intrinsic::vp_is_fpclass:
190192
return OpdIdx == 0;

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2926,7 +2926,8 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
29262926
[&](Type *Ty) { return maybeVectorizeType(Ty, VF); });
29272927

29282928
IntrinsicCostAttributes CostAttrs(ID, RetTy, Arguments, ParamTys, FMF,
2929-
dyn_cast<IntrinsicInst>(CI));
2929+
dyn_cast<IntrinsicInst>(CI),
2930+
InstructionCost::getInvalid(), TLI);
29302931
return TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
29312932
}
29322933

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1179,7 +1179,8 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
11791179
FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
11801180
IntrinsicCostAttributes CostAttrs(
11811181
VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
1182-
dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
1182+
dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()),
1183+
InstructionCost::getInvalid(), &Ctx.TLI);
11831184
return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);
11841185
}
11851186

llvm/test/Analysis/CostModel/AMDGPU/frexp.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -68,46 +68,46 @@ define void @frexp_f16_i32() {
6868
define void @frexp_f16_i16() {
6969
; GFX7-LABEL: 'frexp_f16_i16'
7070
; GFX7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, i16 } @llvm.frexp.f16.i16(half undef)
71-
; GFX7-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef)
72-
; GFX7-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef)
73-
; GFX7-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef)
74-
; GFX7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef)
75-
; GFX7-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef)
76-
; GFX7-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef)
77-
; GFX7-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef)
71+
; GFX7-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef)
72+
; GFX7-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef)
73+
; GFX7-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef)
74+
; GFX7-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef)
75+
; GFX7-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef)
76+
; GFX7-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef)
77+
; GFX7-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef)
7878
; GFX7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
7979
;
8080
; GFX8PLUS-LABEL: 'frexp_f16_i16'
8181
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, i16 } @llvm.frexp.f16.i16(half undef)
82-
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef)
83-
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef)
84-
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef)
85-
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef)
86-
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef)
87-
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef)
88-
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef)
82+
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef)
83+
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef)
84+
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef)
85+
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef)
86+
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef)
87+
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef)
88+
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef)
8989
; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
9090
;
9191
; GFX7-SIZE-LABEL: 'frexp_f16_i16'
9292
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, i16 } @llvm.frexp.f16.i16(half undef)
93-
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef)
94-
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef)
95-
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef)
96-
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef)
97-
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef)
98-
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef)
99-
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef)
93+
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef)
94+
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef)
95+
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef)
96+
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef)
97+
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef)
98+
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef)
99+
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef)
100100
; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
101101
;
102102
; GFX8PLUS-SIZE-LABEL: 'frexp_f16_i16'
103103
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, i16 } @llvm.frexp.f16.i16(half undef)
104-
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef)
105-
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef)
106-
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef)
107-
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef)
108-
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef)
109-
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef)
110-
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef)
104+
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> undef)
105+
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v3f16 = call { <3 x half>, <3 x i16> } @llvm.frexp.v3f16.v3i16(<3 x half> undef)
106+
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f16 = call { <4 x half>, <4 x i16> } @llvm.frexp.v4f16.v4i16(<4 x half> undef)
107+
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v5f16 = call { <5 x half>, <5 x i16> } @llvm.frexp.v5f16.v5i16(<5 x half> undef)
108+
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v8f16 = call { <8 x half>, <8 x i16> } @llvm.frexp.v8f16.v8i16(<8 x half> undef)
109+
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v16f16 = call { <16 x half>, <16 x i16> } @llvm.frexp.v16f16.v16i16(<16 x half> undef)
110+
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v17f16 = call { <17 x half>, <17 x i16> } @llvm.frexp.v17f16.v17i16(<17 x half> undef)
111111
; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
112112
;
113113
%f16 = call { half, i16 } @llvm.frexp.f16.i16(half undef)

0 commit comments

Comments
 (0)