Skip to content

Commit 662c734

Browse files
committed
[LV] Teach the vectorizer to cost and vectorize llvm.sincos intrinsics
This teaches the loop vectorizer that `llvm.sincos` is trivially vectorizable. Additionally, this patch updates the cost model to cost intrinsics that return multiple values correctly. Previously, the cost model only thought intrinsics that return `VectorType` need scalarizing, which meant it cost intrinsics that return multiple vectors (that need scalarizing) way too cheap (giving it the cost of a single function call). The `llvm.sincos` intrinsic also has a custom cost when a vector function library is available, as certain VFs can be expanded (later in code-gen) to a vector function, reducing the cost to a single call (+ the possible loads from the vector function returns values via output pointers).
1 parent c1de9b9 commit 662c734

File tree

7 files changed

+256
-28
lines changed

7 files changed

+256
-28
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ class IntrinsicCostAttributes {
126126
// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
127127
// arguments and the return value will be computed based on types.
128128
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
129+
TargetLibraryInfo const *LibInfo = nullptr;
129130

130131
public:
131132
IntrinsicCostAttributes(
@@ -145,7 +146,8 @@ class IntrinsicCostAttributes {
145146
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
146147
ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
147148
const IntrinsicInst *I = nullptr,
148-
InstructionCost ScalarCost = InstructionCost::getInvalid());
149+
InstructionCost ScalarCost = InstructionCost::getInvalid(),
150+
TargetLibraryInfo const *LibInfo = nullptr);
149151

150152
Intrinsic::ID getID() const { return IID; }
151153
const IntrinsicInst *getInst() const { return II; }
@@ -154,6 +156,7 @@ class IntrinsicCostAttributes {
154156
InstructionCost getScalarizationCost() const { return ScalarizationCost; }
155157
const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
156158
const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
159+
const TargetLibraryInfo *getLibInfo() const { return LibInfo; }
157160

158161
bool isTypeBasedOnly() const {
159162
return Arguments.empty();

llvm/include/llvm/CodeGen/BasicTTIImpl.h

+70-18
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/SmallVector.h"
2323
#include "llvm/Analysis/LoopInfo.h"
2424
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
25+
#include "llvm/Analysis/TargetLibraryInfo.h"
2526
#include "llvm/Analysis/TargetTransformInfo.h"
2627
#include "llvm/Analysis/TargetTransformInfoImpl.h"
2728
#include "llvm/Analysis/ValueTracking.h"
@@ -1717,9 +1718,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
17171718

17181719
Type *RetTy = ICA.getReturnType();
17191720

1720-
ElementCount RetVF =
1721-
(RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1722-
: ElementCount::getFixed(1));
1721+
ElementCount RetVF = isVectorizedTy(RetTy) ? getVectorizedTypeVF(RetTy)
1722+
: ElementCount::getFixed(1);
1723+
17231724
const IntrinsicInst *I = ICA.getInst();
17241725
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
17251726
FastMathFlags FMF = ICA.getFlags();
@@ -1972,6 +1973,49 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19721973
}
19731974
case Intrinsic::experimental_vector_match:
19741975
return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
1976+
case Intrinsic::sincos: {
1977+
// Vector variants of llvm.sincos can be mapped to a vector library call.
1978+
auto const *LibInfo = ICA.getLibInfo();
1979+
if (!LibInfo || !isVectorizedTy(RetTy))
1980+
break;
1981+
1982+
// Find associated libcall.
1983+
VectorType *VectorTy = cast<VectorType>(getContainedTypes(RetTy).front());
1984+
EVT VT = getTLI()->getValueType(DL, VectorTy);
1985+
RTLIB::Libcall LC = RTLIB::getFSINCOS(VT.getVectorElementType());
1986+
const char *LCName = getTLI()->getLibcallName(LC);
1987+
if (!LC || !LCName)
1988+
break;
1989+
1990+
// Search for a corresponding vector variant.
1991+
LLVMContext &Ctx = RetTy->getContext();
1992+
auto VF = getVectorizedTypeVF(RetTy);
1993+
VecDesc const *VD = nullptr;
1994+
for (bool Masked : {false, true}) {
1995+
if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked)))
1996+
break;
1997+
}
1998+
if (!VD)
1999+
break;
2000+
2001+
// Cost the call + mask.
2002+
auto Cost = thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(),
2003+
CostKind);
2004+
if (VD->isMasked())
2005+
Cost += thisT()->getShuffleCost(
2006+
TargetTransformInfo::SK_Broadcast,
2007+
VectorType::get(IntegerType::getInt1Ty(Ctx), VF), {}, CostKind, 0,
2008+
nullptr, {});
2009+
2010+
// Lowering to a sincos library call (with output pointers) may require us
2011+
// to emit reloads for the results.
2012+
Cost +=
2013+
thisT()->getMemoryOpCost(
2014+
Instruction::Load, VectorTy,
2015+
thisT()->getDataLayout().getABITypeAlign(VectorTy), 0, CostKind) *
2016+
2;
2017+
return Cost;
2018+
}
19752019
}
19762020

19772021
// Assume that we need to scalarize this intrinsic.)
@@ -1980,10 +2024,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19802024
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
19812025
if (RetVF.isVector() && !RetVF.isScalable()) {
19822026
ScalarizationCost = 0;
1983-
if (!RetTy->isVoidTy())
1984-
ScalarizationCost += getScalarizationOverhead(
1985-
cast<VectorType>(RetTy),
1986-
/*Insert*/ true, /*Extract*/ false, CostKind);
2027+
if (!RetTy->isVoidTy()) {
2028+
for (Type *VectorTy : getContainedTypes(RetTy)) {
2029+
ScalarizationCost += getScalarizationOverhead(
2030+
cast<VectorType>(VectorTy),
2031+
/*Insert*/ true, /*Extract*/ false, CostKind);
2032+
}
2033+
}
19872034
ScalarizationCost +=
19882035
getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind);
19892036
}
@@ -2609,27 +2656,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
26092656
// Else, assume that we need to scalarize this intrinsic. For math builtins
26102657
// this will emit a costly libcall, adding call overhead and spills. Make it
26112658
// very expensive.
2612-
if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2659+
if (isVectorizedTy(RetTy)) {
2660+
ArrayRef<Type *> RetVTys = getContainedTypes(RetTy);
2661+
26132662
// Scalable vectors cannot be scalarized, so return Invalid.
2614-
if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
2615-
return isa<ScalableVectorType>(Ty);
2616-
}))
2663+
if (any_of(concat<Type *const>(RetVTys, Tys),
2664+
[](Type *Ty) { return isa<ScalableVectorType>(Ty); }))
26172665
return InstructionCost::getInvalid();
26182666

2619-
InstructionCost ScalarizationCost =
2620-
SkipScalarizationCost
2621-
? ScalarizationCostPassed
2622-
: getScalarizationOverhead(RetVTy, /*Insert*/ true,
2623-
/*Extract*/ false, CostKind);
2667+
InstructionCost ScalarizationCost = ScalarizationCostPassed;
2668+
if (!SkipScalarizationCost) {
2669+
ScalarizationCost = 0;
2670+
for (Type *RetVTy : RetVTys) {
2671+
ScalarizationCost += getScalarizationOverhead(
2672+
cast<VectorType>(RetVTy), /*Insert*/ true,
2673+
/*Extract*/ false, CostKind);
2674+
}
2675+
}
26242676

2625-
unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
2677+
unsigned ScalarCalls = getVectorizedTypeVF(RetTy).getFixedValue();
26262678
SmallVector<Type *, 4> ScalarTys;
26272679
for (Type *Ty : Tys) {
26282680
if (Ty->isVectorTy())
26292681
Ty = Ty->getScalarType();
26302682
ScalarTys.push_back(Ty);
26312683
}
2632-
IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
2684+
IntrinsicCostAttributes Attrs(IID, toScalarizedTy(RetTy), ScalarTys, FMF);
26332685
InstructionCost ScalarCost =
26342686
thisT()->getIntrinsicInstrCost(Attrs, CostKind);
26352687
for (Type *Ty : Tys) {

llvm/lib/Analysis/TargetTransformInfo.cpp

+6-7
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,12 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
101101
ParamTys.push_back(Argument->getType());
102102
}
103103

104-
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
105-
ArrayRef<const Value *> Args,
106-
ArrayRef<Type *> Tys,
107-
FastMathFlags Flags,
108-
const IntrinsicInst *I,
109-
InstructionCost ScalarCost)
110-
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
104+
IntrinsicCostAttributes::IntrinsicCostAttributes(
105+
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
106+
ArrayRef<Type *> Tys, FastMathFlags Flags, const IntrinsicInst *I,
107+
InstructionCost ScalarCost, TargetLibraryInfo const *LibInfo)
108+
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost),
109+
LibInfo(LibInfo) {
111110
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
112111
Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
113112
}

llvm/lib/Analysis/VectorUtils.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
7272
case Intrinsic::atan2:
7373
case Intrinsic::sin:
7474
case Intrinsic::cos:
75+
case Intrinsic::sincos:
7576
case Intrinsic::tan:
7677
case Intrinsic::sinh:
7778
case Intrinsic::cosh:
@@ -179,6 +180,7 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
179180
case Intrinsic::ucmp:
180181
case Intrinsic::scmp:
181182
return OpdIdx == -1 || OpdIdx == 0;
183+
case Intrinsic::sincos:
182184
case Intrinsic::is_fpclass:
183185
case Intrinsic::vp_is_fpclass:
184186
return OpdIdx == 0;

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -2971,7 +2971,8 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
29712971
[&](Type *Ty) { return maybeVectorizeType(Ty, VF); });
29722972

29732973
IntrinsicCostAttributes CostAttrs(ID, RetTy, Arguments, ParamTys, FMF,
2974-
dyn_cast<IntrinsicInst>(CI));
2974+
dyn_cast<IntrinsicInst>(CI),
2975+
InstructionCost::getInvalid(), TLI);
29752976
return TTI.getIntrinsicInstrCost(CostAttrs,
29762977
TargetTransformInfo::TCK_RecipThroughput);
29772978
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1112,7 +1112,8 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
11121112
FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
11131113
IntrinsicCostAttributes CostAttrs(
11141114
VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
1115-
dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
1115+
dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()),
1116+
InstructionCost::getInvalid(), &Ctx.TLI);
11161117
return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
11171118
}
11181119

0 commit comments

Comments
 (0)