22
22
#include " llvm/ADT/SmallVector.h"
23
23
#include " llvm/Analysis/LoopInfo.h"
24
24
#include " llvm/Analysis/OptimizationRemarkEmitter.h"
25
+ #include " llvm/Analysis/TargetLibraryInfo.h"
25
26
#include " llvm/Analysis/TargetTransformInfo.h"
26
27
#include " llvm/Analysis/TargetTransformInfoImpl.h"
27
28
#include " llvm/Analysis/ValueTracking.h"
@@ -1717,9 +1718,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1717
1718
1718
1719
Type *RetTy = ICA.getReturnType ();
1719
1720
1720
- ElementCount RetVF =
1721
- (RetTy-> isVectorTy () ? cast<VectorType>(RetTy)-> getElementCount ()
1722
- : ElementCount::getFixed ( 1 ));
1721
+ ElementCount RetVF = isVectorizedTy (RetTy) ? getVectorizedTypeVF (RetTy)
1722
+ : ElementCount::getFixed ( 1 );
1723
+
1723
1724
const IntrinsicInst *I = ICA.getInst ();
1724
1725
const SmallVectorImpl<const Value *> &Args = ICA.getArgs ();
1725
1726
FastMathFlags FMF = ICA.getFlags ();
@@ -1972,6 +1973,49 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1972
1973
}
1973
1974
case Intrinsic::experimental_vector_match:
1974
1975
return thisT ()->getTypeBasedIntrinsicInstrCost (ICA, CostKind);
1976
+ case Intrinsic::sincos: {
1977
+ // Vector variants of llvm.sincos can be mapped to a vector library call.
1978
+ auto const *LibInfo = ICA.getLibInfo ();
1979
+ if (!LibInfo || !isVectorizedTy (RetTy))
1980
+ break ;
1981
+
1982
+ // Find associated libcall.
1983
+ VectorType *VectorTy = cast<VectorType>(getContainedTypes (RetTy).front ());
1984
+ EVT VT = getTLI ()->getValueType (DL, VectorTy);
1985
+ RTLIB::Libcall LC = RTLIB::getFSINCOS (VT.getVectorElementType ());
1986
+ const char *LCName = getTLI ()->getLibcallName (LC);
1987
+ if (!LC || !LCName)
1988
+ break ;
1989
+
1990
+ // Search for a corresponding vector variant.
1991
+ LLVMContext &Ctx = RetTy->getContext ();
1992
+ auto VF = getVectorizedTypeVF (RetTy);
1993
+ VecDesc const *VD = nullptr ;
1994
+ for (bool Masked : {false , true }) {
1995
+ if ((VD = LibInfo->getVectorMappingInfo (LCName, VF, Masked)))
1996
+ break ;
1997
+ }
1998
+ if (!VD)
1999
+ break ;
2000
+
2001
+ // Cost the call + mask.
2002
+ auto Cost = thisT ()->getCallInstrCost (nullptr , RetTy, ICA.getArgTypes (),
2003
+ CostKind);
2004
+ if (VD->isMasked ())
2005
+ Cost += thisT ()->getShuffleCost (
2006
+ TargetTransformInfo::SK_Broadcast,
2007
+ VectorType::get (IntegerType::getInt1Ty (Ctx), VF), {}, CostKind, 0 ,
2008
+ nullptr , {});
2009
+
2010
+ // Lowering to a sincos library call (with output pointers) may require us
2011
+ // to emit reloads for the results.
2012
+ Cost +=
2013
+ thisT ()->getMemoryOpCost (
2014
+ Instruction::Load, VectorTy,
2015
+ thisT ()->getDataLayout ().getABITypeAlign (VectorTy), 0 , CostKind) *
2016
+ 2 ;
2017
+ return Cost;
2018
+ }
1975
2019
}
1976
2020
1977
2021
// Assume that we need to scalarize this intrinsic.)
@@ -1980,10 +2024,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1980
2024
InstructionCost ScalarizationCost = InstructionCost::getInvalid ();
1981
2025
if (RetVF.isVector () && !RetVF.isScalable ()) {
1982
2026
ScalarizationCost = 0 ;
1983
- if (!RetTy->isVoidTy ())
1984
- ScalarizationCost += getScalarizationOverhead (
1985
- cast<VectorType>(RetTy),
1986
- /* Insert*/ true , /* Extract*/ false , CostKind);
2027
+ if (!RetTy->isVoidTy ()) {
2028
+ for (Type *VectorTy : getContainedTypes (RetTy)) {
2029
+ ScalarizationCost += getScalarizationOverhead (
2030
+ cast<VectorType>(VectorTy),
2031
+ /* Insert*/ true , /* Extract*/ false , CostKind);
2032
+ }
2033
+ }
1987
2034
ScalarizationCost +=
1988
2035
getOperandsScalarizationOverhead (Args, ICA.getArgTypes (), CostKind);
1989
2036
}
@@ -2609,27 +2656,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
2609
2656
// Else, assume that we need to scalarize this intrinsic. For math builtins
2610
2657
// this will emit a costly libcall, adding call overhead and spills. Make it
2611
2658
// very expensive.
2612
- if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2659
+ if (isVectorizedTy (RetTy)) {
2660
+ ArrayRef<Type *> RetVTys = getContainedTypes (RetTy);
2661
+
2613
2662
// Scalable vectors cannot be scalarized, so return Invalid.
2614
- if (isa<ScalableVectorType>(RetTy) || any_of (Tys, [](const Type *Ty) {
2615
- return isa<ScalableVectorType>(Ty);
2616
- }))
2663
+ if (any_of (concat<Type *const >(RetVTys, Tys),
2664
+ [](Type *Ty) { return isa<ScalableVectorType>(Ty); }))
2617
2665
return InstructionCost::getInvalid ();
2618
2666
2619
- InstructionCost ScalarizationCost =
2620
- SkipScalarizationCost
2621
- ? ScalarizationCostPassed
2622
- : getScalarizationOverhead (RetVTy, /* Insert*/ true ,
2623
- /* Extract*/ false , CostKind);
2667
+ InstructionCost ScalarizationCost = ScalarizationCostPassed;
2668
+ if (!SkipScalarizationCost) {
2669
+ ScalarizationCost = 0 ;
2670
+ for (Type *RetVTy : RetVTys) {
2671
+ ScalarizationCost += getScalarizationOverhead (
2672
+ cast<VectorType>(RetVTy), /* Insert*/ true ,
2673
+ /* Extract*/ false , CostKind);
2674
+ }
2675
+ }
2624
2676
2625
- unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)-> getNumElements ();
2677
+ unsigned ScalarCalls = getVectorizedTypeVF (RetTy). getFixedValue ();
2626
2678
SmallVector<Type *, 4 > ScalarTys;
2627
2679
for (Type *Ty : Tys) {
2628
2680
if (Ty->isVectorTy ())
2629
2681
Ty = Ty->getScalarType ();
2630
2682
ScalarTys.push_back (Ty);
2631
2683
}
2632
- IntrinsicCostAttributes Attrs (IID, RetTy-> getScalarType ( ), ScalarTys, FMF);
2684
+ IntrinsicCostAttributes Attrs (IID, toScalarizedTy (RetTy ), ScalarTys, FMF);
2633
2685
InstructionCost ScalarCost =
2634
2686
thisT ()->getIntrinsicInstrCost (Attrs, CostKind);
2635
2687
for (Type *Ty : Tys) {
0 commit comments