22
22
#include " llvm/ADT/SmallVector.h"
23
23
#include " llvm/Analysis/LoopInfo.h"
24
24
#include " llvm/Analysis/OptimizationRemarkEmitter.h"
25
+ #include " llvm/Analysis/TargetLibraryInfo.h"
25
26
#include " llvm/Analysis/TargetTransformInfo.h"
26
27
#include " llvm/Analysis/TargetTransformInfoImpl.h"
27
28
#include " llvm/Analysis/ValueTracking.h"
@@ -1718,8 +1719,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1718
1719
Type *RetTy = ICA.getReturnType ();
1719
1720
1720
1721
ElementCount RetVF =
1721
- (RetTy-> isVectorTy ( ) ? cast<VectorType> (RetTy)-> getElementCount ()
1722
- : ElementCount::getFixed ( 1 ));
1722
+ isVectorizedTy (RetTy) ? getVectorizedTypeVF (RetTy) : ElementCount::getFixed ( 1 );
1723
+
1723
1724
const IntrinsicInst *I = ICA.getInst ();
1724
1725
const SmallVectorImpl<const Value *> &Args = ICA.getArgs ();
1725
1726
FastMathFlags FMF = ICA.getFlags ();
@@ -1972,6 +1973,47 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1972
1973
}
1973
1974
case Intrinsic::experimental_vector_match:
1974
1975
return thisT ()->getTypeBasedIntrinsicInstrCost (ICA, CostKind);
1976
+ case Intrinsic::sincos: {
1977
+ // Vector variants of llvm.sincos can be mapped to a vector library call.
1978
+ auto const *LibInfo = ICA.getLibInfo ();
1979
+ if (!LibInfo || !isVectorizedTy (RetTy))
1980
+ break ;
1981
+
1982
+ // Find associated libcall.
1983
+ VectorType *VectorTy = cast<VectorType>(getContainedTypes (RetTy).front ());
1984
+ EVT VT = getTLI ()->getValueType (DL, VectorTy);
1985
+ RTLIB::Libcall LC = RTLIB::getFSINCOS (VT.getVectorElementType ());
1986
+ const char *LCName = getTLI ()->getLibcallName (LC);
1987
+ if (!LC || !LCName)
1988
+ break ;
1989
+
1990
+ // Search for a corresponding vector variant.
1991
+ LLVMContext &Ctx = RetTy->getContext ();
1992
+ auto VF = getVectorizedTypeVF (RetTy);
1993
+ VecDesc const *VD = nullptr ;
1994
+ for (bool Masked : {false , true }) {
1995
+ if ((VD = LibInfo->getVectorMappingInfo (LCName, VF, Masked)))
1996
+ break ;
1997
+ }
1998
+ if (!VD)
1999
+ break ;
2000
+
2001
+ // Cost the call + mask.
2002
+ auto Cost = thisT ()->getCallInstrCost (nullptr , RetTy, ICA.getArgTypes (), CostKind);
2003
+ if (VD->isMasked ())
2004
+ Cost += thisT ()->getShuffleCost (TargetTransformInfo::SK_Broadcast,
2005
+ VectorType::get (IntegerType::getInt1Ty (Ctx), VF),
2006
+ {}, CostKind, 0 , nullptr , {});
2007
+
2008
+ // Lowering to a sincos library call (with output pointers) may require us
2009
+ // to emit reloads for the results.
2010
+ Cost +=
2011
+ thisT ()->getMemoryOpCost (Instruction::Load, VectorTy,
2012
+ thisT ()->getDataLayout ().getABITypeAlign (VectorTy), 0 ,
2013
+ CostKind) *
2014
+ 2 ;
2015
+ return Cost;
2016
+ }
1975
2017
}
1976
2018
1977
2019
// Assume that we need to scalarize this intrinsic.)
@@ -1980,10 +2022,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1980
2022
InstructionCost ScalarizationCost = InstructionCost::getInvalid ();
1981
2023
if (RetVF.isVector () && !RetVF.isScalable ()) {
1982
2024
ScalarizationCost = 0 ;
1983
- if (!RetTy->isVoidTy ())
1984
- ScalarizationCost += getScalarizationOverhead (
1985
- cast<VectorType>(RetTy),
1986
- /* Insert*/ true , /* Extract*/ false , CostKind);
2025
+ if (!RetTy->isVoidTy ()) {
2026
+ for (Type *VectorTy : getContainedTypes (RetTy)) {
2027
+ ScalarizationCost += getScalarizationOverhead (
2028
+ cast<VectorType>(VectorTy),
2029
+ /* Insert*/ true , /* Extract*/ false , CostKind);
2030
+ }
2031
+ }
1987
2032
ScalarizationCost +=
1988
2033
getOperandsScalarizationOverhead (Args, ICA.getArgTypes (), CostKind);
1989
2034
}
@@ -2609,27 +2654,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
2609
2654
// Else, assume that we need to scalarize this intrinsic. For math builtins
2610
2655
// this will emit a costly libcall, adding call overhead and spills. Make it
2611
2656
// very expensive.
2612
- if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2657
+ if (isVectorizedTy (RetTy)) {
2658
+ ArrayRef<Type *> RetVTys = getContainedTypes (RetTy);
2659
+
2613
2660
// Scalable vectors cannot be scalarized, so return Invalid.
2614
- if (isa<ScalableVectorType>(RetTy) || any_of (Tys, [](const Type *Ty) {
2615
- return isa<ScalableVectorType>(Ty);
2616
- }))
2661
+ if (any_of (concat<Type *const >(RetVTys, Tys),
2662
+ [](Type *Ty) { return isa<ScalableVectorType>(Ty); }))
2617
2663
return InstructionCost::getInvalid ();
2618
2664
2619
- InstructionCost ScalarizationCost =
2620
- SkipScalarizationCost
2621
- ? ScalarizationCostPassed
2622
- : getScalarizationOverhead (RetVTy, /* Insert*/ true ,
2623
- /* Extract*/ false , CostKind);
2665
+ InstructionCost ScalarizationCost = ScalarizationCostPassed;
2666
+ if (!SkipScalarizationCost) {
2667
+ ScalarizationCost = 0 ;
2668
+ for (Type *RetVTy : RetVTys) {
2669
+ ScalarizationCost += getScalarizationOverhead (
2670
+ cast<VectorType>(RetVTy), /* Insert*/ true ,
2671
+ /* Extract*/ false , CostKind);
2672
+ }
2673
+ }
2624
2674
2625
- unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)-> getNumElements ();
2675
+ unsigned ScalarCalls = getVectorizedTypeVF (RetTy). getFixedValue ();
2626
2676
SmallVector<Type *, 4 > ScalarTys;
2627
2677
for (Type *Ty : Tys) {
2628
2678
if (Ty->isVectorTy ())
2629
2679
Ty = Ty->getScalarType ();
2630
2680
ScalarTys.push_back (Ty);
2631
2681
}
2632
- IntrinsicCostAttributes Attrs (IID, RetTy-> getScalarType ( ), ScalarTys, FMF);
2682
+ IntrinsicCostAttributes Attrs (IID, toScalarizedTy (RetTy ), ScalarTys, FMF);
2633
2683
InstructionCost ScalarCost =
2634
2684
thisT ()->getIntrinsicInstrCost (Attrs, CostKind);
2635
2685
for (Type *Ty : Tys) {
0 commit comments