@@ -286,6 +286,64 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
286
286
return false ;
287
287
}
288
288
289
+ // / Several intrinsics struct-ret (including llvm.sincos[pi] and llvm.modf)
290
+ // / can be lowered to a vector library call (for certain VFs). The vector
291
+ // / library functions correspond to the scalar calls (e.g. sincos or modf),
292
+ // / which unlike the intrinsic return values via output pointers. This helper
293
+ // / checks if a vector call exists for the given intrinsic, and returns the
294
+ // / cost, which includes the cost of the mask (if required), and the loads for
295
+ // / values returned via output pointers. \p LC is the scalar libcall and
296
+ // / \p CallRetElementIndex (optional) is the struct element which is mapped to
297
+ // / the call return value. If std::nullopt is returned, the no vector library
298
+ // / call is available, so the intrinsic should be assigned the default cost
299
+ // / (e.g. scalarization).
300
+ std::optional<InstructionCost> getMultipleResultIntrinsicVectorLibCallCost (
301
+ const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind,
302
+ RTLIB::Libcall LC, std::optional<unsigned > CallRetElementIndex = {}) {
303
+ Type *RetTy = ICA.getReturnType ();
304
+ // Vector variants of the intrinsic can be mapped to a vector library call.
305
+ auto const *LibInfo = ICA.getLibInfo ();
306
+ if (!LibInfo || !isa<StructType>(RetTy) ||
307
+ !isVectorizedStructTy (cast<StructType>(RetTy)))
308
+ return std::nullopt;
309
+
310
+ // Find associated libcall.
311
+ const char *LCName = getTLI ()->getLibcallName (LC);
312
+ if (!LC || !LCName)
313
+ return std::nullopt;
314
+
315
+ // Search for a corresponding vector variant.
316
+ LLVMContext &Ctx = RetTy->getContext ();
317
+ ElementCount VF = getVectorizedTypeVF (RetTy);
318
+ VecDesc const *VD = nullptr ;
319
+ for (bool Masked : {false , true }) {
320
+ if ((VD = LibInfo->getVectorMappingInfo (LCName, VF, Masked)))
321
+ break ;
322
+ }
323
+ if (!VD)
324
+ return std::nullopt;
325
+
326
+ // Cost the call + mask.
327
+ auto Cost =
328
+ thisT ()->getCallInstrCost (nullptr , RetTy, ICA.getArgTypes (), CostKind);
329
+ if (VD->isMasked ())
330
+ Cost += thisT ()->getShuffleCost (
331
+ TargetTransformInfo::SK_Broadcast,
332
+ VectorType::get (IntegerType::getInt1Ty (Ctx), VF), {}, CostKind, 0 ,
333
+ nullptr , {});
334
+
335
+ // Lowering to a library call (with output pointers) may require us to emit
336
+ // reloads for the results.
337
+ for (auto [Idx, VectorTy] : enumerate(getContainedTypes (RetTy))) {
338
+ if (Idx == CallRetElementIndex)
339
+ continue ;
340
+ Cost += thisT ()->getMemoryOpCost (
341
+ Instruction::Load, VectorTy,
342
+ thisT ()->getDataLayout ().getABITypeAlign (VectorTy), 0 , CostKind);
343
+ }
344
+ return Cost;
345
+ }
346
+
289
347
protected:
290
348
explicit BasicTTIImplBase (const TargetMachine *TM, const DataLayout &DL)
291
349
: BaseT(DL) {}
@@ -1999,47 +2057,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1999
2057
case Intrinsic::experimental_vector_match:
2000
2058
return thisT ()->getTypeBasedIntrinsicInstrCost (ICA, CostKind);
2001
2059
case Intrinsic::sincos: {
2002
- // Vector variants of llvm.sincos can be mapped to a vector library call.
2003
- auto const *LibInfo = ICA.getLibInfo ();
2004
- if (!LibInfo || !isVectorizedTy (RetTy))
2005
- break ;
2006
-
2007
- // Find associated libcall.
2008
- VectorType *VectorTy = cast<VectorType>(getContainedTypes (RetTy).front ());
2009
- EVT VT = getTLI ()->getValueType (DL, VectorTy);
2010
- RTLIB::Libcall LC = RTLIB::getSINCOS (VT.getVectorElementType ());
2011
- const char *LCName = getTLI ()->getLibcallName (LC);
2012
- if (!LC || !LCName)
2013
- break ;
2014
-
2015
- // Search for a corresponding vector variant.
2016
- LLVMContext &Ctx = RetTy->getContext ();
2017
- auto VF = getVectorizedTypeVF (RetTy);
2018
- VecDesc const *VD = nullptr ;
2019
- for (bool Masked : {false , true }) {
2020
- if ((VD = LibInfo->getVectorMappingInfo (LCName, VF, Masked)))
2021
- break ;
2022
- }
2023
- if (!VD)
2024
- break ;
2025
-
2026
- // Cost the call + mask.
2027
- auto Cost = thisT ()->getCallInstrCost (nullptr , RetTy, ICA.getArgTypes (),
2028
- CostKind);
2029
- if (VD->isMasked ())
2030
- Cost += thisT ()->getShuffleCost (
2031
- TargetTransformInfo::SK_Broadcast,
2032
- VectorType::get (IntegerType::getInt1Ty (Ctx), VF), {}, CostKind, 0 ,
2033
- nullptr , {});
2034
-
2035
- // Lowering to a sincos library call (with output pointers) may require us
2036
- // to emit reloads for the results.
2037
- Cost +=
2038
- thisT ()->getMemoryOpCost (
2039
- Instruction::Load, VectorTy,
2040
- thisT ()->getDataLayout ().getABITypeAlign (VectorTy), 0 , CostKind) *
2041
- 2 ;
2042
- return Cost;
2060
+ Type *Ty = getContainedTypes (RetTy).front ();
2061
+ EVT VT = getTLI ()->getValueType (DL, Ty);
2062
+ RTLIB::Libcall LC = RTLIB::getSINCOS (VT.getScalarType ());
2063
+ if (auto Cost =
2064
+ getMultipleResultIntrinsicVectorLibCallCost (ICA, CostKind, LC))
2065
+ return *Cost;
2066
+ // Otherwise, fallback to default scalarization cost.
2067
+ break ;
2043
2068
}
2044
2069
}
2045
2070
0 commit comments