Skip to content

Commit 03e622e

Browse files
committed
[IR] Add llvm.sincos intrinsic
This adds the `llvm.sincos` intrinsic, legalization, and lowering. The `llvm.sincos` intrinsic takes a floating-point value and returns both the sine and cosine (as a struct). ``` declare { float, float } @llvm.sincos.f32(float %Val) declare { double, double } @llvm.sincos.f64(double %Val) declare { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80 %Val) declare { fp128, fp128 } @llvm.sincos.f128(fp128 %Val) declare { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %Val) declare { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %Val) ``` The lowering is built on top of the existing FSINCOS ISD node, with additional type legalization to allow for f16, f128, and vector values.
1 parent 0de1e3e commit 03e622e

18 files changed

+1197
-11
lines changed

llvm/docs/LangRef.rst

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15361,6 +15361,8 @@ Semantics:
1536115361
This function returns the first value raised to the second power with an
1536215362
unspecified sequence of rounding operations.
1536315363

15364+
.. _t_llvm_sin:
15365+
1536415366
'``llvm.sin.*``' Intrinsic
1536515367
^^^^^^^^^^^^^^^^^^^^^^^^^^
1536615368

@@ -15398,6 +15400,8 @@ trapping or setting ``errno``.
1539815400
When specified with the fast-math-flag 'afn', the result may be approximated
1539915401
using a less accurate calculation.
1540015402

15403+
.. _t_llvm_cos:
15404+
1540115405
'``llvm.cos.*``' Intrinsic
1540215406
^^^^^^^^^^^^^^^^^^^^^^^^^^
1540315407

@@ -15694,6 +15698,47 @@ trapping or setting ``errno``.
1569415698
When specified with the fast-math-flag 'afn', the result may be approximated
1569515699
using a less accurate calculation.
1569615700

15701+
15702+
'``llvm.sincos.*``' Intrinsic
15703+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
15704+
15705+
Syntax:
15706+
"""""""
15707+
15708+
This is an overloaded intrinsic. You can use ``llvm.sincos`` on any
15709+
floating-point or vector of floating-point type. Not all targets support
15710+
all types however.
15711+
15712+
::
15713+
15714+
declare { float, float } @llvm.sincos.f32(float %Val)
15715+
declare { double, double } @llvm.sincos.f64(double %Val)
15716+
declare { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80 %Val)
15717+
declare { fp128, fp128 } @llvm.sincos.f128(fp128 %Val)
15718+
declare { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %Val)
15719+
declare { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %Val)
15720+
15721+
Overview:
15722+
"""""""""
15723+
15724+
The '``llvm.sincos.*``' intrinsics returns the sine and cosine of the operand.
15725+
15726+
Arguments:
15727+
""""""""""
15728+
15729+
The argument is a :ref:`floating-point <t_floating>` or :ref:`vector <t_vector>`
15730+
of floating-point values. Returns two values matching the argument type in a
15731+
struct.
15732+
15733+
Semantics:
15734+
""""""""""
15735+
15736+
This intrinsic is equivalent to a calling both :ref:`llvm.sin <t_llvm_sin>`
15737+
and :ref:`llvm.cos <t_llvm_cos>` on the argument.
15738+
15739+
The first result is the sine of the argument and the second result is the cosine
15740+
of the argument.
15741+
1569715742
'``llvm.pow.*``' Intrinsic
1569815743
^^^^^^^^^^^^^^^^^^^^^^^^^^
1569915744

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1978,6 +1978,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19781978
case Intrinsic::cos:
19791979
ISD = ISD::FCOS;
19801980
break;
1981+
case Intrinsic::sincos:
1982+
ISD = ISD::FSINCOS;
1983+
break;
19811984
case Intrinsic::tan:
19821985
ISD = ISD::FTAN;
19831986
break;

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2009,6 +2009,13 @@ class MachineIRBuilder {
20092009
return buildInstr(TargetOpcode::G_FFREXP, {Fract, Exp}, {Src}, Flags);
20102010
}
20112011

2012+
/// Build and insert \p Sin, \p Cos = G_FSINCOS \p Src
2013+
MachineInstrBuilder
2014+
buildFSincos(const DstOp &Sin, const DstOp &Cos, const SrcOp &Src,
2015+
std::optional<unsigned> Flags = std::nullopt) {
2016+
return buildInstr(TargetOpcode::G_FSINCOS, {Sin, Cos}, {Src}, Flags);
2017+
}
2018+
20122019
/// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
20132020
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0,
20142021
const SrcOp &Src1) {

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
10401040
def int_nearbyint : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
10411041
def int_round : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
10421042
def int_roundeven : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
1043+
def int_sincos : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
1044+
[llvm_anyfloat_ty]>;
10431045

10441046
// Truncate a floating point number with a specific rounding mode
10451047
def int_fptrunc_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],

llvm/include/llvm/Support/TargetOpcodes.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -809,6 +809,9 @@ HANDLE_TARGET_OPCODE(G_FCOS)
809809
/// Floating point sine.
810810
HANDLE_TARGET_OPCODE(G_FSIN)
811811

812+
/// Floating point combined sine and cosine.
813+
HANDLE_TARGET_OPCODE(G_FSINCOS)
814+
812815
/// Floating point tangent.
813816
HANDLE_TARGET_OPCODE(G_FTAN)
814817

llvm/include/llvm/Target/GenericOpcodes.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,13 @@ def G_FSIN : GenericInstruction {
10201020
let hasSideEffects = false;
10211021
}
10221022

1023+
// Floating point combined sine and cosine.
1024+
def G_FSINCOS : GenericInstruction {
1025+
let OutOperandList = (outs type0:$dst1, type0:$dst2);
1026+
let InOperandList = (ins type0:$src1);
1027+
let hasSideEffects = false;
1028+
}
1029+
10231030
// Floating point tangent of a value.
10241031
def G_FTAN : GenericInstruction {
10251032
let OutOperandList = (outs type0:$dst);

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2340,6 +2340,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
23402340
MachineInstr::copyFlagsFromInstruction(CI));
23412341
return true;
23422342
}
2343+
case Intrinsic::sincos: {
2344+
ArrayRef<Register> VRegs = getOrCreateVRegs(CI);
2345+
MIRBuilder.buildFSincos(VRegs[0], VRegs[1],
2346+
getOrCreateVReg(*CI.getArgOperand(0)),
2347+
MachineInstr::copyFlagsFromInstruction(CI));
2348+
return true;
2349+
}
23432350
case Intrinsic::fptosi_sat:
23442351
MIRBuilder.buildFPTOSI_SAT(getOrCreateVReg(CI),
23452352
getOrCreateVReg(*CI.getArgOperand(0)));

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5577,6 +5577,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
55775577
Results.push_back(Tmp2.getValue(1));
55785578
break;
55795579
}
5580+
case ISD::FSINCOS: {
5581+
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
5582+
Tmp2 = DAG.getNode(ISD::FSINCOS, dl, {NVT, NVT}, Tmp1);
5583+
5584+
for (unsigned ResNum = 0; ResNum < Node->getNumValues(); ResNum++)
5585+
Results.push_back(
5586+
DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2.getValue(ResNum),
5587+
DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
5588+
break;
5589+
}
55805590
case ISD::FFLOOR:
55815591
case ISD::FCEIL:
55825592
case ISD::FRINT:

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
127127
case ISD::FLDEXP:
128128
case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break;
129129
case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break;
130+
case ISD::FSINCOS: R = SoftenFloatRes_FSINCOS(N); break;
130131
case ISD::STRICT_FREM:
131132
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
132133
case ISD::STRICT_FRINT:
@@ -765,6 +766,45 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) {
765766
return ReturnVal;
766767
}
767768

769+
SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) {
770+
assert(!N->isStrictFPOpcode() && "strictfp not implemented for fsincos");
771+
EVT VT = N->getValueType(0);
772+
RTLIB::Libcall LC = RTLIB::getFSINCOS(VT);
773+
774+
if (!TLI.getLibcallName(LC))
775+
return SDValue();
776+
777+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
778+
SDValue StackSlotSin = DAG.CreateStackTemporary(NVT);
779+
SDValue StackSlotCos = DAG.CreateStackTemporary(NVT);
780+
781+
SDLoc DL(N);
782+
783+
TargetLowering::MakeLibCallOptions CallOptions;
784+
std::array Ops{GetSoftenedFloat(N->getOperand(0)), StackSlotSin,
785+
StackSlotCos};
786+
std::array OpsVT{VT, StackSlotSin.getValueType(),
787+
StackSlotCos.getValueType()};
788+
789+
// TODO: setTypeListBeforeSoften can't properly express multiple return types,
790+
// but since both returns have the same type for sincos it should be okay.
791+
CallOptions.setTypeListBeforeSoften({OpsVT}, VT, true);
792+
793+
auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, DL,
794+
/*Chain=*/SDValue());
795+
unsigned ResNo = 0;
796+
for (SDValue OutPtr : {StackSlotSin, StackSlotCos}) {
797+
int FrameIdx = cast<FrameIndexSDNode>(OutPtr)->getIndex();
798+
auto PtrInfo =
799+
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
800+
801+
SDValue LoadExp = DAG.getLoad(NVT, DL, Chain, OutPtr, PtrInfo);
802+
SetSoftenedFloat(SDValue(N, ResNo++), LoadExp);
803+
}
804+
805+
return SDValue();
806+
}
807+
768808
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
769809
return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
770810
RTLIB::REM_F32,
@@ -2683,6 +2723,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
26832723
case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break;
26842724
case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break;
26852725

2726+
case ISD::FSINCOS:
2727+
R = PromoteFloatRes_FSINCOS(N);
2728+
break;
2729+
26862730
case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
26872731
case ISD::STRICT_FP_ROUND:
26882732
R = PromoteFloatRes_STRICT_FP_ROUND(N);
@@ -2878,6 +2922,18 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FFREXP(SDNode *N) {
28782922
return Res;
28792923
}
28802924

2925+
SDValue DAGTypeLegalizer::PromoteFloatRes_FSINCOS(SDNode *N) {
2926+
EVT VT = N->getValueType(0);
2927+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
2928+
SDValue Op = GetPromotedFloat(N->getOperand(0));
2929+
SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), {NVT, NVT}, Op);
2930+
2931+
for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
2932+
SetPromotedFloat(SDValue(N, ResNum), Res.getValue(ResNum));
2933+
2934+
return SDValue();
2935+
}
2936+
28812937
// Explicit operation to reduce precision. Reduce the value to half precision
28822938
// and promote it back to the legal type.
28832939
SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) {
@@ -3126,6 +3182,10 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
31263182

31273183
case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break;
31283184

3185+
case ISD::FSINCOS:
3186+
R = SoftPromoteHalfRes_FSINCOS(N);
3187+
break;
3188+
31293189
case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
31303190
case ISD::ATOMIC_LOAD:
31313191
R = SoftPromoteHalfRes_ATOMIC_LOAD(N);
@@ -3282,6 +3342,26 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FFREXP(SDNode *N) {
32823342
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
32833343
}
32843344

3345+
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FSINCOS(SDNode *N) {
3346+
EVT OVT = N->getValueType(0);
3347+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
3348+
SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
3349+
SDLoc dl(N);
3350+
3351+
// Promote to the larger FP type.
3352+
Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op);
3353+
SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, NVT), Op);
3354+
3355+
// Convert back to FP16 as an integer.
3356+
ISD::NodeType Truncate = GetPromotionOpcode(NVT, OVT);
3357+
for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) {
3358+
SDValue Trunc = DAG.getNode(Truncate, dl, MVT::i16, Res.getValue(ResNum));
3359+
SetSoftPromotedHalf(SDValue(N, ResNum), Trunc);
3360+
}
3361+
3362+
return SDValue();
3363+
}
3364+
32853365
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
32863366
EVT RVT = N->getValueType(0);
32873367
bool IsStrict = N->isStrictFPOpcode();

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
596596
SDValue SoftenFloatRes_FPOW(SDNode *N);
597597
SDValue SoftenFloatRes_ExpOp(SDNode *N);
598598
SDValue SoftenFloatRes_FFREXP(SDNode *N);
599+
SDValue SoftenFloatRes_FSINCOS(SDNode *N);
599600
SDValue SoftenFloatRes_FREEZE(SDNode *N);
600601
SDValue SoftenFloatRes_FREM(SDNode *N);
601602
SDValue SoftenFloatRes_FRINT(SDNode *N);
@@ -742,6 +743,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
742743
SDValue PromoteFloatRes_FMAD(SDNode *N);
743744
SDValue PromoteFloatRes_ExpOp(SDNode *N);
744745
SDValue PromoteFloatRes_FFREXP(SDNode *N);
746+
SDValue PromoteFloatRes_FSINCOS(SDNode *N);
745747
SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
746748
SDValue PromoteFloatRes_STRICT_FP_ROUND(SDNode *N);
747749
SDValue PromoteFloatRes_LOAD(SDNode *N);
@@ -790,6 +792,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
790792
SDValue SoftPromoteHalfRes_FMAD(SDNode *N);
791793
SDValue SoftPromoteHalfRes_ExpOp(SDNode *N);
792794
SDValue SoftPromoteHalfRes_FFREXP(SDNode *N);
795+
SDValue SoftPromoteHalfRes_FSINCOS(SDNode *N);
793796
SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
794797
SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
795798
SDValue SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N);
@@ -861,7 +864,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
861864
SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N);
862865

863866
SDValue ScalarizeVecRes_FIX(SDNode *N);
864-
SDValue ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo);
867+
SDValue ScalarizeVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo);
865868

866869
// Vector Operand Scalarization: <1 x ty> -> ty.
867870
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -915,7 +918,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
915918
void SplitVecRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi);
916919
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
917920
void SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo, SDValue &Hi);
918-
void SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi);
921+
void SplitVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo, SDValue &Lo,
922+
SDValue &Hi);
919923
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
920924
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
921925
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -1066,6 +1070,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
10661070
SDValue WidenVecRes_ExpOp(SDNode *N);
10671071
SDValue WidenVecRes_Unary(SDNode *N);
10681072
SDValue WidenVecRes_InregOp(SDNode *N);
1073+
SDValue WidenVecRes_FSINCOS(SDNode *N);
10691074

10701075
// Widen Vector Operand.
10711076
bool WidenVectorOperand(SDNode *N, unsigned OpNo);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
451451
case ISD::UMULO:
452452
case ISD::FCANONICALIZE:
453453
case ISD::FFREXP:
454+
case ISD::FSINCOS:
454455
case ISD::SADDSAT:
455456
case ISD::UADDSAT:
456457
case ISD::SSUBSAT:

0 commit comments

Comments
 (0)