diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index e12eb7095b908..402189769c20c 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -49,6 +49,7 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("cosf", "vcosf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "vtanf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4), "_ZGV_LLVM_N4v") @@ -142,6 +143,18 @@ TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("tan", "_ZGVbN2v_tan", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tan", "_ZGVdN4v_tan", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("tanf", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tanf", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVbN2v_tan", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVdN4v_tan", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v") + TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", FIXED(4), "_ZGV_LLVM_N4vv") @@ -303,6 +316,22 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("tan", "__svml_tan2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tan", "__svml_tan4", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tan", "__svml_tan8", FIXED(8), "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("tanf", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tanf", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("tanf", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan4", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan8", FIXED(8), "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v") + TLI_DEFINE_VECFUNC("pow", "__svml_pow2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("pow", "__svml_pow4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("pow", "__svml_pow8", FIXED(8), "_ZGV_LLVM_N8vv") @@ -1237,6 +1266,13 @@ TLI_DEFINE_VECFUNC("tanf", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("tanf", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("tanf", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd8_tan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd4_tan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd2_tan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") + TLI_DEFINE_VECFUNC("asin", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("asinf", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("asinf", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 2091432d4fe27..c062ebd93ec90 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1973,6 +1973,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::cos: ISD = ISD::FCOS; break; + case Intrinsic::tan: + ISD = ISD::FTAN; + break; case Intrinsic::exp: ISD = ISD::FEXP; break; diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 0f87e062e2da6..c8c86ed5eef29 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -415,6 +415,7 @@ enum NodeType { STRICT_FLDEXP, STRICT_FSIN, STRICT_FCOS, + STRICT_FTAN, STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, @@ -934,6 +935,7 @@ enum NodeType { FCBRT, FSIN, FCOS, + FTAN, FPOW, FPOWI, /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1). diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index a5a72884a6bf5..e900bcdbcdd04 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -197,6 +197,11 @@ HANDLE_LIBCALL(COS_F64, "cos") HANDLE_LIBCALL(COS_F80, "cosl") HANDLE_LIBCALL(COS_F128, "cosl") HANDLE_LIBCALL(COS_PPCF128, "cosl") +HANDLE_LIBCALL(TAN_F32, "tanf") +HANDLE_LIBCALL(TAN_F64, "tan") +HANDLE_LIBCALL(TAN_F80, "tanl") +HANDLE_LIBCALL(TAN_F128,"tanl") +HANDLE_LIBCALL(TAN_PPCF128, "tanl") HANDLE_LIBCALL(SINCOS_F32, nullptr) HANDLE_LIBCALL(SINCOS_F64, nullptr) HANDLE_LIBCALL(SINCOS_F80, nullptr) diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 8fa0e4b86d6dc..560d3b434d07d 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -148,6 +148,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 1c95a60909846..15e02eb49271d 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -509,6 +509,7 @@ def fneg : SDNode<"ISD::FNEG" , SDTFPUnaryOp>; def fsqrt : SDNode<"ISD::FSQRT" , SDTFPUnaryOp>; def fsin : SDNode<"ISD::FSIN" , SDTFPUnaryOp>; def fcos : SDNode<"ISD::FCOS" , SDTFPUnaryOp>; +def ftan : SDNode<"ISD::FTAN" , SDTFPUnaryOp>; def fexp2 : SDNode<"ISD::FEXP2" , SDTFPUnaryOp>; def fexp10 : SDNode<"ISD::FEXP10" , SDTFPUnaryOp>; def fpow : SDNode<"ISD::FPOW" , SDTFPBinOp>; @@ -562,6 +563,8 @@ def strict_fsin : SDNode<"ISD::STRICT_FSIN", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fcos : SDNode<"ISD::STRICT_FCOS", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_ftan : SDNode<"ISD::STRICT_FTAN", + SDTFPUnaryOp, [SDNPHasChain]>; def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fpow : SDNode<"ISD::STRICT_FPOW", @@ -1517,6 +1520,9 @@ def any_fsin : PatFrags<(ops node:$src), def any_fcos : PatFrags<(ops node:$src), [(strict_fcos node:$src), (fcos node:$src)]>; +def any_ftan : PatFrags<(ops node:$src), + [(strict_ftan node:$src), + (ftan node:$src)]>; def any_fexp2 : PatFrags<(ops node:$src), [(strict_fexp2 node:$src), (fexp2 node:$src)]>; diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 917094267d05a..30728ed587509 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -68,6 +68,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::sqrt: // Begin floating-point. case Intrinsic::sin: case Intrinsic::cos: + case Intrinsic::tan: case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::log: diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 6f0cae2edab17..9830b521797c1 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -449,6 +449,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(SIN_F); case TargetOpcode::G_FCOS: RTLIBCASE(COS_F); + case TargetOpcode::G_FTAN: + RTLIBCASE(TAN_F); case TargetOpcode::G_FLOG10: RTLIBCASE(LOG10_F); case TargetOpcode::G_FLOG: @@ -1037,6 +1039,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FREM: case TargetOpcode::G_FCOS: case TargetOpcode::G_FSIN: + case TargetOpcode::G_FTAN: case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: case TargetOpcode::G_FLOG2: @@ -2893,6 +2896,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FFLOOR: case TargetOpcode::G_FCOS: case TargetOpcode::G_FSIN: + case TargetOpcode::G_FTAN: case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: case TargetOpcode::G_FLOG2: @@ -4659,6 +4663,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_INTRINSIC_TRUNC: case G_FCOS: case G_FSIN: + case G_FTAN: case G_FSQRT: case G_BSWAP: case G_BITREVERSE: diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index e8438be94b3cd..129e6963aef33 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -833,6 +833,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, case TargetOpcode::G_FREM: case TargetOpcode::G_FSIN: case TargetOpcode::G_FCOS: + case TargetOpcode::G_FTAN: case TargetOpcode::G_FMA: case TargetOpcode::G_FMAD: if (SNaN) @@ -1713,6 +1714,7 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) { case TargetOpcode::G_FREM: case TargetOpcode::G_FRINT: case TargetOpcode::G_FSIN: + case TargetOpcode::G_FTAN: case TargetOpcode::G_FSQRT: case TargetOpcode::G_FSUB: case TargetOpcode::G_INTRINSIC_ROUND: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 8cd2bb60d81f2..27c45cab2e0d0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4514,6 +4514,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128, Results); break; + case ISD::FTAN: + case ISD::STRICT_FTAN: + ExpandFPLibCall(Node, RTLIB::TAN_F32, RTLIB::TAN_F64, RTLIB::TAN_F80, + RTLIB::TAN_F128, RTLIB::TAN_PPCF128, Results); + break; case ISD::FSINCOS: // Expand into sincos libcall. ExpandSinCosLibCall(Node, Results); @@ -5468,6 +5473,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: + case ISD::FTAN: case ISD::FLOG: case ISD::FLOG2: case ISD::FLOG10: @@ -5492,6 +5498,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::STRICT_FSQRT: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: + case ISD::STRICT_FTAN: case ISD::STRICT_FLOG: case ISD::STRICT_FLOG2: case ISD::STRICT_FLOG10: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index fb1424f75e097..aa116c9de5d8c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -131,6 +131,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::STRICT_FSUB: case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::STRICT_FTAN: + case ISD::FTAN: R = SoftenFloatRes_FTAN(N); break; case ISD::STRICT_FTRUNC: case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; @@ -774,6 +776,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { RTLIB::SUB_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FTAN(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32, RTLIB::TAN_F64, + RTLIB::TAN_F80, RTLIB::TAN_F128, RTLIB::TAN_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, @@ -1330,7 +1338,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { #endif report_fatal_error("Do not know how to expand the result of this " "operator!"); - + // clang-format off case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; @@ -1399,6 +1407,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; case ISD::STRICT_FSUB: case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::STRICT_FTAN: + case ISD::FTAN: ExpandFloatRes_FTAN(N, Lo, Hi); break; case ISD::STRICT_FTRUNC: case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; @@ -1408,6 +1418,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; case ISD::STRICT_FREM: case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; + // clang-format on } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1768,6 +1779,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, RTLIB::SUB_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32, + RTLIB::TAN_F64, RTLIB::TAN_F80, + RTLIB::TAN_F128, RTLIB::TAN_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), @@ -2479,6 +2499,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: + case ISD::FTAN: case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break; // Binary FP Operations @@ -2914,6 +2935,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: + case ISD::FTAN: case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break; // Binary FP Operations diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bec9cb49b5864..2350b562a0343 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -586,6 +586,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FSIN(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); + SDValue SoftenFloatRes_FTAN(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); SDValue SoftenFloatRes_LOAD(SDNode *N); SDValue SoftenFloatRes_ATOMIC_LOAD(SDNode *N); @@ -635,6 +636,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue &Lo, SDValue &Hi); void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, SDValue &Lo, SDValue &Hi); + // clang-format off void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -667,9 +669,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTAN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); + // clang-format on // Float Operand Expansion. bool ExpandFloatOperand(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 6acbc044d6731..5c5347b7f314e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -397,6 +397,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: + case ISD::FTAN: case ISD::FLDEXP: case ISD::FPOWI: case ISD::FPOW: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 361416edb554c..92ce3b17ed6c9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -108,6 +108,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: + case ISD::FTAN: case ISD::FTRUNC: case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: @@ -1140,6 +1141,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::VP_SQRT: + case ISD::FTAN: case ISD::FTRUNC: case ISD::VP_FROUNDTOZERO: case ISD::SINT_TO_FP: @@ -4400,6 +4402,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: + case ISD::FTAN: case ISD::FTRUNC: if (unrollExpandedOp()) break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b05649c6ce955..2900cefd2c806 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5349,6 +5349,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FREM: case ISD::FSIN: case ISD::FCOS: + case ISD::FTAN: case ISD::FMA: case ISD::FMAD: { if (SNaN) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 85e4cc3b82e6e..2d2da3e352780 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6748,6 +6748,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::fabs: case Intrinsic::sin: case Intrinsic::cos: + case Intrinsic::tan: case Intrinsic::exp10: case Intrinsic::floor: case Intrinsic::ceil: @@ -6765,6 +6766,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::fabs: Opcode = ISD::FABS; break; case Intrinsic::sin: Opcode = ISD::FSIN; break; case Intrinsic::cos: Opcode = ISD::FCOS; break; + case Intrinsic::tan: Opcode = ISD::FTAN; break; case Intrinsic::exp10: Opcode = ISD::FEXP10; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; @@ -9147,6 +9149,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FCOS)) return; break; + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanl: + if (visitUnaryFloatCall(I, ISD::FTAN)) + return; + break; case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 2198c2354483c..52da24b594513 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -210,6 +210,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FCOS: return "fcos"; case ISD::STRICT_FCOS: return "strict_fcos"; case ISD::FSINCOS: return "fsincos"; + case ISD::FTAN: return "ftan"; + case ISD::STRICT_FTAN: return "strict_ftan"; case ISD::FTRUNC: return "ftrunc"; case ISD::STRICT_FTRUNC: return "strict_ftrunc"; case ISD::FFLOOR: return "ffloor"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 3aec7049e0cc8..8240a1fd7e2ff 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -141,6 +141,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::EXP10_F128, "exp10f128"); setLibcallName(RTLIB::SIN_F128, "sinf128"); setLibcallName(RTLIB::COS_F128, "cosf128"); + setLibcallName(RTLIB::TAN_F128, "tanf128"); setLibcallName(RTLIB::SINCOS_F128, "sincosf128"); setLibcallName(RTLIB::POW_F128, "powf128"); setLibcallName(RTLIB::POW_FINITE_F128, "__powf128_finite"); @@ -1015,7 +1016,8 @@ void TargetLoweringBase::initActions() { setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND, - ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN}, + ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN, + ISD::FTAN}, {MVT::f32, MVT::f64, MVT::f128}, Expand); // Default ISD::TRAP to expand (which turns it into abort). diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 839006cbaed4c..24745640137f8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -615,6 +615,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSIN, VT, Action); setOperationAction(ISD::FCOS, VT, Action); setOperationAction(ISD::FSINCOS, VT, Action); + setOperationAction(ISD::FTAN, VT, Action); setOperationAction(ISD::FSQRT, VT, Action); setOperationAction(ISD::FPOW, VT, Action); setOperationAction(ISD::FLOG, VT, Action); @@ -833,9 +834,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // Always expand sin/cos functions even though x87 has an instruction. + // clang-format off setOperationAction(ISD::FSIN , MVT::f80, Expand); setOperationAction(ISD::FCOS , MVT::f80, Expand); setOperationAction(ISD::FSINCOS, MVT::f80, Expand); + setOperationAction(ISD::FTAN , MVT::f80, Expand); + // clang-format on setOperationAction(ISD::FFLOOR, MVT::f80, Expand); setOperationAction(ISD::FCEIL, MVT::f80, Expand); @@ -888,11 +892,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FNEG, MVT::f128, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); + // clang-format off setOperationAction(ISD::FSIN, MVT::f128, LibCall); setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall); setOperationAction(ISD::FCOS, MVT::f128, LibCall); setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall); setOperationAction(ISD::FSINCOS, MVT::f128, LibCall); + setOperationAction(ISD::FTAN, MVT::f128, LibCall); + setOperationAction(ISD::STRICT_FTAN, MVT::f128, LibCall); + // clang-format on // No STRICT_FSINCOS setOperationAction(ISD::FSQRT, MVT::f128, LibCall); setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall); @@ -944,9 +952,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16, MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { + // clang-format off setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FTAN, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); @@ -956,6 +966,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FEXP, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); setOperationAction(ISD::FEXP10, VT, Expand); + // clang-format on } // First set operation action for all vector types to either promote @@ -2473,7 +2484,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // function casting to f64 and calling `fmod`. if (Subtarget.is32Bit() && (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium())) - for (ISD::NodeType Op : + // clang-format off + for (ISD::NodeType Op : {ISD::FCEIL, ISD::STRICT_FCEIL, ISD::FCOS, ISD::STRICT_FCOS, ISD::FEXP, ISD::STRICT_FEXP, @@ -2482,9 +2494,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::FLOG, ISD::STRICT_FLOG, ISD::FLOG10, ISD::STRICT_FLOG10, ISD::FPOW, ISD::STRICT_FPOW, - ISD::FSIN, ISD::STRICT_FSIN}) + ISD::FSIN, ISD::STRICT_FSIN, + ISD::FTAN, ISD::STRICT_FTAN}) if (isOperationExpand(Op, MVT::f32)) setOperationAction(Op, MVT::f32, Promote); + // clang-format on // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, diff --git a/llvm/test/CodeGen/X86/llvm.tan.ll b/llvm/test/CodeGen/X86/llvm.tan.ll new file mode 100644 index 0000000000000..24b30038687f2 --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.tan.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_tanf16(half %a) nounwind { +; CHECK-LABEL: use_tanf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.tan.f16(half %a) + ret half %x +} + +define float @use_tanf32(float %a) nounwind { +; CHECK-LABEL: use_tanf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tanf@PLT # TAILCALL + %x = call float @llvm.tan.f32(float %a) + ret float %x +} + +define double @use_tanf64(double %a) nounwind { +; CHECK-LABEL: use_tanf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tan@PLT # TAILCALL + %x = call double @llvm.tan.f64(double %a) + ret double %x +} + +define x86_fp80 @use_tanf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_tanf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq tanl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.tan.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_tanfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_tanfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tanf128@PLT # TAILCALL + %x = call fp128 @llvm.tan.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_tanppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq tanl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.tan.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.tan.f16(half) +declare float @llvm.tan.f32(float) +declare double @llvm.tan.f64(double) +declare x86_fp80 @llvm.tan.f80(x86_fp80) +declare fp128 @llvm.tan.f128(fp128) +declare ppc_fp128 @llvm.tan.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/vec-libcalls.ll b/llvm/test/CodeGen/X86/vec-libcalls.ll index 3a1315446d7a2..6857101d3d75b 100644 --- a/llvm/test/CodeGen/X86/vec-libcalls.ll +++ b/llvm/test/CodeGen/X86/vec-libcalls.ll @@ -17,6 +17,14 @@ declare <5 x float> @llvm.sin.v5f32(<5 x float>) declare <6 x float> @llvm.sin.v6f32(<6 x float>) declare <3 x double> @llvm.sin.v3f64(<3 x double>) +declare <1 x float> @llvm.tan.v1f32(<1 x float>) +declare <2 x float> @llvm.tan.v2f32(<2 x float>) +declare <3 x float> @llvm.tan.v3f32(<3 x float>) +declare <4 x float> @llvm.tan.v4f32(<4 x float>) +declare <5 x float> @llvm.tan.v5f32(<5 x float>) +declare <6 x float> @llvm.tan.v6f32(<6 x float>) +declare <3 x double> @llvm.tan.v3f64(<3 x double>) + ; Verify that all of the potential libcall candidates are handled. ; Some of these have custom lowering, so those cases won't have ; libcalls. @@ -230,6 +238,200 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind { ret <3 x double> %r } +define <1 x float> @tan_v1f32(<1 x float> %x) nounwind { +; CHECK-LABEL: tan_v1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %r = call <1 x float> @llvm.tan.v1f32(<1 x float> %x) + ret <1 x float> %r +} + +define <2 x float> @tan_v2f32(<2 x float> %x) nounwind { +; CHECK-LABEL: tan_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq + %r = call <2 x float> @llvm.tan.v2f32(<2 x float> %x) + ret <2 x float> %r +} + +define <3 x float> @tan_v3f32(<3 x float> %x) nounwind { +; CHECK-LABEL: tan_v3f32: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq + %r = call <3 x float> @llvm.tan.v3f32(<3 x float> %x) + ret <3 x float> %r +} + +define <4 x float> @tan_v4f32(<4 x float> %x) nounwind { +; CHECK-LABEL: tan_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[3,3,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq + %r = call <4 x float> @llvm.tan.v4f32(<4 x float> %x) + ret <4 x float> %r +} + +define <5 x float> @tan_v5f32(<5 x float> %x) nounwind { +; CHECK-LABEL: tan_v5f32: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $72, %rsp +; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[3,3,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; CHECK-NEXT: addq $72, %rsp +; CHECK-NEXT: retq + %r = call <5 x float> @llvm.tan.v5f32(<5 x float> %x) + ret <5 x float> %r +} + +define <6 x float> @tan_v6f32(<6 x float> %x) nounwind { +; CHECK-LABEL: tan_v6f32: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $72, %rsp +; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[3,3,3,3] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; CHECK-NEXT: addq $72, %rsp +; CHECK-NEXT: retq + %r = call <6 x float> @llvm.tan.v6f32(<6 x float> %x) + ret <6 x float> %r +} + +define <3 x double> @tan_v3f64(<3 x double> %x) nounwind { +; CHECK-LABEL: tan_v3f64: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $72, %rsp +; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill +; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; CHECK-NEXT: addq $72, %rsp +; CHECK-NEXT: retq + %r = call <3 x double> @llvm.tan.v3f64(<3 x double> %x) + ret <3 x double> %r +} + define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind { ; CHECK-LABEL: fabs_v2f32: ; CHECK: # %bb.0: diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll index 8d2820a245d95..1627292732b6a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll @@ -15,6 +15,11 @@ declare float @cosf(float) #0 declare double @llvm.cos.f64(double) #0 declare float @llvm.cos.f32(float) #0 +declare double @tan(double) #0 +declare float @tanf(float) #0 +declare double @llvm.tan.f64(double) #0 +declare float @llvm.tan.f32(float) #0 + declare double @pow(double, double) #0 declare float @powf(float, float) #0 declare double @llvm.pow.f64(double, double) #0 @@ -264,6 +269,114 @@ for.end: ret void } +define void @tan_f64(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @tan_f64( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @tan(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @tan_f32(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @tan_f32( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @tanf(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @tan_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @tan_f64_intrinsic( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.tan.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @tan_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @tan_f32_intrinsic( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.tan.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) { ; CHECK-LABEL: @pow_f64( ; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll index 038852f55f455..67a2cf2b80e70 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll @@ -356,6 +356,117 @@ for.end: ; preds = %for.body !132 = !{!"llvm.loop.vectorize.width", i32 8} !133 = !{!"llvm.loop.vectorize.enable", i1 true} +define void @tan_f64(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f64( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_tan(<2 x double> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @tan(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: + ret void +} + +!141 = distinct !{!141, !142, !143} +!142 = !{!"llvm.loop.vectorize.width", i32 2} +!143 = !{!"llvm.loop.vectorize.enable", i1 true} + + +define void @tan_f32(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f32( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_tanf(<8 x float> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @tanf(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: + ret void +} + +!151 = distinct !{!151, !152, !153} +!152 = !{!"llvm.loop.vectorize.width", i32 8} +!153 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @tan_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f64_intrinsic( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_tan(<2 x double> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.tan.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: + ret void +} + +!161 = distinct !{!161, !162, !163} +!162 = !{!"llvm.loop.vectorize.width", i32 2} +!163 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @tan_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f32_intrinsic( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_tanf(<8 x float> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.tan.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41 + +for.end: + ret void +} + + + +!171 = distinct !{!171, !172, !173} +!172 = !{!"llvm.loop.vectorize.width", i32 8} +!173 = !{!"llvm.loop.vectorize.enable", i1 true} + attributes #0 = { nounwind readnone } declare double @sin(double) #0 @@ -366,6 +477,10 @@ declare double @cos(double) #0 declare float @cosf(float) #0 declare double @llvm.cos.f64(double) #0 declare float @llvm.cos.f32(float) #0 +declare double @tan(double) #0 +declare float @tanf(float) #0 +declare double @llvm.tan.f64(double) #0 +declare float @llvm.tan.f32(float) #0 declare float @expf(float) #0 declare float @powf(float, float) #0 declare float @llvm.exp.f32(float) #0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll index 005557d7445ca..2e78e3632feab 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll @@ -230,6 +230,52 @@ for.end: ret void } +define void @tan_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_tan4(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.tan.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @tan_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_tanf4(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.tan.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) { ; CHECK-LABEL: @pow_f64( ; CHECK: [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll index 2e78a96a44b74..27038f3a24b66 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll @@ -406,6 +406,31 @@ for.end: ; preds = %for.body, %entry ret void } +;CHECK-LABEL: @tan_f32_intrinsic( +;CHECK: vtanf{{.*}}<4 x float> +;CHECK: ret void +declare float @llvm.tan.f32(float) nounwind readnone +define void @tan_f32_intrinsic(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %call = tail call float @llvm.tan.f32(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv + store float %call, ptr %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + ;CHECK-LABEL: @asin_f32( ;CHECK: vasinf{{.*}}<4 x float> ;CHECK: ret void