diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index 402189769c20c..ffdf8b8c3bc79 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -92,6 +92,11 @@ TLI_DEFINE_VECFUNC("llvm.sin.f64", "_simd_sin_d2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("sinf", "_simd_sin_f4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.sin.f32", "_simd_sin_f4", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tan", "_simd_tan_d2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "_simd_tan_d2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tanf", "_simd_tan_f4", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_simd_tan_f4", FIXED(4), "_ZGV_LLVM_N4v") + // Floating-Point Arithmetic and Auxiliary Functions TLI_DEFINE_VECFUNC("cbrt", "_simd_cbrt_d2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("cbrtf", "_simd_cbrt_f4", FIXED(4), "_ZGV_LLVM_N4v") @@ -584,6 +589,7 @@ TLI_DEFINE_VECFUNC("sinpi", "_ZGVnN2v_sinpi", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("tanh", "_ZGVnN2v_tanh", FIXED(2), "_ZGV_LLVM_N2v") @@ -681,6 +687,7 @@ TLI_DEFINE_VECFUNC("sinpif", "_ZGVnN4v_sinpif", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN4v_tanhf", FIXED(4), "_ZGV_LLVM_N4v") @@ -828,6 +835,8 @@ TLI_DEFINE_VECFUNC("sqrtf", "_ZGVsMxv_sqrtf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("tan", "_ZGVsMxv_tan", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("tanf", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVsMxv_tan", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("tanh", "_ZGVsMxv_tanh", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("tanhf", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGVsMxv") @@ -1087,6 +1096,11 @@ TLI_DEFINE_VECFUNC("tanf", "armpl_vtanq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("tan", "armpl_svtan_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("tanf", "armpl_svtan_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "armpl_vtanq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "armpl_vtanq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "armpl_svtan_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "armpl_svtan_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("tanh", "armpl_vtanhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("tanhf", "armpl_vtanhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("tanh", "armpl_svtanh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 62cf6a2c47acc..56fd15f23363d 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -3534,6 +3534,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { } case Intrinsic::sin: case Intrinsic::cos: + case Intrinsic::tan: case Intrinsic::pow: { MVT RetVT; if (!isTypeLegal(II->getType(), RetVT)) @@ -3542,11 +3543,11 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (RetVT != MVT::f32 && RetVT != MVT::f64) return false; - static const RTLIB::Libcall LibCallTable[3][2] = { - { RTLIB::SIN_F32, RTLIB::SIN_F64 }, - { RTLIB::COS_F32, RTLIB::COS_F64 }, - { RTLIB::POW_F32, RTLIB::POW_F64 } - }; + static const RTLIB::Libcall LibCallTable[4][2] = { + {RTLIB::SIN_F32, RTLIB::SIN_F64}, + {RTLIB::COS_F32, RTLIB::COS_F64}, + {RTLIB::TAN_F32, RTLIB::TAN_F64}, + {RTLIB::POW_F32, RTLIB::POW_F64}}; RTLIB::Libcall LC; bool Is64Bit = RetVT == MVT::f64; switch (II->getIntrinsicID()) { @@ -3558,9 +3559,12 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::cos: LC = LibCallTable[1][Is64Bit]; break; - case Intrinsic::pow: + case Intrinsic::tan: LC = LibCallTable[2][Is64Bit]; break; + case Intrinsic::pow: + LC = LibCallTable[3][Is64Bit]; + break; } ArgListTy Args; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 360a841bdade4..48bf648b00522 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -543,6 +543,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSINCOS, MVT::f128, Expand); setOperationAction(ISD::FSQRT, MVT::f128, Expand); setOperationAction(ISD::FSUB, MVT::f128, LibCall); + setOperationAction(ISD::FTAN, MVT::f128, Expand); setOperationAction(ISD::FTRUNC, MVT::f128, Expand); setOperationAction(ISD::SETCC, MVT::f128, Custom); setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom); @@ -727,14 +728,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote); } - for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, - ISD::FCOS, ISD::FSIN, ISD::FSINCOS, - ISD::FEXP, ISD::FEXP2, ISD::FEXP10, - ISD::FLOG, ISD::FLOG2, ISD::FLOG10, - ISD::STRICT_FREM, - ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS, - ISD::STRICT_FSIN, ISD::STRICT_FEXP, ISD::STRICT_FEXP2, - ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) { + for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, + ISD::FCOS, ISD::FSIN, ISD::FSINCOS, + ISD::FTAN, ISD::FEXP, ISD::FEXP2, + ISD::FEXP10, ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW, + ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN, + ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG, + ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) { setOperationAction(Op, MVT::f16, Promote); setOperationAction(Op, MVT::v4f16, Expand); setOperationAction(Op, MVT::v8f16, Expand); @@ -1171,26 +1172,27 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, if (Subtarget->isNeonAvailable()) { // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to // silliness like this: + // clang-format off for (auto Op : - {ISD::SELECT, ISD::SELECT_CC, - ISD::BR_CC, ISD::FADD, ISD::FSUB, - ISD::FMUL, ISD::FDIV, ISD::FMA, - ISD::FNEG, ISD::FABS, ISD::FCEIL, - ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT, - ISD::FSIN, ISD::FCOS, ISD::FPOW, - ISD::FLOG, ISD::FLOG2, ISD::FLOG10, - ISD::FEXP, ISD::FEXP2, ISD::FEXP10, - ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN, - ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM, - ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD, - ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, - ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, - ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT, - ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, - ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM, - ISD::STRICT_FMAXIMUM}) + {ISD::SELECT, ISD::SELECT_CC, + ISD::BR_CC, ISD::FADD, ISD::FSUB, + ISD::FMUL, ISD::FDIV, ISD::FMA, + ISD::FNEG, ISD::FABS, ISD::FCEIL, + ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT, + ISD::FSIN, ISD::FCOS, ISD::FTAN, + ISD::FPOW, ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::FEXP, ISD::FEXP2, + ISD::FEXP10, ISD::FRINT, ISD::FROUND, + ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, + ISD::FMAXNUM, ISD::FMINIMUM, ISD::FMAXIMUM, + ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, + ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FCEIL, + ISD::STRICT_FFLOOR, ISD::STRICT_FSQRT, ISD::STRICT_FRINT, + ISD::STRICT_FNEARBYINT, ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, + ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, + ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM}) setOperationAction(Op, MVT::v1f64, Expand); - + // clang-format on for (auto Op : {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::MUL, @@ -1622,6 +1624,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); + setOperationAction(ISD::FTAN, VT, Expand); setOperationAction(ISD::FEXP, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); setOperationAction(ISD::FEXP10, VT, Expand); @@ -1803,6 +1806,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) { if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) { setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FTAN, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 07a0473888ee5..42cd43c3afa37 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -267,9 +267,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .libcallFor({{s64, s128}}) .minScalarOrElt(1, MinFPScalar); - getActionDefinitionsBuilder( - {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, - G_FEXP, G_FEXP2, G_FEXP10}) + getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, + G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10}) // We need a call for these, so we always need to scalarize. .scalarize(0) // Regardless of FP16 support, widen 16-bit elements to 32-bits. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index a61931b898aea..eb94cc5d0fb61 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -2313,6 +2313,14 @@ define float @test_sin_f32(float %x) { ret float %y } +declare float @llvm.tan.f32(float) +define float @test_tan_f32(float %x) { + ; CHECK-LABEL: name: test_tan_f32 + ; CHECK: %{{[0-9]+}}:_(s32) = G_FTAN %{{[0-9]+}} + %y = call float @llvm.tan.f32(float %x) + ret float %y +} + declare float @llvm.sqrt.f32(float) define float @test_sqrt_f32(float %x) { ; CHECK-LABEL: name: test_sqrt_f32 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-tan.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-tan.mir new file mode 100644 index 0000000000000..455aae0128364 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-tan.mir @@ -0,0 +1,227 @@ +# RUN: llc -verify-machineinstrs -mtriple aarch64--- \ +# RUN: -run-pass=legalizer -mattr=+fullfp16 -global-isel %s -o - \ +# RUN: | FileCheck %s +... +--- +name: test_v4f16.tan +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $d0 + ; CHECK-LABEL: name: test_v4f16.tan + ; CHECK: [[V1:%[0-9]+]]:_(s16), [[V2:%[0-9]+]]:_(s16), [[V3:%[0-9]+]]:_(s16), [[V4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>) + + ; CHECK-DAG: [[V1_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V1]](s16) + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-NEXT: $s0 = COPY [[V1_S32]](s32) + ; CHECK-NEXT: BL &tanf + ; CHECK-NEXT: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT1_S32:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT1_S32]](s32) + + ; CHECK-DAG: [[V2_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V2]](s16) + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-NEXT: $s0 = COPY [[V2_S32]](s32) + ; CHECK-NEXT: BL &tanf + ; CHECK-NEXT: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT2_S32:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT2_S32]](s32) + + ; CHECK-DAG: [[V3_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V3]](s16) + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-NEXT: $s0 = COPY [[V3_S32]](s32) + ; CHECK-NEXT: BL &tanf + ; CHECK-NEXT: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT3_S32:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT3_S32]](s32) + + ; CHECK-DAG: [[V4_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V4]](s16) + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-NEXT: $s0 = COPY [[V4_S32]](s32) + ; CHECK-NEXT: BL &tanf + ; CHECK-NEXT: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT4_S32:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT4_S32]](s32) + + ; CHECK-DAG: %{{[0-9]+}}:_(<4 x s16>) = G_BUILD_VECTOR [[ELT1]](s16), [[ELT2]](s16), [[ELT3]](s16), [[ELT4]](s16) + + %0:_(<4 x s16>) = COPY $d0 + %1:_(<4 x s16>) = G_FTAN %0 + $d0 = COPY %1(<4 x s16>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v8f16.tan +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: test_v8f16.tan + + ; This is big, so let's just check for the 8 calls to tanf, the the + ; G_UNMERGE_VALUES, and the G_BUILD_VECTOR. The other instructions ought + ; to be covered by the other tests. + + ; CHECK: G_UNMERGE_VALUES + ; CHECK: BL &tanf + ; CHECK: BL &tanf + ; CHECK: BL &tanf + ; CHECK: BL &tanf + ; CHECK: BL &tanf + ; CHECK: BL &tanf + ; CHECK: BL &tanf + ; CHECK: BL &tanf + ; CHECK: G_BUILD_VECTOR + + %0:_(<8 x s16>) = COPY $q0 + %1:_(<8 x s16>) = G_FTAN %0 + $q0 = COPY %1(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2f32.tan +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: test_v2f32.tan + ; CHECK: [[V1:%[0-9]+]]:_(s32), [[V2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s32>) + + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-DAG: $s0 = COPY [[V1]](s32) + ; CHECK-DAG: BL &tanf + ; CHECK-DAG: [[ELT1:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-DAG: ADJCALLSTACKUP + + ; CHECK-DAG: ADJCALLSTACKDOWN + ; CHECK-DAG: $s0 = COPY [[V2]](s32) + ; CHECK-DAG: BL &tanf + ; CHECK-DAG: [[ELT2:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-DAG: ADJCALLSTACKUP + + ; CHECK-DAG: %1:_(<2 x s32>) = G_BUILD_VECTOR [[ELT1]](s32), [[ELT2]](s32) + + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = G_FTAN %0 + $d0 = COPY %1(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v4f32.tan +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $q0 + ; CHECK-LABEL: name: test_v4f32.tan + ; CHECK: [[V1:%[0-9]+]]:_(s32), [[V2:%[0-9]+]]:_(s32), [[V3:%[0-9]+]]:_(s32), [[V4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s32>) + + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-DAG: $s0 = COPY [[V1]](s32) + ; CHECK-DAG: BL &tanf + ; CHECK-DAG: [[ELT1:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-DAG: ADJCALLSTACKUP + + ; CHECK-DAG: ADJCALLSTACKDOWN + ; CHECK-DAG: $s0 = COPY [[V2]](s32) + ; CHECK-DAG: BL &tanf + ; CHECK-DAG: [[ELT2:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-DAG: ADJCALLSTACKUP + + ; CHECK-DAG: ADJCALLSTACKDOWN + ; CHECK-DAG: $s0 = COPY [[V3]](s32) + ; CHECK-DAG: BL &tanf + ; CHECK-DAG: [[ELT3:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-DAG: ADJCALLSTACKUP + + ; CHECK-DAG: ADJCALLSTACKDOWN + ; CHECK-DAG: $s0 = COPY [[V4]](s32) + ; CHECK-DAG: BL &tanf + ; CHECK-DAG: [[ELT4:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-DAG: ADJCALLSTACKUP + + ; CHECK-DAG: %1:_(<4 x s32>) = G_BUILD_VECTOR [[ELT1]](s32), [[ELT2]](s32), [[ELT3]](s32), [[ELT4]](s32) + + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = G_FTAN %0 + $q0 = COPY %1(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2f64.tan +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: test_v2f64.tan + ; CHECK: [[V1:%[0-9]+]]:_(s64), [[V2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s64>) + + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-DAG: $d0 = COPY [[V1]](s64) + ; CHECK-DAG: BL &tan + ; CHECK-DAG: [[ELT1:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-DAG: ADJCALLSTACKUP + + ; CHECK-DAG: ADJCALLSTACKDOWN + ; CHECK-DAG: $d0 = COPY [[V2]](s64) + ; CHECK-DAG: BL &tan + ; CHECK-DAG: [[ELT2:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-DAG: ADJCALLSTACKUP + + ; CHECK-DAG: %1:_(<2 x s64>) = G_BUILD_VECTOR [[ELT1]](s64), [[ELT2]](s64) + + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = G_FTAN %0 + $q0 = COPY %1(<2 x s64>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_tan_half +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $h0 + ; CHECK-LABEL: name: test_tan_half + ; CHECK: [[REG1:%[0-9]+]]:_(s32) = G_FPEXT %0(s16) + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-NEXT: $s0 = COPY [[REG1]](s32) + ; CHECK-NEXT: BL &tanf + ; CHECK-NEXT: ADJCALLSTACKUP + ; CHECK-NEXT: [[REG2:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s16) = G_FPTRUNC [[REG2]](s32) + + %0:_(s16) = COPY $h0 + %1:_(s16) = G_FTAN %0 + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index d71111b57efe5..04c9a08c50038 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -675,8 +675,9 @@ # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FTAN (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FSQRT (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index fa362ae798aa8..998f7dd38c3f5 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -759,6 +759,7 @@ declare half @llvm.sqrt.f16(half %a) #0 declare half @llvm.powi.f16.i32(half %a, i32 %b) #0 declare half @llvm.sin.f16(half %a) #0 declare half @llvm.cos.f16(half %a) #0 +declare half @llvm.tan.f16(half %a) #0 declare half @llvm.pow.f16(half %a, half %b) #0 declare half @llvm.exp.f16(half %a) #0 declare half @llvm.exp2.f16(half %a) #0 @@ -870,6 +871,31 @@ define half @test_cos(half %a) #0 { ret half %r } +; FALLBACK-NOT: remark:{{.*}}test_tan +; FALLBACK-FP16-NOT: remark:{{.*}}test_tan + +; CHECK-COMMON-LABEL: test_tan: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: fcvt s0, h0 +; CHECK-COMMON-NEXT: bl {{_?}}tanf +; CHECK-COMMON-NEXT: fcvt h0, s0 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 +; CHECK-COMMON-NEXT: ret + +; GISEL-LABEL: test_tan: +; GISEL-NEXT: stp x29, x30, [sp, #-16]! +; GISEL-NEXT: mov x29, sp +; GISEL-NEXT: fcvt s0, h0 +; GISEL-NEXT: bl {{_?}}tanf +; GISEL-NEXT: fcvt h0, s0 +; GISEL-NEXT: ldp x29, x30, [sp], #16 +; GISEL-NEXT: ret +define half @test_tan(half %a) #0 { + %r = call half @llvm.tan.f16(half %a) + ret half %r +} + ; CHECK-COMMON-LABEL: test_pow: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! ; CHECK-COMMON-NEXT: mov x29, sp diff --git a/llvm/test/CodeGen/AArch64/fast-isel-runtime-libcall.ll b/llvm/test/CodeGen/AArch64/fast-isel-runtime-libcall.ll index 16bab1565e7fa..7ff4dfca3f4c3 100644 --- a/llvm/test/CodeGen/AArch64/fast-isel-runtime-libcall.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-runtime-libcall.ll @@ -67,6 +67,28 @@ define double @cos_f64(double %a) { ret double %1 } +define float @tan_f32(float %a) { +; SMALL-LABEL: tan_f32 +; SMALL: bl _tanf +; LARGE-LABEL: tan_f32 +; LARGE: adrp [[REG:x[0-9]+]], _tanf@GOTPAGE +; LARGE: ldr [[REG]], [[[REG]], _tanf@GOTPAGEOFF] +; LARGE-NEXT: blr [[REG]] + %1 = call float @llvm.tan.f32(float %a) + ret float %1 +} + +define double @tan_f64(double %a) { +; SMALL-LABEL: tan_f64 +; SMALL: bl _tan +; LARGE-LABEL: tan_f64 +; LARGE: adrp [[REG:x[0-9]+]], _tan@GOTPAGE +; LARGE: ldr [[REG]], [[[REG]], _tan@GOTPAGEOFF] +; LARGE-NEXT: blr [[REG]] + %1 = call double @llvm.tan.f64(double %a) + ret double %1 +} + define float @pow_f32(float %a, float %b) { ; SMALL-LABEL: pow_f32 ; SMALL: bl _powf @@ -92,5 +114,7 @@ declare float @llvm.sin.f32(float) declare double @llvm.sin.f64(double) declare float @llvm.cos.f32(float) declare double @llvm.cos.f64(double) +declare float @llvm.tan.f32(float) +declare double @llvm.tan.f64(double) declare float @llvm.pow.f32(float, float) declare double @llvm.pow.f64(double, double) diff --git a/llvm/test/CodeGen/AArch64/illegal-float-ops.ll b/llvm/test/CodeGen/AArch64/illegal-float-ops.ll index 20435e10d89ff..3281a98767795 100644 --- a/llvm/test/CodeGen/AArch64/illegal-float-ops.ll +++ b/llvm/test/CodeGen/AArch64/illegal-float-ops.ll @@ -159,6 +159,28 @@ define void @test_sin(float %float, double %double, fp128 %fp128) { } +declare float @llvm.tan.f32(float) +declare double @llvm.tan.f64(double) +declare fp128 @llvm.tan.f128(fp128) + +define void @test_tan(float %float, double %double, fp128 %fp128) { +; CHECK-LABEL: test_tan: + + %tanfloat = call float @llvm.tan.f32(float %float) + store float %tanfloat, ptr @varfloat +; CHECK: bl tanf + + %tandouble = call double @llvm.tan.f64(double %double) + store double %tandouble, ptr @vardouble +; CHECK: bl tan + + %tanfp128 = call fp128 @llvm.tan.f128(fp128 %fp128) + store fp128 %tanfp128, ptr @varfp128 +; CHECK: bl tanl + ret void + +} + declare float @llvm.pow.f32(float, float) declare double @llvm.pow.f64(double, double) declare fp128 @llvm.pow.f128(fp128, fp128) diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll index 758df0493cc50..ce0c5572577dc 100644 --- a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll @@ -15,7 +15,7 @@ declare @llvm.cos.nxv2f64() declare @llvm.cos.nxv4f32() ;. -; CHECK: @llvm.compiler.used = appending global [36 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vfmodq_f64, ptr @armpl_vfmodq_f32, ptr @armpl_svfmod_f64_x, ptr @armpl_svfmod_f32_x], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [40 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vfmodq_f64, ptr @armpl_vfmodq_f32, ptr @armpl_svfmod_f64_x, ptr @armpl_svfmod_f32_x], section "llvm.metadata" ;. define <2 x double> @llvm_cos_f64(<2 x double> %in) { ; CHECK-LABEL: define <2 x double> @llvm_cos_f64 @@ -424,6 +424,50 @@ define @llvm_sin_vscale_f32( %in) #0 { ret %1 } +declare <2 x double> @llvm.tan.v2f64(<2 x double>) +declare <4 x float> @llvm.tan.v4f32(<4 x float>) +declare @llvm.tan.nxv2f64() +declare @llvm.tan.nxv4f32() + +define <2 x double> @llvm_tan_f64(<2 x double> %in) { +; CHECK-LABEL: define <2 x double> @llvm_tan_f64 +; CHECK-SAME: (<2 x double> [[IN:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vtanq_f64(<2 x double> [[IN]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_tan_f32(<4 x float> %in) { +; CHECK-LABEL: define <4 x float> @llvm_tan_f32 +; CHECK-SAME: (<4 x float> [[IN:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vtanq_f32(<4 x float> [[IN]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define @llvm_tan_vscale_f64( %in) #0 { +; CHECK-LABEL: define @llvm_tan_vscale_f64 +; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svtan_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.tan.nxv2f64( %in) + ret %1 +} + +define @llvm_tan_vscale_f32( %in) #0 { +; CHECK-LABEL: define @llvm_tan_vscale_f32 +; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svtan_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.tan.nxv4f32( %in) + ret %1 +} define <2 x double> @frem_f64(<2 x double> %in) { ; CHECK-LABEL: define <2 x double> @frem_f64 diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll index 9e214655e413a..c58a9bc8342de 100644 --- a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll @@ -4,7 +4,7 @@ target triple = "aarch64-unknown-linux-gnu" ;. -; CHECK: @llvm.compiler.used = appending global [18 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxvv_fmod, ptr @_ZGVsMxvv_fmodf], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [20 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxvv_fmod, ptr @_ZGVsMxvv_fmodf], section "llvm.metadata" ;. define @llvm_ceil_vscale_f64( %in) { ; CHECK-LABEL: @llvm_ceil_vscale_f64( @@ -366,6 +366,25 @@ define @llvm_sqrt_vscale_f32( %in) { ret %1 } +define @llvm_tan_vscale_f64( %in) { +; CHECK-LABEL: @llvm_tan_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_tan( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.tan.nxv2f64( %in) + ret %1 +} + +define @llvm_tan_vscale_f32( %in) { +; CHECK-LABEL: @llvm_tan_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_tanf( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.tan.nxv4f32( %in) + ret %1 +} + + define @llvm_trunc_vscale_f64( %in) { ; CHECK-LABEL: @llvm_trunc_vscale_f64( ; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.trunc.nxv2f64( [[IN:%.*]]) @@ -442,6 +461,8 @@ declare @llvm.sin.nxv2f64() declare @llvm.sin.nxv4f32() declare @llvm.sqrt.nxv2f64() declare @llvm.sqrt.nxv4f32() +declare @llvm.tan.nxv2f64() +declare @llvm.tan.nxv4f32() declare @llvm.trunc.nxv2f64() declare @llvm.trunc.nxv4f32() ;. diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll index f408df570fdc0..1df2caed4244e 100644 --- a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll @@ -4,7 +4,7 @@ target triple = "aarch64-unknown-linux-gnu" ;. -; CHECK: @llvm.compiler.used = appending global [18 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2vv_fmod, ptr @_ZGVnN4vv_fmodf], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [20 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2vv_fmod, ptr @_ZGVnN4vv_fmodf], section "llvm.metadata" ;. define <2 x double> @llvm_ceil_f64(<2 x double> %in) { ; CHECK-LABEL: @llvm_ceil_f64( @@ -366,6 +366,24 @@ define <4 x float> @llvm_sqrt_f32(<4 x float> %in) { ret <4 x float> %1 } +define <2 x double> @llvm_tan_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_tan_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_tan(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_tan_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_tan_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_tanf(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + define <2 x double> @llvm_trunc_f64(<2 x double> %in) { ; CHECK-LABEL: @llvm_trunc_f64( ; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> [[IN:%.*]]) @@ -442,6 +460,8 @@ declare <2 x double> @llvm.sin.v2f64(<2 x double>) declare <4 x float> @llvm.sin.v4f32(<4 x float>) declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare <2 x double> @llvm.tan.v2f64(<2 x double>) +declare <4 x float> @llvm.tan.v4f32(<4 x float>) declare <2 x double> @llvm.trunc.v2f64(<2 x double>) declare <4 x float> @llvm.trunc.v4f32(<4 x float>) ;. diff --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll index aa2cca40a92c2..9bbac5d69c28c 100644 --- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll +++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll @@ -20,6 +20,7 @@ declare <3 x double> @llvm.sin.v3f64(<3 x double>) declare <3 x float> @llvm.fabs.v3f32(<3 x float>) declare <3 x float> @llvm.ceil.v3f32(<3 x float>) declare <3 x float> @llvm.cos.v3f32(<3 x float>) +declare <3 x float> @llvm.tan.v3f32(<3 x float>) declare <3 x float> @llvm.exp.v3f32(<3 x float>) declare <3 x float> @llvm.exp2.v3f32(<3 x float>) declare <3 x float> @llvm.floor.v3f32(<3 x float>) @@ -297,6 +298,37 @@ define <3 x float> @cos_v3f32(<3 x float> %x) nounwind { ret <3 x float> %r } +define <3 x float> @tan_v3f32(<3 x float> %x) nounwind { +; CHECK-LABEL: tan_v3f32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: bl tanf +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov s0, v0.s[2] +; CHECK-NEXT: bl tanf +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v1.s[2], v0.s[0] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret + %r = call <3 x float> @llvm.tan.v3f32(<3 x float> %x) + ret <3 x float> %r +} + define <3 x float> @exp_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: exp_v3f32: ; CHECK: // %bb.0: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll index 2017797288c29..89a513515b205 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll @@ -291,6 +291,150 @@ for.end: ret void } +declare float @sinf(float) nounwind readnone +define void @sinf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { +; CHECK-LABEL: @sinf_v4f32( +; CHECK: call <4 x float> @_simd_sin_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 + %call = tail call float @sinf(float %lv) + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @sin(double) nounwind readnone +define void @sin_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { +; CHECK-LABEL: @sin_v2f64( +; CHECK: call <2 x double> @_simd_sin_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 + %call = tail call double @sin(double %lv) + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @tanf(float) nounwind readnone +define void @tanf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { +; CHECK-LABEL: @tanf_v4f32( +; CHECK: call <4 x float> @_simd_tan_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 + %call = tail call float @tanf(float %lv) + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @tan(double) nounwind readnone +define void @tan_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { +; CHECK-LABEL: @tan_v2f64( +; CHECK: call <2 x double> @_simd_tan_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 + %call = tail call double @tan(double %lv) + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @llvm.tan.f32(float) nounwind readnone +define void @tan_v4f32_intrinsic(i64 %n, ptr noalias %y, ptr noalias %x) { +; CHECK-LABEL: @tan_v4f32_intrinsic( +; CHECK: call <4 x float> @_simd_tan_f4(<4 x float> +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 + %call = tail call float @llvm.tan.f32(float %lv) + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @llvm.tan.f64(double) nounwind readnone +define void @tan_v2f64_intrinsic(i64 %n, ptr noalias %y, ptr noalias %x) { +; CHECK-LABEL: @tan_v2f64_intrinsic( +; CHECK: call <2 x double> @_simd_tan_d2(<2 x double> +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 + %call = tail call double @llvm.tan.f64(double %lv) + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + declare float @cbrtf(float) nounwind readnone define void @cbrtf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @cbrtf_v4f32( diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll index 2a552077a42b6..f0ae0093b2641 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(cos|exp|log|sin|pow|ceil|copysign|fabs|floor|fma|m..num|nearbyint|rint|round|sqrt|trunc)" --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(cos|exp|log|sin|pow|ceil|copysign|fabs|floor|fma|m..num|nearbyint|rint|round|sqrt|tan|trunc)" --version 2 ; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SLEEF-NEON ; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefix=SLEEF-SVE @@ -1472,6 +1472,79 @@ define void @sqrt_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } +declare double @llvm.tan.f64(double) +declare float @llvm.tan.f32(float) + +define void @tan_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @tan_f64 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]]) +; +; SLEEF-SVE-LABEL: define void @tan_f64 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tan( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @tan_f64 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vtanq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; +; ARMPL-SVE-LABEL: define void @tan_f64 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtan_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv + %in = load double, ptr %in.gep, align 8 + %call = tail call double @llvm.tan.f64(double %in) + %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv + store double %call, ptr %out.gep, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @tan_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @tan_f32 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; SLEEF-SVE-LABEL: define void @tan_f32 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tanf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @tan_f32 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vtanq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; +; ARMPL-SVE-LABEL: define void @tan_f32 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtan_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv + %in = load float, ptr %in.gep, align 8 + %call = tail call float @llvm.tan.f32(float %in) + %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv + store float %call, ptr %out.gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + declare double @llvm.trunc.f64(double) declare float @llvm.trunc.f32(float)