diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index b22bdd555cd41..5d2900a858c91 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -472,19 +472,33 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16), "_ZGV_LLVM_N16 TLI_DEFINE_VECFUNC("acos", "_ZGVnN2v_acos", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("acosh", "_ZGVnN2v_acosh", FIXED(2), "_ZGV_LLVM_N2v") + TLI_DEFINE_VECFUNC("asin", "_ZGVnN2v_asin", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("asinh", "_ZGVnN2v_asinh", FIXED(2), "_ZGV_LLVM_N2v") + TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("cbrt", "_ZGVnN2v_cbrt", FIXED(2), "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("copysign", "_ZGVnN2vv_copysign", FIXED(2), "_ZGV_LLVM_N2vv") + TLI_DEFINE_VECFUNC("cos", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("cosh", "_ZGVnN2v_cosh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("cospi", "_ZGVnN2v_cospi", FIXED(2), "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("erf", "_ZGVnN2v_erf", FIXED(2), "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("erfc", "_ZGVnN2v_erfc", FIXED(2), "_ZGV_LLVM_N2v") + TLI_DEFINE_VECFUNC("exp", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v") @@ -494,8 +508,24 @@ TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v" TLI_DEFINE_VECFUNC("exp2", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("expm1", "_ZGVnN2v_expm1", FIXED(2), "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("fdim", "_ZGVnN2vv_fdim", FIXED(2), "_ZGV_LLVM_N2vv") + +TLI_DEFINE_VECFUNC("fma", "_ZGVnN2vvv_fma", FIXED(2), "_ZGV_LLVM_N2vvv") + +TLI_DEFINE_VECFUNC("fmax", "_ZGVnN2vv_fmax", FIXED(2), "_ZGV_LLVM_N2vv") + +TLI_DEFINE_VECFUNC("fmin", "_ZGVnN2vv_fmin", FIXED(2), "_ZGV_LLVM_N2vv") + TLI_DEFINE_VECFUNC("fmod", "_ZGVnN2vv_fmod", FIXED(2), "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("hypot", "_ZGVnN2vv_hypot", FIXED(2), "_ZGV_LLVM_N2vv") + +TLI_DEFINE_VECFUNC("ilogb", "_ZGVnN2v_ilogb", FIXED(2), "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("ldexp", "_ZGVnN2vv_ldexp", FIXED(2), "_ZGV_LLVM_N2vv") + TLI_DEFINE_VECFUNC("lgamma", "_ZGVnN2v_lgamma", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("log", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v") @@ -504,11 +534,15 @@ TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log1p", "_ZGVnN2v_log1p", FIXED(2), "_ZGV_LLVM_N2v") + TLI_DEFINE_VECFUNC("log2", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8") +TLI_DEFINE_VECFUNC("nextafter", "_ZGVnN2vv_nextafter", FIXED(2), "_ZGV_LLVM_N2vv") + TLI_DEFINE_VECFUNC("pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") @@ -521,6 +555,8 @@ TLI_DEFINE_VECFUNC("sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2v TLI_DEFINE_VECFUNC("sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("sinpi", "_ZGVnN2v_sinpi", FIXED(2), "_ZGV_LLVM_N2v") + TLI_DEFINE_VECFUNC("sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v") @@ -533,19 +569,33 @@ TLI_DEFINE_VECFUNC("tgamma", "_ZGVnN2v_tgamma", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("acosf", "_ZGVnN4v_acosf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("acoshf", "_ZGVnN4v_acoshf", FIXED(4), "_ZGV_LLVM_N4v") + TLI_DEFINE_VECFUNC("asinf", "_ZGVnN4v_asinf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("asinhf", "_ZGVnN4v_asinhf", FIXED(4), "_ZGV_LLVM_N4v") + TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("cbrtf", "_ZGVnN4v_cbrtf", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("copysignf", "_ZGVnN4vv_copysignf", FIXED(4), "_ZGV_LLVM_N4vv") + TLI_DEFINE_VECFUNC("cosf", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("coshf", "_ZGVnN4v_coshf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("cospif", "_ZGVnN4v_cospif", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("erff", "_ZGVnN4v_erff", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("erfcf", "_ZGVnN4v_erfcf", FIXED(4), "_ZGV_LLVM_N4v") + TLI_DEFINE_VECFUNC("expf", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v") @@ -555,8 +605,24 @@ TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("expm1f", "_ZGVnN4v_expm1f", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("fdimf", "_ZGVnN4vv_fdimf", FIXED(4), "_ZGV_LLVM_N4vv") + +TLI_DEFINE_VECFUNC("fmaf", "_ZGVnN4vvv_fmaf", FIXED(4), "_ZGV_LLVM_N4vvv") + +TLI_DEFINE_VECFUNC("fmaxf", "_ZGVnN4vv_fmaxf", FIXED(4), "_ZGV_LLVM_N4vv") + +TLI_DEFINE_VECFUNC("fminf", "_ZGVnN4vv_fminf", FIXED(4), "_ZGV_LLVM_N4vv") + TLI_DEFINE_VECFUNC("fmodf", "_ZGVnN4vv_fmodf", FIXED(4), "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("hypotf", "_ZGVnN4vv_hypotf", FIXED(4), "_ZGV_LLVM_N4vv") + +TLI_DEFINE_VECFUNC("ilogbf", "_ZGVnN4v_ilogbf", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("ldexpf", "_ZGVnN4vv_ldexpf", FIXED(4), "_ZGV_LLVM_N4vv") + TLI_DEFINE_VECFUNC("lgammaf", "_ZGVnN4v_lgammaf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("logf", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") @@ -565,11 +631,15 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("log1pf", "_ZGVnN4v_log1pf", FIXED(4), "_ZGV_LLVM_N4v") + TLI_DEFINE_VECFUNC("log2f", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4") +TLI_DEFINE_VECFUNC("nextafterf", "_ZGVnN4vv_nextafterf", FIXED(4), "_ZGV_LLVM_N4vv") + TLI_DEFINE_VECFUNC("powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") @@ -582,6 +652,8 @@ TLI_DEFINE_VECFUNC("sincospif", "_ZGVnN4vl4l4_sincospif", FIXED(4), "_ZGV_LLVM_N TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("sinpif", "_ZGVnN4v_sinpif", FIXED(4), "_ZGV_LLVM_N4v") + TLI_DEFINE_VECFUNC("sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") @@ -595,9 +667,15 @@ TLI_DEFINE_VECFUNC("tgammaf", "_ZGVnN4v_tgammaf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("acos", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("acosf", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("acosh", "_ZGVsMxv_acosh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("acoshf", "_ZGVsMxv_acoshf", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("asin", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("asinf", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("asinh", "_ZGVsMxv_asinh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("asinhf", "_ZGVsMxv_asinhf", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("atan", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("atanf", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv") @@ -607,6 +685,12 @@ TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("cbrt", "_ZGVsMxv_cbrt", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("cbrtf", "_ZGVsMxv_cbrtf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("copysign", "_ZGVsMxvv_copysign", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("copysignf", "_ZGVsMxvv_copysignf", SCALABLE(4), MASKED, "_ZGVsMxvv") + TLI_DEFINE_VECFUNC("cos", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("cosf", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -615,6 +699,15 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsM TLI_DEFINE_VECFUNC("cosh", "_ZGVsMxv_cosh", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("coshf", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("cospi", "_ZGVsMxv_cospi", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("cospif", "_ZGVsMxv_cospif", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("erf", "_ZGVsMxv_erf", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("erff", "_ZGVsMxv_erff", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("erfc", "_ZGVsMxv_erfc", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("erfcf", "_ZGVsMxv_erfcf", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("exp", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("expf", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -630,9 +723,33 @@ TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("expm1", "_ZGVsMxv_expm1", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("expm1f", "_ZGVsMxv_expm1f", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("fdim", "_ZGVsMxvv_fdim", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("fdimf", "_ZGVsMxvv_fdimf", SCALABLE(4), MASKED, "_ZGVsMxvv") + +TLI_DEFINE_VECFUNC("fma", "_ZGVsMxvvv_fma", SCALABLE(2), MASKED, "_ZGVsMxvvv") +TLI_DEFINE_VECFUNC("fmaf", "_ZGVsMxvvv_fmaf", SCALABLE(4), MASKED, "_ZGVsMxvvv") + +TLI_DEFINE_VECFUNC("fmax", "_ZGVsMxvv_fmax", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("fmaxf", "_ZGVsMxvv_fmaxf", SCALABLE(4), MASKED, "_ZGVsMxvv") + +TLI_DEFINE_VECFUNC("fmin", "_ZGVsMxvv_fmin", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("fminf", "_ZGVsMxvv_fminf", SCALABLE(4), MASKED, "_ZGVsMxvv") + TLI_DEFINE_VECFUNC("fmod", "_ZGVsMxvv_fmod", SCALABLE(2), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("fmodf", "_ZGVsMxvv_fmodf", SCALABLE(4), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("hypot", "_ZGVsMxvv_hypot", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("hypotf", "_ZGVsMxvv_hypotf", SCALABLE(4), MASKED, "_ZGVsMxvv") + +TLI_DEFINE_VECFUNC("ilogb", "_ZGVsMxv_ilogb", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("ilogbf", "_ZGVsMxv_ilogbf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("ldexp", "_ZGVsMxvv_ldexp", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("ldexpf", "_ZGVsMxvv_ldexpf", SCALABLE(4), MASKED, "_ZGVsMxvv") + TLI_DEFINE_VECFUNC("lgamma", "_ZGVsMxv_lgamma", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("lgammaf", "_ZGVsMxv_lgammaf", SCALABLE(4), MASKED, "_ZGVsMxv") @@ -646,6 +763,9 @@ TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log1p", "_ZGVsMxv_log1p", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log1pf", "_ZGVsMxv_log1pf", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -654,6 +774,9 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGV TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8") TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4") +TLI_DEFINE_VECFUNC("nextafter", "_ZGVsMxvv_nextafter", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("nextafterf", "_ZGVsMxvv_nextafterf", SCALABLE(4), MASKED, "_ZGVsMxvv") + TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv") @@ -673,6 +796,9 @@ TLI_DEFINE_VECFUNC("sincospif", "_ZGVsMxvl4l4_sincospif", SCALABLE(4), MASKED, " TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("sinpi", "_ZGVsMxv_sinpi", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("sinpif", "_ZGVsMxv_sinpif", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("sqrt", "_ZGVsMxv_sqrt", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("sqrtf", "_ZGVsMxv_sqrtf", SCALABLE(4), MASKED, "_ZGVsMxv") diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll index 2eb7151ffe31a..c656099838f62 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll @@ -90,11 +90,11 @@ declare float @acoshf(float) define void @acosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @acosh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @acosh(double [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_acosh(<2 x double> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @acosh_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @acosh(double [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_acosh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @acosh_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -125,11 +125,11 @@ define void @acosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @acosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @acosh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @acoshf(float [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_acoshf(<4 x float> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @acosh_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @acoshf(float [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_acoshf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @acosh_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -236,11 +236,11 @@ declare float @asinhf(float) define void @asinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @asinh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @asinh(double [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_asinh(<2 x double> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @asinh_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @asinh(double [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_asinh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @asinh_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -271,11 +271,11 @@ define void @asinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @asinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @asinh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @asinhf(float [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_asinhf(<4 x float> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @asinh_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @asinhf(float [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_asinhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @asinh_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -528,11 +528,11 @@ declare float @cbrtf(float) define void @cbrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @cbrt_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @cbrt(double [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_cbrt(<2 x double> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @cbrt_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @cbrt(double [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_cbrt( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @cbrt_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -563,11 +563,11 @@ define void @cbrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @cbrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @cbrt_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @cbrtf(float [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_cbrtf(<4 x float> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @cbrt_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @cbrtf(float [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_cbrtf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @cbrt_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -601,11 +601,11 @@ declare float @copysignf(float, float) define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @copysign_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_copysign(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; ; SLEEF-SVE-LABEL: define void @copysign_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_copysign( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @copysign_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -636,11 +636,11 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @copysign_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_copysignf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; ; SLEEF-SVE-LABEL: define void @copysign_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_copysignf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @copysign_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -814,17 +814,90 @@ define void @cosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } +declare double @cospi(double) +declare float @cospif(float) + +define void @cospi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @cospi_f64 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_cospi(<2 x double> [[WIDE_LOAD:%.*]]) +; +; SLEEF-SVE-LABEL: define void @cospi_f64 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_cospi( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @cospi_f64 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]]) +; +; ARMPL-SVE-LABEL: define void @cospi_f64 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv + %in = load double, ptr %in.gep, align 8 + %call = tail call double @cospi(double %in) + %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv + store double %call, ptr %out.gep, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @cospi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @cospi_f32 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_cospif(<4 x float> [[WIDE_LOAD:%.*]]) +; +; SLEEF-SVE-LABEL: define void @cospi_f32 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_cospif( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @cospi_f32 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]]) +; +; ARMPL-SVE-LABEL: define void @cospi_f32 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv + %in = load float, ptr %in.gep, align 8 + %call = tail call float @cospif(float %in) + %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv + store float %call, ptr %out.gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + declare double @erf(double) declare float @erff(float) define void @erf_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @erf_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @erf(double [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_erf(<2 x double> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @erf_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @erf(double [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_erf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @erf_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -855,11 +928,11 @@ define void @erf_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @erf_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @erf_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @erff(float [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_erff(<4 x float> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @erf_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @erff(float [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_erff( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @erf_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -893,11 +966,11 @@ declare float @erfcf(float) define void @erfc_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @erfc_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @erfc(double [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_erfc(<2 x double> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @erfc_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @erfc(double [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_erfc( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @erfc_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -928,11 +1001,11 @@ define void @erfc_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @erfc_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @erfc_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @erfcf(float [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_erfcf(<4 x float> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @erfc_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @erfcf(float [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_erfcf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @erfc_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -1185,11 +1258,11 @@ declare float @expm1f(float) define void @expm1_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @expm1_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @expm1(double [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_expm1(<2 x double> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @expm1_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @expm1(double [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_expm1( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @expm1_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -1220,11 +1293,11 @@ define void @expm1_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @expm1_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @expm1_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @expm1f(float [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_expm1f(<4 x float> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @expm1_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @expm1f(float [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_expm1f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @expm1_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -1258,11 +1331,11 @@ declare float @fdimf(float, float) define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @fdim_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_fdim(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; ; SLEEF-SVE-LABEL: define void @fdim_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_fdim( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @fdim_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -1293,11 +1366,11 @@ define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @fdim_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_fdimf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; ; SLEEF-SVE-LABEL: define void @fdim_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_fdimf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @fdim_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -1331,11 +1404,11 @@ declare float @fmaf(float, float, float) define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @fma_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vvv_fma(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) ; ; SLEEF-SVE-LABEL: define void @fma_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvvv_fma( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @fma_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -1366,11 +1439,11 @@ define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @fma_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vvv_fmaf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) ; ; SLEEF-SVE-LABEL: define void @fma_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvvv_fmaf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @fma_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -1398,17 +1471,90 @@ define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } +declare double @fmax(double, double) +declare float @fmaxf(float, float) + +define void @fmax_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @fmax_f64 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_fmax(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; SLEEF-SVE-LABEL: define void @fmax_f64 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_fmax( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @fmax_f64 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]]) +; +; ARMPL-SVE-LABEL: define void @fmax_f64 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv + %in = load double, ptr %in.gep, align 8 + %call = tail call double @fmax(double %in, double %in) + %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv + store double %call, ptr %out.gep, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @fmax_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @fmax_f32 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_fmaxf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; SLEEF-SVE-LABEL: define void @fmax_f32 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_fmaxf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @fmax_f32 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]]) +; +; ARMPL-SVE-LABEL: define void @fmax_f32 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv + %in = load float, ptr %in.gep, align 8 + %call = tail call float @fmaxf(float %in, float %in) + %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv + store float %call, ptr %out.gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + declare double @fmin(double, double) declare float @fminf(float, float) define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @fmin_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_fmin(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; ; SLEEF-SVE-LABEL: define void @fmin_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_fmin( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @fmin_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -1439,11 +1585,11 @@ define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @fmin_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_fminf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; ; SLEEF-SVE-LABEL: define void @fmin_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_fminf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @fmin_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -1550,11 +1696,11 @@ declare float @hypotf(float, float) define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @hypot_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_hypot(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; ; SLEEF-SVE-LABEL: define void @hypot_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_hypot( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @hypot_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -1585,11 +1731,11 @@ define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @hypot_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_hypotf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; ; SLEEF-SVE-LABEL: define void @hypot_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_hypotf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @hypot_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -1617,6 +1763,156 @@ define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } +declare i32 @ilogb(double) +declare i32 @ilogbf(float) + +define void @ilogb_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @ilogb_f64 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x i32> @_ZGVnN2v_ilogb(<2 x double> [[WIDE_LOAD:%.*]]) +; +; SLEEF-SVE-LABEL: define void @ilogb_f64 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_ilogb( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @ilogb_f64 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]]) +; +; ARMPL-SVE-LABEL: define void @ilogb_f64 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv + %in = load double, ptr %in.gep, align 8 + %call = tail call i32 @ilogb(double %in) + %out.gep = getelementptr inbounds i32, ptr %out.ptr, i64 %iv + store i32 %call, ptr %out.gep, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @ilogb_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @ilogb_f32 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x i32> @_ZGVnN4v_ilogbf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; SLEEF-SVE-LABEL: define void @ilogb_f32 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_ilogbf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @ilogb_f32 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]]) +; +; ARMPL-SVE-LABEL: define void @ilogb_f32 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv + %in = load float, ptr %in.gep, align 8 + %call = tail call i32 @ilogbf(float %in) + %out.gep = getelementptr inbounds i32, ptr %out.ptr, i64 %iv + store i32 %call, ptr %out.gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @ldexp(double, i32) +declare float @ldexpf(float, i32) + +define void @ldexp_f64(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @ldexp_f64 +; SLEEF-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_ldexp(<2 x double> [[WIDE_LOAD:%.*]], <2 x i32> [[WIDE_LOAD1:%.*]]) +; +; SLEEF-SVE-LABEL: define void @ldexp_f64 +; SLEEF-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[TMP17:%.*]] = call @_ZGVsMxvv_ldexp( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD1:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @ldexp_f64 +; ARMPL-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]]) +; +; ARMPL-SVE-LABEL: define void @ldexp_f64 +; ARMPL-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in1.gep = getelementptr inbounds double, ptr %in1.ptr, i64 %iv + %in1 = load double, ptr %in1.gep, align 8 + %in2.gep = getelementptr inbounds i32, ptr %in2.ptr, i64 %iv + %in2 = load i32, ptr %in2.gep, align 8 + %call = tail call double @ldexp(double %in1, i32 %in2) + %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv + store double %call, ptr %out.gep, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @ldexp_f32(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @ldexp_f32 +; SLEEF-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_ldexpf(<4 x float> [[WIDE_LOAD:%.*]], <4 x i32> [[WIDE_LOAD1:%.*]]) +; +; SLEEF-SVE-LABEL: define void @ldexp_f32 +; SLEEF-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[TMP17:%.*]] = call @_ZGVsMxvv_ldexpf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD1:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @ldexp_f32 +; ARMPL-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]]) +; +; ARMPL-SVE-LABEL: define void @ldexp_f32 +; ARMPL-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in1.gep = getelementptr inbounds float, ptr %in1.ptr, i64 %iv + %in1 = load float, ptr %in1.gep, align 8 + %in2.gep = getelementptr inbounds i32, ptr %in2.ptr, i64 %iv + %in2 = load i32, ptr %in2.gep, align 8 + %call = tail call float @ldexpf(float %in1, i32 %in2) + %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv + store float %call, ptr %out.gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + declare double @lgamma(double) declare float @lgammaf(float) @@ -1842,11 +2138,11 @@ declare float @log1pf(float) define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @log1p_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log1p(<2 x double> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @log1p_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log1p( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @log1p_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -1877,11 +2173,11 @@ define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @log1p_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log1pf(<4 x float> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @log1p_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log1pf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @log1p_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -2063,11 +2359,11 @@ declare float @nextafterf(float, float) define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @nextafter_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_nextafter(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; ; SLEEF-SVE-LABEL: define void @nextafter_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_nextafter( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @nextafter_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -2098,11 +2394,11 @@ define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @nextafter_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_nextafterf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; ; SLEEF-SVE-LABEL: define void @nextafter_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_nextafterf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @nextafter_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -2349,6 +2645,79 @@ for.cond.cleanup: ret void } +declare void @sincospi(double, ptr, ptr) +declare void @sincospif(float, ptr, ptr) + +define void @sincospi_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; SLEEF-NEON-LABEL: define void @sincospi_f64 +; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: call void @_ZGVnN2vl8l8_sincospi(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; +; SLEEF-SVE-LABEL: define void @sincospi_f64 +; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: call void @_ZGVsMxvl8l8_sincospi( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @sincospi_f64 +; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: call void @armpl_vsincospiq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; +; ARMPL-SVE-LABEL: define void @sincospi_f64 +; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: call void @armpl_svsincospi_f64_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepa = getelementptr double, ptr %a, i64 %indvars.iv + %num = load double, ptr %gepa, align 8 + %gepb = getelementptr double, ptr %b, i64 %indvars.iv + %gepc = getelementptr double, ptr %c, i64 %indvars.iv + call void @sincospi(double %num, ptr %gepb, ptr %gepc) + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @sincospi_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; SLEEF-NEON-LABEL: define void @sincospi_f32 +; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: call void @_ZGVnN4vl4l4_sincospif(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; +; SLEEF-SVE-LABEL: define void @sincospi_f32 +; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: call void @_ZGVsMxvl4l4_sincospif( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @sincospi_f32 +; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: call void @armpl_vsincospiq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; +; ARMPL-SVE-LABEL: define void @sincospi_f32 +; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: call void @armpl_svsincospi_f32_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepa = getelementptr float, ptr %a, i64 %indvars.iv + %num = load float, ptr %gepa, align 8 + %gepb = getelementptr float, ptr %b, i64 %indvars.iv + %gepc = getelementptr float, ptr %c, i64 %indvars.iv + call void @sincospif(float %num, ptr %gepb, ptr %gepc) + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + declare double @sinh(double) declare float @sinhf(float) @@ -2428,11 +2797,11 @@ declare float @sinpif(float) define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @sinpi_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sinpi(<2 x double> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @sinpi_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sinpi( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sinpi_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { @@ -2463,11 +2832,11 @@ define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; SLEEF-NEON-LABEL: define void @sinpi_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinpif(<4 x float> [[WIDE_LOAD:%.*]]) ; ; SLEEF-SVE-LABEL: define void @sinpi_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sinpif( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sinpi_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {