From 0f9a842b067f0ce4868c68219b0364ae8863935a Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 00:57:03 +0200 Subject: [PATCH 01/12] [libc][math][c23] Add f16fmaf C23 math function --- libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/docs/math/index.rst | 2 + libc/spec/stdc.td | 2 + libc/src/__support/FPUtil/FMA.h | 32 ++--- libc/src/__support/FPUtil/generic/FMA.h | 145 +++++++++++-------- libc/src/__support/FPUtil/multiply_add.h | 4 +- libc/src/__support/big_int.h | 16 +++ libc/src/math/CMakeLists.txt | 2 + libc/src/math/f16fmaf.h | 20 +++ libc/src/math/generic/CMakeLists.txt | 12 ++ libc/src/math/generic/expm1f.cpp | 2 +- libc/src/math/generic/f16fmaf.cpp | 19 +++ libc/src/math/generic/fma.cpp | 2 +- libc/src/math/generic/fmaf.cpp | 2 +- libc/src/math/generic/range_reduction_fma.h | 25 ++-- libc/test/src/math/CMakeLists.txt | 21 ++- libc/test/src/math/FmaTest.h | 147 ++++++++++++-------- libc/test/src/math/f16fmaf_test.cpp | 25 ++++ libc/test/src/math/smoke/CMakeLists.txt | 18 ++- libc/test/src/math/smoke/FmaTest.h | 96 ++++++++----- libc/test/src/math/smoke/f16fmaf_test.cpp | 13 ++ libc/test/src/math/smoke/fma_test.cpp | 6 +- libc/test/src/math/smoke/fmaf_test.cpp | 6 +- libc/test/src/sched/CMakeLists.txt | 34 ++--- libc/utils/MPFRWrapper/MPFRUtils.cpp | 78 ++++++----- libc/utils/MPFRWrapper/MPFRUtils.h | 48 +++++-- 27 files changed, 513 insertions(+), 266 deletions(-) create mode 100644 libc/src/math/f16fmaf.h create mode 100644 libc/src/math/generic/f16fmaf.cpp create mode 100644 libc/test/src/math/f16fmaf_test.cpp create mode 100644 libc/test/src/math/smoke/f16fmaf_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 2b2d0985a8992..dab747ca0ac74 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -503,6 +503,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.canonicalizef16 libc.src.math.ceilf16 libc.src.math.copysignf16 + libc.src.math.f16fmaf libc.src.math.f16sqrtf libc.src.math.fabsf16 libc.src.math.fdimf16 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 2d36ca296c3a4..45914fe9f7ad2 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -535,6 +535,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.canonicalizef16 libc.src.math.ceilf16 libc.src.math.copysignf16 + libc.src.math.f16fmaf libc.src.math.f16sqrtf libc.src.math.fabsf16 libc.src.math.fdimf16 diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 790786147c164..293edd1c15100 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -124,6 +124,8 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | dsub | N/A | N/A | | N/A | | 7.12.14.2 | F.10.11 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| f16fma | |check| | | | N/A | | 7.12.14.5 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fabs | |check| | |check| | |check| | |check| | |check| | 7.12.7.3 | F.10.4.3 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fadd | N/A | | | N/A | | 7.12.14.1 | F.10.11 | diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 7c4135032a0b2..b089b596b0958 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -715,6 +715,8 @@ def StdC : StandardSpec<"stdc"> { GuardedFunctionSpec<"totalordermagf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, + GuardedFunctionSpec<"f16fmaf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, + GuardedFunctionSpec<"f16sqrtf", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, ] >; diff --git a/libc/src/__support/FPUtil/FMA.h b/libc/src/__support/FPUtil/FMA.h index c277da49538bf..cf01a317d7359 100644 --- a/libc/src/__support/FPUtil/FMA.h +++ b/libc/src/__support/FPUtil/FMA.h @@ -10,41 +10,29 @@ #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FMA_H #include "src/__support/CPP/type_traits.h" +#include "src/__support/FPUtil/generic/FMA.h" #include "src/__support/macros/properties/architectures.h" #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#if defined(LIBC_TARGET_CPU_HAS_FMA) - namespace LIBC_NAMESPACE { namespace fputil { -template -LIBC_INLINE cpp::enable_if_t, T> fma(T x, T y, T z) { - return __builtin_fmaf(x, y, z); +template +LIBC_INLINE OutType fma(InType x, InType y, InType z) { + return generic::fma(x, y, z); } -template -LIBC_INLINE cpp::enable_if_t, T> fma(T x, T y, T z) { - return __builtin_fma(x, y, z); +#ifdef LIBC_TARGET_CPU_HAS_FMA +template <> LIBC_INLINE float fma(float x, float y, float z) { + return __builtin_fmaf(x, y, z); } -} // namespace fputil -} // namespace LIBC_NAMESPACE - -#else -// FMA instructions are not available -#include "generic/FMA.h" - -namespace LIBC_NAMESPACE { -namespace fputil { - -template LIBC_INLINE T fma(T x, T y, T z) { - return generic::fma(x, y, z); +template <> LIBC_INLINE double fma(double x, double y, double z) { + return __builtin_fma(x, y, z); } +#endif // LIBC_TARGET_CPU_HAS_FMA } // namespace fputil } // namespace LIBC_NAMESPACE -#endif - #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FMA_H diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h index f403aa7333b39..3bbb30476e5d8 100644 --- a/libc/src/__support/FPUtil/generic/FMA.h +++ b/libc/src/__support/FPUtil/generic/FMA.h @@ -10,20 +10,28 @@ #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMA_H #include "src/__support/CPP/bit.h" +#include "src/__support/CPP/limits.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/big_int.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/uint128.h" + +#include "hdr/fenv_macros.h" namespace LIBC_NAMESPACE { namespace fputil { namespace generic { -template LIBC_INLINE T fma(T x, T y, T z); +template +LIBC_INLINE cpp::enable_if_t && + cpp::is_floating_point_v && + sizeof(OutType) <= sizeof(InType), + OutType> +fma(InType x, InType y, InType z); +#ifndef LIBC_TARGET_CPU_HAS_FMA // TODO(lntue): Implement fmaf that is correctly rounded to all rounding modes. // The implementation below only is only correct for the default rounding mode, // round-to-nearest tie-to-even. @@ -74,17 +82,20 @@ template <> LIBC_INLINE float fma(float x, float y, float z) { return static_cast(bit_sum.get_val()); } +#endif // LIBC_TARGET_CPU_HAS_FMA namespace internal { // Extract the sticky bits and shift the `mantissa` to the right by // `shift_length`. -LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) { - if (shift_length >= 128) { +template +LIBC_INLINE cpp::enable_if_t, bool> +shift_mantissa(int shift_length, T &mant) { + if (shift_length >= cpp::numeric_limits::digits) { mant = 0; return true; // prod_mant is non-zero. } - UInt128 mask = (UInt128(1) << shift_length) - 1; + T mask = (T(1) << shift_length) - 1; bool sticky_bits = (mant & mask) != 0; mant >>= shift_length; return sticky_bits; @@ -92,11 +103,29 @@ LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) { } // namespace internal -template <> LIBC_INLINE double fma(double x, double y, double z) { - using FPBits = fputil::FPBits; +template +LIBC_INLINE cpp::enable_if_t && + cpp::is_floating_point_v && + sizeof(OutType) <= sizeof(InType), + OutType> +fma(InType x, InType y, InType z) { + using OutFPBits = fputil::FPBits; + using OutStorageType = typename OutFPBits::StorageType; + using InFPBits = fputil::FPBits; + using InStorageType = typename InFPBits::StorageType; + + constexpr int IN_EXPLICIT_MANT_LEN = InFPBits::FRACTION_LEN + 1; + constexpr size_t PROD_LEN = 2 * (IN_EXPLICIT_MANT_LEN); + constexpr size_t TMP_RESULT_LEN = cpp::bit_ceil(PROD_LEN + 1); + using TmpResultType = UInt; + + constexpr size_t EXTRA_FRACTION_LEN = + TMP_RESULT_LEN - 1 - OutFPBits::FRACTION_LEN; + constexpr TmpResultType EXTRA_FRACTION_STICKY_MASK = + (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1)) - 1; if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0)) { - return x * y + z; + return static_cast(x * y + z); } int x_exp = 0; @@ -104,35 +133,35 @@ template <> LIBC_INLINE double fma(double x, double y, double z) { int z_exp = 0; // Normalize denormal inputs. - if (LIBC_UNLIKELY(FPBits(x).is_subnormal())) { - x_exp -= 52; - x *= 0x1.0p+52; + if (LIBC_UNLIKELY(InFPBits(x).is_subnormal())) { + x_exp -= InFPBits::FRACTION_LEN; + x *= InType(InStorageType(1) << InFPBits::FRACTION_LEN); } - if (LIBC_UNLIKELY(FPBits(y).is_subnormal())) { - y_exp -= 52; - y *= 0x1.0p+52; + if (LIBC_UNLIKELY(InFPBits(y).is_subnormal())) { + y_exp -= InFPBits::FRACTION_LEN; + y *= InType(InStorageType(1) << InFPBits::FRACTION_LEN); } - if (LIBC_UNLIKELY(FPBits(z).is_subnormal())) { - z_exp -= 52; - z *= 0x1.0p+52; + if (LIBC_UNLIKELY(InFPBits(z).is_subnormal())) { + z_exp -= InFPBits::FRACTION_LEN; + z *= InType(InStorageType(1) << InFPBits::FRACTION_LEN); } - FPBits x_bits(x), y_bits(y), z_bits(z); + InFPBits x_bits(x), y_bits(y), z_bits(z); const Sign z_sign = z_bits.sign(); Sign prod_sign = (x_bits.sign() == y_bits.sign()) ? Sign::POS : Sign::NEG; x_exp += x_bits.get_biased_exponent(); y_exp += y_bits.get_biased_exponent(); z_exp += z_bits.get_biased_exponent(); - if (LIBC_UNLIKELY(x_exp == FPBits::MAX_BIASED_EXPONENT || - y_exp == FPBits::MAX_BIASED_EXPONENT || - z_exp == FPBits::MAX_BIASED_EXPONENT)) - return x * y + z; + if (LIBC_UNLIKELY(x_exp == InFPBits::MAX_BIASED_EXPONENT || + y_exp == InFPBits::MAX_BIASED_EXPONENT || + z_exp == InFPBits::MAX_BIASED_EXPONENT)) + return static_cast(x * y + z); // Extract mantissa and append hidden leading bits. - UInt128 x_mant = x_bits.get_explicit_mantissa(); - UInt128 y_mant = y_bits.get_explicit_mantissa(); - UInt128 z_mant = z_bits.get_explicit_mantissa(); + InStorageType x_mant = x_bits.get_explicit_mantissa(); + InStorageType y_mant = y_bits.get_explicit_mantissa(); + TmpResultType z_mant = z_bits.get_explicit_mantissa(); // If the exponent of the product x*y > the exponent of z, then no extra // precision beside the entire product x*y is needed. On the other hand, when @@ -144,21 +173,24 @@ template <> LIBC_INLINE double fma(double x, double y, double z) { // - prod : 1bb...bb....b // In that case, in order to store the exact result, we need at least // (Length of prod) - (MantissaLength of z) = 2*(52 + 1) - 52 = 54. + // TODO: 53? (Explicit mantissa.) ^ // Overall, before aligning the mantissas and exponents, we can simply left- // shift the mantissa of z by at least 54, and left-shift the product of x*y // by (that amount - 52). After that, it is enough to align the least + // TODO: ^ 54? // significant bit, given that we keep track of the round and sticky bits // after the least significant bit. // We pick shifting z_mant by 64 bits so that technically we can simply use // the original mantissa as high part when constructing 128-bit z_mant. So the // mantissa of prod will be left-shifted by 64 - 54 = 10 initially. - UInt128 prod_mant = x_mant * y_mant << 10; + TmpResultType prod_mant = TmpResultType(x_mant) * y_mant; int prod_lsb_exp = - x_exp + y_exp - (FPBits::EXP_BIAS + 2 * FPBits::FRACTION_LEN + 10); + x_exp + y_exp - (InFPBits::EXP_BIAS + 2 * InFPBits::FRACTION_LEN); - z_mant <<= 64; - int z_lsb_exp = z_exp - (FPBits::FRACTION_LEN + 64); + constexpr int RESULT_MIN_LEN = PROD_LEN - InFPBits::FRACTION_LEN; + z_mant <<= RESULT_MIN_LEN; + int z_lsb_exp = z_exp - (InFPBits::FRACTION_LEN + RESULT_MIN_LEN); bool round_bit = false; bool sticky_bits = false; bool z_shifted = false; @@ -198,46 +230,40 @@ template <> LIBC_INLINE double fma(double x, double y, double z) { } } - uint64_t result = 0; + OutStorageType result = 0; int r_exp = 0; // Unbiased exponent of the result // Normalize the result. if (prod_mant != 0) { - uint64_t prod_hi = static_cast(prod_mant >> 64); - int lead_zeros = - prod_hi ? cpp::countl_zero(prod_hi) - : 64 + cpp::countl_zero(static_cast(prod_mant)); + int lead_zeros = cpp::countl_zero(prod_mant); // Move the leading 1 to the most significant bit. prod_mant <<= lead_zeros; - // The lower 64 bits are always sticky bits after moving the leading 1 to - // the most significant bit. - sticky_bits |= (static_cast(prod_mant) != 0); - result = static_cast(prod_mant >> 64); - // Change prod_lsb_exp the be the exponent of the least significant bit of - // the result. - prod_lsb_exp += 64 - lead_zeros; - r_exp = prod_lsb_exp + 63; + prod_lsb_exp -= lead_zeros; + r_exp = prod_lsb_exp + (cpp::numeric_limits::digits - 1) - + InFPBits::EXP_BIAS + OutFPBits::EXP_BIAS; if (r_exp > 0) { // The result is normal. We will shift the mantissa to the right by // 63 - 52 = 11 bits (from the locations of the most significant bit). // Then the rounding bit will correspond the 11th bit, and the lowest // 10 bits are merged into sticky bits. - round_bit = (result & 0x0400ULL) != 0; - sticky_bits |= (result & 0x03ffULL) != 0; - result >>= 11; + round_bit = + (prod_mant & (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1))) != 0; + sticky_bits |= (prod_mant & EXTRA_FRACTION_STICKY_MASK) != 0; + result = static_cast(prod_mant >> EXTRA_FRACTION_LEN); } else { - if (r_exp < -52) { + if (r_exp < -OutFPBits::FRACTION_LEN) { // The result is smaller than 1/2 of the smallest denormal number. sticky_bits = true; // since the result is non-zero. result = 0; } else { // The result is denormal. - uint64_t mask = 1ULL << (11 - r_exp); - round_bit = (result & mask) != 0; - sticky_bits |= (result & (mask - 1)) != 0; - if (r_exp > -52) - result >>= 12 - r_exp; + TmpResultType mask = TmpResultType(1) << (EXTRA_FRACTION_LEN - r_exp); + round_bit = (prod_mant & mask) != 0; + sticky_bits |= (prod_mant & (mask - 1)) != 0; + if (r_exp > -OutFPBits::FRACTION_LEN) + result = static_cast( + prod_mant >> (EXTRA_FRACTION_LEN + 1 - r_exp)); else result = 0; } @@ -251,20 +277,21 @@ template <> LIBC_INLINE double fma(double x, double y, double z) { // Finalize the result. int round_mode = fputil::quick_get_round(); - if (LIBC_UNLIKELY(r_exp >= FPBits::MAX_BIASED_EXPONENT)) { + if (LIBC_UNLIKELY(r_exp >= OutFPBits::MAX_BIASED_EXPONENT)) { if ((round_mode == FE_TOWARDZERO) || (round_mode == FE_UPWARD && prod_sign.is_neg()) || (round_mode == FE_DOWNWARD && prod_sign.is_pos())) { - return FPBits::max_normal(prod_sign).get_val(); + return OutFPBits::max_normal(prod_sign).get_val(); } - return FPBits::inf(prod_sign).get_val(); + return OutFPBits::inf(prod_sign).get_val(); } // Remove hidden bit and append the exponent field and sign bit. - result = (result & FPBits::FRACTION_MASK) | - (static_cast(r_exp) << FPBits::FRACTION_LEN); + result = static_cast( + (result & OutFPBits::FRACTION_MASK) | + (static_cast(r_exp) << OutFPBits::FRACTION_LEN)); if (prod_sign.is_neg()) { - result |= FPBits::SIGN_MASK; + result |= OutFPBits::SIGN_MASK; } // Rounding. @@ -277,7 +304,7 @@ template <> LIBC_INLINE double fma(double x, double y, double z) { ++result; } - return cpp::bit_cast(result); + return cpp::bit_cast(result); } } // namespace generic diff --git a/libc/src/__support/FPUtil/multiply_add.h b/libc/src/__support/FPUtil/multiply_add.h index 82932da5417c8..622914e4265c9 100644 --- a/libc/src/__support/FPUtil/multiply_add.h +++ b/libc/src/__support/FPUtil/multiply_add.h @@ -45,11 +45,11 @@ namespace LIBC_NAMESPACE { namespace fputil { LIBC_INLINE float multiply_add(float x, float y, float z) { - return fma(x, y, z); + return fma(x, y, z); } LIBC_INLINE double multiply_add(double x, double y, double z) { - return fma(x, y, z); + return fma(x, y, z); } } // namespace fputil diff --git a/libc/src/__support/big_int.h b/libc/src/__support/big_int.h index 40ad6eeed7ac2..27cb3b783470c 100644 --- a/libc/src/__support/big_int.h +++ b/libc/src/__support/big_int.h @@ -983,6 +983,12 @@ using UInt = BigInt>; template using Int = BigInt>; +// Provides limits of BigInt. +template +struct cpp::numeric_limits> { + LIBC_INLINE_VAR static constexpr int digits = Bits - Signed; +}; + // Provides limits of U/Int<128>. template <> class cpp::numeric_limits> { public: @@ -1073,6 +1079,16 @@ template using make_integral_or_big_int_signed_t = typename make_integral_or_big_int_signed::type; +// is_unsigned_integral_or_big_int +template +struct is_unsigned_integral_or_big_int + : cpp::bool_constant< + cpp::is_same_v>> {}; + +template +LIBC_INLINE_VAR constexpr bool is_unsigned_integral_or_big_int_v = + is_unsigned_integral_or_big_int::value; + namespace cpp { // Specialization of cpp::bit_cast ('bit.h') from T to BigInt. diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index df8e6c0b253da..4472367d6c073 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -99,6 +99,8 @@ add_math_entrypoint_object(exp10f) add_math_entrypoint_object(expm1) add_math_entrypoint_object(expm1f) +add_math_entrypoint_object(f16fmaf) + add_math_entrypoint_object(f16sqrtf) add_math_entrypoint_object(fabs) diff --git a/libc/src/math/f16fmaf.h b/libc/src/math/f16fmaf.h new file mode 100644 index 0000000000000..d92cb43c292eb --- /dev/null +++ b/libc/src/math/f16fmaf.h @@ -0,0 +1,20 @@ +//===-- Implementation header for f16fmaf -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_F16FMAF_H +#define LLVM_LIBC_SRC_MATH_F16FMAF_H + +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE { + +float16 f16fmaf(float x, float y, float z); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_F16FMAF_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index f1f7d6c367be2..706bfc4b08670 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3602,6 +3602,18 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + f16fmaf + SRCS + f16fmaf.cpp + HDRS + ../f16fmaf.h + DEPENDS + libc.src.__support.FPUtil.fma + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( f16sqrtf SRCS diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp index 037e60021b296..6b9f07476a650 100644 --- a/libc/src/math/generic/expm1f.cpp +++ b/libc/src/math/generic/expm1f.cpp @@ -104,7 +104,7 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) { // intermediate results as it is more efficient than using an emulated // version of FMA. #if defined(LIBC_TARGET_CPU_HAS_FMA) - return fputil::fma(x, x, x); + return fputil::fma(x, x, x); #else double xd = x; return static_cast(fputil::multiply_add(xd, xd, xd)); diff --git a/libc/src/math/generic/f16fmaf.cpp b/libc/src/math/generic/f16fmaf.cpp new file mode 100644 index 0000000000000..09f2712639335 --- /dev/null +++ b/libc/src/math/generic/f16fmaf.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of f16fmaf function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/f16fmaf.h" +#include "src/__support/FPUtil/FMA.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(float16, f16fmaf, (float x, float y, float z)) { + return fputil::fma(x, y, z); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/fma.cpp b/libc/src/math/generic/fma.cpp index e27e5baeddf58..7937766dccd71 100644 --- a/libc/src/math/generic/fma.cpp +++ b/libc/src/math/generic/fma.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(double, fma, (double x, double y, double z)) { - return fputil::fma(x, y, z); + return fputil::fma(x, y, z); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/fmaf.cpp b/libc/src/math/generic/fmaf.cpp index 7512b82005d0f..d367a069ea7d8 100644 --- a/libc/src/math/generic/fmaf.cpp +++ b/libc/src/math/generic/fmaf.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(float, fmaf, (float x, float y, float z)) { - return fputil::fma(x, y, z); + return fputil::fma(x, y, z); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/range_reduction_fma.h b/libc/src/math/generic/range_reduction_fma.h index aee8cbb1332a6..82b4ae1c705e1 100644 --- a/libc/src/math/generic/range_reduction_fma.h +++ b/libc/src/math/generic/range_reduction_fma.h @@ -33,8 +33,8 @@ static constexpr double THIRTYTWO_OVER_PI[5] = { // k = round(x * 32 / pi) and y = (x * 32 / pi) - k. LIBC_INLINE int64_t small_range_reduction(double x, double &y) { double kd = fputil::nearest_integer(x * THIRTYTWO_OVER_PI[0]); - y = fputil::fma(x, THIRTYTWO_OVER_PI[0], -kd); - y = fputil::fma(x, THIRTYTWO_OVER_PI[1], y); + y = fputil::fma(x, THIRTYTWO_OVER_PI[0], -kd); + y = fputil::fma(x, THIRTYTWO_OVER_PI[1], y); return static_cast(kd); } @@ -54,12 +54,13 @@ LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) { prod_hi.set_uintval(prod_hi.uintval() & ((x_exp < 55) ? (~0xfffULL) : (~0ULL))); // |x| < 2^55 double k_hi = fputil::nearest_integer(prod_hi.get_val()); - double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[0], -k_hi); - double prod_lo = fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod); + double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[0], -k_hi); + double prod_lo = + fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod); double k_lo = fputil::nearest_integer(prod_lo); - y = fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod - k_lo); - y = fputil::fma(x, THIRTYTWO_OVER_PI[2], y); - y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y); + y = fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod - k_lo); + y = fputil::fma(x, THIRTYTWO_OVER_PI[2], y); + y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y); return static_cast(k_lo); } @@ -74,12 +75,12 @@ LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) { prod_hi.set_uintval(prod_hi.uintval() & ((x_exp < 110) ? (~0xfffULL) : (~0ULL))); // |x| < 2^110 double k_hi = fputil::nearest_integer(prod_hi.get_val()); - double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[1], -k_hi); - double prod_lo = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod); + double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[1], -k_hi); + double prod_lo = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod); double k_lo = fputil::nearest_integer(prod_lo); - y = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod - k_lo); - y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y); - y = fputil::fma(x, THIRTYTWO_OVER_PI[4], y); + y = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod - k_lo); + y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y); + y = fputil::fma(x, THIRTYTWO_OVER_PI[4], y); return static_cast(k_lo); } diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 79e6e89a5324e..bb364c3f0a175 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1455,11 +1455,12 @@ add_fp_unittest( libc-math-unittests SRCS fmaf_test.cpp + HDRS + FmaTest.h DEPENDS libc.src.math.fmaf libc.src.stdlib.rand libc.src.stdlib.srand - libc.src.__support.FPUtil.fp_bits FLAGS FMA_OPT__ONLY ) @@ -1471,11 +1472,12 @@ add_fp_unittest( libc-math-unittests SRCS fma_test.cpp + HDRS + FmaTest.h DEPENDS libc.src.math.fma libc.src.stdlib.rand libc.src.stdlib.srand - libc.src.__support.FPUtil.fp_bits ) add_fp_unittest( @@ -1888,6 +1890,21 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + f16fmaf_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + f16fmaf_test.cpp + HDRS + FmaTest.h + DEPENDS + libc.src.math.f16fmaf + libc.src.stdlib.rand + libc.src.stdlib.srand +) + add_subdirectory(generic) add_subdirectory(smoke) diff --git a/libc/test/src/math/FmaTest.h b/libc/test/src/math/FmaTest.h index 5a40f694ebd10..0e0f05aad5d13 100644 --- a/libc/test/src/math/FmaTest.h +++ b/libc/test/src/math/FmaTest.h @@ -9,7 +9,6 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_FMATEST_H #define LLVM_LIBC_TEST_SRC_MATH_FMATEST_H -#include "src/__support/FPUtil/FPBits.h" #include "src/stdlib/rand.h" #include "src/stdlib/srand.h" #include "test/UnitTest/FEnvSafeTest.h" @@ -19,85 +18,115 @@ namespace mpfr = LIBC_NAMESPACE::testing::mpfr; -template +template class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { -private: - using Func = T (*)(T, T, T); - using FPBits = LIBC_NAMESPACE::fputil::FPBits; - using StorageType = typename FPBits::StorageType; - - const T min_subnormal = FPBits::min_subnormal(Sign::POS).get_val(); - const T min_normal = FPBits::min_normal(Sign::POS).get_val(); - const T max_normal = FPBits::max_normal(Sign::POS).get_val(); - const T inf = FPBits::inf(Sign::POS).get_val(); - const T neg_inf = FPBits::inf(Sign::NEG).get_val(); - const T zero = FPBits::zero(Sign::POS).get_val(); - const T neg_zero = FPBits::zero(Sign::NEG).get_val(); - const T nan = FPBits::quiet_nan().get_val(); - - static constexpr StorageType MAX_NORMAL = FPBits::max_normal().uintval(); - static constexpr StorageType MIN_NORMAL = FPBits::min_normal().uintval(); - static constexpr StorageType MAX_SUBNORMAL = - FPBits::max_subnormal().uintval(); - static constexpr StorageType MIN_SUBNORMAL = - FPBits::min_subnormal().uintval(); - - StorageType get_random_bit_pattern() { - StorageType bits{0}; - for (StorageType i = 0; i < sizeof(StorageType) / 2; ++i) { + + struct OutConstants { + DECLARE_SPECIAL_CONSTANTS(OutType) + }; + + struct InConstants { + DECLARE_SPECIAL_CONSTANTS(InType) + }; + + using OutFPBits = typename OutConstants::FPBits; + using OutStorageType = typename OutConstants::StorageType; + using InFPBits = typename InConstants::FPBits; + using InStorageType = typename InConstants::StorageType; + + static constexpr OutStorageType OUT_MIN_NORMAL_U = + OutFPBits::min_normal().uintval(); + static constexpr InStorageType IN_MAX_NORMAL_U = + InFPBits::max_normal().uintval(); + static constexpr InStorageType IN_MIN_NORMAL_U = + InFPBits::min_normal().uintval(); + static constexpr InStorageType IN_MAX_SUBNORMAL_U = + InFPBits::max_subnormal().uintval(); + static constexpr InStorageType IN_MIN_SUBNORMAL_U = + InFPBits::min_subnormal().uintval(); + + OutConstants out; + InConstants in; + + InStorageType get_random_bit_pattern() { + InStorageType bits{0}; + for (InStorageType i = 0; i < sizeof(InStorageType) / 2; ++i) { bits = (bits << 2) + static_cast(LIBC_NAMESPACE::rand()); } return bits; } public: - void test_special_numbers(Func func) { - EXPECT_FP_EQ(func(zero, zero, zero), zero); - EXPECT_FP_EQ(func(zero, neg_zero, neg_zero), neg_zero); - EXPECT_FP_EQ(func(inf, inf, zero), inf); - EXPECT_FP_EQ(func(neg_inf, inf, neg_inf), neg_inf); - EXPECT_FP_EQ(func(inf, zero, zero), nan); - EXPECT_FP_EQ(func(inf, neg_inf, inf), nan); - EXPECT_FP_EQ(func(nan, zero, inf), nan); - EXPECT_FP_EQ(func(inf, neg_inf, nan), nan); + using FmaFunc = OutType (*)(InType, InType, InType); + + void test_special_numbers(FmaFunc func) { + EXPECT_FP_EQ(out.zero, func(in.zero, in.zero, in.zero)); + EXPECT_FP_EQ(out.neg_zero, func(in.zero, in.neg_zero, in.neg_zero)); + EXPECT_FP_EQ(out.inf, func(in.inf, in.inf, in.zero)); + EXPECT_FP_EQ(out.neg_inf, func(in.neg_inf, in.inf, in.neg_inf)); + EXPECT_FP_EQ(out.aNaN, func(in.inf, in.zero, in.zero)); + EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.inf)); + EXPECT_FP_EQ(out.aNaN, func(in.aNaN, in.zero, in.inf)); + EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.aNaN)); // Test underflow rounding up. - EXPECT_FP_EQ(func(T(0.5), min_subnormal, min_subnormal), - FPBits(StorageType(2)).get_val()); + EXPECT_FP_EQ(OutFPBits(OutStorageType(2)).get_val(), + func(OutType(0.5), out.min_denormal, out.min_denormal)); + + if constexpr (sizeof(OutType) < sizeof(InType)) { + EXPECT_FP_EQ(out.zero, + func(InType(0.5), in.min_denormal, in.min_denormal)); + } // Test underflow rounding down. - T v = FPBits(MIN_NORMAL + StorageType(1)).get_val(); - EXPECT_FP_EQ(func(T(1) / T(MIN_NORMAL << 1), v, min_normal), v); + OutType v = OutFPBits(static_cast(OUT_MIN_NORMAL_U + + OutStorageType(1))) + .get_val(); + EXPECT_FP_EQ(v, func(OutType(1) / OutType(OUT_MIN_NORMAL_U << 1), v, + out.min_normal)); + + if constexpr (sizeof(OutType) < sizeof(InType)) { + InType v = InFPBits(static_cast(IN_MIN_NORMAL_U + + InStorageType(1))) + .get_val(); + EXPECT_FP_EQ( + out.min_normal, + func(InType(1) / InType(IN_MIN_NORMAL_U << 1), v, out.min_normal)); + } // Test overflow. - T z = max_normal; - EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z); + OutType z = out.max_normal; + EXPECT_FP_EQ(OutType(0.75) * z, func(InType(1.75), z, -z)); // Exact cancellation. - EXPECT_FP_EQ(func(T(3.0), T(5.0), -T(15.0)), T(0.0)); - EXPECT_FP_EQ(func(T(-3.0), T(5.0), T(15.0)), T(0.0)); + EXPECT_FP_EQ(out.zero, func(InType(3.0), InType(5.0), -InType(15.0))); + EXPECT_FP_EQ(out.zero, func(InType(-3.0), InType(5.0), InType(15.0))); } - void test_subnormal_range(Func func) { - constexpr StorageType COUNT = 100'001; - constexpr StorageType STEP = (MAX_SUBNORMAL - MIN_SUBNORMAL) / COUNT; + void test_subnormal_range(FmaFunc func) { + constexpr InStorageType COUNT = 100'001; + constexpr InStorageType STEP = + (IN_MAX_SUBNORMAL_U - IN_MIN_SUBNORMAL_U) / COUNT; LIBC_NAMESPACE::srand(1); - for (StorageType v = MIN_SUBNORMAL, w = MAX_SUBNORMAL; - v <= MAX_SUBNORMAL && w >= MIN_SUBNORMAL; v += STEP, w -= STEP) { - T x = FPBits(get_random_bit_pattern()).get_val(), y = FPBits(v).get_val(), - z = FPBits(w).get_val(); - mpfr::TernaryInput input{x, y, z}; + for (InStorageType v = IN_MIN_SUBNORMAL_U, w = IN_MAX_SUBNORMAL_U; + v <= IN_MAX_SUBNORMAL_U && w >= IN_MIN_SUBNORMAL_U; + v += STEP, w -= STEP) { + InType x = InFPBits(get_random_bit_pattern()).get_val(); + InType y = InFPBits(v).get_val(); + InType z = InFPBits(w).get_val(); + mpfr::TernaryInput input{x, y, z}; ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, func(x, y, z), 0.5); } } - void test_normal_range(Func func) { - constexpr StorageType COUNT = 100'001; - constexpr StorageType STEP = (MAX_NORMAL - MIN_NORMAL) / COUNT; + void test_normal_range(FmaFunc func) { + constexpr InStorageType COUNT = 100'001; + constexpr InStorageType STEP = (IN_MAX_NORMAL_U - IN_MIN_NORMAL_U) / COUNT; LIBC_NAMESPACE::srand(1); - for (StorageType v = MIN_NORMAL, w = MAX_NORMAL; - v <= MAX_NORMAL && w >= MIN_NORMAL; v += STEP, w -= STEP) { - T x = FPBits(v).get_val(), y = FPBits(w).get_val(), - z = FPBits(get_random_bit_pattern()).get_val(); - mpfr::TernaryInput input{x, y, z}; + for (InStorageType v = IN_MIN_NORMAL_U, w = IN_MAX_NORMAL_U; + v <= IN_MAX_NORMAL_U && w >= IN_MIN_NORMAL_U; v += STEP, w -= STEP) { + InType x = InFPBits(v).get_val(); + InType y = InFPBits(w).get_val(); + InType z = InFPBits(get_random_bit_pattern()).get_val(); + mpfr::TernaryInput input{x, y, z}; ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, func(x, y, z), 0.5); } diff --git a/libc/test/src/math/f16fmaf_test.cpp b/libc/test/src/math/f16fmaf_test.cpp new file mode 100644 index 0000000000000..dc197e297c11e --- /dev/null +++ b/libc/test/src/math/f16fmaf_test.cpp @@ -0,0 +1,25 @@ +//===-- Unittests for f16fmaf ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FmaTest.h" + +#include "src/math/f16fmaf.h" + +using LlvmLibcF16fmafTest = FmaTestTemplate; + +TEST_F(LlvmLibcF16fmafTest, SpecialNumbers) { + test_special_numbers(&LIBC_NAMESPACE::f16fmaf); +} + +TEST_F(LlvmLibcF16fmafTest, SubnormalRange) { + test_subnormal_range(&LIBC_NAMESPACE::f16fmaf); +} + +TEST_F(LlvmLibcF16fmafTest, NormalRange) { + test_normal_range(&LIBC_NAMESPACE::f16fmaf); +} diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 3e9edc51b004f..a67d0437592d5 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -3109,9 +3109,10 @@ add_fp_unittest( libc-math-smoke-tests SRCS fmaf_test.cpp + HDRS + FmaTest.h DEPENDS libc.src.math.fmaf - libc.src.__support.FPUtil.fp_bits FLAGS FMA_OPT__ONLY ) @@ -3122,9 +3123,10 @@ add_fp_unittest( libc-math-smoke-tests SRCS fma_test.cpp + HDRS + FmaTest.h DEPENDS libc.src.math.fma - libc.src.__support.FPUtil.fp_bits ) add_fp_unittest( @@ -3551,6 +3553,18 @@ add_fp_unittest( libc.src.math.totalordermagf16 ) +add_fp_unittest( + f16fmaf_test + SUITE + libc-math-smoke-tests + SRCS + f16fmaf_test.cpp + HDRS + FmaTest.h + DEPENDS + libc.src.math.f16fmaf +) + add_fp_unittest( f16sqrtf_test SUITE diff --git a/libc/test/src/math/smoke/FmaTest.h b/libc/test/src/math/smoke/FmaTest.h index 7063ecf199837..3edd3aceb4847 100644 --- a/libc/test/src/math/smoke/FmaTest.h +++ b/libc/test/src/math/smoke/FmaTest.h @@ -9,51 +9,85 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_FMATEST_H #define LLVM_LIBC_TEST_SRC_MATH_FMATEST_H -#include "src/__support/FPUtil/FPBits.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -template +template class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { -private: - using Func = T (*)(T, T, T); - using FPBits = LIBC_NAMESPACE::fputil::FPBits; - using StorageType = typename FPBits::StorageType; - const T inf = FPBits::inf(Sign::POS).get_val(); - const T neg_inf = FPBits::inf(Sign::NEG).get_val(); - const T zero = FPBits::zero(Sign::POS).get_val(); - const T neg_zero = FPBits::zero(Sign::NEG).get_val(); - const T nan = FPBits::quiet_nan().get_val(); + struct OutConstants { + DECLARE_SPECIAL_CONSTANTS(OutType) + }; + + struct InConstants { + DECLARE_SPECIAL_CONSTANTS(InType) + }; + + using OutFPBits = typename OutConstants::FPBits; + using OutStorageType = typename OutConstants::StorageType; + using InFPBits = typename InConstants::FPBits; + using InStorageType = typename InConstants::StorageType; + + static constexpr OutStorageType OUT_MIN_NORMAL_U = + OutFPBits::min_normal().uintval(); + static constexpr InStorageType IN_MIN_NORMAL_U = + InFPBits::min_normal().uintval(); + + OutConstants out; + InConstants in; public: - void test_special_numbers(Func func) { - EXPECT_FP_EQ(func(zero, zero, zero), zero); - EXPECT_FP_EQ(func(zero, neg_zero, neg_zero), neg_zero); - EXPECT_FP_EQ(func(inf, inf, zero), inf); - EXPECT_FP_EQ(func(neg_inf, inf, neg_inf), neg_inf); - EXPECT_FP_EQ(func(inf, zero, zero), nan); - EXPECT_FP_EQ(func(inf, neg_inf, inf), nan); - EXPECT_FP_EQ(func(nan, zero, inf), nan); - EXPECT_FP_EQ(func(inf, neg_inf, nan), nan); + using FmaFunc = OutType (*)(InType, InType, InType); + + void test_special_numbers(FmaFunc func) { + EXPECT_FP_EQ(out.zero, func(in.zero, in.zero, in.zero)); + EXPECT_FP_EQ(out.neg_zero, func(in.zero, in.neg_zero, in.neg_zero)); + EXPECT_FP_EQ(out.inf, func(in.inf, in.inf, in.zero)); + EXPECT_FP_EQ(out.neg_inf, func(in.neg_inf, in.inf, in.neg_inf)); + EXPECT_FP_EQ(out.aNaN, func(in.inf, in.zero, in.zero)); + EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.inf)); + EXPECT_FP_EQ(out.aNaN, func(in.aNaN, in.zero, in.inf)); + EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.aNaN)); // Test underflow rounding up. - EXPECT_FP_EQ(func(T(0.5), FPBits::min_subnormal().get_val(), - FPBits::min_subnormal().get_val()), - FPBits(StorageType(2)).get_val()); + EXPECT_FP_EQ(OutFPBits(OutStorageType(2)).get_val(), + func(OutType(0.5), out.min_denormal, out.min_denormal)); + + if constexpr (sizeof(OutType) < sizeof(InType)) { + EXPECT_FP_EQ(out.zero, + func(InType(0.5), in.min_denormal, in.min_denormal)); + } // Test underflow rounding down. - StorageType MIN_NORMAL = FPBits::min_normal().uintval(); - T v = FPBits(MIN_NORMAL + StorageType(1)).get_val(); - EXPECT_FP_EQ( - func(T(1) / T(MIN_NORMAL << 1), v, FPBits::min_normal().get_val()), v); + OutType v = OutFPBits(static_cast(OUT_MIN_NORMAL_U + + OutStorageType(1))) + .get_val(); + EXPECT_FP_EQ(v, func(OutType(1) / OutType(OUT_MIN_NORMAL_U << 1), v, + out.min_normal)); + + if constexpr (sizeof(OutType) < sizeof(InType)) { + InType v = InFPBits(static_cast(IN_MIN_NORMAL_U + + InStorageType(1))) + .get_val(); + EXPECT_FP_EQ( + out.min_normal, + func(InType(1) / InType(IN_MIN_NORMAL_U << 1), v, out.min_normal)); + } // Test overflow. - T z = FPBits::max_normal().get_val(); - EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z); + OutType z = out.max_normal; + EXPECT_FP_EQ(OutType(0.75) * z, func(InType(1.75), z, -z)); // Exact cancellation. - EXPECT_FP_EQ(func(T(3.0), T(5.0), -T(15.0)), T(0.0)); - EXPECT_FP_EQ(func(T(-3.0), T(5.0), T(15.0)), T(0.0)); + EXPECT_FP_EQ(out.zero, func(InType(3.0), InType(5.0), -InType(15.0))); + EXPECT_FP_EQ(out.zero, func(InType(-3.0), InType(5.0), InType(15.0))); } }; +#define LIST_FMA_TESTS(T, func) \ + using LlvmLibcFmaTest = FmaTestTemplate; \ + TEST_F(LlvmLibcFmaTest, SpecialNumbers) { test_special_numbers(&func); } + +#define LIST_NARROWING_FMA_TESTS(OutType, InType, func) \ + using LlvmLibcFmaTest = FmaTestTemplate; \ + TEST_F(LlvmLibcFmaTest, SpecialNumbers) { test_special_numbers(&func); } + #endif // LLVM_LIBC_TEST_SRC_MATH_FMATEST_H diff --git a/libc/test/src/math/smoke/f16fmaf_test.cpp b/libc/test/src/math/smoke/f16fmaf_test.cpp new file mode 100644 index 0000000000000..5e3aec768c191 --- /dev/null +++ b/libc/test/src/math/smoke/f16fmaf_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for f16fmaf ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FmaTest.h" + +#include "src/math/f16fmaf.h" + +LIST_NARROWING_FMA_TESTS(float16, float, LIBC_NAMESPACE::f16fmaf) diff --git a/libc/test/src/math/smoke/fma_test.cpp b/libc/test/src/math/smoke/fma_test.cpp index 4460b80d9ad65..c5d802a532eb0 100644 --- a/libc/test/src/math/smoke/fma_test.cpp +++ b/libc/test/src/math/smoke/fma_test.cpp @@ -10,8 +10,4 @@ #include "src/math/fma.h" -using LlvmLibcFmaTest = FmaTestTemplate; - -TEST_F(LlvmLibcFmaTest, SpecialNumbers) { - test_special_numbers(&LIBC_NAMESPACE::fma); -} +LIST_FMA_TESTS(double, LIBC_NAMESPACE::fma) diff --git a/libc/test/src/math/smoke/fmaf_test.cpp b/libc/test/src/math/smoke/fmaf_test.cpp index a645efb8776d0..09e9c504b942a 100644 --- a/libc/test/src/math/smoke/fmaf_test.cpp +++ b/libc/test/src/math/smoke/fmaf_test.cpp @@ -10,8 +10,4 @@ #include "src/math/fmaf.h" -using LlvmLibcFmafTest = FmaTestTemplate; - -TEST_F(LlvmLibcFmafTest, SpecialNumbers) { - test_special_numbers(&LIBC_NAMESPACE::fmaf); -} +LIST_FMA_TESTS(float, LIBC_NAMESPACE::fmaf) diff --git a/libc/test/src/sched/CMakeLists.txt b/libc/test/src/sched/CMakeLists.txt index 9dda4ea16e101..3dd56c72fc86d 100644 --- a/libc/test/src/sched/CMakeLists.txt +++ b/libc/test/src/sched/CMakeLists.txt @@ -40,23 +40,23 @@ add_libc_unittest( libc.src.sched.sched_get_priority_max ) -add_libc_unittest( - scheduler_test - SUITE - libc_sched_unittests - SRCS - param_and_scheduler_test.cpp - DEPENDS - libc.include.sched - libc.src.errno.errno - libc.src.sched.sched_getscheduler - libc.src.sched.sched_setscheduler - libc.src.sched.sched_getparam - libc.src.sched.sched_setparam - libc.src.sched.sched_get_priority_min - libc.src.sched.sched_get_priority_max - libc.src.unistd.getuid -) +# add_libc_unittest( +# scheduler_test +# SUITE +# libc_sched_unittests +# SRCS +# param_and_scheduler_test.cpp +# DEPENDS +# libc.include.sched +# libc.src.errno.errno +# libc.src.sched.sched_getscheduler +# libc.src.sched.sched_setscheduler +# libc.src.sched.sched_getparam +# libc.src.sched.sched_setparam +# libc.src.sched.sched_get_priority_min +# libc.src.sched.sched_get_priority_max +# libc.src.unistd.getuid +# ) add_libc_unittest( sched_rr_get_interval_test diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 100c6b1644b16..2eac4dd8e199d 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -922,46 +922,49 @@ template void explain_binary_operation_one_output_error( Operation, const BinaryInput &, long double, double, RoundingMode); -template -void explain_ternary_operation_one_output_error(Operation op, - const TernaryInput &input, - T libc_result, - double ulp_tolerance, - RoundingMode rounding) { - unsigned int precision = get_precision(ulp_tolerance); +template +void explain_ternary_operation_one_output_error( + Operation op, const TernaryInput &input, OutputType libc_result, + double ulp_tolerance, RoundingMode rounding) { + unsigned int precision = get_precision(ulp_tolerance); MPFRNumber mpfrX(input.x, precision); MPFRNumber mpfrY(input.y, precision); MPFRNumber mpfrZ(input.z, precision); - FPBits xbits(input.x); - FPBits ybits(input.y); - FPBits zbits(input.z); + FPBits xbits(input.x); + FPBits ybits(input.y); + FPBits zbits(input.z); MPFRNumber mpfr_result = ternary_operation_one_output( op, input.x, input.y, input.z, precision, rounding); MPFRNumber mpfrMatchValue(libc_result); tlog << "Input decimal: x: " << mpfrX.str() << " y: " << mpfrY.str() << " z: " << mpfrZ.str() << '\n'; - tlog << " First input bits: " << str(FPBits(input.x)) << '\n'; - tlog << "Second input bits: " << str(FPBits(input.y)) << '\n'; - tlog << " Third input bits: " << str(FPBits(input.z)) << '\n'; + tlog << " First input bits: " << str(FPBits(input.x)) << '\n'; + tlog << "Second input bits: " << str(FPBits(input.y)) << '\n'; + tlog << " Third input bits: " << str(FPBits(input.z)) << '\n'; tlog << "Libc result: " << mpfrMatchValue.str() << '\n' << "MPFR result: " << mpfr_result.str() << '\n'; - tlog << "Libc floating point result bits: " << str(FPBits(libc_result)) - << '\n'; + tlog << "Libc floating point result bits: " + << str(FPBits(libc_result)) << '\n'; tlog << " MPFR rounded bits: " - << str(FPBits(mpfr_result.as())) << '\n'; + << str(FPBits(mpfr_result.as())) << '\n'; tlog << "ULP error: " << mpfr_result.ulp_as_mpfr_number(libc_result).str() << '\n'; } -template void explain_ternary_operation_one_output_error( +template void explain_ternary_operation_one_output_error( Operation, const TernaryInput &, float, double, RoundingMode); -template void explain_ternary_operation_one_output_error( +template void explain_ternary_operation_one_output_error( Operation, const TernaryInput &, double, double, RoundingMode); -template void explain_ternary_operation_one_output_error( - Operation, const TernaryInput &, long double, double, - RoundingMode); +template void +explain_ternary_operation_one_output_error(Operation, + const TernaryInput &, + long double, double, RoundingMode); +#ifdef LIBC_TYPES_HAS_FLOAT16 +template void explain_ternary_operation_one_output_error( + Operation, const TernaryInput &, float16, double, RoundingMode); +#endif template bool compare_unary_operation_single_output(Operation op, InputType input, @@ -1069,12 +1072,13 @@ template bool compare_binary_operation_one_output( Operation, const BinaryInput &, long double, double, RoundingMode); -template +template bool compare_ternary_operation_one_output(Operation op, - const TernaryInput &input, - T libc_result, double ulp_tolerance, + const TernaryInput &input, + OutputType libc_result, + double ulp_tolerance, RoundingMode rounding) { - unsigned int precision = get_precision(ulp_tolerance); + unsigned int precision = get_precision(ulp_tolerance); MPFRNumber mpfr_result = ternary_operation_one_output( op, input.x, input.y, input.z, precision, rounding); double ulp = mpfr_result.ulp(libc_result); @@ -1082,13 +1086,23 @@ bool compare_ternary_operation_one_output(Operation op, return (ulp <= ulp_tolerance); } -template bool compare_ternary_operation_one_output( - Operation, const TernaryInput &, float, double, RoundingMode); -template bool compare_ternary_operation_one_output( - Operation, const TernaryInput &, double, double, RoundingMode); -template bool compare_ternary_operation_one_output( - Operation, const TernaryInput &, long double, double, - RoundingMode); +template bool compare_ternary_operation_one_output(Operation, + const TernaryInput &, + float, double, RoundingMode); +template bool compare_ternary_operation_one_output(Operation, + const TernaryInput &, + double, double, + RoundingMode); +template bool +compare_ternary_operation_one_output(Operation, + const TernaryInput &, + long double, double, RoundingMode); +#ifdef LIBC_TYPES_HAS_FLOAT16 +template bool compare_ternary_operation_one_output(Operation, + const TernaryInput &, + float16, double, + RoundingMode); +#endif } // namespace internal diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index 805678b96c2ef..c8c0506164bb2 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_UTILS_MPFRWRAPPER_MPFRUTILS_H #include "src/__support/CPP/type_traits.h" +#include "src/__support/CPP/type_traits/type_identity.h" #include "test/UnitTest/RoundingModeUtils.h" #include "test/UnitTest/Test.h" @@ -129,6 +130,18 @@ struct AreMatchingBinaryInputAndBinaryOutput, BinaryOutput> { static constexpr bool VALUE = cpp::is_floating_point_v; }; +template struct IsTernaryInput { + static constexpr bool VALUE = false; +}; + +template struct IsTernaryInput> { + static constexpr bool VALUE = true; +}; + +template struct MakeScalarInput : cpp::type_identity {}; +template +struct MakeScalarInput> : cpp::type_identity {}; + template bool compare_unary_operation_single_output(Operation op, InputType input, OutputType libc_output, @@ -152,10 +165,11 @@ bool compare_binary_operation_one_output(Operation op, T libc_output, double ulp_tolerance, RoundingMode rounding); -template +template bool compare_ternary_operation_one_output(Operation op, - const TernaryInput &input, - T libc_output, double ulp_tolerance, + const TernaryInput &input, + OutputType libc_output, + double ulp_tolerance, RoundingMode rounding); template @@ -180,12 +194,10 @@ void explain_binary_operation_one_output_error(Operation op, double ulp_tolerance, RoundingMode rounding); -template -void explain_ternary_operation_one_output_error(Operation op, - const TernaryInput &input, - T match_value, - double ulp_tolerance, - RoundingMode rounding); +template +void explain_ternary_operation_one_output_error( + Operation op, const TernaryInput &input, OutputType match_value, + double ulp_tolerance, RoundingMode rounding); template class MPFRMatcher : public testing::Matcher { @@ -234,7 +246,8 @@ class MPFRMatcher : public testing::Matcher { rounding); } - template bool match(const TernaryInput &in, T out) { + template + bool match(const TernaryInput &in, U out) { return compare_ternary_operation_one_output(op, in, out, ulp_tolerance, rounding); } @@ -260,7 +273,8 @@ class MPFRMatcher : public testing::Matcher { rounding); } - template void explain_error(const TernaryInput &in, T out) { + template + void explain_error(const TernaryInput &in, U out) { explain_ternary_operation_one_output_error(op, in, out, ulp_tolerance, rounding); } @@ -272,10 +286,14 @@ class MPFRMatcher : public testing::Matcher { // types. template constexpr bool is_valid_operation() { - constexpr bool IS_NARROWING_OP = op == Operation::Sqrt && - cpp::is_floating_point_v && - cpp::is_floating_point_v && - sizeof(OutputType) <= sizeof(InputType); + constexpr bool IS_NARROWING_OP = + (op == Operation::Sqrt && cpp::is_floating_point_v && + cpp::is_floating_point_v && + sizeof(OutputType) <= sizeof(InputType)) || + (op == Operation::Fma && internal::IsTernaryInput::VALUE && + cpp::is_floating_point_v< + typename internal::MakeScalarInput::type> && + cpp::is_floating_point_v); if (IS_NARROWING_OP) return true; return (Operation::BeginUnaryOperationsSingleOutput < op && From 06e598e490065f25788abe3faa33460a0b463c43 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 01:16:03 +0200 Subject: [PATCH 02/12] [libc] Undo disabling of scheduler_test --- libc/test/src/sched/CMakeLists.txt | 34 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/libc/test/src/sched/CMakeLists.txt b/libc/test/src/sched/CMakeLists.txt index 3dd56c72fc86d..9dda4ea16e101 100644 --- a/libc/test/src/sched/CMakeLists.txt +++ b/libc/test/src/sched/CMakeLists.txt @@ -40,23 +40,23 @@ add_libc_unittest( libc.src.sched.sched_get_priority_max ) -# add_libc_unittest( -# scheduler_test -# SUITE -# libc_sched_unittests -# SRCS -# param_and_scheduler_test.cpp -# DEPENDS -# libc.include.sched -# libc.src.errno.errno -# libc.src.sched.sched_getscheduler -# libc.src.sched.sched_setscheduler -# libc.src.sched.sched_getparam -# libc.src.sched.sched_setparam -# libc.src.sched.sched_get_priority_min -# libc.src.sched.sched_get_priority_max -# libc.src.unistd.getuid -# ) +add_libc_unittest( + scheduler_test + SUITE + libc_sched_unittests + SRCS + param_and_scheduler_test.cpp + DEPENDS + libc.include.sched + libc.src.errno.errno + libc.src.sched.sched_getscheduler + libc.src.sched.sched_setscheduler + libc.src.sched.sched_getparam + libc.src.sched.sched_setparam + libc.src.sched.sched_get_priority_min + libc.src.sched.sched_get_priority_max + libc.src.unistd.getuid +) add_libc_unittest( sched_rr_get_interval_test From 05bc63cba9427b74f1392a2864d33fe1f97ca133 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 01:18:42 +0200 Subject: [PATCH 03/12] [libc] Fix includes and dependencies --- libc/src/__support/FPUtil/generic/CMakeLists.txt | 3 +++ libc/src/math/generic/CMakeLists.txt | 1 + libc/utils/MPFRWrapper/MPFRUtils.h | 1 - 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt index 595656e3e8d90..a8a95ba3f15ff 100644 --- a/libc/src/__support/FPUtil/generic/CMakeLists.txt +++ b/libc/src/__support/FPUtil/generic/CMakeLists.txt @@ -19,12 +19,15 @@ add_header_library( HDRS FMA.h DEPENDS + libc.hdr.fenv_macros libc.src.__support.common libc.src.__support.CPP.bit + libc.src.__support.CPP.limits libc.src.__support.CPP.type_traits libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode + libc.src.__support.big_int libc.src.__support.macros.optimization libc.src.__support.uint128 ) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 706bfc4b08670..aa0069d821d0d 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3609,6 +3609,7 @@ add_entrypoint_object( HDRS ../f16fmaf.h DEPENDS + libc.src.__support.macros.properties.types libc.src.__support.FPUtil.fma COMPILE_OPTIONS -O3 diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index c8c0506164bb2..9221b4a457454 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -10,7 +10,6 @@ #define LLVM_LIBC_UTILS_MPFRWRAPPER_MPFRUTILS_H #include "src/__support/CPP/type_traits.h" -#include "src/__support/CPP/type_traits/type_identity.h" #include "test/UnitTest/RoundingModeUtils.h" #include "test/UnitTest/Test.h" From b112ac4a94b2969bf400ee2a156bcba9d5c27b7f Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 01:19:47 +0200 Subject: [PATCH 04/12] [libc] Add blank line between template declaration and specialization --- libc/utils/MPFRWrapper/MPFRUtils.h | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index 9221b4a457454..0b4f42a72ec81 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -138,6 +138,7 @@ template struct IsTernaryInput> { }; template struct MakeScalarInput : cpp::type_identity {}; + template struct MakeScalarInput> : cpp::type_identity {}; From 7e7c3542e7c1730502bc761db5e0236fa8bd24e7 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 15:10:27 +0200 Subject: [PATCH 05/12] [libc][math] Remove useless header guard around fputil::generic::fma --- libc/src/__support/FPUtil/generic/FMA.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h index 3bbb30476e5d8..4cbbe9cbb81e7 100644 --- a/libc/src/__support/FPUtil/generic/FMA.h +++ b/libc/src/__support/FPUtil/generic/FMA.h @@ -31,7 +31,6 @@ LIBC_INLINE cpp::enable_if_t && OutType> fma(InType x, InType y, InType z); -#ifndef LIBC_TARGET_CPU_HAS_FMA // TODO(lntue): Implement fmaf that is correctly rounded to all rounding modes. // The implementation below only is only correct for the default rounding mode, // round-to-nearest tie-to-even. @@ -82,7 +81,6 @@ template <> LIBC_INLINE float fma(float x, float y, float z) { return static_cast(bit_sum.get_val()); } -#endif // LIBC_TARGET_CPU_HAS_FMA namespace internal { From e409c828618637891af4ae4bffec2d03a6298a51 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 15:38:31 +0200 Subject: [PATCH 06/12] [libc] Misc cleanup --- libc/spec/stdc.td | 5 ++--- libc/src/__support/FPUtil/generic/FMA.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index b089b596b0958..b7375fb411220 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -474,10 +474,11 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"fmul", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fma", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, FunctionSpec<"fmaf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, + GuardedFunctionSpec<"f16fmaf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, + FunctionSpec<"fmod", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"fmodf", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"fmodl", RetValSpec, [ArgSpec, ArgSpec]>, @@ -715,8 +716,6 @@ def StdC : StandardSpec<"stdc"> { GuardedFunctionSpec<"totalordermagf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16fmaf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16sqrtf", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, ] >; diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h index 4cbbe9cbb81e7..d954006f8de69 100644 --- a/libc/src/__support/FPUtil/generic/FMA.h +++ b/libc/src/__support/FPUtil/generic/FMA.h @@ -113,7 +113,7 @@ fma(InType x, InType y, InType z) { using InStorageType = typename InFPBits::StorageType; constexpr int IN_EXPLICIT_MANT_LEN = InFPBits::FRACTION_LEN + 1; - constexpr size_t PROD_LEN = 2 * (IN_EXPLICIT_MANT_LEN); + constexpr size_t PROD_LEN = 2 * IN_EXPLICIT_MANT_LEN; constexpr size_t TMP_RESULT_LEN = cpp::bit_ceil(PROD_LEN + 1); using TmpResultType = UInt; From bd971fe9e52be5a5059a9b9c5f51b70c6569f3a8 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 15:51:01 +0200 Subject: [PATCH 07/12] [libc] Replace 128-bit BigInt numeric_limits with generic BigInt specialization --- libc/src/__support/big_int.h | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/libc/src/__support/big_int.h b/libc/src/__support/big_int.h index 27cb3b783470c..c30c3ece54a30 100644 --- a/libc/src/__support/big_int.h +++ b/libc/src/__support/big_int.h @@ -986,26 +986,15 @@ using Int = BigInt>; // Provides limits of BigInt. template struct cpp::numeric_limits> { - LIBC_INLINE_VAR static constexpr int digits = Bits - Signed; -}; - -// Provides limits of U/Int<128>. -template <> class cpp::numeric_limits> { -public: - LIBC_INLINE static constexpr UInt<128> max() { return UInt<128>::max(); } - LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>::min(); } - // Meant to match std::numeric_limits interface. - // NOLINTNEXTLINE(readability-identifier-naming) - LIBC_INLINE_VAR static constexpr int digits = 128; -}; - -template <> class cpp::numeric_limits> { -public: - LIBC_INLINE static constexpr Int<128> max() { return Int<128>::max(); } - LIBC_INLINE static constexpr Int<128> min() { return Int<128>::min(); } + LIBC_INLINE static constexpr BigInt max() { + return BigInt::max(); + } + LIBC_INLINE static constexpr BigInt min() { + return BigInt::min(); + } // Meant to match std::numeric_limits interface. // NOLINTNEXTLINE(readability-identifier-naming) - LIBC_INLINE_VAR static constexpr int digits = 128; + LIBC_INLINE_VAR static constexpr int digits = Bits - Signed; }; // type traits to determine whether a T is a BigInt. @@ -1086,6 +1075,8 @@ struct is_unsigned_integral_or_big_int cpp::is_same_v>> {}; template +// Meant to look like helper variable templates. +// NOLINTNEXTLINE(readability-identifier-naming) LIBC_INLINE_VAR constexpr bool is_unsigned_integral_or_big_int_v = is_unsigned_integral_or_big_int::value; From 5dae03b46e367836b4403049ef4edb5d71112646 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 17:17:12 +0200 Subject: [PATCH 08/12] [libc][math] Fix sign of FMA with exact cancellation --- libc/src/__support/FPUtil/generic/FMA.h | 11 ++++++++--- libc/test/src/math/smoke/FmaTest.h | 24 +++++++++++++++++++++--- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h index d954006f8de69..754efc0f5c21d 100644 --- a/libc/src/__support/FPUtil/generic/FMA.h +++ b/libc/src/__support/FPUtil/generic/FMA.h @@ -231,6 +231,8 @@ fma(InType x, InType y, InType z) { OutStorageType result = 0; int r_exp = 0; // Unbiased exponent of the result + int round_mode = fputil::quick_get_round(); + // Normalize the result. if (prod_mant != 0) { int lead_zeros = cpp::countl_zero(prod_mant); @@ -269,12 +271,15 @@ fma(InType x, InType y, InType z) { r_exp = 0; } } else { - // Return +0.0 when there is exact cancellation, i.e., x*y == -z exactly. - prod_sign = Sign::POS; + // When there is exact cancellation, i.e., x*y == -z exactly, return -0.0 if + // rounding downward and +0.0 for other rounding modes. + if (round_mode == FE_DOWNWARD) + prod_sign = Sign::NEG; + else + prod_sign = Sign::POS; } // Finalize the result. - int round_mode = fputil::quick_get_round(); if (LIBC_UNLIKELY(r_exp >= OutFPBits::MAX_BIASED_EXPONENT)) { if ((round_mode == FE_TOWARDZERO) || (round_mode == FE_UPWARD && prod_sign.is_neg()) || diff --git a/libc/test/src/math/smoke/FmaTest.h b/libc/test/src/math/smoke/FmaTest.h index 3edd3aceb4847..f942de37654dd 100644 --- a/libc/test/src/math/smoke/FmaTest.h +++ b/libc/test/src/math/smoke/FmaTest.h @@ -58,6 +58,7 @@ class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { EXPECT_FP_EQ(out.zero, func(InType(0.5), in.min_denormal, in.min_denormal)); } + // Test underflow rounding down. OutType v = OutFPBits(static_cast(OUT_MIN_NORMAL_U + OutStorageType(1))) @@ -73,12 +74,29 @@ class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { out.min_normal, func(InType(1) / InType(IN_MIN_NORMAL_U << 1), v, out.min_normal)); } + // Test overflow. OutType z = out.max_normal; - EXPECT_FP_EQ(OutType(0.75) * z, func(InType(1.75), z, -z)); + EXPECT_FP_EQ_ALL_ROUNDING(OutType(0.75) * z, func(InType(1.75), z, -z)); + // Exact cancellation. - EXPECT_FP_EQ(out.zero, func(InType(3.0), InType(5.0), -InType(15.0))); - EXPECT_FP_EQ(out.zero, func(InType(-3.0), InType(5.0), InType(15.0))); + EXPECT_FP_EQ_ROUNDING_NEAREST( + out.zero, func(InType(3.0), InType(5.0), InType(-15.0))); + EXPECT_FP_EQ_ROUNDING_UPWARD(out.zero, + func(InType(3.0), InType(5.0), InType(-15.0))); + EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO( + out.zero, func(InType(3.0), InType(5.0), InType(-15.0))); + EXPECT_FP_EQ_ROUNDING_DOWNWARD( + out.neg_zero, func(InType(3.0), InType(5.0), InType(-15.0))); + + EXPECT_FP_EQ_ROUNDING_NEAREST( + out.zero, func(InType(-3.0), InType(5.0), InType(15.0))); + EXPECT_FP_EQ_ROUNDING_UPWARD(out.zero, + func(InType(-3.0), InType(5.0), InType(15.0))); + EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO( + out.zero, func(InType(-3.0), InType(5.0), InType(15.0))); + EXPECT_FP_EQ_ROUNDING_DOWNWARD( + out.neg_zero, func(InType(-3.0), InType(5.0), InType(15.0))); } }; From 9d4dc22bb68c02a43123cd8fc6732cf0bd9eac7f Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 17:34:32 +0200 Subject: [PATCH 09/12] [libc] Remove braces from single-statement ifs as per LLVM code style --- libc/src/__support/FPUtil/generic/FMA.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h index 754efc0f5c21d..950db8c1ac671 100644 --- a/libc/src/__support/FPUtil/generic/FMA.h +++ b/libc/src/__support/FPUtil/generic/FMA.h @@ -71,11 +71,10 @@ template <> LIBC_INLINE float fma(float x, float y, float z) { // Update sticky bits if t != 0.0 and the least (52 - 23 - 1 = 28) bits are // zero. if (!t.is_zero() && ((bit_sum.get_mantissa() & 0xfff'ffffULL) == 0)) { - if (bit_sum.sign() != t.sign()) { + if (bit_sum.sign() != t.sign()) bit_sum.set_mantissa(bit_sum.get_mantissa() + 1); - } else if (bit_sum.get_mantissa()) { + else if (bit_sum.get_mantissa()) bit_sum.set_mantissa(bit_sum.get_mantissa() - 1); - } } } @@ -122,9 +121,8 @@ fma(InType x, InType y, InType z) { constexpr TmpResultType EXTRA_FRACTION_STICKY_MASK = (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1)) - 1; - if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0)) { + if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0)) return static_cast(x * y + z); - } int x_exp = 0; int y_exp = 0; @@ -293,9 +291,8 @@ fma(InType x, InType y, InType z) { result = static_cast( (result & OutFPBits::FRACTION_MASK) | (static_cast(r_exp) << OutFPBits::FRACTION_LEN)); - if (prod_sign.is_neg()) { + if (prod_sign.is_neg()) result |= OutFPBits::SIGN_MASK; - } // Rounding. if (round_mode == FE_TONEAREST) { From eb0ff6bb8809f47358be7c0dece4f212b365ffb9 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 17:36:07 +0200 Subject: [PATCH 10/12] [libc][math] Remove test_special_numbers from FMA MPFR unit test duplicated from smoke test --- libc/test/src/math/FmaTest.h | 41 ----------------------------- libc/test/src/math/f16fmaf_test.cpp | 4 --- libc/test/src/math/fma_test.cpp | 4 --- libc/test/src/math/fmaf_test.cpp | 4 --- 4 files changed, 53 deletions(-) diff --git a/libc/test/src/math/FmaTest.h b/libc/test/src/math/FmaTest.h index 0e0f05aad5d13..53895e7d633c2 100644 --- a/libc/test/src/math/FmaTest.h +++ b/libc/test/src/math/FmaTest.h @@ -59,47 +59,6 @@ class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { public: using FmaFunc = OutType (*)(InType, InType, InType); - void test_special_numbers(FmaFunc func) { - EXPECT_FP_EQ(out.zero, func(in.zero, in.zero, in.zero)); - EXPECT_FP_EQ(out.neg_zero, func(in.zero, in.neg_zero, in.neg_zero)); - EXPECT_FP_EQ(out.inf, func(in.inf, in.inf, in.zero)); - EXPECT_FP_EQ(out.neg_inf, func(in.neg_inf, in.inf, in.neg_inf)); - EXPECT_FP_EQ(out.aNaN, func(in.inf, in.zero, in.zero)); - EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.inf)); - EXPECT_FP_EQ(out.aNaN, func(in.aNaN, in.zero, in.inf)); - EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.aNaN)); - - // Test underflow rounding up. - EXPECT_FP_EQ(OutFPBits(OutStorageType(2)).get_val(), - func(OutType(0.5), out.min_denormal, out.min_denormal)); - - if constexpr (sizeof(OutType) < sizeof(InType)) { - EXPECT_FP_EQ(out.zero, - func(InType(0.5), in.min_denormal, in.min_denormal)); - } - // Test underflow rounding down. - OutType v = OutFPBits(static_cast(OUT_MIN_NORMAL_U + - OutStorageType(1))) - .get_val(); - EXPECT_FP_EQ(v, func(OutType(1) / OutType(OUT_MIN_NORMAL_U << 1), v, - out.min_normal)); - - if constexpr (sizeof(OutType) < sizeof(InType)) { - InType v = InFPBits(static_cast(IN_MIN_NORMAL_U + - InStorageType(1))) - .get_val(); - EXPECT_FP_EQ( - out.min_normal, - func(InType(1) / InType(IN_MIN_NORMAL_U << 1), v, out.min_normal)); - } - // Test overflow. - OutType z = out.max_normal; - EXPECT_FP_EQ(OutType(0.75) * z, func(InType(1.75), z, -z)); - // Exact cancellation. - EXPECT_FP_EQ(out.zero, func(InType(3.0), InType(5.0), -InType(15.0))); - EXPECT_FP_EQ(out.zero, func(InType(-3.0), InType(5.0), InType(15.0))); - } - void test_subnormal_range(FmaFunc func) { constexpr InStorageType COUNT = 100'001; constexpr InStorageType STEP = diff --git a/libc/test/src/math/f16fmaf_test.cpp b/libc/test/src/math/f16fmaf_test.cpp index dc197e297c11e..e4ca88b8810e1 100644 --- a/libc/test/src/math/f16fmaf_test.cpp +++ b/libc/test/src/math/f16fmaf_test.cpp @@ -12,10 +12,6 @@ using LlvmLibcF16fmafTest = FmaTestTemplate; -TEST_F(LlvmLibcF16fmafTest, SpecialNumbers) { - test_special_numbers(&LIBC_NAMESPACE::f16fmaf); -} - TEST_F(LlvmLibcF16fmafTest, SubnormalRange) { test_subnormal_range(&LIBC_NAMESPACE::f16fmaf); } diff --git a/libc/test/src/math/fma_test.cpp b/libc/test/src/math/fma_test.cpp index 20224d99894be..dd761382631d5 100644 --- a/libc/test/src/math/fma_test.cpp +++ b/libc/test/src/math/fma_test.cpp @@ -276,10 +276,6 @@ struct LlvmLibcFmaTest : public FmaTestTemplate { } }; -TEST_F(LlvmLibcFmaTest, SpecialNumbers) { - test_special_numbers(&LIBC_NAMESPACE::fma); -} - TEST_F(LlvmLibcFmaTest, SubnormalRange) { test_subnormal_range(&LIBC_NAMESPACE::fma); } diff --git a/libc/test/src/math/fmaf_test.cpp b/libc/test/src/math/fmaf_test.cpp index b607d4a66f8eb..0e498d46ecfb0 100644 --- a/libc/test/src/math/fmaf_test.cpp +++ b/libc/test/src/math/fmaf_test.cpp @@ -12,10 +12,6 @@ using LlvmLibcFmafTest = FmaTestTemplate; -TEST_F(LlvmLibcFmafTest, SpecialNumbers) { - test_special_numbers(&LIBC_NAMESPACE::fmaf); -} - TEST_F(LlvmLibcFmafTest, SubnormalRange) { test_subnormal_range(&LIBC_NAMESPACE::fmaf); } From f3128ee0c2262f81a6813f3a486699cc26a7b585 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 17:56:05 +0200 Subject: [PATCH 11/12] [libc][math] Update comments in fputil::generic::fma --- libc/src/__support/FPUtil/generic/FMA.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h index 950db8c1ac671..eafe7c31e6625 100644 --- a/libc/src/__support/FPUtil/generic/FMA.h +++ b/libc/src/__support/FPUtil/generic/FMA.h @@ -168,17 +168,12 @@ fma(InType x, InType y, InType z) { // z : 10aa...a // - prod : 1bb...bb....b // In that case, in order to store the exact result, we need at least - // (Length of prod) - (MantissaLength of z) = 2*(52 + 1) - 52 = 54. - // TODO: 53? (Explicit mantissa.) ^ + // (Length of prod) - (Fraction length of z) = 2*(52 + 1) - 52 = 54. // Overall, before aligning the mantissas and exponents, we can simply left- // shift the mantissa of z by at least 54, and left-shift the product of x*y - // by (that amount - 52). After that, it is enough to align the least - // TODO: ^ 54? + // by (that amount - 54). After that, it is enough to align the least // significant bit, given that we keep track of the round and sticky bits // after the least significant bit. - // We pick shifting z_mant by 64 bits so that technically we can simply use - // the original mantissa as high part when constructing 128-bit z_mant. So the - // mantissa of prod will be left-shifted by 64 - 54 = 10 initially. TmpResultType prod_mant = TmpResultType(x_mant) * y_mant; int prod_lsb_exp = @@ -241,10 +236,10 @@ fma(InType x, InType y, InType z) { InFPBits::EXP_BIAS + OutFPBits::EXP_BIAS; if (r_exp > 0) { - // The result is normal. We will shift the mantissa to the right by - // 63 - 52 = 11 bits (from the locations of the most significant bit). - // Then the rounding bit will correspond the 11th bit, and the lowest - // 10 bits are merged into sticky bits. + // The result is normal. We will shift the mantissa to the right by the + // amount of extra bits compared to the length of the explicit mantissa in + // the output type. The rounding bit then becomes the highest bit that is + // shifted out, and the following lower bits are merged into sticky bits. round_bit = (prod_mant & (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1))) != 0; sticky_bits |= (prod_mant & EXTRA_FRACTION_STICKY_MASK) != 0; From 40bd58f4040d295127efb60d2783c307f4f52498 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 18:07:33 +0200 Subject: [PATCH 12/12] [libc][math] Update comments in fputil::generic::fma again --- libc/src/__support/FPUtil/generic/FMA.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h index eafe7c31e6625..71b150758d419 100644 --- a/libc/src/__support/FPUtil/generic/FMA.h +++ b/libc/src/__support/FPUtil/generic/FMA.h @@ -168,12 +168,12 @@ fma(InType x, InType y, InType z) { // z : 10aa...a // - prod : 1bb...bb....b // In that case, in order to store the exact result, we need at least - // (Length of prod) - (Fraction length of z) = 2*(52 + 1) - 52 = 54. + // (Length of prod) - (Fraction length of z) + // = 2*(Length of input explicit mantissa) - (Fraction length of z) bits. // Overall, before aligning the mantissas and exponents, we can simply left- - // shift the mantissa of z by at least 54, and left-shift the product of x*y - // by (that amount - 54). After that, it is enough to align the least - // significant bit, given that we keep track of the round and sticky bits - // after the least significant bit. + // shift the mantissa of z by that amount. After that, it is enough to align + // the least significant bit, given that we keep track of the round and sticky + // bits after the least significant bit. TmpResultType prod_mant = TmpResultType(x_mant) * y_mant; int prod_lsb_exp =