Skip to content

[libc][math] Add min/max/min_denorm/max_denorm constants to FPBits and clean up its constants return types. #71298

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 23 additions & 15 deletions libc/src/__support/FPUtil/FPBits.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ template <typename T> struct FPBits {
cpp::enable_if_t<cpp::is_same_v<XType, UIntType>, int> = 0>
constexpr explicit FPBits(XType x) : bits(x) {}

FPBits() : bits(0) {}
constexpr FPBits() : bits(0) {}

LIBC_INLINE constexpr T get_val() const { return cpp::bit_cast<T>(bits); }

Expand Down Expand Up @@ -169,30 +169,38 @@ template <typename T> struct FPBits {
return (bits & FloatProp::EXPONENT_MASK) == FloatProp::EXPONENT_MASK;
}

LIBC_INLINE static constexpr FPBits<T> zero(bool sign = false) {
return FPBits(sign ? FloatProp::SIGN_MASK : UIntType(0));
LIBC_INLINE static constexpr T zero(bool sign = false) {
return FPBits(sign ? FloatProp::SIGN_MASK : UIntType(0)).get_val();
}

LIBC_INLINE static constexpr FPBits<T> neg_zero() { return zero(true); }
LIBC_INLINE static constexpr T neg_zero() { return zero(true); }

LIBC_INLINE static constexpr FPBits<T> inf(bool sign = false) {
FPBits<T> bits(sign ? FloatProp::SIGN_MASK : UIntType(0));
bits.set_unbiased_exponent(MAX_EXPONENT);
return bits;
LIBC_INLINE static constexpr T inf(bool sign = false) {
return FPBits((sign ? FloatProp::SIGN_MASK : UIntType(0)) |
FloatProp::EXPONENT_MASK)
.get_val();
}

LIBC_INLINE static constexpr FPBits<T> neg_inf() {
FPBits<T> bits = inf();
bits.set_sign(1);
return bits;
LIBC_INLINE static constexpr T neg_inf() { return inf(true); }

LIBC_INLINE static constexpr T min_normal() {
return FPBits(MIN_NORMAL).get_val();
}

LIBC_INLINE static constexpr T max_normal() {
return FPBits(MAX_NORMAL).get_val();
}

LIBC_INLINE static constexpr T min_denormal() {
return FPBits(MIN_SUBNORMAL).get_val();
}

LIBC_INLINE static constexpr FPBits<T> min_normal() {
return FPBits<T>(MIN_NORMAL);
LIBC_INLINE static constexpr T max_denormal() {
return FPBits(MAX_SUBNORMAL).get_val();
}

LIBC_INLINE static constexpr T build_nan(UIntType v) {
FPBits<T> bits = inf();
FPBits<T> bits(inf());
bits.set_mantissa(v);
return T(bits);
}
Expand Down
4 changes: 2 additions & 2 deletions libc/src/__support/FPUtil/except_value_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,14 @@ template <typename T, size_t N> struct ExceptValues {
// Helper functions to set results for exceptional cases.
template <typename T> LIBC_INLINE T round_result_slightly_down(T value_rn) {
volatile T tmp = value_rn;
const T MIN_NORMAL = FPBits<T>::min_normal().get_val();
const T MIN_NORMAL = FPBits<T>::min_normal();
tmp = tmp - MIN_NORMAL;
return tmp;
}

template <typename T> LIBC_INLINE T round_result_slightly_up(T value_rn) {
volatile T tmp = value_rn;
const T MIN_NORMAL = FPBits<T>::min_normal().get_val();
const T MIN_NORMAL = FPBits<T>::min_normal();
tmp = tmp + MIN_NORMAL;
return tmp;
}
Expand Down
6 changes: 3 additions & 3 deletions libc/src/__support/FPUtil/generic/FMod.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ class FMod {
if (LIBC_LIKELY(sx.uintval() <= sy.uintval())) {
if (sx.uintval() < sy.uintval())
return sx; // |x|<|y| return x
return FPB::zero(); // |x|=|y| return 0.0
return FPB(FPB::zero()); // |x|=|y| return 0.0
}

int e_x = sx.get_unbiased_exponent();
Expand All @@ -243,7 +243,7 @@ class FMod {
intU_t m_y = sy.get_explicit_mantissa();
intU_t d = (e_x == e_y) ? (m_x - m_y) : (m_x << (e_x - e_y)) % m_y;
if (d == 0)
return FPB::zero();
return FPB(FPB::zero());
// iy - 1 because of "zero power" for number with power 1
return FPB::make_value(d, e_y - 1);
}
Expand Down Expand Up @@ -291,7 +291,7 @@ class FMod {

m_x %= m_y;
if (LIBC_UNLIKELY(m_x == 0))
return FPB::zero();
return FPB(FPB::zero());

if (exp_diff == 0)
return FPB::make_value(m_x, e_y);
Expand Down
91 changes: 49 additions & 42 deletions libc/src/__support/FPUtil/x86_64/LongDoubleBits.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,85 +56,85 @@ template <> struct FPBits<long double> {

UIntType bits;

LIBC_INLINE void set_mantissa(UIntType mantVal) {
LIBC_INLINE constexpr void set_mantissa(UIntType mantVal) {
mantVal &= (FloatProp::MANTISSA_MASK);
bits &= ~(FloatProp::MANTISSA_MASK);
bits |= mantVal;
}

LIBC_INLINE UIntType get_mantissa() const {
LIBC_INLINE constexpr UIntType get_mantissa() const {
return bits & FloatProp::MANTISSA_MASK;
}

LIBC_INLINE UIntType get_explicit_mantissa() const {
LIBC_INLINE constexpr UIntType get_explicit_mantissa() const {
return bits & (FloatProp::MANTISSA_MASK | FloatProp::EXPLICIT_BIT_MASK);
}

LIBC_INLINE void set_unbiased_exponent(UIntType expVal) {
LIBC_INLINE constexpr void set_unbiased_exponent(UIntType expVal) {
expVal =
(expVal << (FloatProp::BIT_WIDTH - 1 - FloatProp::EXPONENT_WIDTH)) &
FloatProp::EXPONENT_MASK;
bits &= ~(FloatProp::EXPONENT_MASK);
bits |= expVal;
}

LIBC_INLINE uint16_t get_unbiased_exponent() const {
LIBC_INLINE constexpr uint16_t get_unbiased_exponent() const {
return uint16_t((bits & FloatProp::EXPONENT_MASK) >>
(FloatProp::BIT_WIDTH - 1 - FloatProp::EXPONENT_WIDTH));
}

LIBC_INLINE void set_implicit_bit(bool implicitVal) {
LIBC_INLINE constexpr void set_implicit_bit(bool implicitVal) {
bits &= ~(UIntType(1) << FloatProp::MANTISSA_WIDTH);
bits |= (UIntType(implicitVal) << FloatProp::MANTISSA_WIDTH);
}

LIBC_INLINE bool get_implicit_bit() const {
LIBC_INLINE constexpr bool get_implicit_bit() const {
return bool((bits & (UIntType(1) << FloatProp::MANTISSA_WIDTH)) >>
FloatProp::MANTISSA_WIDTH);
}

LIBC_INLINE void set_sign(bool signVal) {
LIBC_INLINE constexpr void set_sign(bool signVal) {
bits &= ~(FloatProp::SIGN_MASK);
UIntType sign1 = UIntType(signVal) << (FloatProp::BIT_WIDTH - 1);
bits |= sign1;
}

LIBC_INLINE bool get_sign() const {
LIBC_INLINE constexpr bool get_sign() const {
return bool((bits & FloatProp::SIGN_MASK) >> (FloatProp::BIT_WIDTH - 1));
}

FPBits() : bits(0) {}
constexpr FPBits() : bits(0) {}

template <typename XType,
cpp::enable_if_t<cpp::is_same_v<long double, XType>, int> = 0>
explicit FPBits(XType x) : bits(cpp::bit_cast<UIntType>(x)) {
constexpr explicit FPBits(XType x) : bits(cpp::bit_cast<UIntType>(x)) {
// bits starts uninitialized, and setting it to a long double only
// overwrites the first 80 bits. This clears those upper bits.
bits = bits & ((UIntType(1) << 80) - 1);
}

template <typename XType,
cpp::enable_if_t<cpp::is_same_v<XType, UIntType>, int> = 0>
explicit FPBits(XType x) : bits(x) {}
constexpr explicit FPBits(XType x) : bits(x) {}

LIBC_INLINE operator long double() {
LIBC_INLINE constexpr operator long double() {
return cpp::bit_cast<long double>(bits);
}

LIBC_INLINE UIntType uintval() {
LIBC_INLINE constexpr UIntType uintval() {
// We zero the padding bits as they can contain garbage.
static constexpr UIntType MASK =
constexpr UIntType MASK =
(UIntType(1) << (sizeof(long double) * 8 -
Padding<sizeof(uintptr_t)>::VALUE)) -
1;
return bits & MASK;
}

LIBC_INLINE long double get_val() const {
LIBC_INLINE constexpr long double get_val() const {
return cpp::bit_cast<long double>(bits);
}

LIBC_INLINE int get_exponent() const {
LIBC_INLINE constexpr int get_exponent() const {
return int(get_unbiased_exponent()) - EXPONENT_BIAS;
}

Expand All @@ -144,7 +144,7 @@ template <> struct FPBits<long double> {
// values are calculated from the exponent, since just subtracting the bias
// will give a slightly incorrect result. Additionally, zero has an exponent
// of zero, and that should actually be treated as zero.
LIBC_INLINE int get_explicit_exponent() const {
LIBC_INLINE constexpr int get_explicit_exponent() const {
const int unbiased_exp = int(get_unbiased_exponent());
if (is_zero()) {
return 0;
Expand All @@ -155,17 +155,17 @@ template <> struct FPBits<long double> {
}
}

LIBC_INLINE bool is_zero() const {
LIBC_INLINE constexpr bool is_zero() const {
return get_unbiased_exponent() == 0 && get_mantissa() == 0 &&
get_implicit_bit() == 0;
}

LIBC_INLINE bool is_inf() const {
LIBC_INLINE constexpr bool is_inf() const {
return get_unbiased_exponent() == MAX_EXPONENT && get_mantissa() == 0 &&
get_implicit_bit() == 1;
}

LIBC_INLINE bool is_nan() const {
LIBC_INLINE constexpr bool is_nan() const {
if (get_unbiased_exponent() == MAX_EXPONENT) {
return (get_implicit_bit() == 0) || get_mantissa() != 0;
} else if (get_unbiased_exponent() != 0) {
Expand All @@ -174,51 +174,58 @@ template <> struct FPBits<long double> {
return false;
}

LIBC_INLINE bool is_inf_or_nan() const {
LIBC_INLINE constexpr bool is_inf_or_nan() const {
return (get_unbiased_exponent() == MAX_EXPONENT) ||
(get_unbiased_exponent() != 0 && get_implicit_bit() == 0);
}

// Methods below this are used by tests.

LIBC_INLINE static FPBits<long double> zero() {
return FPBits<long double>(0.0l);
}
LIBC_INLINE static constexpr long double zero() { return 0.0l; }

LIBC_INLINE static FPBits<long double> neg_zero() {
FPBits<long double> bits(0.0l);
bits.set_sign(1);
return bits;
}
LIBC_INLINE static constexpr long double neg_zero() { return -0.0l; }

LIBC_INLINE static FPBits<long double> inf() {
LIBC_INLINE static constexpr long double inf(bool sign = false) {
FPBits<long double> bits(0.0l);
bits.set_unbiased_exponent(MAX_EXPONENT);
bits.set_implicit_bit(1);
return bits;
if (sign) {
bits.set_sign(true);
}
return bits.get_val();
}

LIBC_INLINE static FPBits<long double> neg_inf() {
FPBits<long double> bits(0.0l);
bits.set_unbiased_exponent(MAX_EXPONENT);
bits.set_implicit_bit(1);
bits.set_sign(1);
return bits;
}
LIBC_INLINE static constexpr long double neg_inf() { return inf(true); }

LIBC_INLINE static long double build_nan(UIntType v) {
LIBC_INLINE static constexpr long double build_nan(UIntType v) {
FPBits<long double> bits(0.0l);
bits.set_unbiased_exponent(MAX_EXPONENT);
bits.set_implicit_bit(1);
bits.set_mantissa(v);
return bits;
}

LIBC_INLINE static long double build_quiet_nan(UIntType v) {
LIBC_INLINE static constexpr long double build_quiet_nan(UIntType v) {
return build_nan(FloatProp::QUIET_NAN_MASK | v);
}

LIBC_INLINE static FPBits<long double>
LIBC_INLINE static constexpr long double min_normal() {
return FPBits(MIN_NORMAL).get_val();
}

LIBC_INLINE static constexpr long double max_normal() {
return FPBits(MAX_NORMAL).get_val();
}

LIBC_INLINE static constexpr long double min_denormal() {
return FPBits(MIN_SUBNORMAL).get_val();
}

LIBC_INLINE static constexpr long double max_denormal() {
return FPBits(MAX_SUBNORMAL).get_val();
}

LIBC_INLINE static constexpr FPBits<long double>
create_value(bool sign, UIntType unbiased_exp, UIntType mantissa) {
FPBits<long double> result;
result.set_sign(sign);
Expand Down
4 changes: 2 additions & 2 deletions libc/src/__support/str_to_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -1220,9 +1220,9 @@ LIBC_INLINE StrToNumResult<T> strtofloatingpoint(const char *__restrict src) {
tolower(src[index + 2]) == inf_string[2]) {
seen_digit = true;
if (result.get_sign())
result = result.neg_inf();
result = fputil::FPBits<T>(result.neg_inf());
else
result = result.inf();
result = fputil::FPBits<T>(result.inf());
if (tolower(src[index + 3]) == inf_string[3] &&
tolower(src[index + 4]) == inf_string[4] &&
tolower(src[index + 5]) == inf_string[5] &&
Expand Down
2 changes: 1 addition & 1 deletion libc/src/math/generic/atanhf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ LLVM_LIBC_FUNCTION(float, atanhf, (float x)) {
if (x_abs == 0x3F80'0000U) {
fputil::set_errno_if_required(ERANGE);
fputil::raise_except_if_required(FE_DIVBYZERO);
return FPBits::inf(sign).get_val();
return FPBits::inf(sign);
} else {
fputil::set_errno_if_required(EDOM);
fputil::raise_except_if_required(FE_INVALID);
Expand Down
4 changes: 2 additions & 2 deletions libc/src/math/generic/coshf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ LLVM_LIBC_FUNCTION(float, coshf, (float x)) {
}

if (xbits.is_inf_or_nan())
return x + FPBits::inf().get_val();
return x + FPBits::inf();

int rounding = fputil::quick_get_round();
if (LIBC_UNLIKELY(rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO))
Expand All @@ -40,7 +40,7 @@ LLVM_LIBC_FUNCTION(float, coshf, (float x)) {
fputil::set_errno_if_required(ERANGE);
fputil::raise_except_if_required(FE_OVERFLOW);

return x + FPBits::inf().get_val();
return x + FPBits::inf();
}

// TODO: We should be able to reduce the latency and reciprocal throughput
Expand Down
4 changes: 2 additions & 2 deletions libc/src/math/generic/exp10f_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ LIBC_INLINE float exp10f(float x) {
if (x_u < 0x7f80'0000U) {
int rounding = fputil::quick_get_round();
if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
return static_cast<float>(FPBits(FPBits::MAX_NORMAL));
return FPBits::max_normal();

fputil::set_errno_if_required(ERANGE);
fputil::raise_except_if_required(FE_OVERFLOW);
}
// x is +inf or nan
return x + static_cast<float>(FPBits::inf());
return x + FPBits::inf();
}
}

Expand Down
6 changes: 3 additions & 3 deletions libc/src/math/generic/exp2f_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@ LIBC_INLINE float exp2f(float x) {
if (x_u < 0x7f80'0000U) {
int rounding = fputil::quick_get_round();
if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
return static_cast<float>(FPBits(FPBits::MAX_NORMAL));
return FPBits::max_normal();

fputil::set_errno_if_required(ERANGE);
fputil::raise_except_if_required(FE_OVERFLOW);
}
// x is +inf or nan
return x + FPBits::inf().get_val();
return x + FPBits::inf();
}
// x <= -150
if (x_u >= 0xc316'0000U) {
Expand All @@ -93,7 +93,7 @@ LIBC_INLINE float exp2f(float x) {
if (xbits.is_nan())
return x;
if (fputil::fenv_is_round_up())
return FPBits(FPBits::MIN_SUBNORMAL).get_val();
return FPBits::min_denormal();
if (x != 0.0f) {
fputil::set_errno_if_required(ERANGE);
fputil::raise_except_if_required(FE_UNDERFLOW);
Expand Down
Loading