From 2143a3f0c21554ad70e8c72199869afadc2c0e93 Mon Sep 17 00:00:00 2001 From: "Pascal S. de Kloe" Date: Fri, 22 Aug 2025 17:40:48 +0200 Subject: [PATCH 1/2] benchmarks for exponent fmt of integers --- library/coretests/benches/fmt.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/library/coretests/benches/fmt.rs b/library/coretests/benches/fmt.rs index f45b921b93933..17549ab0f1c2a 100644 --- a/library/coretests/benches/fmt.rs +++ b/library/coretests/benches/fmt.rs @@ -342,3 +342,27 @@ fn write_i128_hex(bh: &mut Bencher) { black_box(&mut buf).clear(); }); } + +#[bench] +fn write_i64_exp(bh: &mut Bencher) { + let mut buf = String::with_capacity(1024); + bh.iter(|| { + write!(black_box(&mut buf), "{:e}", black_box(0_i64)).unwrap(); + write!(black_box(&mut buf), "{:e}", black_box(100_i64)).unwrap(); + write!(black_box(&mut buf), "{:e}", black_box(-100_i64)).unwrap(); + write!(black_box(&mut buf), "{:e}", black_box(1_i64 << 32)).unwrap(); + black_box(&mut buf).clear(); + }); +} + +#[bench] +fn write_i128_exp(bh: &mut Bencher) { + let mut buf = String::with_capacity(1024); + bh.iter(|| { + write!(black_box(&mut buf), "{:e}", black_box(0_i128)).unwrap(); + write!(black_box(&mut buf), "{:e}", black_box(100_i128)).unwrap(); + write!(black_box(&mut buf), "{:e}", black_box(-100_i128)).unwrap(); + write!(black_box(&mut buf), "{:e}", black_box(1_i128 << 64)).unwrap(); + black_box(&mut buf).clear(); + }); +} From f135601b07a9bf623691751d139b6bf8b9a6af86 Mon Sep 17 00:00:00 2001 From: "Pascal S. de Kloe" Date: Sun, 24 Aug 2025 14:47:48 +0200 Subject: [PATCH 2/2] single buffer for exponent fmt of integers --- library/core/src/fmt/num.rs | 387 ++++++++++++++++++++---------------- 1 file changed, 211 insertions(+), 176 deletions(-) diff --git a/library/core/src/fmt/num.rs b/library/core/src/fmt/num.rs index 605ba42c541fa..11c592db62313 100644 --- a/library/core/src/fmt/num.rs +++ b/library/core/src/fmt/num.rs @@ -3,7 +3,7 @@ use crate::fmt::NumBuffer; use crate::mem::MaybeUninit; use crate::num::fmt as numfmt; -use crate::{fmt, ptr, slice, str}; +use crate::{fmt, str}; /// Formatting of integers with a non-decimal radix. macro_rules! radix_integer { @@ -96,8 +96,8 @@ macro_rules! impl_Debug { }; } -// 2 digit decimal look up table -static DEC_DIGITS_LUT: &[u8; 200] = b"\ +// The string of all two-digit numbers in range 00..99 is used as a lookup table. +static DECIMAL_PAIRS: &[u8; 200] = b"\ 0001020304050607080910111213141516171819\ 2021222324252627282930313233343536373839\ 4041424344454647484950515253545556575859\ @@ -123,6 +123,9 @@ macro_rules! impl_Display { $( const _: () = { + assert!($Signed::MIN < 0, "need signed"); + assert!($Unsigned::MIN == 0, "need unsigned"); + assert!($Signed::BITS == $Unsigned::BITS, "need counterparts"); assert!($Signed::BITS <= $T::BITS, "need lossless conversion"); assert!($Unsigned::BITS <= $T::BITS, "need lossless conversion"); }; @@ -207,10 +210,10 @@ macro_rules! impl_Display { remain /= scale; let pair1 = (quad / 100) as usize; let pair2 = (quad % 100) as usize; - buf[offset + 0].write(DEC_DIGITS_LUT[pair1 * 2 + 0]); - buf[offset + 1].write(DEC_DIGITS_LUT[pair1 * 2 + 1]); - buf[offset + 2].write(DEC_DIGITS_LUT[pair2 * 2 + 0]); - buf[offset + 3].write(DEC_DIGITS_LUT[pair2 * 2 + 1]); + buf[offset + 0].write(DECIMAL_PAIRS[pair1 * 2 + 0]); + buf[offset + 1].write(DECIMAL_PAIRS[pair1 * 2 + 1]); + buf[offset + 2].write(DECIMAL_PAIRS[pair2 * 2 + 0]); + buf[offset + 3].write(DECIMAL_PAIRS[pair2 * 2 + 1]); } // Format per two digits from the lookup table. @@ -225,8 +228,8 @@ macro_rules! impl_Display { let pair = (remain % 100) as usize; remain /= 100; - buf[offset + 0].write(DEC_DIGITS_LUT[pair * 2 + 0]); - buf[offset + 1].write(DEC_DIGITS_LUT[pair * 2 + 1]); + buf[offset + 0].write(DECIMAL_PAIRS[pair * 2 + 0]); + buf[offset + 1].write(DECIMAL_PAIRS[pair * 2 + 1]); } // Format the last remaining digit, if any. @@ -242,7 +245,7 @@ macro_rules! impl_Display { // Either the compiler sees that remain < 10, or it prevents // a boundary check up next. let last = (remain & 15) as usize; - buf[offset].write(DEC_DIGITS_LUT[last * 2 + 1]); + buf[offset].write(DECIMAL_PAIRS[last * 2 + 1]); // not used: remain = 0; } @@ -335,7 +338,6 @@ macro_rules! impl_Display { } } - )* #[cfg(feature = "optimize_for_size")] @@ -374,178 +376,211 @@ macro_rules! impl_Display { macro_rules! impl_Exp { ($($Signed:ident, $Unsigned:ident),* ; as $T:ident into $fmt_fn:ident) => { - fn $fmt_fn( - mut n: $T, + const _: () = assert!($T::MIN == 0, "need unsigned"); + + fn $fmt_fn( + n: $T, is_nonnegative: bool, - upper: bool, f: &mut fmt::Formatter<'_> ) -> fmt::Result { - let (mut n, mut exponent, trailing_zeros, added_precision) = { - let mut exponent = 0; - // count and remove trailing decimal zeroes - while n % 10 == 0 && n >= 10 { - n /= 10; - exponent += 1; - } - let (added_precision, subtracted_precision) = match f.precision() { - Some(fmt_prec) => { - // number of decimal digits minus 1 - let mut tmp = n; - let mut prec = 0; - while tmp >= 10 { - tmp /= 10; - prec += 1; - } - (fmt_prec.saturating_sub(prec), prec.saturating_sub(fmt_prec)) + assert!(LETTER_E.is_ascii(), "single-byte character"); + + // Print the integer as a coefficient in range (-10, 10). + let mut exp = n.checked_ilog10().unwrap_or(0) as usize; + debug_assert!(n / (10 as $T).pow(exp as u32) < 10); + + // Precisison is counted as the number of digits in the fraction. + let mut coef_prec = exp; + // Keep the digits as an integer (paired with its coef_prec count). + let mut coef = n; + + // A Formatter may set the precision to a fixed number of decimals. + let more_prec = match f.precision() { + None => { + // Omit any and all trailing zeroes. + while coef_prec != 0 && coef % 10 == 0 { + coef /= 10; + coef_prec -= 1; } - None => (0, 0) - }; - for _ in 1..subtracted_precision { - n /= 10; - exponent += 1; - } - if subtracted_precision != 0 { - let rem = n % 10; - n /= 10; - exponent += 1; - // round up last digit, round to even on a tie - if rem > 5 || (rem == 5 && (n % 2 != 0 || subtracted_precision > 1 )) { - n += 1; - // if the digit is rounded to the next power - // instead adjust the exponent - if n.ilog10() > (n - 1).ilog10() { - n /= 10; - exponent += 1; - } + 0 + }, + + Some(fmt_prec) if fmt_prec >= coef_prec => { + // Count the number of additional zeroes needed. + fmt_prec - coef_prec + }, + + Some(fmt_prec) => { + // Count the number of digits to drop. + let less_prec = coef_prec - fmt_prec; + assert!(less_prec > 0); + + // Scale down the coefficient/precision pair. + let scale = match (10 as $T).checked_pow(less_prec as u32) { + Some(pow_of_ten) => pow_of_ten, + None => { + // SAFETY: Any precision less than coef_prec will + // cause a power of ten below the coef value. + unsafe { core::hint::unreachable_unchecked() } + }, + }; + let floor = coef / scale; + // Round half to even conform documentation. + let over = coef % scale; + let half = scale / 2; + let round_up = if over < half { + 0 + } else if over > half { + 1 + } else { + floor & 1 // round odd up to even + }; + // Adding one to a scale down of at least 10 won't overflow. + coef = floor + round_up; + coef_prec = fmt_prec; + + // The round_up may have caused the coefficient to reach 10 + // (which is not permitted). For example, anything in range + // [9.95, 10) becomes 10.0 when adjusted to precision 1. + if round_up != 0 && coef.checked_ilog10().unwrap_or(0) as usize > coef_prec { + debug_assert_eq!(coef, (10 as $T).pow(coef_prec as u32 + 1)); + coef /= 10; // drop one trailing zero + exp += 1; // one power of ten higher } - } - (n, exponent, exponent, added_precision) + 0 + }, }; - // Since `curr` always decreases by the number of digits copied, this means - // that `curr >= 0`. - let mut buf = [MaybeUninit::::uninit(); 40]; - let mut curr = buf.len(); //index for buf - let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf); - let lut_ptr = DEC_DIGITS_LUT.as_ptr(); - - // decode 2 chars at a time - while n >= 100 { - let d1 = ((n % 100) as usize) << 1; - curr -= 2; - // SAFETY: `d1 <= 198`, so we can copy from `lut_ptr[d1..d1 + 2]` since - // `DEC_DIGITS_LUT` has a length of 200. - unsafe { - ptr::copy_nonoverlapping(lut_ptr.add(d1), buf_ptr.add(curr), 2); - } - n /= 100; - exponent += 2; - } - // n is <= 99, so at most 2 chars long - let mut n = n as isize; // possibly reduce 64bit math - // decode second-to-last character - if n >= 10 { - curr -= 1; - // SAFETY: Safe since `40 > curr >= 0` (see comment) - unsafe { - *buf_ptr.add(curr) = (n as u8 % 10_u8) + b'0'; + // Allocate a text buffer with lazy initialization. + const MAX_DEC_N: usize = $T::MAX.ilog10() as usize + 1; + const MAX_COEF_LEN: usize = MAX_DEC_N + ".".len(); + const MAX_TEXT_LEN: usize = MAX_COEF_LEN + "e99".len(); + let mut buf = [MaybeUninit::::uninit(); MAX_TEXT_LEN]; + + // Encode the coefficient in buf[..coef_len]. + let (lead_dec, coef_len) = if coef_prec == 0 && more_prec == 0 { + (coef, 1_usize) // single digit; no fraction + } else { + buf[1].write(b'.'); + let fraction_range = 2..2 + coef_prec; + + // Consume the least-significant decimals from a working copy. + let mut remain = coef; + #[cfg(feature = "optimize_for_size")] { + for i in fraction_range.clone().rev() { + let digit = (remain % 10) as usize; + remain /= 10; + buf[i].write(b'0' + digit as u8); + } } - n /= 10; - exponent += 1; - } - // add decimal point iff >1 mantissa digit will be printed - if exponent != trailing_zeros || added_precision != 0 { - curr -= 1; - // SAFETY: Safe since `40 > curr >= 0` - unsafe { - *buf_ptr.add(curr) = b'.'; + #[cfg(not(feature = "optimize_for_size"))] { + // Write digits per two at a time with a lookup table. + for i in fraction_range.clone().skip(1).rev().step_by(2) { + let pair = (remain % 100) as usize; + remain /= 100; + buf[i - 1].write(DECIMAL_PAIRS[pair * 2 + 0]); + buf[i - 0].write(DECIMAL_PAIRS[pair * 2 + 1]); + } + // An odd number of digits leave one digit remaining. + if coef_prec & 1 != 0 { + let digit = (remain % 10) as usize; + remain /= 10; + buf[fraction_range.start].write(b'0' + digit as u8); + } } - } - // SAFETY: Safe since `40 > curr >= 0` - let buf_slice = unsafe { - // decode last character - curr -= 1; - *buf_ptr.add(curr) = (n as u8) + b'0'; - - let len = buf.len() - curr as usize; - slice::from_raw_parts(buf_ptr.add(curr), len) + (remain, fraction_range.end) }; - - // stores 'e' (or 'E') and the up to 2-digit exponent - let mut exp_buf = [MaybeUninit::::uninit(); 3]; - let exp_ptr = MaybeUninit::slice_as_mut_ptr(&mut exp_buf); - // SAFETY: In either case, `exp_buf` is written within bounds and `exp_ptr[..len]` - // is contained within `exp_buf` since `len <= 3`. - let exp_slice = unsafe { - *exp_ptr.add(0) = if upper { b'E' } else { b'e' }; - let len = if exponent < 10 { - *exp_ptr.add(1) = (exponent as u8) + b'0'; - 2 - } else { - let off = exponent << 1; - ptr::copy_nonoverlapping(lut_ptr.add(off), exp_ptr.add(1), 2); - 3 - }; - slice::from_raw_parts(exp_ptr, len) + debug_assert!(lead_dec < 10); + debug_assert!(lead_dec != 0 || coef == 0, "significant digits only"); + buf[0].write(b'0' + lead_dec as u8); + + // SAFETY: The number of decimals is limited, captured by MAX. + unsafe { core::hint::assert_unchecked(coef_len <= MAX_COEF_LEN) } + // Encode the scale factor in buf[coef_len..text_len]. + buf[coef_len].write(LETTER_E); + let text_len: usize = match exp { + ..10 => { + buf[coef_len + 1].write(b'0' + exp as u8); + coef_len + 2 + }, + 10..100 => { + #[cfg(feature = "optimize_for_size")] { + buf[coef_len + 1].write(b'0' + (exp / 10) as u8); + buf[coef_len + 2].write(b'0' + (exp % 10) as u8); + } + #[cfg(not(feature = "optimize_for_size"))] { + buf[coef_len + 1].write(DECIMAL_PAIRS[exp * 2 + 0]); + buf[coef_len + 2].write(DECIMAL_PAIRS[exp * 2 + 1]); + } + coef_len + 3 + }, + _ => { + // SAFETY: A `u256::MAX` would get exponent 77. + unsafe { core::hint::unreachable_unchecked() } + } }; - - let parts = &[ - numfmt::Part::Copy(buf_slice), - numfmt::Part::Zero(added_precision), - numfmt::Part::Copy(exp_slice), - ]; - let sign = if !is_nonnegative { - "-" - } else if f.sign_plus() { - "+" + // SAFETY: All bytes up until text_len have been set. + let text = unsafe { buf[..text_len].assume_init_ref() }; + + if more_prec == 0 { + // SAFETY: Text is set with ASCII exclusively: either a decimal, + // or a LETTER_E, or a dot. ASCII implies valid UTF-8. + let as_str = unsafe { str::from_utf8_unchecked(text) }; + f.pad_integral(is_nonnegative, "", as_str) } else { - "" - }; - let formatted = numfmt::Formatted { sign, parts }; - // SAFETY: `buf_slice` and `exp_slice` contain only ASCII characters. - unsafe { f.pad_formatted_parts(&formatted) } + let parts = &[ + numfmt::Part::Copy(&text[..coef_len]), + numfmt::Part::Zero(more_prec), + numfmt::Part::Copy(&text[coef_len..]), + ]; + let sign = if !is_nonnegative { + "-" + } else if f.sign_plus() { + "+" + } else { + "" + }; + // SAFETY: Text is set with ASCII exclusively: either a decimal, + // or a LETTER_E, or a dot. ASCII implies valid UTF-8. + unsafe { f.pad_formatted_parts(&numfmt::Formatted { sign, parts }) } + } } $( - #[stable(feature = "integer_exp_format", since = "1.42.0")] - impl fmt::LowerExp for $Signed { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let is_nonnegative = *self >= 0; - let n = if is_nonnegative { - *self as $T - } else { - self.unsigned_abs() as $T - }; - $fmt_fn(n, is_nonnegative, false, f) - } + const _: () = { + assert!($Signed::MIN < 0, "need signed"); + assert!($Unsigned::MIN == 0, "need unsigned"); + assert!($Signed::BITS == $Unsigned::BITS, "need counterparts"); + assert!($Signed::BITS <= $T::BITS, "need lossless conversion"); + assert!($Unsigned::BITS <= $T::BITS, "need lossless conversion"); + }; + #[stable(feature = "integer_exp_format", since = "1.42.0")] + impl fmt::LowerExp for $Signed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + $fmt_fn::(self.unsigned_abs() as $T, *self >= 0, f) } - #[stable(feature = "integer_exp_format", since = "1.42.0")] - impl fmt::LowerExp for $Unsigned { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - $fmt_fn(*self as $T, true, false, f) - } - })* - - $( - #[stable(feature = "integer_exp_format", since = "1.42.0")] - impl fmt::UpperExp for $Signed { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let is_nonnegative = *self >= 0; - let n = if is_nonnegative { - *self as $T - } else { - self.unsigned_abs() as $T - }; - $fmt_fn(n, is_nonnegative, true, f) - } + } + #[stable(feature = "integer_exp_format", since = "1.42.0")] + impl fmt::LowerExp for $Unsigned { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + $fmt_fn::(*self as $T, true, f) } - #[stable(feature = "integer_exp_format", since = "1.42.0")] - impl fmt::UpperExp for $Unsigned { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - $fmt_fn(*self as $T, true, true, f) - } - })* + } + #[stable(feature = "integer_exp_format", since = "1.42.0")] + impl fmt::UpperExp for $Signed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + $fmt_fn::(self.unsigned_abs() as $T, *self >= 0, f) + } + } + #[stable(feature = "integer_exp_format", since = "1.42.0")] + impl fmt::UpperExp for $Unsigned { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + $fmt_fn::(*self as $T, true, f) + } + } + )* + }; } @@ -658,10 +693,10 @@ impl u128 { remain /= 1_00_00; let pair1 = (quad / 100) as usize; let pair2 = (quad % 100) as usize; - buf[offset + 0].write(DEC_DIGITS_LUT[pair1 * 2 + 0]); - buf[offset + 1].write(DEC_DIGITS_LUT[pair1 * 2 + 1]); - buf[offset + 2].write(DEC_DIGITS_LUT[pair2 * 2 + 0]); - buf[offset + 3].write(DEC_DIGITS_LUT[pair2 * 2 + 1]); + buf[offset + 0].write(DECIMAL_PAIRS[pair1 * 2 + 0]); + buf[offset + 1].write(DECIMAL_PAIRS[pair1 * 2 + 1]); + buf[offset + 2].write(DECIMAL_PAIRS[pair2 * 2 + 0]); + buf[offset + 3].write(DECIMAL_PAIRS[pair2 * 2 + 1]); } // Format per two digits from the lookup table. @@ -676,8 +711,8 @@ impl u128 { let pair = (remain % 100) as usize; remain /= 100; - buf[offset + 0].write(DEC_DIGITS_LUT[pair * 2 + 0]); - buf[offset + 1].write(DEC_DIGITS_LUT[pair * 2 + 1]); + buf[offset + 0].write(DECIMAL_PAIRS[pair * 2 + 0]); + buf[offset + 1].write(DECIMAL_PAIRS[pair * 2 + 1]); } // Format the last remaining digit, if any. @@ -693,7 +728,7 @@ impl u128 { // Either the compiler sees that remain < 10, or it prevents // a boundary check up next. let last = (remain & 15) as usize; - buf[offset].write(DEC_DIGITS_LUT[last * 2 + 1]); + buf[offset].write(DECIMAL_PAIRS[last * 2 + 1]); // not used: remain = 0; } offset @@ -792,10 +827,10 @@ fn enc_16lsd(buf: &mut [MaybeUninit], n: u64) { remain /= 1_00_00; let pair1 = (quad / 100) as usize; let pair2 = (quad % 100) as usize; - buf[quad_index * 4 + OFFSET + 0].write(DEC_DIGITS_LUT[pair1 * 2 + 0]); - buf[quad_index * 4 + OFFSET + 1].write(DEC_DIGITS_LUT[pair1 * 2 + 1]); - buf[quad_index * 4 + OFFSET + 2].write(DEC_DIGITS_LUT[pair2 * 2 + 0]); - buf[quad_index * 4 + OFFSET + 3].write(DEC_DIGITS_LUT[pair2 * 2 + 1]); + buf[quad_index * 4 + OFFSET + 0].write(DECIMAL_PAIRS[pair1 * 2 + 0]); + buf[quad_index * 4 + OFFSET + 1].write(DECIMAL_PAIRS[pair1 * 2 + 1]); + buf[quad_index * 4 + OFFSET + 2].write(DECIMAL_PAIRS[pair2 * 2 + 0]); + buf[quad_index * 4 + OFFSET + 3].write(DECIMAL_PAIRS[pair2 * 2 + 1]); } }