@@ -21,8 +21,10 @@ use num::dec2flt::num::{self, Big};
21
21
/// Number of significand bits in Fp
22
22
const P : u32 = 64 ;
23
23
24
- // We simply store the best approximation for *all* exponents, so the variable "h" and the
25
- // associated conditions can be omitted. This trades performance for a couple kilobytes of space.
24
+ // We simply store the best approximation for *all* exponents, so the variable
25
+ // "h" and the
26
+ // associated conditions can be omitted. This trades performance for a couple
27
+ // kilobytes of space.
26
28
27
29
fn power_of_ten ( e : i16 ) -> Fp {
28
30
assert ! ( e >= table:: MIN_E ) ;
@@ -47,8 +49,10 @@ fn power_of_ten(e: i16) -> Fp {
47
49
/// FIXME: It would nevertheless be nice if we had a good way to detect and deal with x87.
48
50
pub fn fast_path < T : RawFloat > ( integral : & [ u8 ] , fractional : & [ u8 ] , e : i64 ) -> Option < T > {
49
51
let num_digits = integral. len ( ) + fractional. len ( ) ;
50
- // log_10(f64::max_sig) ~ 15.95. We compare the exact value to max_sig near the end,
51
- // this is just a quick, cheap rejection (and also frees the rest of the code from
52
+ // log_10(f64::max_sig) ~ 15.95. We compare the exact value to max_sig near the
53
+ // end,
54
+ // this is just a quick, cheap rejection (and also frees the rest of the code
55
+ // from
52
56
// worrying about underflow).
53
57
if num_digits > 16 {
54
58
return None ;
@@ -130,16 +134,20 @@ fn algorithm_r<T: RawFloat>(f: &Big, e: i16, z0: T) -> T {
130
134
let mut x = f. clone ( ) ;
131
135
let mut y = Big :: from_u64 ( m) ;
132
136
133
- // Find positive integers `x`, `y` such that `x / y` is exactly `(f * 10^e) / (m * 2^k)`.
134
- // This not only avoids dealing with the signs of `e` and `k`, we also eliminate the
137
+ // Find positive integers `x`, `y` such that `x / y` is exactly `(f * 10^e) /
138
+ // (m * 2^k)`.
139
+ // This not only avoids dealing with the signs of `e` and `k`, we also
140
+ // eliminate the
135
141
// power of two common to `10^e` and `2^k` to make the numbers smaller.
136
142
make_ratio ( & mut x, & mut y, e, k) ;
137
143
138
144
let m_digits = [ ( m & 0xFF_FF_FF_FF ) as u32 , ( m >> 32 ) as u32 ] ;
139
145
// This is written a bit awkwardly because our bignums don't support
140
146
// negative numbers, so we use the absolute value + sign information.
141
- // The multiplication with m_digits can't overflow. If `x` or `y` are large enough that
142
- // we need to worry about overflow, then they are also large enough that`make_ratio` has
147
+ // The multiplication with m_digits can't overflow. If `x` or `y` are large
148
+ // enough that
149
+ // we need to worry about overflow, then they are also large enough
150
+ // that`make_ratio` has
143
151
// reduced the fraction by a factor of 2^64 or more.
144
152
let ( d2, d_negative) = if x >= y {
145
153
// Don't need x any more, save a clone().
@@ -187,14 +195,17 @@ fn make_ratio(x: &mut Big, y: &mut Big, e: i16, k: i16) {
187
195
let ( e_abs, k_abs) = ( e. abs ( ) as usize , k. abs ( ) as usize ) ;
188
196
if e >= 0 {
189
197
if k >= 0 {
190
- // x = f * 10^e, y = m * 2^k, except that we reduce the fraction by some power of two.
198
+ // x = f * 10^e, y = m * 2^k, except that we reduce the fraction by some power
199
+ // of two.
191
200
let common = min ( e_abs, k_abs) ;
192
201
x. mul_pow5 ( e_abs) . mul_pow2 ( e_abs - common) ;
193
202
y. mul_pow2 ( k_abs - common) ;
194
203
} else {
195
204
// x = f * 10^e * 2^abs(k), y = m
196
- // This can't overflow because it requires positive `e` and negative `k`, which can
197
- // only happen for values extremely close to 1, which means that `e` and `k` will be
205
+ // This can't overflow because it requires positive `e` and negative `k`, which
206
+ // can
207
+ // only happen for values extremely close to 1, which means that `e` and `k`
208
+ // will be
198
209
// comparatively tiny.
199
210
x. mul_pow5 ( e_abs) . mul_pow2 ( e_abs + k_abs) ;
200
211
}
@@ -239,7 +250,8 @@ pub fn algorithm_m<T: RawFloat>(f: &Big, e: i16) -> T {
239
250
v = Big :: from_small ( 1 ) ;
240
251
v. mul_pow5 ( e_abs) . mul_pow2 ( e_abs) ;
241
252
} else {
242
- // FIXME possible optimization: generalize big_to_fp so that we can do the equivalent of
253
+ // FIXME possible optimization: generalize big_to_fp so that we can do the
254
+ // equivalent of
243
255
// fp_to_float(big_to_fp(u)) here, only without the double rounding.
244
256
u = f. clone ( ) ;
245
257
u. mul_pow5 ( e_abs) . mul_pow2 ( e_abs) ;
@@ -253,10 +265,13 @@ pub fn algorithm_m<T: RawFloat>(f: &Big, e: i16) -> T {
253
265
loop {
254
266
u. div_rem ( & v, & mut x, & mut rem) ;
255
267
if k == T :: min_exp_int ( ) {
256
- // We have to stop at the minimum exponent, if we wait until `k < T::min_exp_int()`,
257
- // then we'd be off by a factor of two. Unfortunately this means we have to special-
268
+ // We have to stop at the minimum exponent, if we wait until `k <
269
+ // T::min_exp_int()`,
270
+ // then we'd be off by a factor of two. Unfortunately this means we have to
271
+ // special-
258
272
// case normal numbers with the minimum exponent.
259
- // FIXME find a more elegant formulation, but run the `tiny-pow10` test to make sure
273
+ // FIXME find a more elegant formulation, but run the `tiny-pow10` test to make
274
+ // sure
260
275
// that it's actually correct!
261
276
if x >= min_sig && x <= max_sig {
262
277
break ;
@@ -283,13 +298,18 @@ pub fn algorithm_m<T: RawFloat>(f: &Big, e: i16) -> T {
283
298
284
299
/// Skip over most AlgorithmM iterations by checking the bit length.
285
300
fn quick_start < T : RawFloat > ( u : & mut Big , v : & mut Big , k : & mut i16 ) {
286
- // The bit length is an estimate of the base two logarithm, and log(u / v) = log(u) - log(v).
287
- // The estimate is off by at most 1, but always an under-estimate, so the error on log(u)
288
- // and log(v) are of the same sign and cancel out (if both are large). Therefore the error
301
+ // The bit length is an estimate of the base two logarithm, and log(u / v) =
302
+ // log(u) - log(v).
303
+ // The estimate is off by at most 1, but always an under-estimate, so the error
304
+ // on log(u)
305
+ // and log(v) are of the same sign and cancel out (if both are large).
306
+ // Therefore the error
289
307
// for log(u / v) is at most one as well.
290
- // The target ratio is one where u/v is in an in-range significand. Thus our termination
308
+ // The target ratio is one where u/v is in an in-range significand. Thus our
309
+ // termination
291
310
// condition is log2(u / v) being the significand bits, plus/minus one.
292
- // FIXME Looking at the second bit could improve the estimate and avoid some more divisions.
311
+ // FIXME Looking at the second bit could improve the estimate and avoid some
312
+ // more divisions.
293
313
let target_ratio = f64:: sig_bits ( ) as i16 ;
294
314
let log2_u = u. bit_length ( ) as i16 ;
295
315
let log2_v = v. bit_length ( ) as i16 ;
@@ -326,8 +346,10 @@ fn underflow<T: RawFloat>(x: Big, v: Big, rem: Big) -> T {
326
346
let z = rawfp:: encode_subnormal ( q) ;
327
347
return round_by_remainder ( v, rem, q, z) ;
328
348
}
329
- // Ratio isn't an in-range significand with the minimum exponent, so we need to round off
330
- // excess bits and adjust the exponent accordingly. The real value now looks like this:
349
+ // Ratio isn't an in-range significand with the minimum exponent, so we need to
350
+ // round off
351
+ // excess bits and adjust the exponent accordingly. The real value now looks
352
+ // like this:
331
353
//
332
354
// x lsb
333
355
// /--------------\/
@@ -336,8 +358,10 @@ fn underflow<T: RawFloat>(x: Big, v: Big, rem: Big) -> T {
336
358
// q trunc. (represented by rem)
337
359
//
338
360
// Therefore, when the rounded-off bits are != 0.5 ULP, they decide the rounding
339
- // on their own. When they are equal and the remainder is non-zero, the value still
340
- // needs to be rounded up. Only when the rounded off bits are 1/2 and the remainer
361
+ // on their own. When they are equal and the remainder is non-zero, the value
362
+ // still
363
+ // needs to be rounded up. Only when the rounded off bits are 1/2 and the
364
+ // remainer
341
365
// is zero, we have a half-to-even situation.
342
366
let bits = x. bit_length ( ) ;
343
367
let lsb = bits - T :: sig_bits ( ) as usize ;
0 commit comments