From cd7cfc7108822c7091daf6598150a5465a1f70a2 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 15 Mar 2023 01:13:34 -0500 Subject: [PATCH 1/5] Move double and triple length helper functions to the top --- Modules/mathmodule.c | 193 ++++++++++++++++++++++--------------------- 1 file changed, 97 insertions(+), 96 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 544560e8322c72..382118b4fb2ab4 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -92,6 +92,103 @@ get_math_module_state(PyObject *module) return (math_module_state *)state; } +/* +Double and triple length extended precision algorithms from: + + Accurate Sum and Dot Product + by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi + https://doi.org/10.1137/030601818 + https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf + +*/ + +typedef struct{ double hi; double lo; } DoubleLength; + +static DoubleLength +dl_sum(double a, double b) +{ + /* Algorithm 3.1 Error-free transformation of the sum */ + double x = a + b; + double z = x - a; + double y = (a - (x - z)) + (b - z); + return (DoubleLength) {x, y}; +} + +#ifndef UNRELIABLE_FMA + +static DoubleLength +dl_mul(double x, double y) +{ + /* Algorithm 3.5. Error-free transformation of a product */ + double z = x * y; + double zz = fma(x, y, -z); + return (DoubleLength) {z, zz}; +} + +#else + +/* + The default implementation of dl_mul() depends on the C math library + having an accurate fma() function as required by § 7.12.13.1 of the + C99 standard. + + The UNRELIABLE_FMA option is provided as a slower but accurate + alternative for builds where the fma() function is found wanting. + The speed penalty may be modest (17% slower on an Apple M1 Max), + so don't hesitate to enable this build option. + + The algorithms are from the T. J. Dekker paper: + A Floating-Point Technique for Extending the Available Precision + https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf +*/ + +static DoubleLength +dl_split(double x) { + // Dekker (5.5) and (5.6). + double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1 + double hi = t - (t - x); + double lo = x - hi; + return (DoubleLength) {hi, lo}; +} + +static DoubleLength +dl_mul(double x, double y) +{ + // Dekker (5.12) and mul12() + DoubleLength xx = dl_split(x); + DoubleLength yy = dl_split(y); + double p = xx.hi * yy.hi; + double q = xx.hi * yy.lo + xx.lo * yy.hi; + double z = p + q; + double zz = p - z + q + xx.lo * yy.lo; + return (DoubleLength) {z, zz}; +} + +#endif + +typedef struct { double hi; double lo; double tiny; } TripleLength; + +static const TripleLength tl_zero = {0.0, 0.0, 0.0}; + +static TripleLength +tl_fma(double x, double y, TripleLength total) +{ + /* Algorithm 5.10 with SumKVert for K=3 */ + DoubleLength pr = dl_mul(x, y); + DoubleLength sm = dl_sum(total.hi, pr.hi); + DoubleLength r1 = dl_sum(total.lo, pr.lo); + DoubleLength r2 = dl_sum(r1.hi, sm.lo); + return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo}; +} + +static double +tl_to_d(TripleLength total) +{ + DoubleLength last = dl_sum(total.lo, total.hi); + return total.tiny + last.lo + last.hi; +} + + /* sin(pi*x), giving accurate results for all finite x (especially x integral or close to an integer). This is here for use in the @@ -2646,102 +2743,6 @@ long_add_would_overflow(long a, long b) return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a); } -/* -Double and triple length extended precision algorithms from: - - Accurate Sum and Dot Product - by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi - https://doi.org/10.1137/030601818 - https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf - -*/ - -typedef struct{ double hi; double lo; } DoubleLength; - -static DoubleLength -dl_sum(double a, double b) -{ - /* Algorithm 3.1 Error-free transformation of the sum */ - double x = a + b; - double z = x - a; - double y = (a - (x - z)) + (b - z); - return (DoubleLength) {x, y}; -} - -#ifndef UNRELIABLE_FMA - -static DoubleLength -dl_mul(double x, double y) -{ - /* Algorithm 3.5. Error-free transformation of a product */ - double z = x * y; - double zz = fma(x, y, -z); - return (DoubleLength) {z, zz}; -} - -#else - -/* - The default implementation of dl_mul() depends on the C math library - having an accurate fma() function as required by § 7.12.13.1 of the - C99 standard. - - The UNRELIABLE_FMA option is provided as a slower but accurate - alternative for builds where the fma() function is found wanting. - The speed penalty may be modest (17% slower on an Apple M1 Max), - so don't hesitate to enable this build option. - - The algorithms are from the T. J. Dekker paper: - A Floating-Point Technique for Extending the Available Precision - https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf -*/ - -static DoubleLength -dl_split(double x) { - // Dekker (5.5) and (5.6). - double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1 - double hi = t - (t - x); - double lo = x - hi; - return (DoubleLength) {hi, lo}; -} - -static DoubleLength -dl_mul(double x, double y) -{ - // Dekker (5.12) and mul12() - DoubleLength xx = dl_split(x); - DoubleLength yy = dl_split(y); - double p = xx.hi * yy.hi; - double q = xx.hi * yy.lo + xx.lo * yy.hi; - double z = p + q; - double zz = p - z + q + xx.lo * yy.lo; - return (DoubleLength) {z, zz}; -} - -#endif - -typedef struct { double hi; double lo; double tiny; } TripleLength; - -static const TripleLength tl_zero = {0.0, 0.0, 0.0}; - -static TripleLength -tl_fma(double x, double y, TripleLength total) -{ - /* Algorithm 5.10 with SumKVert for K=3 */ - DoubleLength pr = dl_mul(x, y); - DoubleLength sm = dl_sum(total.hi, pr.hi); - DoubleLength r1 = dl_sum(total.lo, pr.lo); - DoubleLength r2 = dl_sum(r1.hi, sm.lo); - return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo}; -} - -static double -tl_to_d(TripleLength total) -{ - DoubleLength last = dl_sum(total.lo, total.hi); - return total.tiny + last.lo + last.hi; -} - /*[clinic input] math.sumprod From cd5fb29fb43dfe4ad3c508aa98e16b768f03daa6 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 15 Mar 2023 01:31:29 -0500 Subject: [PATCH 2/5] Add more double length functions to support vector_norm --- Modules/mathmodule.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 382118b4fb2ab4..6ea49a1d28281f 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -104,6 +104,22 @@ Double and triple length extended precision algorithms from: typedef struct{ double hi; double lo; } DoubleLength; +static DoubleLength +d_to_dl(double x) +{ + return (DoubleLength) {x, 0.0}; +} + +static DoubleLength +dl_fast_sum(double a, double b) +{ + /* Algorithm 1.1. Compensated summation of two floating point numbers. */ + assert(fabs(a) >= fabs(b)); + double x = a + b; + double y = (a - x) + b; + return (DoubleLength) {x, y}; +} + static DoubleLength dl_sum(double a, double b) { @@ -164,6 +180,21 @@ dl_mul(double x, double y) return (DoubleLength) {z, zz}; } +static DoubleLength +dl_fma(double x, double y, DoubleLength total) +{ + /* Algorithm 5.10 with SumKVert for K=2 */ + DoubleLength pr = dl_mul(x, y); + DoubleLength sm = dl_sum(total.hi, pr.hi); + return DoubleLength {sm.hi, pr.lo + sm.lo + total.lo}; +} + +static double +dl_to_d(DoubleLength total): +{ + return total.lo + total.hi; +} + #endif typedef struct { double hi; double lo; double tiny; } TripleLength; From 8f3e73c3f30d2c762c13f9494ad47ce1a1e4b9bb Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 15 Mar 2023 01:44:20 -0500 Subject: [PATCH 3/5] Use DoubleLength functions in hypot(). --- Modules/mathmodule.c | 77 +++++++++----------------------------------- 1 file changed, 15 insertions(+), 62 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 6ea49a1d28281f..2ff10c96841951 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -104,12 +104,6 @@ Double and triple length extended precision algorithms from: typedef struct{ double hi; double lo; } DoubleLength; -static DoubleLength -d_to_dl(double x) -{ - return (DoubleLength) {x, 0.0}; -} - static DoubleLength dl_fast_sum(double a, double b) { @@ -189,12 +183,6 @@ dl_fma(double x, double y, DoubleLength total) return DoubleLength {sm.hi, pr.lo + sm.lo + total.lo}; } -static double -dl_to_d(DoubleLength total): -{ - return total.lo + total.hi; -} - #endif typedef struct { double hi; double lo; double tiny; } TripleLength; @@ -2511,9 +2499,8 @@ exactly equal) was verified for 1 billion random inputs with n=5. [7] static inline double vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) { - const double T27 = 134217729.0; /* ldexp(1.0, 27) + 1.0) */ - double x, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0, frac3 = 0.0; - double t, hi, lo, h; + double x, h, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0; + DoubleLength pr, sm; int max_e; Py_ssize_t i; @@ -2538,54 +2525,20 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) x *= scale; assert(fabs(x) < 1.0); - t = x * T27; - hi = t - (t - x); - lo = x - hi; - assert(hi + lo == x); - - x = hi * hi; - assert(x <= 1.0); - assert(fabs(csum) >= fabs(x)); - oldcsum = csum; - csum += x; - frac1 += (oldcsum - csum) + x; - - x = 2.0 * hi * lo; - assert(fabs(csum) >= fabs(x)); - oldcsum = csum; - csum += x; - frac2 += (oldcsum - csum) + x; - - assert(csum + lo * lo == csum); - frac3 += lo * lo; + pr = dl_mul(x, x); + assert(pr.hi <= 1.0); + sm = dl_fast_sum(csum, pr.hi); + csum = sm.hi; + frac1 += pr.lo; + frac2 += sm.lo; } - h = sqrt(csum - 1.0 + (frac1 + frac2 + frac3)); - - x = h; - t = x * T27; - hi = t - (t - x); - lo = x - hi; - assert (hi + lo == x); - - x = -hi * hi; - assert(fabs(csum) >= fabs(x)); - oldcsum = csum; - csum += x; - frac1 += (oldcsum - csum) + x; - - x = -2.0 * hi * lo; - assert(fabs(csum) >= fabs(x)); - oldcsum = csum; - csum += x; - frac2 += (oldcsum - csum) + x; - - x = -lo * lo; - assert(fabs(csum) >= fabs(x)); - oldcsum = csum; - csum += x; - frac3 += (oldcsum - csum) + x; - - x = csum - 1.0 + (frac1 + frac2 + frac3); + h = sqrt(csum - 1.0 + (frac1 + frac2)); + pr = dl_mul(-h, h); + sm = dl_fast_sum(csum, pr.hi); + csum = sm.hi; + frac1 += pr.lo; + frac2 += sm.lo; + x = csum - 1.0 + (frac1 + frac2); return (h + x / (2.0 * h)) / scale; } /* When max_e < -1023, ldexp(1.0, -max_e) overflows. From 58377dfba26b4dd8288e607048258b796c4fa7e5 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 15 Mar 2023 01:59:11 -0500 Subject: [PATCH 4/5] Update comments --- Modules/mathmodule.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 2ff10c96841951..101fa373394bb0 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2417,6 +2417,7 @@ that are almost always correctly rounded, four techniques are used: * lossless scaling using a power-of-two scaling factor * accurate squaring using Veltkamp-Dekker splitting [1] + or an equivalent with an fma() call * compensated summation using a variant of the Neumaier algorithm [2] * differential correction of the square root [3] @@ -2475,14 +2476,21 @@ algorithm, effectively doubling the number of accurate bits. This technique is used in Dekker's SQRT2 algorithm and again in Borges' ALGORITHM 4 and 5. -Without proof for all cases, hypot() cannot claim to be always -correctly rounded. However for n <= 1000, prior to the final addition -that rounds the overall result, the internal accuracy of "h" together -with its correction of "x / (2.0 * h)" is at least 100 bits. [6] -Also, hypot() was tested against a Decimal implementation with -prec=300. After 100 million trials, no incorrectly rounded examples -were found. In addition, perfect commutativity (all permutations are -exactly equal) was verified for 1 billion random inputs with n=5. [7] +The hypot() function is faithfully rounded (less than 1 ulp error) +and usually correctly rounded (within 1/2 ulp). The squaring +step is exact. The Neumaier summation computes as if in doubled +precision (106 bits) and has the advantage that its input squares +are non-negative so that the condition number of the sum is one. +The square root with a differential correction is likewise computed +as if in double precision. + +For n <= 1000, prior to the final addition that rounds the overall +result, the internal accuracy of "h" together with its correction of +"x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested +against a Decimal implementation with prec=300. After 100 million +trials, no incorrectly rounded examples were found. In addition, +perfect commutativity (all permutations are exactly equal) was +verified for 1 billion random inputs with n=5. [7] References: From 47e878d2f139076adfc587cbd0e86248582884c3 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 15 Mar 2023 09:59:34 -0500 Subject: [PATCH 5/5] Remove unused code --- Modules/mathmodule.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 101fa373394bb0..ae9e3211c072d8 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -174,15 +174,6 @@ dl_mul(double x, double y) return (DoubleLength) {z, zz}; } -static DoubleLength -dl_fma(double x, double y, DoubleLength total) -{ - /* Algorithm 5.10 with SumKVert for K=2 */ - DoubleLength pr = dl_mul(x, y); - DoubleLength sm = dl_sum(total.hi, pr.hi); - return DoubleLength {sm.hi, pr.lo + sm.lo + total.lo}; -} - #endif typedef struct { double hi; double lo; double tiny; } TripleLength; @@ -2535,6 +2526,7 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) pr = dl_mul(x, x); assert(pr.hi <= 1.0); + sm = dl_fast_sum(csum, pr.hi); csum = sm.hi; frac1 += pr.lo;