Skip to content

Commit 1399074

Browse files
authored
Replace straight addition with Kahan summation and move max to the end (GH-8727)
1 parent 2fc4697 commit 1399074

File tree

1 file changed

+45
-20
lines changed

1 file changed

+45
-20
lines changed

Modules/mathmodule.c

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2031,6 +2031,49 @@ math_fmod_impl(PyObject *module, double x, double y)
20312031
return PyFloat_FromDouble(r);
20322032
}
20332033

2034+
/*
2035+
Given an *n* length *vec* of non-negative, non-nan, non-inf values
2036+
where *max* is the largest value in the vector, compute:
2037+
2038+
sum((x / max) ** 2 for x in vec)
2039+
2040+
When a maximum value is found, it is swapped to the end. This
2041+
lets us skip one loop iteration and just add 1.0 at the end.
2042+
Saving the largest value for last also helps improve accuracy.
2043+
2044+
Kahan summation is used to improve accuracy. The *csum*
2045+
variable tracks the cumulative sum and *frac* tracks
2046+
fractional round-off error for the most recent addition.
2047+
2048+
*/
2049+
2050+
static inline double
2051+
scaled_vector_squared(Py_ssize_t n, double *vec, double max)
2052+
{
2053+
double x, csum = 0.0, oldcsum, frac = 0.0;
2054+
Py_ssize_t i;
2055+
2056+
if (max == 0.0) {
2057+
return 0.0;
2058+
}
2059+
assert(n > 0);
2060+
for (i=0 ; i<n-1 ; i++) {
2061+
x = vec[i];
2062+
if (x == max) {
2063+
x = vec[n-1];
2064+
vec[n-1] = max;
2065+
}
2066+
x /= max;
2067+
x = x*x - frac;
2068+
oldcsum = csum;
2069+
csum += x;
2070+
frac = (csum - oldcsum) - x;
2071+
}
2072+
assert(vec[n-1] == max);
2073+
csum += 1.0 - frac;
2074+
return csum;
2075+
}
2076+
20342077
/*[clinic input]
20352078
math.dist
20362079
@@ -2054,7 +2097,6 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
20542097
PyObject *item;
20552098
double *diffs;
20562099
double max = 0.0;
2057-
double csum = 0.0;
20582100
double x, px, qx, result;
20592101
Py_ssize_t i, m, n;
20602102
int found_nan = 0;
@@ -2099,15 +2141,7 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
20992141
result = Py_NAN;
21002142
goto done;
21012143
}
2102-
if (max == 0.0) {
2103-
result = 0.0;
2104-
goto done;
2105-
}
2106-
for (i=0 ; i<n ; i++) {
2107-
x = diffs[i] / max;
2108-
csum += x * x;
2109-
}
2110-
result = max * sqrt(csum);
2144+
result = max * sqrt(scaled_vector_squared(n, diffs, max));
21112145

21122146
done:
21132147
PyObject_Free(diffs);
@@ -2122,7 +2156,6 @@ math_hypot(PyObject *self, PyObject *args)
21222156
PyObject *item;
21232157
double *coordinates;
21242158
double max = 0.0;
2125-
double csum = 0.0;
21262159
double x, result;
21272160
int found_nan = 0;
21282161

@@ -2152,15 +2185,7 @@ math_hypot(PyObject *self, PyObject *args)
21522185
result = Py_NAN;
21532186
goto done;
21542187
}
2155-
if (max == 0.0) {
2156-
result = 0.0;
2157-
goto done;
2158-
}
2159-
for (i=0 ; i<n ; i++) {
2160-
x = coordinates[i] / max;
2161-
csum += x * x;
2162-
}
2163-
result = max * sqrt(csum);
2188+
result = max * sqrt(scaled_vector_squared(n, coordinates, max));
21642189

21652190
done:
21662191
PyObject_Free(coordinates);

0 commit comments

Comments
 (0)