@@ -2031,6 +2031,49 @@ math_fmod_impl(PyObject *module, double x, double y)
2031
2031
return PyFloat_FromDouble (r );
2032
2032
}
2033
2033
2034
+ /*
2035
+ Given an *n* length *vec* of non-negative, non-nan, non-inf values
2036
+ where *max* is the largest value in the vector, compute:
2037
+
2038
+ sum((x / max) ** 2 for x in vec)
2039
+
2040
+ When a maximum value is found, it is swapped to the end. This
2041
+ lets us skip one loop iteration and just add 1.0 at the end.
2042
+ Saving the largest value for last also helps improve accuracy.
2043
+
2044
+ Kahan summation is used to improve accuracy. The *csum*
2045
+ variable tracks the cumulative sum and *frac* tracks
2046
+ fractional round-off error for the most recent addition.
2047
+
2048
+ */
2049
+
2050
+ static inline double
2051
+ scaled_vector_squared (Py_ssize_t n , double * vec , double max )
2052
+ {
2053
+ double x , csum = 0.0 , oldcsum , frac = 0.0 ;
2054
+ Py_ssize_t i ;
2055
+
2056
+ if (max == 0.0 ) {
2057
+ return 0.0 ;
2058
+ }
2059
+ assert (n > 0 );
2060
+ for (i = 0 ; i < n - 1 ; i ++ ) {
2061
+ x = vec [i ];
2062
+ if (x == max ) {
2063
+ x = vec [n - 1 ];
2064
+ vec [n - 1 ] = max ;
2065
+ }
2066
+ x /= max ;
2067
+ x = x * x - frac ;
2068
+ oldcsum = csum ;
2069
+ csum += x ;
2070
+ frac = (csum - oldcsum ) - x ;
2071
+ }
2072
+ assert (vec [n - 1 ] == max );
2073
+ csum += 1.0 - frac ;
2074
+ return csum ;
2075
+ }
2076
+
2034
2077
/*[clinic input]
2035
2078
math.dist
2036
2079
@@ -2054,7 +2097,6 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
2054
2097
PyObject * item ;
2055
2098
double * diffs ;
2056
2099
double max = 0.0 ;
2057
- double csum = 0.0 ;
2058
2100
double x , px , qx , result ;
2059
2101
Py_ssize_t i , m , n ;
2060
2102
int found_nan = 0 ;
@@ -2099,15 +2141,7 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
2099
2141
result = Py_NAN ;
2100
2142
goto done ;
2101
2143
}
2102
- if (max == 0.0 ) {
2103
- result = 0.0 ;
2104
- goto done ;
2105
- }
2106
- for (i = 0 ; i < n ; i ++ ) {
2107
- x = diffs [i ] / max ;
2108
- csum += x * x ;
2109
- }
2110
- result = max * sqrt (csum );
2144
+ result = max * sqrt (scaled_vector_squared (n , diffs , max ));
2111
2145
2112
2146
done :
2113
2147
PyObject_Free (diffs );
@@ -2122,7 +2156,6 @@ math_hypot(PyObject *self, PyObject *args)
2122
2156
PyObject * item ;
2123
2157
double * coordinates ;
2124
2158
double max = 0.0 ;
2125
- double csum = 0.0 ;
2126
2159
double x , result ;
2127
2160
int found_nan = 0 ;
2128
2161
@@ -2152,15 +2185,7 @@ math_hypot(PyObject *self, PyObject *args)
2152
2185
result = Py_NAN ;
2153
2186
goto done ;
2154
2187
}
2155
- if (max == 0.0 ) {
2156
- result = 0.0 ;
2157
- goto done ;
2158
- }
2159
- for (i = 0 ; i < n ; i ++ ) {
2160
- x = coordinates [i ] / max ;
2161
- csum += x * x ;
2162
- }
2163
- result = max * sqrt (csum );
2188
+ result = max * sqrt (scaled_vector_squared (n , coordinates , max ));
2164
2189
2165
2190
done :
2166
2191
PyObject_Free (coordinates );
0 commit comments