@@ -2456,7 +2456,7 @@ Given that csum >= 1.0, we have:
2456
2456
Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2457
2457
2458
2458
To minimize loss of information during the accumulation of fractional
2459
- values, the lo**2 term has a separate accumulator.
2459
+ values, each term has a separate accumulator.
2460
2460
2461
2461
The square root differential correction is needed because a
2462
2462
correctly rounded square root of a correctly rounded sum of
@@ -2487,7 +2487,7 @@ static inline double
2487
2487
vector_norm (Py_ssize_t n , double * vec , double max , int found_nan )
2488
2488
{
2489
2489
const double T27 = 134217729.0 ; /* ldexp(1.0, 27)+1.0) */
2490
- double x , csum = 1.0 , oldcsum , frac = 0.0 , frac_lo = 0.0 , scale ;
2490
+ double x , csum = 1.0 , oldcsum , scale , frac = 0.0 , frac_mid = 0.0 , frac_lo = 0.0 ;
2491
2491
double t , hi , lo , h ;
2492
2492
int max_e ;
2493
2493
Py_ssize_t i ;
@@ -2529,12 +2529,12 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
2529
2529
assert (fabs (csum ) >= fabs (x ));
2530
2530
oldcsum = csum ;
2531
2531
csum += x ;
2532
- frac += (oldcsum - csum ) + x ;
2532
+ frac_mid += (oldcsum - csum ) + x ;
2533
2533
2534
2534
assert (csum + lo * lo == csum );
2535
2535
frac_lo += lo * lo ;
2536
2536
}
2537
- frac += frac_lo ;
2537
+ frac += frac_lo + frac_mid ;
2538
2538
h = sqrt (csum - 1.0 + frac );
2539
2539
2540
2540
x = h ;
0 commit comments