@@ -728,15 +728,19 @@ def _ss(data, c=None):
728
728
lead to garbage results.
729
729
"""
730
730
if c is not None :
731
- T , total , count = _sum ((x - c ) ** 2 for x in data )
731
+ T , total , count = _sum ((d := x - c ) * d for x in data )
732
732
return (T , total )
733
+ # Compute the mean accurate to within 1/2 ulp
733
734
c = mean (data )
734
- T , total , count = _sum ((x - c )** 2 for x in data )
735
- # The following sum should mathematically equal zero, but due to rounding
736
- # error may not.
737
- U , total2 , count2 = _sum ((x - c ) for x in data )
738
- assert T == U and count == count2
739
- total -= total2 ** 2 / len (data )
735
+ # Initial computation for the sum of square deviations
736
+ T , total , count = _sum ((d := x - c ) * d for x in data )
737
+ # Correct any remaining inaccuracy in the mean c.
738
+ # The following sum should mathematically equal zero,
739
+ # but due to the final rounding of the mean, it may not.
740
+ U , error , count2 = _sum ((x - c ) for x in data )
741
+ assert count == count2
742
+ correction = error * error / len (data )
743
+ total -= correction
740
744
assert not total < 0 , 'negative sum of square deviations: %f' % total
741
745
return (T , total )
742
746
@@ -924,8 +928,8 @@ def correlation(x, y, /):
924
928
xbar = fsum (x ) / n
925
929
ybar = fsum (y ) / n
926
930
sxy = fsum ((xi - xbar ) * (yi - ybar ) for xi , yi in zip (x , y ))
927
- sxx = fsum ((xi - xbar ) ** 2.0 for xi in x )
928
- syy = fsum ((yi - ybar ) ** 2.0 for yi in y )
931
+ sxx = fsum ((d := xi - xbar ) * d for xi in x )
932
+ syy = fsum ((d := yi - ybar ) * d for yi in y )
929
933
try :
930
934
return sxy / sqrt (sxx * syy )
931
935
except ZeroDivisionError :
@@ -968,7 +972,7 @@ def linear_regression(x, y, /):
968
972
xbar = fsum (x ) / n
969
973
ybar = fsum (y ) / n
970
974
sxy = fsum ((xi - xbar ) * (yi - ybar ) for xi , yi in zip (x , y ))
971
- sxx = fsum ((xi - xbar ) ** 2.0 for xi in x )
975
+ sxx = fsum ((d := xi - xbar ) * d for xi in x )
972
976
try :
973
977
slope = sxy / sxx # equivalent to: covariance(x, y) / variance(x)
974
978
except ZeroDivisionError :
@@ -1094,10 +1098,11 @@ def samples(self, n, *, seed=None):
1094
1098
1095
1099
def pdf (self , x ):
1096
1100
"Probability density function. P(x <= X < x+dx) / dx"
1097
- variance = self ._sigma ** 2.0
1101
+ variance = self ._sigma * self . _sigma
1098
1102
if not variance :
1099
1103
raise StatisticsError ('pdf() not defined when sigma is zero' )
1100
- return exp ((x - self ._mu )** 2.0 / (- 2.0 * variance )) / sqrt (tau * variance )
1104
+ diff = x - self ._mu
1105
+ return exp (diff * diff / (- 2.0 * variance )) / sqrt (tau * variance )
1101
1106
1102
1107
def cdf (self , x ):
1103
1108
"Cumulative distribution function. P(X <= x)"
@@ -1161,7 +1166,7 @@ def overlap(self, other):
1161
1166
if not dv :
1162
1167
return 1.0 - erf (dm / (2.0 * X ._sigma * sqrt (2.0 )))
1163
1168
a = X ._mu * Y_var - Y ._mu * X_var
1164
- b = X ._sigma * Y ._sigma * sqrt (dm ** 2.0 + dv * log (Y_var / X_var ))
1169
+ b = X ._sigma * Y ._sigma * sqrt (dm * dm + dv * log (Y_var / X_var ))
1165
1170
x1 = (a + b ) / dv
1166
1171
x2 = (a - b ) / dv
1167
1172
return 1.0 - (fabs (Y .cdf (x1 ) - X .cdf (x1 )) + fabs (Y .cdf (x2 ) - X .cdf (x2 )))
@@ -1204,7 +1209,7 @@ def stdev(self):
1204
1209
@property
1205
1210
def variance (self ):
1206
1211
"Square of the standard deviation."
1207
- return self ._sigma ** 2.0
1212
+ return self ._sigma * self . _sigma
1208
1213
1209
1214
def __add__ (x1 , x2 ):
1210
1215
"""Add a constant or another NormalDist instance.
0 commit comments