@@ -147,21 +147,17 @@ class StatisticsError(ValueError):
147
147
148
148
# === Private utilities ===
149
149
150
- def _sum (data , start = 0 ):
151
- """_sum(data [, start] ) -> (type, sum, count)
150
+ def _sum (data ):
151
+ """_sum(data) -> (type, sum, count)
152
152
153
153
Return a high-precision sum of the given numeric data as a fraction,
154
154
together with the type to be converted to and the count of items.
155
155
156
- If optional argument ``start`` is given, it is added to the total.
157
- If ``data`` is empty, ``start`` (defaulting to 0) is returned.
158
-
159
-
160
156
Examples
161
157
--------
162
158
163
- >>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75 )
164
- (<class 'float'>, Fraction(11, 1 ), 5)
159
+ >>> _sum([3, 2.25, 4.5, -0.5, 0.25] )
160
+ (<class 'float'>, Fraction(19, 2 ), 5)
165
161
166
162
Some sources of round-off error will be avoided:
167
163
@@ -184,10 +180,9 @@ def _sum(data, start=0):
184
180
allowed.
185
181
"""
186
182
count = 0
187
- n , d = _exact_ratio (start )
188
- partials = {d : n }
183
+ partials = {}
189
184
partials_get = partials .get
190
- T = _coerce ( int , type ( start ))
185
+ T = int
191
186
for typ , values in groupby (data , type ):
192
187
T = _coerce (T , typ ) # or raise TypeError
193
188
for n , d in map (_exact_ratio , values ):
@@ -200,8 +195,7 @@ def _sum(data, start=0):
200
195
assert not _isfinite (total )
201
196
else :
202
197
# Sum all the partial sums using builtin sum.
203
- # FIXME is this faster if we sum them in order of the denominator?
204
- total = sum (Fraction (n , d ) for d , n in sorted (partials .items ()))
198
+ total = sum (Fraction (n , d ) for d , n in partials .items ())
205
199
return (T , total , count )
206
200
207
201
@@ -252,27 +246,19 @@ def _exact_ratio(x):
252
246
x is expected to be an int, Fraction, Decimal or float.
253
247
"""
254
248
try :
255
- # Optimise the common case of floats. We expect that the most often
256
- # used numeric type will be builtin floats, so try to make this as
257
- # fast as possible.
258
- if type (x ) is float or type (x ) is Decimal :
259
- return x .as_integer_ratio ()
260
- try :
261
- # x may be an int, Fraction, or Integral ABC.
262
- return (x .numerator , x .denominator )
263
- except AttributeError :
264
- try :
265
- # x may be a float or Decimal subclass.
266
- return x .as_integer_ratio ()
267
- except AttributeError :
268
- # Just give up?
269
- pass
249
+ return x .as_integer_ratio ()
250
+ except AttributeError :
251
+ pass
270
252
except (OverflowError , ValueError ):
271
253
# float NAN or INF.
272
254
assert not _isfinite (x )
273
255
return (x , None )
274
- msg = "can't convert type '{}' to numerator/denominator"
275
- raise TypeError (msg .format (type (x ).__name__ ))
256
+ try :
257
+ # x may be an Integral ABC.
258
+ return (x .numerator , x .denominator )
259
+ except AttributeError :
260
+ msg = f"can't convert type '{ type (x ).__name__ } ' to numerator/denominator"
261
+ raise TypeError (msg )
276
262
277
263
278
264
def _convert (value , T ):
@@ -719,14 +705,20 @@ def _ss(data, c=None):
719
705
if c is not None :
720
706
T , total , count = _sum ((x - c )** 2 for x in data )
721
707
return (T , total )
722
- c = mean (data )
723
- T , total , count = _sum ((x - c )** 2 for x in data )
724
- # The following sum should mathematically equal zero, but due to rounding
725
- # error may not.
726
- U , total2 , count2 = _sum ((x - c ) for x in data )
727
- assert T == U and count == count2
728
- total -= total2 ** 2 / len (data )
729
- assert not total < 0 , 'negative sum of square deviations: %f' % total
708
+ T , total , count = _sum (data )
709
+ mean_n , mean_d = (total / count ).as_integer_ratio ()
710
+ partials = Counter ()
711
+ for n , d in map (_exact_ratio , data ):
712
+ diff_n = n * mean_d - d * mean_n
713
+ diff_d = d * mean_d
714
+ partials [diff_d * diff_d ] += diff_n * diff_n
715
+ if None in partials :
716
+ # The sum will be a NAN or INF. We can ignore all the finite
717
+ # partials, and just look at this special one.
718
+ total = partials [None ]
719
+ assert not _isfinite (total )
720
+ else :
721
+ total = sum (Fraction (n , d ) for d , n in partials .items ())
730
722
return (T , total )
731
723
732
724
@@ -830,6 +822,9 @@ def stdev(data, xbar=None):
830
822
1.0810874155219827
831
823
832
824
"""
825
+ # Fixme: Despite the exact sum of squared deviations, some inaccuracy
826
+ # remain because there are two rounding steps. The first occurs in
827
+ # the _convert() step for variance(), the second occurs in math.sqrt().
833
828
var = variance (data , xbar )
834
829
try :
835
830
return var .sqrt ()
@@ -846,6 +841,9 @@ def pstdev(data, mu=None):
846
841
0.986893273527251
847
842
848
843
"""
844
+ # Fixme: Despite the exact sum of squared deviations, some inaccuracy
845
+ # remain because there are two rounding steps. The first occurs in
846
+ # the _convert() step for pvariance(), the second occurs in math.sqrt().
849
847
var = pvariance (data , mu )
850
848
try :
851
849
return var .sqrt ()
0 commit comments