Skip to content

Commit a1999b4

Browse files
rhettingerlisroach
authored andcommitted
bpo-36018: Address more reviewer feedback (pythonGH-15733)
1 parent 1546dc4 commit a1999b4

File tree

3 files changed

+69
-39
lines changed

3 files changed

+69
-39
lines changed

Doc/library/statistics.rst

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -514,15 +514,14 @@ However, for reading convenience, most of the examples show sorted sequences.
514514

515515
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. Set
516516
*n* to 100 for percentiles which gives the 99 cuts points that separate
517-
*data* in to 100 equal sized groups. Raises :exc:`StatisticsError` if *n*
517+
*data* into 100 equal sized groups. Raises :exc:`StatisticsError` if *n*
518518
is not least 1.
519519

520-
The *data* can be any iterable containing sample data or it can be an
521-
instance of a class that defines an :meth:`~inv_cdf` method. For meaningful
520+
The *data* can be any iterable containing sample data. For meaningful
522521
results, the number of data points in *data* should be larger than *n*.
523522
Raises :exc:`StatisticsError` if there are not at least two data points.
524523

525-
For sample data, the cut points are linearly interpolated from the
524+
The cut points are linearly interpolated from the
526525
two nearest data points. For example, if a cut point falls one-third
527526
of the distance between two sample values, ``100`` and ``112``, the
528527
cut-point will evaluate to ``104``.
@@ -547,9 +546,6 @@ However, for reading convenience, most of the examples show sorted sequences.
547546
values, the method sorts them and assigns the following percentiles:
548547
0%, 10%, 20%, 30%, 40%, 50%, 60%, 70%, 80%, 90%, 100%.
549548

550-
If *data* is an instance of a class that defines an
551-
:meth:`~inv_cdf` method, setting *method* has no effect.
552-
553549
.. doctest::
554550

555551
# Decile cut points for empirically sampled data
@@ -561,11 +557,6 @@ However, for reading convenience, most of the examples show sorted sequences.
561557
>>> [round(q, 1) for q in quantiles(data, n=10)]
562558
[81.0, 86.2, 89.0, 99.4, 102.5, 103.6, 106.0, 109.8, 111.0]
563559

564-
>>> # Quartile cut points for the standard normal distribution
565-
>>> Z = NormalDist()
566-
>>> [round(q, 4) for q in quantiles(Z, n=4)]
567-
[-0.6745, 0.0, 0.6745]
568-
569560
.. versionadded:: 3.8
570561

571562

@@ -607,6 +598,18 @@ of applications in statistics.
607598
<https://en.wikipedia.org/wiki/Arithmetic_mean>`_ of a normal
608599
distribution.
609600

601+
.. attribute:: median
602+
603+
A read-only property for the `median
604+
<https://en.wikipedia.org/wiki/Median>`_ of a normal
605+
distribution.
606+
607+
.. attribute:: mode
608+
609+
A read-only property for the `mode
610+
<https://en.wikipedia.org/wiki/Mode_(statistics)>`_ of a normal
611+
distribution.
612+
610613
.. attribute:: stdev
611614

612615
A read-only property for the `standard deviation
@@ -678,6 +681,16 @@ of applications in statistics.
678681
the two probability density functions
679682
<https://www.rasch.org/rmt/rmt101r.htm>`_.
680683

684+
.. method:: NormalDist.quantiles()
685+
686+
Divide the normal distribution into *n* continuous intervals with
687+
equal probability. Returns a list of (n - 1) cut points separating
688+
the intervals.
689+
690+
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
691+
Set *n* to 100 for percentiles which gives the 99 cuts points that
692+
separate the normal distribution into 100 equal sized groups.
693+
681694
Instances of :class:`NormalDist` support addition, subtraction,
682695
multiplication and division by a constant. These operations
683696
are used for translation and scaling. For example:
@@ -733,9 +746,9 @@ Find the `quartiles <https://en.wikipedia.org/wiki/Quartile>`_ and `deciles
733746

734747
.. doctest::
735748

736-
>>> list(map(round, quantiles(sat)))
749+
>>> list(map(round, sat.quantiles()))
737750
[928, 1060, 1192]
738-
>>> list(map(round, quantiles(sat, n=10)))
751+
>>> list(map(round, sat.quantiles(n=10)))
739752
[810, 896, 958, 1011, 1060, 1109, 1162, 1224, 1310]
740753

741754
To estimate the distribution for a model than isn't easy to solve

Lib/statistics.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -624,18 +624,15 @@ def quantiles(data, /, *, n=4, method='exclusive'):
624624
Set *n* to 100 for percentiles which gives the 99 cuts points that
625625
separate *data* in to 100 equal sized groups.
626626
627-
The *data* can be any iterable containing sample data or it can be
628-
an instance of a class that defines an inv_cdf() method. For sample
629-
data, the cut points are linearly interpolated between data points.
627+
The *data* can be any iterable containing sample.
628+
The cut points are linearly interpolated between data points.
630629
631630
If *method* is set to *inclusive*, *data* is treated as population
632631
data. The minimum value is treated as the 0th percentile and the
633632
maximum value is treated as the 100th percentile.
634633
"""
635634
if n < 1:
636635
raise StatisticsError('n must be at least 1')
637-
if hasattr(data, 'inv_cdf'):
638-
return [data.inv_cdf(i / n) for i in range(1, n)]
639636
data = sorted(data)
640637
ld = len(data)
641638
if ld < 2:
@@ -955,6 +952,17 @@ def inv_cdf(self, p):
955952
raise StatisticsError('cdf() not defined when sigma at or below zero')
956953
return _normal_dist_inv_cdf(p, self._mu, self._sigma)
957954

955+
def quantiles(self, n=4):
956+
"""Divide into *n* continuous intervals with equal probability.
957+
958+
Returns a list of (n - 1) cut points separating the intervals.
959+
960+
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
961+
Set *n* to 100 for percentiles which gives the 99 cuts points that
962+
separate the normal distribution in to 100 equal sized groups.
963+
"""
964+
return [self.inv_cdf(i / n) for i in range(1, n)]
965+
958966
def overlap(self, other):
959967
"""Compute the overlapping coefficient (OVL) between two normal distributions.
960968
@@ -994,6 +1002,20 @@ def mean(self):
9941002
"Arithmetic mean of the normal distribution."
9951003
return self._mu
9961004

1005+
@property
1006+
def median(self):
1007+
"Return the median of the normal distribution"
1008+
return self._mu
1009+
1010+
@property
1011+
def mode(self):
1012+
"""Return the mode of the normal distribution
1013+
1014+
The mode is the value x where which the probability density
1015+
function (pdf) takes its maximum value.
1016+
"""
1017+
return self._mu
1018+
9971019
@property
9981020
def stdev(self):
9991021
"Standard deviation of the normal distribution."

Lib/test/test_statistics.py

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2198,16 +2198,6 @@ def f(x):
21982198
exp = list(map(f, expected))
21992199
act = quantiles(map(f, data), n=n)
22002200
self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
2201-
# Quartiles of a standard normal distribution
2202-
for n, expected in [
2203-
(1, []),
2204-
(2, [0.0]),
2205-
(3, [-0.4307, 0.4307]),
2206-
(4 ,[-0.6745, 0.0, 0.6745]),
2207-
]:
2208-
actual = quantiles(statistics.NormalDist(), n=n)
2209-
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
2210-
for e, a in zip(expected, actual)))
22112201
# Q2 agrees with median()
22122202
for k in range(2, 60):
22132203
data = random.choices(range(100), k=k)
@@ -2248,16 +2238,6 @@ def f(x):
22482238
exp = list(map(f, expected))
22492239
act = quantiles(map(f, data), n=n, method="inclusive")
22502240
self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
2251-
# Quartiles of a standard normal distribution
2252-
for n, expected in [
2253-
(1, []),
2254-
(2, [0.0]),
2255-
(3, [-0.4307, 0.4307]),
2256-
(4 ,[-0.6745, 0.0, 0.6745]),
2257-
]:
2258-
actual = quantiles(statistics.NormalDist(), n=n, method="inclusive")
2259-
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
2260-
for e, a in zip(expected, actual)))
22612241
# Natural deciles
22622242
self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
22632243
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
@@ -2546,6 +2526,19 @@ def test_inv_cdf(self):
25462526
# Special values
25472527
self.assertTrue(math.isnan(Z.inv_cdf(float('NaN'))))
25482528

2529+
def test_quantiles(self):
2530+
# Quartiles of a standard normal distribution
2531+
Z = self.module.NormalDist()
2532+
for n, expected in [
2533+
(1, []),
2534+
(2, [0.0]),
2535+
(3, [-0.4307, 0.4307]),
2536+
(4 ,[-0.6745, 0.0, 0.6745]),
2537+
]:
2538+
actual = Z.quantiles(n=n)
2539+
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
2540+
for e, a in zip(expected, actual)))
2541+
25492542
def test_overlap(self):
25502543
NormalDist = self.module.NormalDist
25512544

@@ -2612,6 +2605,8 @@ def overlap_numeric(X, Y, *, steps=8_192, z=5):
26122605
def test_properties(self):
26132606
X = self.module.NormalDist(100, 15)
26142607
self.assertEqual(X.mean, 100)
2608+
self.assertEqual(X.median, 100)
2609+
self.assertEqual(X.mode, 100)
26152610
self.assertEqual(X.stdev, 15)
26162611
self.assertEqual(X.variance, 225)
26172612

0 commit comments

Comments
 (0)