Skip to content

Commit db22ee2

Browse files
committed
Series.value_counts: Preserve original ordering when using sort=False
Ensure that value_counts returns the same ordering of the indices than the input object when sorting the values no matter if it is ascending or descending. This fixes #12679.
1 parent 5d134ec commit db22ee2

File tree

3 files changed

+41
-1
lines changed

3 files changed

+41
-1
lines changed

doc/source/whatsnew/v0.24.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1634,6 +1634,7 @@ Other
16341634

16351635
- Bug where C variables were declared with external linkage causing import errors if certain other C libraries were imported before Pandas. (:issue:`24113`)
16361636
- Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`)
1637+
- :meth:`Series.value_counts` returns the counts in the same ordering as the original series when using ``sort=False``
16371638

16381639
.. _whatsnew_0.24.0.contributors:
16391640

pandas/core/algorithms.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
666666
value_counts : Series
667667
668668
"""
669-
from pandas.core.series import Series, Index
669+
from pandas import Series, Index, CategoricalIndex
670670
name = getattr(values, 'name', None)
671671

672672
if bins is not None:
@@ -708,6 +708,10 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
708708

709709
if sort:
710710
result = result.sort_values(ascending=ascending)
711+
else:
712+
uniq = unique(values)
713+
if not isinstance(result.index, CategoricalIndex):
714+
result = result.reindex(uniq)
711715

712716
if normalize:
713717
result = result / float(counts.sum())

pandas/tests/test_algos.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,41 @@ def test_value_counts_uint64(self):
962962
if not compat.is_platform_32bit():
963963
tm.assert_series_equal(result, expected)
964964

965+
def test_value_counts_nonsorted(self):
966+
# All items occour exactly once. No matter if sorted or not, the resulting
967+
# values should be in the same order.
968+
s = Series(list('bacdef'))
969+
970+
# Garantee the same index if value_counts(sort=False) is used
971+
vc = s.value_counts(sort=False, ascending=False)
972+
tm.assert_series_equal(Series(vc.index), s)
973+
vc = s.value_counts(sort=False, ascending=True)
974+
tm.assert_series_equal(Series(vc.index), s)
975+
976+
# Garantee does not hold yet for the sort=True case
977+
#vc = s.value_counts(sort=True, ascending=False)
978+
#tm.assert_series_equal(Series(vc.index), s)
979+
#vc = s.value_counts(sort=True, ascending=True)
980+
#tm.assert_series_equal(Series(vc.index), s)
981+
982+
# 'a' is there twice. Sorted, it should be there at the top, unsorted it
983+
# should stay where it is.
984+
s = Series(list('bacaef'))
985+
ref_nonsorted = Series(list('bacef'))
986+
ref_sorted = Series(list('abcef'))
987+
988+
# Garantee the same index if value_counts(sort=False) is used
989+
vc = s.value_counts(sort=False, ascending=False)
990+
tm.assert_series_equal(Series(vc.index), ref_nonsorted)
991+
vc = s.value_counts(sort=False, ascending=True)
992+
tm.assert_series_equal(Series(vc.index), ref_nonsorted)
993+
994+
# Garantee does not hold yet for the sort=True case
995+
#vc = s.value_counts(sort=True, ascending=False)
996+
#tm.assert_series_equal(Series(vc.index), ref_sorted)
997+
#vc = s.value_counts(sort=True, ascending=True)
998+
#tm.assert_series_equal(Series(vc.index), ref_sorted)
999+
9651000

9661001
class TestDuplicated(object):
9671002

0 commit comments

Comments
 (0)