-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Series.value_counts: Preserve original ordering #24302
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
0270839
c857119
e966aa7
b827df6
453d3df
405ac0e
355f872
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -666,7 +666,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, | |
value_counts : Series | ||
|
||
""" | ||
from pandas.core.series import Series, Index | ||
from pandas import Series, Index, CategoricalIndex | ||
name = getattr(values, 'name', None) | ||
|
||
if bins is not None: | ||
|
@@ -708,6 +708,10 @@ def value_counts(values, sort=True, ascending=False, normalize=False, | |
|
||
if sort: | ||
result = result.sort_values(ascending=ascending) | ||
elif bins is None: | ||
uniq = unique(values) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added it above, although it is not computed twice now. Did you mean it like this? |
||
if not isinstance(result.index, CategoricalIndex): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this check needed? (or maybe it just needs to be for an ordered categorical) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The issue was the test
which returns
tomspur marked this conversation as resolved.
Show resolved
Hide resolved
|
||
result = result.reindex(uniq) | ||
|
||
if normalize: | ||
result = result / float(counts.sum()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -962,6 +962,52 @@ def test_value_counts_uint64(self): | |
if not compat.is_platform_32bit(): | ||
tm.assert_series_equal(result, expected) | ||
|
||
def test_value_counts_nonsorted_single_occurance(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. paramterize on sort There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done and pushed |
||
# GH 12679 | ||
# All items occour exactly once. | ||
# No matter if sorted or not, the resulting values should be in | ||
# the same order. | ||
s = Series(list('bacdef')) | ||
|
||
# Guarantee the same index if value_counts(sort=False) is used | ||
tomspur marked this conversation as resolved.
Show resolved
Hide resolved
|
||
vc = s.value_counts(sort=False, ascending=False) | ||
tm.assert_series_equal(Series(vc.index), s) | ||
vc = s.value_counts(sort=False, ascending=True) | ||
tm.assert_series_equal(Series(vc.index), s) | ||
|
||
@pytest.mark.xfail(reason="sort=True does not guarantee the same order") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this xfail? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could not get it working for the |
||
def test_value_counts_sorted_single_occurance(self): | ||
# GH 12679 | ||
s = Series(list('bacdef')) | ||
# Guarantee does not hold yet for the sort=True case | ||
vc = s.value_counts(sort=True, ascending=False) | ||
tm.assert_series_equal(Series(vc.index), s) | ||
vc = s.value_counts(sort=True, ascending=True) | ||
tm.assert_series_equal(Series(vc.index), s) | ||
|
||
def test_value_counts_nonsorted_double_occurance(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. parametrize these. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I c, ok then, parameterize over ascending though |
||
# GH 12679 | ||
# 'a' is there twice. Sorted, it should be there at the top. | ||
s = Series(list('bacaef')) | ||
ref_nonsorted = Series(list('bacef')) | ||
|
||
# Guarantee the same index if value_counts(sort=False) is used | ||
vc = s.value_counts(sort=False, ascending=False) | ||
tm.assert_series_equal(Series(vc.index), ref_nonsorted) | ||
vc = s.value_counts(sort=False, ascending=True) | ||
tm.assert_series_equal(Series(vc.index), ref_nonsorted) | ||
|
||
@pytest.mark.xfail(reason="sort=True does not guarantee the same order") | ||
tomspur marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def test_value_counts_sorted_double_occurance(self): | ||
# GH 12679 | ||
s = Series(list('bacaef')) | ||
ref_sorted = Series(list('abcef')) | ||
# Guarantee does not hold yet for the sort=True case | ||
vc = s.value_counts(sort=True, ascending=False) | ||
tm.assert_series_equal(Series(vc.index), ref_sorted) | ||
vc = s.value_counts(sort=True, ascending=True) | ||
tm.assert_series_equal(Series(vc.index), ref_sorted) | ||
|
||
|
||
class TestDuplicated(object): | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move to api breaking changes
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done and pushed