Skip to content

Commit 9a7bfe6

Browse files
authored
ENH: Add single label to value_counts (#50955)
1 parent 367d2a8 commit 9a7bfe6

File tree

3 files changed

+34
-5
lines changed

3 files changed

+34
-5
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,6 +1089,7 @@ Removal of prior version deprecations/changes
10891089
- Arguments after ``expr`` in :meth:`DataFrame.eval` and :meth:`DataFrame.query` are keyword-only (:issue:`47587`)
10901090
- Removed :meth:`Index._get_attributes_dict` (:issue:`50648`)
10911091
- Removed :meth:`Series.__array_wrap__` (:issue:`50648`)
1092+
- Changed behavior of :meth:`.DataFrame.value_counts` to return a :class:`Series` with :class:`MultiIndex` for any list-like(one element or not) but an :class:`Index` for a single label (:issue:`50829`)
10921093

10931094
.. ---------------------------------------------------------------------------
10941095
.. _whatsnew_200.performance:

pandas/core/frame.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6957,7 +6957,7 @@ def value_counts(
69576957
69586958
Parameters
69596959
----------
6960-
subset : list-like, optional
6960+
subset : label or list of labels, optional
69616961
Columns to use when counting unique combinations.
69626962
normalize : bool, default False
69636963
Return proportions rather than frequencies.
@@ -6981,9 +6981,10 @@ def value_counts(
69816981
Notes
69826982
-----
69836983
The returned Series will have a MultiIndex with one level per input
6984-
column. By default, rows that contain any NA values are omitted from
6985-
the result. By default, the resulting Series will be in descending
6986-
order so that the first element is the most frequently-occurring row.
6984+
column but an Index (non-multi) for a single label. By default, rows
6985+
that contain any NA values are omitted from the result. By default,
6986+
the resulting Series will be in descending order so that the first
6987+
element is the most frequently-occurring row.
69876988
69886989
Examples
69896990
--------
@@ -7049,6 +7050,13 @@ def value_counts(
70497050
John Smith 1
70507051
NaN 1
70517052
Name: count, dtype: int64
7053+
7054+
>>> df.value_counts("first_name")
7055+
first_name
7056+
John 2
7057+
Anne 1
7058+
Beth 1
7059+
Name: count, dtype: int64
70527060
"""
70537061
if subset is None:
70547062
subset = self.columns.tolist()
@@ -7063,7 +7071,7 @@ def value_counts(
70637071
counts /= counts.sum()
70647072

70657073
# Force MultiIndex for single column
7066-
if len(subset) == 1:
7074+
if is_list_like(subset) and len(subset) == 1:
70677075
counts.index = MultiIndex.from_arrays(
70687076
[counts.index], names=[counts.index.name]
70697077
)

pandas/tests/frame/methods/test_value_counts.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
import pytest
23

34
import pandas as pd
45
import pandas._testing as tm
@@ -155,3 +156,22 @@ def test_data_frame_value_counts_dropna_false(nulls_fixture):
155156
)
156157

157158
tm.assert_series_equal(result, expected)
159+
160+
161+
@pytest.mark.parametrize("columns", (["first_name", "middle_name"], [0, 1]))
162+
def test_data_frame_value_counts_subset(nulls_fixture, columns):
163+
# GH 50829
164+
df = pd.DataFrame(
165+
{
166+
columns[0]: ["John", "Anne", "John", "Beth"],
167+
columns[1]: ["Smith", nulls_fixture, nulls_fixture, "Louise"],
168+
},
169+
)
170+
result = df.value_counts(columns[0])
171+
expected = pd.Series(
172+
data=[2, 1, 1],
173+
index=pd.Index(["John", "Anne", "Beth"], name=columns[0]),
174+
name="count",
175+
)
176+
177+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)