ENH: Add single label to value_counts (#50955)

tpackard1 · web-flow · commit 9a7bfe60f1d9 · 2023-02-19T22:27:38.000-05:00
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -1089,6 +1089,7 @@ Removal of prior version deprecations/changes
 - Arguments after ``expr`` in :meth:`DataFrame.eval` and :meth:`DataFrame.query` are keyword-only (:issue:`47587`)
 - Removed :meth:`Index._get_attributes_dict` (:issue:`50648`)
 - Removed :meth:`Series.__array_wrap__` (:issue:`50648`)
+- Changed behavior of :meth:`.DataFrame.value_counts` to return a :class:`Series` with :class:`MultiIndex` for any list-like(one element or not) but an :class:`Index` for a single label (:issue:`50829`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_200.performance:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -6957,7 +6957,7 @@ def value_counts(
 
         Parameters
         ----------
-        subset : list-like, optional
+        subset : label or list of labels, optional
             Columns to use when counting unique combinations.
         normalize : bool, default False
             Return proportions rather than frequencies.
@@ -6981,9 +6981,10 @@ def value_counts(
         Notes
         -----
         The returned Series will have a MultiIndex with one level per input
-        column. By default, rows that contain any NA values are omitted from
-        the result. By default, the resulting Series will be in descending
-        order so that the first element is the most frequently-occurring row.
+        column but an Index (non-multi) for a single label. By default, rows
+        that contain any NA values are omitted from the result. By default,
+        the resulting Series will be in descending order so that the first
+        element is the most frequently-occurring row.
 
         Examples
         --------
@@ -7049,6 +7050,13 @@ def value_counts(
         John        Smith          1
                     NaN            1
         Name: count, dtype: int64
+
+        >>> df.value_counts("first_name")
+        first_name
+        John    2
+        Anne    1
+        Beth    1
+        Name: count, dtype: int64
         """
         if subset is None:
             subset = self.columns.tolist()
@@ -7063,7 +7071,7 @@ def value_counts(
             counts /= counts.sum()
 
         # Force MultiIndex for single column
-        if len(subset) == 1:
+        if is_list_like(subset) and len(subset) == 1:
             counts.index = MultiIndex.from_arrays(
                 [counts.index], names=[counts.index.name]
             )
diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 import pandas as pd
 import pandas._testing as tm
@@ -155,3 +156,22 @@ def test_data_frame_value_counts_dropna_false(nulls_fixture):
     )
 
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("columns", (["first_name", "middle_name"], [0, 1]))
+def test_data_frame_value_counts_subset(nulls_fixture, columns):
+    # GH 50829
+    df = pd.DataFrame(
+        {
+            columns[0]: ["John", "Anne", "John", "Beth"],
+            columns[1]: ["Smith", nulls_fixture, nulls_fixture, "Louise"],
+        },
+    )
+    result = df.value_counts(columns[0])
+    expected = pd.Series(
+        data=[2, 1, 1],
+        index=pd.Index(["John", "Anne", "Beth"], name=columns[0]),
+        name="count",
+    )
+
+    tm.assert_series_equal(result, expected)