From 2711ffe828c85f3905ac29ae2447afaa56e82c72 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 12 May 2023 14:14:53 -0700 Subject: [PATCH 1/4] BUG: groupby.apply raising a TypeError when __getitem__ selects multiple columns --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/groupby/generic.py | 10 +++++++--- pandas/tests/groupby/test_apply.py | 12 ++++++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 52fc8512c9db3..44327c05ac794 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -416,7 +416,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`) - Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`) - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`) -- +- Bug in :meth:`GroupBy.apply` raising a ``TypeError`` when selecting multiple columns and providing a function that returns ``np.ndarray`` results (:issue:`18930`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d26448dffc11a..ef36156f9c0aa 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -51,6 +51,7 @@ CategoricalDtype, IntervalDtype, ) +from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import ( isna, notna, @@ -1474,9 +1475,12 @@ def _wrap_applied_output( # fall through to the outer else clause # TODO: sure this is right? we used to do this # after raising AttributeError above - return self.obj._constructor_sliced( - values, index=key_index, name=self._selection - ) + # GH 18930 + if not is_hashable(self._selection): + name = tuple(self._selection) + else: + name = self._selection + return self.obj._constructor_sliced(values, index=key_index, name=name) elif not isinstance(first_not_none, Series): # values are not series or array-like but scalars # self._selection not passed through to Series as the diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index b52b708de50a6..0cdb11cfbf6e0 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1404,3 +1404,15 @@ def test_apply_inconsistent_output(group_col): ) tm.assert_series_equal(result, expected) + + +def test_apply_array_output_multi_getitem(): + # GH 18930 + df = DataFrame( + {"A": {"a": 1, "b": 2}, "B": {"a": 1, "b": 2}, "C": {"a": 1, "b": 2}} + ) + result = df.groupby("A")[["B", "C"]].apply(lambda x: np.array([0])) + expected = Series( + [np.array([0])] * 2, index=Index([1, 2], name="A"), name=("B", "C") + ) + tm.assert_series_equal(result, expected) From 805a5625b6562097b0f00b3f27cc27cf8f946fe6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 12 May 2023 14:20:16 -0700 Subject: [PATCH 2/4] Fix whatsnew --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 44327c05ac794..370cad535564f 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -413,10 +413,10 @@ Groupby/resample/rolling or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument, the function operated on the whole index rather than each element of the index. (:issue:`51979`) - Bug in :meth:`DataFrameGroupBy.apply` causing an error to be raised when the input :class:`DataFrame` was subset as a :class:`DataFrame` after groupby (``[['a']]`` and not ``['a']``) and the given callable returned :class:`Series` that were not all indexed the same. (:issue:`52444`) +- Bug in :meth:`DataFrameGroupBy.apply` raising a ``TypeError`` when selecting multiple columns and providing a function that returns ``np.ndarray`` results (:issue:`18930`) - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`) - Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`) - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`) -- Bug in :meth:`GroupBy.apply` raising a ``TypeError`` when selecting multiple columns and providing a function that returns ``np.ndarray`` results (:issue:`18930`) Reshaping ^^^^^^^^^ From f4e255c48ab42127342349c271af7bcddd5af70c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 12 May 2023 18:44:34 -0700 Subject: [PATCH 3/4] typing --- pandas/core/groupby/generic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ef36156f9c0aa..3a9b81b6845c0 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1477,9 +1477,13 @@ def _wrap_applied_output( # after raising AttributeError above # GH 18930 if not is_hashable(self._selection): - name = tuple(self._selection) + # error: Need type annotation for "name" + name = tuple(self._selection) # type: ignore[var-annotated] else: - name = self._selection + # error: Incompatible types in assignment + # (expression has type "Hashable", variable + # has type "Tuple[Any, ...]") + name = self._selection # type: ignore[assignment] return self.obj._constructor_sliced(values, index=key_index, name=name) elif not isinstance(first_not_none, Series): # values are not series or array-like but scalars From 6b9601bf828461762ba535ceb75d0345728c7b72 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 12 May 2023 18:45:55 -0700 Subject: [PATCH 4/4] Typing --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3a9b81b6845c0..c66aaf3c82c70 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1478,7 +1478,7 @@ def _wrap_applied_output( # GH 18930 if not is_hashable(self._selection): # error: Need type annotation for "name" - name = tuple(self._selection) # type: ignore[var-annotated] + name = tuple(self._selection) # type: ignore[var-annotated, arg-type] else: # error: Incompatible types in assignment # (expression has type "Hashable", variable