diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a672f8fc96ba0..f0408db7f4ef8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -21,6 +21,7 @@ Mapping, TypeVar, Union, + cast, ) import warnings @@ -30,7 +31,9 @@ from pandas._typing import ( ArrayLike, FrameOrSeries, + Manager, Manager2D, + SingleManager, ) from pandas.util._decorators import ( Appender, @@ -80,7 +83,6 @@ Index, MultiIndex, all_indexes_same, - default_index, ) from pandas.core.series import Series from pandas.core.util.numba_ import maybe_use_numba @@ -159,19 +161,21 @@ def pinner(cls): class SeriesGroupBy(GroupBy[Series]): _apply_allowlist = base.series_apply_allowlist - def _wrap_agged_manager(self, mgr: Manager2D) -> Series: - single = mgr.iget(0) + def _wrap_agged_manager(self, mgr: Manager) -> Series: + if mgr.ndim == 1: + mgr = cast(SingleManager, mgr) + single = mgr + else: + mgr = cast(Manager2D, mgr) + single = mgr.iget(0) ser = self.obj._constructor(single, name=self.obj.name) # NB: caller is responsible for setting ser.index return ser - def _get_data_to_aggregate(self) -> Manager2D: + def _get_data_to_aggregate(self) -> SingleManager: ser = self._obj_with_exclusions single = ser._mgr - columns = default_index(1) - # Much faster than using ser.to_frame() since we avoid inferring columns - # from scalar - return single.to_2d_mgr(columns) + return single def _iterate_slices(self) -> Iterable[Series]: yield self._selected_obj diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7ff672429afcf..b41935902b9cf 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1745,6 +1745,8 @@ def count(self) -> Series | DataFrame: ids, _, ngroups = self.grouper.group_info mask = ids != -1 + is_series = data.ndim == 1 + def hfunc(bvalues: ArrayLike) -> ArrayLike: # TODO(2DEA): reshape would not be necessary with 2D EAs if bvalues.ndim == 1: @@ -1754,6 +1756,10 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike: masked = mask & ~isna(bvalues) counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1) + if is_series: + assert counted.ndim == 2 + assert counted.shape[0] == 1 + return counted[0] return counted new_mgr = data.grouped_reduce(hfunc) @@ -2702,7 +2708,7 @@ def blk_func(values: ArrayLike) -> ArrayLike: mgr = self._get_data_to_aggregate() res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True) - if len(res_mgr.items) != len(mgr.items): + if not is_ser and len(res_mgr.items) != len(mgr.items): warnings.warn( "Dropping invalid columns in " f"{type(self).__name__}.quantile is deprecated. " @@ -3134,14 +3140,15 @@ def blk_func(values: ArrayLike) -> ArrayLike: obj = self._obj_with_exclusions # Operate block-wise instead of column-by-column - orig_ndim = obj.ndim + is_ser = obj.ndim == 1 mgr = self._get_data_to_aggregate() if numeric_only: mgr = mgr.get_numeric_data() res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True) - if len(res_mgr.items) != len(mgr.items): + + if not is_ser and len(res_mgr.items) != len(mgr.items): howstr = how.replace("group_", "") warnings.warn( "Dropping invalid columns in " @@ -3162,7 +3169,7 @@ def blk_func(values: ArrayLike) -> ArrayLike: # We should never get here raise TypeError("All columns were dropped in grouped_reduce") - if orig_ndim == 1: + if is_ser: out = self._wrap_agged_manager(res_mgr) out.index = self.grouper.result_index else: diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 4d3dcb9c4732e..080796e7957a3 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -10,6 +10,7 @@ ) from pandas._typing import ( + ArrayLike, DtypeObj, Shape, ) @@ -18,7 +19,10 @@ from pandas.core.dtypes.cast import find_common_type from pandas.core.base import PandasObject -from pandas.core.indexes.api import Index +from pandas.core.indexes.api import ( + Index, + default_index, +) T = TypeVar("T", bound="DataManager") @@ -171,6 +175,23 @@ def setitem_inplace(self, indexer, value) -> None: """ self.array[indexer] = value + def grouped_reduce(self, func, ignore_failures: bool = False): + """ + ignore_failures : bool, default False + Not used; for compatibility with ArrayManager/BlockManager. + """ + + arr = self.array + res = func(arr) + index = default_index(len(res)) + + mgr = type(self).from_array(res, index) + return mgr + + @classmethod + def from_array(cls, arr: ArrayLike, index: Index): + raise AbstractMethodError(cls) + def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None: """ diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 3b3bd402e4cc7..0c0ee73f48233 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -350,11 +350,10 @@ def test_agg(): expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) for t in cases: - warn = FutureWarning if t in cases[1:3] else None - with tm.assert_produces_warning( - warn, match="Dropping invalid columns", check_stacklevel=False - ): - # .var on dt64 column raises and is dropped + with tm.assert_produces_warning(None): + # .var on dt64 column raises and is dropped, but the path in core.apply + # that it goes through will still suppress a TypeError even + # once the deprecations in the groupby code are enforced result = t.aggregate([np.mean, np.std]) tm.assert_frame_equal(result, expected)