diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index d138ebb9c02a3..7fc8b16bbed9a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1010,6 +1010,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.resample` reduction methods when used with ``on`` would attempt to aggregate the provided column (:issue:`47079`) - Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would not respect ``dropna=False`` when the input DataFrame/Series had a NaN values in a :class:`MultiIndex` (:issue:`46783`) - Bug in :meth:`DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list which misses the resample key (:issue:`47362`) +- Bug in :meth:`DataFrame.groupby` would lose index columns when the DataFrame is empty for transforms, like fillna (:issue:`47787`) - Reshaping diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 28e1b2b388035..26ae5ba91708f 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1020,6 +1020,11 @@ def curried(x): return self.apply(curried) is_transform = name in base.transformation_kernels + + # Transform needs to keep the same schema, including when empty + if is_transform and self._obj_with_exclusions.empty: + return self._obj_with_exclusions + result = self._python_apply_general( curried, self._obj_with_exclusions, is_transform=is_transform ) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 920b869ef799b..df49db9ec9dad 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2348,6 +2348,42 @@ def test_groupby_duplicate_index(): tm.assert_series_equal(result, expected) +def test_group_on_empty_multiindex(transformation_func, request): + # GH 47787 + # With one row, those are transforms so the schema should be the same + if transformation_func == "tshift": + mark = pytest.mark.xfail(raises=NotImplementedError) + request.node.add_marker(mark) + df = DataFrame( + data=[[1, Timestamp("today"), 3, 4]], + columns=["col_1", "col_2", "col_3", "col_4"], + ) + df["col_3"] = df["col_3"].astype(int) + df["col_4"] = df["col_4"].astype(int) + df = df.set_index(["col_1", "col_2"]) + if transformation_func == "fillna": + args = ("ffill",) + elif transformation_func == "tshift": + args = (1, "D") + else: + args = () + result = df.iloc[:0].groupby(["col_1"]).transform(transformation_func, *args) + expected = df.groupby(["col_1"]).transform(transformation_func, *args).iloc[:0] + if transformation_func in ("diff", "shift"): + expected = expected.astype(int) + tm.assert_equal(result, expected) + + result = ( + df["col_3"].iloc[:0].groupby(["col_1"]).transform(transformation_func, *args) + ) + expected = ( + df["col_3"].groupby(["col_1"]).transform(transformation_func, *args).iloc[:0] + ) + if transformation_func in ("diff", "shift"): + expected = expected.astype(int) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( "idx", [