diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 8dbc6728dccfe..b166f0a360d26 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -771,6 +771,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) - Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`) - Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`) +- Bug in :meth:`.DataFrameGroupBy.apply` dropped values on ``nan`` group when returning the same axes with the original frame (:issue:`38227`) - Bug in :meth:`.DataFrameGroupBy.quantile` couldn't handle with arraylike ``q`` when grouping by columns (:issue:`33795`) - Bug in :meth:`DataFrameGroupBy.rank` with ``datetime64tz`` or period dtype incorrectly casting results to those dtypes instead of returning ``float64`` dtype (:issue:`38187`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 583fc6bf8ddb7..4b9e554aa8bc2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1216,7 +1216,7 @@ def reset_identity(values): if not not_indexed_same: result = concat(values, axis=self.axis) - ax = self._selected_obj._get_axis(self.axis) + ax = self.filter(lambda x: True).axes[self.axis] # this is a very unfortunate situation # we can't use reindex to restore the original order diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index b2074dcb08c95..975cebe16dc55 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1087,3 +1087,25 @@ def test_apply_by_cols_equals_apply_by_rows_transposed(): tm.assert_frame_equal(by_cols, by_rows.T) tm.assert_frame_equal(by_cols, df) + + +def test_apply_dropna_with_indexed_same(): + # GH 38227 + + df = DataFrame( + { + "col": [1, 2, 3, 4, 5], + "group": ["a", np.nan, np.nan, "b", "b"], + }, + index=list("xxyxz"), + ) + result = df.groupby("group").apply(lambda x: x) + expected = DataFrame( + { + "col": [1, 4, 5], + "group": ["a", "b", "b"], + }, + index=list("xxz"), + ) + + tm.assert_frame_equal(result, expected)