diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index d8e9e70ec7227..dd53fd187d38e 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -17,6 +17,7 @@ Fixed regressions - Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`) - Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`) - Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`) +- Fixed regression in :meth:`.GroupBy.apply` where ``nan`` values were dropped even with ``dropna=False`` (:issue:`43205`) - Fixed regression in :meth:`.GroupBy.quantile` which was failing with ``pandas.NA`` (:issue:`42849`) - Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`) - Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 79ee71ddb1047..0e358e611f418 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1011,7 +1011,11 @@ def reset_identity(values): if not not_indexed_same: result = concat(values, axis=self.axis) - ax = self.filter(lambda x: True).axes[self.axis] + ax = ( + self.filter(lambda x: True).axes[self.axis] + if self.dropna + else self._selected_obj._get_axis(self.axis) + ) # this is a very unfortunate situation # we can't use reindex to restore the original order diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index f250fe1a4392d..1aa5845e20753 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1062,9 +1062,10 @@ def test_apply_by_cols_equals_apply_by_rows_transposed(): tm.assert_frame_equal(by_cols, df) -def test_apply_dropna_with_indexed_same(): +@pytest.mark.parametrize("dropna", [True, False]) +def test_apply_dropna_with_indexed_same(dropna): # GH 38227 - + # GH#43205 df = DataFrame( { "col": [1, 2, 3, 4, 5], @@ -1072,15 +1073,8 @@ def test_apply_dropna_with_indexed_same(): }, index=list("xxyxz"), ) - result = df.groupby("group").apply(lambda x: x) - expected = DataFrame( - { - "col": [1, 4, 5], - "group": ["a", "b", "b"], - }, - index=list("xxz"), - ) - + result = df.groupby("group", dropna=dropna).apply(lambda x: x) + expected = df.dropna() if dropna else df.iloc[[0, 3, 1, 2, 4]] tm.assert_frame_equal(result, expected)