diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst index 05667264ad9af..6f07dc3e1e2f9 100644 --- a/doc/source/whatsnew/v1.3.4.rst +++ b/doc/source/whatsnew/v1.3.4.rst @@ -24,6 +24,8 @@ Fixed regressions - Fixed regression in :meth:`pandas.read_csv` raising ``UnicodeDecodeError`` exception when ``memory_map=True`` (:issue:`43540`) - Fixed regression in :meth:`DataFrame.explode` raising ``AssertionError`` when ``column`` is any scalar which is not a string (:issue:`43314`) - Fixed regression in :meth:`Series.aggregate` attempting to pass ``args`` and ``kwargs`` multiple times to the user supplied ``func`` in certain cases (:issue:`43357`) +- Fixed regression when iterating over a :class:`DataFrame.groupby.rolling` object causing the resulting DataFrames to have an incorrect index if the input groupings were not sorted (:issue:`43386`) +- Fixed regression in :meth:`DataFrame.groupby.rolling.cov` and :meth:`DataFrame.groupby.rolling.corr` computing incorrect results if the input groupings were not sorted (:issue:`43386`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 2d5f148a6437a..1c714db78fa46 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -290,8 +290,8 @@ def __repr__(self) -> str: return f"{type(self).__name__} [{attrs}]" def __iter__(self): - obj = self._create_data(self._selected_obj) - obj = obj.set_axis(self._on) + obj = self._selected_obj.set_axis(self._on) + obj = self._create_data(obj) indexer = self._get_window_indexer() start, end = indexer.get_window_bounds( @@ -649,6 +649,7 @@ def _apply_pairwise( """ # Manually drop the grouping column first target = target.drop(columns=self._grouper.names, errors="ignore") + target = self._create_data(target) result = super()._apply_pairwise(target, other, pairwise, func) # 1) Determine the levels + codes of the groupby levels if other is not None: diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 5d7fc50620ef8..1711a54975b71 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -146,6 +146,42 @@ def func(x): expected = g.apply(func) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "func, expected_values", + [("cov", [[1.0, 1.0], [1.0, 4.0]]), ("corr", [[1.0, 0.5], [0.5, 1.0]])], + ) + def test_rolling_corr_cov_unordered(self, func, expected_values): + # GH 43386 + df = DataFrame( + { + "a": ["g1", "g2", "g1", "g1"], + "b": [0, 0, 1, 2], + "c": [2, 0, 6, 4], + } + ) + rol = df.groupby("a").rolling(3) + result = getattr(rol, func)() + expected = DataFrame( + { + "b": 4 * [np.nan] + expected_values[0] + 2 * [np.nan], + "c": 4 * [np.nan] + expected_values[1] + 2 * [np.nan], + }, + index=MultiIndex.from_tuples( + [ + ("g1", 0, "b"), + ("g1", 0, "c"), + ("g1", 2, "b"), + ("g1", 2, "c"), + ("g1", 3, "b"), + ("g1", 3, "c"), + ("g2", 1, "b"), + ("g2", 1, "c"), + ], + names=["a", None, None], + ), + ) + tm.assert_frame_equal(result, expected) + def test_rolling_apply(self, raw): g = self.frame.groupby("A") r = g.rolling(window=4) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 5bf2df0208ddc..b4f9b1ef6cb55 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -792,6 +792,15 @@ def test_iter_rolling_on_dataframe(expected, window): tm.assert_frame_equal(actual, expected) +def test_iter_rolling_on_dataframe_unordered(): + # GH 43386 + df = DataFrame({"a": ["x", "y", "x"], "b": [0, 1, 2]}) + results = list(df.groupby("a").rolling(2)) + expecteds = [df.iloc[idx, [1]] for idx in [[0], [0, 2], [1]]] + for result, expected in zip(results, expecteds): + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( "ser,expected,window, min_periods", [