diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 2fe289a5f7c35..07d448bf811a1 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -677,8 +677,8 @@ Groupby/resample/rolling - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and index is decreasing (:issue:`43927`) - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` for centered datetimelike windows with uneven nanosecond (:issue:`43997`) - Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`) -- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`#3944`) - +- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`) +- Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index f5f681d9de797..d91388e9722f7 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -417,6 +417,13 @@ def __init__( self.alpha, ) + def _check_window_bounds( + self, start: np.ndarray, end: np.ndarray, num_vals: int + ) -> None: + # emw algorithms are iterative with each point + # ExponentialMovingWindowIndexer "bounds" are the entire window + pass + def _get_window_indexer(self) -> BaseIndexer: """ Return an indexer class that will compute the window start and end bounds diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f7799912937b7..73bdc626554a2 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -227,6 +227,20 @@ def _validate(self) -> None: if self.method not in ["table", "single"]: raise ValueError("method must be 'table' or 'single") + def _check_window_bounds( + self, start: np.ndarray, end: np.ndarray, num_vals: int + ) -> None: + if len(start) != len(end): + raise ValueError( + f"start ({len(start)}) and end ({len(end)}) bounds must be the " + f"same length" + ) + elif len(start) != num_vals: + raise ValueError( + f"start and end bounds ({len(start)}) must be the same length " + f"as the object ({num_vals})" + ) + def _create_data(self, obj: NDFrameT) -> NDFrameT: """ Split data into blocks & return conformed data. @@ -311,10 +325,7 @@ def __iter__(self): center=self.center, closed=self.closed, ) - - assert len(start) == len( - end - ), "these should be equal in length from get_window_bounds" + self._check_window_bounds(start, end, len(obj)) for s, e in zip(start, end): result = obj.iloc[slice(s, e)] @@ -565,9 +576,7 @@ def calc(x): center=self.center, closed=self.closed, ) - assert len(start) == len( - end - ), "these should be equal in length from get_window_bounds" + self._check_window_bounds(start, end, len(x)) return func(x, start, end, min_periods, *numba_args) @@ -608,6 +617,7 @@ def _numba_apply( center=self.center, closed=self.closed, ) + self._check_window_bounds(start, end, len(values)) aggregator = executor.generate_shared_aggregator( func, engine_kwargs, numba_cache_key_str ) @@ -1544,10 +1554,7 @@ def cov_func(x, y): center=self.center, closed=self.closed, ) - - assert len(start) == len( - end - ), "these should be equal in length from get_window_bounds" + self._check_window_bounds(start, end, len(x_array)) with np.errstate(all="ignore"): mean_x_y = window_aggregations.roll_mean( @@ -1588,10 +1595,7 @@ def corr_func(x, y): center=self.center, closed=self.closed, ) - - assert len(start) == len( - end - ), "these should be equal in length from get_window_bounds" + self._check_window_bounds(start, end, len(x_array)) with np.errstate(all="ignore"): mean_x_y = window_aggregations.roll_mean( diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index df4666d16ace0..a7ad409683ec8 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -452,3 +452,46 @@ def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size): manual = manual.set_index(["a", "c"])["b"] tm.assert_series_equal(result, manual) + + +def test_unequal_start_end_bounds(): + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + return np.array([1]), np.array([1, 2]) + + indexer = CustomIndexer() + roll = Series(1).rolling(indexer) + match = "start" + with pytest.raises(ValueError, match=match): + roll.mean() + + with pytest.raises(ValueError, match=match): + next(iter(roll)) + + with pytest.raises(ValueError, match=match): + roll.corr(pairwise=True) + + with pytest.raises(ValueError, match=match): + roll.cov(pairwise=True) + + +def test_unequal_bounds_to_object(): + # GH 44470 + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + return np.array([1]), np.array([2]) + + indexer = CustomIndexer() + roll = Series([1, 1]).rolling(indexer) + match = "start and end" + with pytest.raises(ValueError, match=match): + roll.mean() + + with pytest.raises(ValueError, match=match): + next(iter(roll)) + + with pytest.raises(ValueError, match=match): + roll.corr(pairwise=True) + + with pytest.raises(ValueError, match=match): + roll.cov(pairwise=True)