Skip to content

ERR: Raise ValueError when BaseIndexer start & end bounds are unequal length #44497

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -677,8 +677,8 @@ Groupby/resample/rolling
- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and index is decreasing (:issue:`43927`)
- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` for centered datetimelike windows with uneven nanosecond (:issue:`43997`)
- Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`)
- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`#3944`)

- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`)
- Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`)

Reshaping
^^^^^^^^^
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,13 @@ def __init__(
self.alpha,
)

def _check_window_bounds(
self, start: np.ndarray, end: np.ndarray, num_vals: int
) -> None:
# emw algorithms are iterative with each point
# ExponentialMovingWindowIndexer "bounds" are the entire window
pass

def _get_window_indexer(self) -> BaseIndexer:
"""
Return an indexer class that will compute the window start and end bounds
Expand Down
34 changes: 19 additions & 15 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,20 @@ def _validate(self) -> None:
if self.method not in ["table", "single"]:
raise ValueError("method must be 'table' or 'single")

def _check_window_bounds(
self, start: np.ndarray, end: np.ndarray, num_vals: int
) -> None:
if len(start) != len(end):
raise ValueError(
f"start ({len(start)}) and end ({len(end)}) bounds must be the "
f"same length"
)
elif len(start) != num_vals:
raise ValueError(
f"start and end bounds ({len(start)}) must be the same length "
f"as the object ({num_vals})"
)

def _create_data(self, obj: NDFrameT) -> NDFrameT:
"""
Split data into blocks & return conformed data.
Expand Down Expand Up @@ -311,10 +325,7 @@ def __iter__(self):
center=self.center,
closed=self.closed,
)

assert len(start) == len(
end
), "these should be equal in length from get_window_bounds"
self._check_window_bounds(start, end, len(obj))

for s, e in zip(start, end):
result = obj.iloc[slice(s, e)]
Expand Down Expand Up @@ -565,9 +576,7 @@ def calc(x):
center=self.center,
closed=self.closed,
)
assert len(start) == len(
end
), "these should be equal in length from get_window_bounds"
self._check_window_bounds(start, end, len(x))

return func(x, start, end, min_periods, *numba_args)

Expand Down Expand Up @@ -608,6 +617,7 @@ def _numba_apply(
center=self.center,
closed=self.closed,
)
self._check_window_bounds(start, end, len(values))
aggregator = executor.generate_shared_aggregator(
func, engine_kwargs, numba_cache_key_str
)
Expand Down Expand Up @@ -1544,10 +1554,7 @@ def cov_func(x, y):
center=self.center,
closed=self.closed,
)

assert len(start) == len(
end
), "these should be equal in length from get_window_bounds"
self._check_window_bounds(start, end, len(x_array))

with np.errstate(all="ignore"):
mean_x_y = window_aggregations.roll_mean(
Expand Down Expand Up @@ -1588,10 +1595,7 @@ def corr_func(x, y):
center=self.center,
closed=self.closed,
)

assert len(start) == len(
end
), "these should be equal in length from get_window_bounds"
self._check_window_bounds(start, end, len(x_array))

with np.errstate(all="ignore"):
mean_x_y = window_aggregations.roll_mean(
Expand Down
43 changes: 43 additions & 0 deletions pandas/tests/window/test_base_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,3 +452,46 @@ def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size):
manual = manual.set_index(["a", "c"])["b"]

tm.assert_series_equal(result, manual)


def test_unequal_start_end_bounds():
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed):
return np.array([1]), np.array([1, 2])

indexer = CustomIndexer()
roll = Series(1).rolling(indexer)
match = "start"
with pytest.raises(ValueError, match=match):
roll.mean()

with pytest.raises(ValueError, match=match):
next(iter(roll))

with pytest.raises(ValueError, match=match):
roll.corr(pairwise=True)

with pytest.raises(ValueError, match=match):
roll.cov(pairwise=True)


def test_unequal_bounds_to_object():
# GH 44470
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed):
return np.array([1]), np.array([2])

indexer = CustomIndexer()
roll = Series([1, 1]).rolling(indexer)
match = "start and end"
with pytest.raises(ValueError, match=match):
roll.mean()

with pytest.raises(ValueError, match=match):
next(iter(roll))

with pytest.raises(ValueError, match=match):
roll.corr(pairwise=True)

with pytest.raises(ValueError, match=match):
roll.cov(pairwise=True)