From 722710da783d57ff47a7bdad1966f44db0973946 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 14:20:36 +0300 Subject: [PATCH 01/19] TST: add test --- pandas/tests/window/common.py | 16 ++++++++++++++++ pandas/tests/window/test_rolling.py | 15 ++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/pandas/tests/window/common.py b/pandas/tests/window/common.py index 6aeada3152dbb..d2a116e94bf6b 100644 --- a/pandas/tests/window/common.py +++ b/pandas/tests/window/common.py @@ -5,6 +5,7 @@ from pandas import DataFrame, Series, bdate_range, notna import pandas._testing as tm +from pandas.api.indexers import BaseIndexer N, K = 100, 10 @@ -384,3 +385,18 @@ def check_binary_ew_min_periods(name, min_periods, A, B): Series([1.0]), Series([1.0]), 50, name=name, min_periods=min_periods ) tm.assert_series_equal(result, Series([np.NaN])) + + +class ForwardIndexer(BaseIndexer): + # GH 32865 + def get_window_bounds(self, num_values, min_periods, center, closed): + start = np.empty(num_values, dtype=np.int64) + end = np.empty(num_values, dtype=np.int64) + for i in range(num_values): + if i + min_periods <= num_values: + start[i] = i + end[i] = min(i + self.window_size, num_values) + else: + start[i] = i + end[i] = i + 1 + return start, end diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index ab2c7fcb7a0dc..27cd3edc10a29 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -10,7 +10,7 @@ from pandas import DataFrame, Index, Series import pandas._testing as tm from pandas.core.window import Rolling -from pandas.tests.window.common import Base +from pandas.tests.window.common import Base, ForwardIndexer class TestRolling(Base): @@ -465,3 +465,16 @@ def test_rolling_count_default_min_periods_with_null_values(constructor): result = constructor(values).rolling(3).count() expected = constructor(expected_counts) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_rolling_forward_window_min(constructor): + # GH 32865 + values = np.arange(10) + expected_min = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, np.nan] + + indexer = ForwardIndexer(window_size=3) + rolling = constructor(values).rolling(window=indexer, min_periods=2) + result = rolling.min() + expected = constructor(rolling.apply(lambda x: min(x))) + tm.assert_equal(result, expected) From c8d79f0bb4e93da2711120eebff39e28ee1b83ac Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 14:26:29 +0300 Subject: [PATCH 02/19] FIX: fix the min-max calculation algorithm --- pandas/_libs/window/aggregations.pyx | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 1d1963fb04818..dcc9ceae0c561 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1051,7 +1051,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values, bint is_max): cdef: numeric ai - int64_t i, close_offset, curr_win_size + int64_t i, k, close_offset, curr_win_size Py_ssize_t nobs = 0, N = len(values) deque Q[int64_t] # min/max always the front deque W[int64_t] # track the whole window for nobs compute @@ -1088,12 +1088,14 @@ cdef _roll_min_max_variable(ndarray[numeric] values, # first window's size curr_win_size = endi[0] - starti[0] + k = 0 for i in range(endi[0], endi[N-1]): if not Q.empty() and curr_win_size > 0: - output[i-1+close_offset] = calc_mm( + output[k] = calc_mm( minp, nobs, values[Q.front()]) else: - output[i-1+close_offset] = NaN + output[k] = NaN + k += 1 ai = init_mm(values[i], &nobs, is_max) @@ -1119,9 +1121,9 @@ cdef _roll_min_max_variable(ndarray[numeric] values, W.push_back(i) if not Q.empty() and curr_win_size > 0: - output[N-1] = calc_mm(minp, nobs, values[Q.front()]) + output[k] = calc_mm(minp, nobs, values[Q.front()]) else: - output[N-1] = NaN + output[k] = NaN return output From 53947cd04811fc8ffb6dff0fa0e1265e0682b184 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 14:30:19 +0300 Subject: [PATCH 03/19] TST: remove min/max from test_notimplemented_functions --- pandas/tests/window/test_base_indexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index e9190dfde4fc4..600adcc187467 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -83,7 +83,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed): @pytest.mark.parametrize( - "func", ["min", "max", "std", "var", "count", "skew", "cov", "corr"] + "func", ["std", "var", "count", "skew", "cov", "corr"] ) def test_notimplemented_functions(func): # GH 32865 From 6c7742799a5484e394ac142208be3119234868c8 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 14:32:13 +0300 Subject: [PATCH 04/19] add min/max to supported function list --- pandas/core/window/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 8abc47886d261..05f19de19f9f7 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -327,7 +327,7 @@ def func(arg, window, min_periods=None): def validate_baseindexer_support(func_name: Optional[str]) -> None: # GH 32865: These functions work correctly with a BaseIndexer subclass - BASEINDEXER_WHITELIST = {"mean", "sum", "median", "kurt", "quantile"} + BASEINDEXER_WHITELIST = {"min", "max", "mean", "sum", "median", "kurt", "quantile"} if isinstance(func_name, str) and func_name not in BASEINDEXER_WHITELIST: raise NotImplementedError( f"{func_name} is not supported with using a BaseIndexer " From 312e921a8f0a7f884259200154a7d509b3244e6c Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 14:40:06 +0300 Subject: [PATCH 05/19] Revert "FIX: fix the min-max calculation algorithm" This reverts commit c8d79f0bb4e93da2711120eebff39e28ee1b83ac. --- pandas/_libs/window/aggregations.pyx | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index dcc9ceae0c561..1d1963fb04818 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1051,7 +1051,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values, bint is_max): cdef: numeric ai - int64_t i, k, close_offset, curr_win_size + int64_t i, close_offset, curr_win_size Py_ssize_t nobs = 0, N = len(values) deque Q[int64_t] # min/max always the front deque W[int64_t] # track the whole window for nobs compute @@ -1088,14 +1088,12 @@ cdef _roll_min_max_variable(ndarray[numeric] values, # first window's size curr_win_size = endi[0] - starti[0] - k = 0 for i in range(endi[0], endi[N-1]): if not Q.empty() and curr_win_size > 0: - output[k] = calc_mm( + output[i-1+close_offset] = calc_mm( minp, nobs, values[Q.front()]) else: - output[k] = NaN - k += 1 + output[i-1+close_offset] = NaN ai = init_mm(values[i], &nobs, is_max) @@ -1121,9 +1119,9 @@ cdef _roll_min_max_variable(ndarray[numeric] values, W.push_back(i) if not Q.empty() and curr_win_size > 0: - output[k] = calc_mm(minp, nobs, values[Q.front()]) + output[N-1] = calc_mm(minp, nobs, values[Q.front()]) else: - output[k] = NaN + output[N-1] = NaN return output From 3b2067d73add077a729bc53d6af3b6b396ad4dcc Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 15:48:16 +0300 Subject: [PATCH 06/19] FIX: adjust the algorithm --- pandas/_libs/window/aggregations.pyx | 52 +++++++++++++--------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 1d1963fb04818..6c302e700131a 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1051,7 +1051,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values, bint is_max): cdef: numeric ai - int64_t i, close_offset, curr_win_size + int64_t i, k, close_offset, curr_win_size Py_ssize_t nobs = 0, N = len(values) deque Q[int64_t] # min/max always the front deque W[int64_t] # track the whole window for nobs compute @@ -1083,41 +1083,39 @@ cdef _roll_min_max_variable(ndarray[numeric] values, Q.push_back(i) W.push_back(i) - # if right is open then the first window is empty - close_offset = 0 if endi[0] > starti[0] else 1 # first window's size curr_win_size = endi[0] - starti[0] - - for i in range(endi[0], endi[N-1]): + # GH 32865 + # Anchor output index to values index to provide custom + # BaseIndexer support + for i in range(1, N): if not Q.empty() and curr_win_size > 0: - output[i-1+close_offset] = calc_mm( - minp, nobs, values[Q.front()]) + output[i - 1] = calc_mm(minp, nobs, values[Q.front()]) else: - output[i-1+close_offset] = NaN + output[i - 1] = NaN - ai = init_mm(values[i], &nobs, is_max) - - # Discard previous entries if we find new min or max - if is_max: - while not Q.empty() and ((ai >= values[Q.back()]) or - values[Q.back()] != values[Q.back()]): - Q.pop_back() - else: - while not Q.empty() and ((ai <= values[Q.back()]) or - values[Q.back()] != values[Q.back()]): - Q.pop_back() - - # Maintain window/nobs retention - curr_win_size = endi[i + close_offset] - starti[i + close_offset] - while not Q.empty() and Q.front() <= i - curr_win_size: + curr_win_size = endi[i] - starti[i] + for k in range(endi[i - 1], endi[i]): + ai = init_mm(values[k], &nobs, is_max) + # Discard previous entries if we find new min or max + if is_max: + while not Q.empty() and ((ai >= values[Q.back()]) or + values[Q.back()] != values[Q.back()]): + Q.pop_back() + else: + while not Q.empty() and ((ai <= values[Q.back()]) or + values[Q.back()] != values[Q.back()]): + Q.pop_back() + Q.push_back(k) + W.push_back(k) + + # Discard entries outside and left of current window + while not Q.empty() and Q.front() <= starti[i] - 1: Q.pop_front() - while not W.empty() and W.front() <= i - curr_win_size: + while not W.empty() and W.front() <= starti[i] - 1: remove_mm(values[W.front()], &nobs) W.pop_front() - Q.push_back(i) - W.push_back(i) - if not Q.empty() and curr_win_size > 0: output[N-1] = calc_mm(minp, nobs, values[Q.front()]) else: From ede2da775fc2e08647a71a9b331143739ad2c257 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 16:11:24 +0300 Subject: [PATCH 07/19] REFACT: refactor the test --- pandas/tests/window/common.py | 16 ------------- pandas/tests/window/test_base_indexer.py | 4 +--- pandas/tests/window/test_rolling.py | 30 ++++++++++++++++++++---- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/pandas/tests/window/common.py b/pandas/tests/window/common.py index d2a116e94bf6b..6aeada3152dbb 100644 --- a/pandas/tests/window/common.py +++ b/pandas/tests/window/common.py @@ -5,7 +5,6 @@ from pandas import DataFrame, Series, bdate_range, notna import pandas._testing as tm -from pandas.api.indexers import BaseIndexer N, K = 100, 10 @@ -385,18 +384,3 @@ def check_binary_ew_min_periods(name, min_periods, A, B): Series([1.0]), Series([1.0]), 50, name=name, min_periods=min_periods ) tm.assert_series_equal(result, Series([np.NaN])) - - -class ForwardIndexer(BaseIndexer): - # GH 32865 - def get_window_bounds(self, num_values, min_periods, center, closed): - start = np.empty(num_values, dtype=np.int64) - end = np.empty(num_values, dtype=np.int64) - for i in range(num_values): - if i + min_periods <= num_values: - start[i] = i - end[i] = min(i + self.window_size, num_values) - else: - start[i] = i - end[i] = i + 1 - return start, end diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 600adcc187467..62fff0acb4d2f 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -82,9 +82,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed): df.rolling(indexer, win_type="boxcar") -@pytest.mark.parametrize( - "func", ["std", "var", "count", "skew", "cov", "corr"] -) +@pytest.mark.parametrize("func", ["std", "var", "count", "skew", "cov", "corr"]) def test_notimplemented_functions(func): # GH 32865 class CustomIndexer(BaseIndexer): diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 27cd3edc10a29..026ef5c253755 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -9,8 +9,9 @@ import pandas as pd from pandas import DataFrame, Index, Series import pandas._testing as tm +from pandas.api.indexers import BaseIndexer from pandas.core.window import Rolling -from pandas.tests.window.common import Base, ForwardIndexer +from pandas.tests.window.common import Base class TestRolling(Base): @@ -468,13 +469,32 @@ def test_rolling_count_default_min_periods_with_null_values(constructor): @pytest.mark.parametrize("constructor", [Series, DataFrame]) -def test_rolling_forward_window_min(constructor): +@pytest.mark.parametrize( + "func,expected", + [ + ("min", [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, np.nan]), + ("max", [2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0, np.nan]), + ], +) +def test_rolling_forward_window_min(constructor, func, expected): # GH 32865 + class ForwardIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + start = np.empty(num_values, dtype=np.int64) + end = np.empty(num_values, dtype=np.int64) + for i in range(num_values): + if i + min_periods <= num_values: + start[i] = i + end[i] = min(i + self.window_size, num_values) + else: + start[i] = i + end[i] = i + 1 + return start, end + values = np.arange(10) - expected_min = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, np.nan] indexer = ForwardIndexer(window_size=3) rolling = constructor(values).rolling(window=indexer, min_periods=2) - result = rolling.min() - expected = constructor(rolling.apply(lambda x: min(x))) + result = getattr(rolling, func)() + expected = constructor(expected) tm.assert_equal(result, expected) From d91fb1078c525bdcccd96258d2d75f627b5a8d9e Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 16:33:22 +0300 Subject: [PATCH 08/19] DOC: add a whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 58ac2b4cba3b7..72e9b7fdd2276 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -101,8 +101,8 @@ Other API changes - Added :meth:`DataFrame.value_counts` (:issue:`5377`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) -- Using a :func:`pandas.api.indexers.BaseIndexer` with ``min``, ``max``, ``std``, ``var``, ``count``, ``skew``, ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`) -- +- Using a :func:`pandas.api.indexers.BaseIndexer` with ``std``, ``var``, ``count``, ``skew``, ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`) +- Using a :func:`pandas.api.indexers.BaseIndexer` with ``min``, ``max`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 7a5a49d439e4697709caf480f56f48319af65b96 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 19:08:54 +0300 Subject: [PATCH 09/19] REFACT: fold init and final into main loop --- pandas/_libs/window/aggregations.pyx | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 6c302e700131a..4556a60cd7b89 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1051,7 +1051,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values, bint is_max): cdef: numeric ai - int64_t i, k, close_offset, curr_win_size + int64_t i, k, close_offset, curr_win_size, start Py_ssize_t nobs = 0, N = len(values) deque Q[int64_t] # min/max always the front deque W[int64_t] # track the whole window for nobs compute @@ -1088,14 +1088,15 @@ cdef _roll_min_max_variable(ndarray[numeric] values, # GH 32865 # Anchor output index to values index to provide custom # BaseIndexer support - for i in range(1, N): - if not Q.empty() and curr_win_size > 0: - output[i - 1] = calc_mm(minp, nobs, values[Q.front()]) - else: - output[i - 1] = NaN + for i in range(N): curr_win_size = endi[i] - starti[i] - for k in range(endi[i - 1], endi[i]): + if i == 0: + start = starti[i] + else: + start = endi[i - 1] + + for k in range(start, endi[i]): ai = init_mm(values[k], &nobs, is_max) # Discard previous entries if we find new min or max if is_max: @@ -1116,10 +1117,11 @@ cdef _roll_min_max_variable(ndarray[numeric] values, remove_mm(values[W.front()], &nobs) W.pop_front() - if not Q.empty() and curr_win_size > 0: - output[N-1] = calc_mm(minp, nobs, values[Q.front()]) - else: - output[N-1] = NaN + # Save output based on index in input value array + if not Q.empty() and curr_win_size > 0: + output[i] = calc_mm(minp, nobs, values[Q.front()]) + else: + output[i] = NaN return output From 723713e162edd1042191cc2e8abc2d397bbe180c Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 19:15:29 +0300 Subject: [PATCH 10/19] REFACT: finish folding initialization into main loop --- pandas/_libs/window/aggregations.pyx | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 4556a60cd7b89..c8fa7de32ca2b 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1068,21 +1068,6 @@ cdef _roll_min_max_variable(ndarray[numeric] values, # The original impl didn't deal with variable window sizes # So the code was optimized for that - for i in range(starti[0], endi[0]): - ai = init_mm(values[i], &nobs, is_max) - - # Discard previous entries if we find new min or max - if is_max: - while not Q.empty() and ((ai >= values[Q.back()]) or - values[Q.back()] != values[Q.back()]): - Q.pop_back() - else: - while not Q.empty() and ((ai <= values[Q.back()]) or - values[Q.back()] != values[Q.back()]): - Q.pop_back() - Q.push_back(i) - W.push_back(i) - # first window's size curr_win_size = endi[0] - starti[0] # GH 32865 From 560f7df22822f5bcd3cc327ef71160dbac592504 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 19:16:48 +0300 Subject: [PATCH 11/19] CLN: remove unnecessary close_offset variable --- pandas/_libs/window/aggregations.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index c8fa7de32ca2b..f3889039c095e 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1051,7 +1051,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values, bint is_max): cdef: numeric ai - int64_t i, k, close_offset, curr_win_size, start + int64_t i, k, curr_win_size, start Py_ssize_t nobs = 0, N = len(values) deque Q[int64_t] # min/max always the front deque W[int64_t] # track the whole window for nobs compute From 1c394e797f3d7838e9b8cab7f03e2fc7757b5b0e Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 20:13:04 +0300 Subject: [PATCH 12/19] TST: generalize the test --- pandas/tests/window/test_rolling.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 026ef5c253755..5729c23b2cc09 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -472,11 +472,11 @@ def test_rolling_count_default_min_periods_with_null_values(constructor): @pytest.mark.parametrize( "func,expected", [ - ("min", [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, np.nan]), - ("max", [2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0, np.nan]), + ("min", [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0, np.nan]), + ("max", [2.0, 3.0, 4.0, 100.0, 100.0, 100.0, 8.0, 9.0, 9.0, np.nan]), ], ) -def test_rolling_forward_window_min(constructor, func, expected): +def test_rolling_forward_window(constructor, func, expected): # GH 32865 class ForwardIndexer(BaseIndexer): def get_window_bounds(self, num_values, min_periods, center, closed): @@ -492,6 +492,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed): return start, end values = np.arange(10) + values[5] = 100.0 indexer = ForwardIndexer(window_size=3) rolling = constructor(values).rolling(window=indexer, min_periods=2) From 4e9772d17fa9ac553187443ea8bf9292c07e4c94 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 20:42:23 +0300 Subject: [PATCH 13/19] TST: move test to test_base_indexer --- pandas/tests/window/test_base_indexer.py | 33 +++++++++++++++++++++++ pandas/tests/window/test_rolling.py | 34 ------------------------ 2 files changed, 33 insertions(+), 34 deletions(-) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 62fff0acb4d2f..14979b37af799 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -93,3 +93,36 @@ def get_window_bounds(self, num_values, min_periods, center, closed): indexer = CustomIndexer() with pytest.raises(NotImplementedError, match=f"{func} is not supported"): getattr(df.rolling(indexer), func)() + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +@pytest.mark.parametrize( + "func,expected", + [ + ("min", [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0, np.nan]), + ("max", [2.0, 3.0, 4.0, 100.0, 100.0, 100.0, 8.0, 9.0, 9.0, np.nan]), + ], +) +def test_rolling_forward_window(constructor, func, expected): + # GH 32865 + class ForwardIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + start = np.empty(num_values, dtype=np.int64) + end = np.empty(num_values, dtype=np.int64) + for i in range(num_values): + if i + min_periods <= num_values: + start[i] = i + end[i] = min(i + self.window_size, num_values) + else: + start[i] = i + end[i] = i + 1 + return start, end + + values = np.arange(10) + values[5] = 100.0 + + indexer = ForwardIndexer(window_size=3) + rolling = constructor(values).rolling(window=indexer, min_periods=2) + result = getattr(rolling, func)() + expected = constructor(expected) + tm.assert_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 5729c23b2cc09..ab2c7fcb7a0dc 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -9,7 +9,6 @@ import pandas as pd from pandas import DataFrame, Index, Series import pandas._testing as tm -from pandas.api.indexers import BaseIndexer from pandas.core.window import Rolling from pandas.tests.window.common import Base @@ -466,36 +465,3 @@ def test_rolling_count_default_min_periods_with_null_values(constructor): result = constructor(values).rolling(3).count() expected = constructor(expected_counts) tm.assert_equal(result, expected) - - -@pytest.mark.parametrize("constructor", [Series, DataFrame]) -@pytest.mark.parametrize( - "func,expected", - [ - ("min", [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0, np.nan]), - ("max", [2.0, 3.0, 4.0, 100.0, 100.0, 100.0, 8.0, 9.0, 9.0, np.nan]), - ], -) -def test_rolling_forward_window(constructor, func, expected): - # GH 32865 - class ForwardIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): - start = np.empty(num_values, dtype=np.int64) - end = np.empty(num_values, dtype=np.int64) - for i in range(num_values): - if i + min_periods <= num_values: - start[i] = i - end[i] = min(i + self.window_size, num_values) - else: - start[i] = i - end[i] = i + 1 - return start, end - - values = np.arange(10) - values[5] = 100.0 - - indexer = ForwardIndexer(window_size=3) - rolling = constructor(values).rolling(window=indexer, min_periods=2) - result = getattr(rolling, func)() - expected = constructor(expected) - tm.assert_equal(result, expected) From cc0b56e708597ebb5f8c02e2262918f4295295a0 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 31 Mar 2020 20:45:33 +0300 Subject: [PATCH 14/19] TST: add testing vs. apply --- pandas/tests/window/test_base_indexer.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 14979b37af799..36db889191724 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -97,13 +97,13 @@ def get_window_bounds(self, num_values, min_periods, center, closed): @pytest.mark.parametrize("constructor", [Series, DataFrame]) @pytest.mark.parametrize( - "func,expected", + "func,alt_func,expected", [ - ("min", [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0, np.nan]), - ("max", [2.0, 3.0, 4.0, 100.0, 100.0, 100.0, 8.0, 9.0, 9.0, np.nan]), + ("min", np.min, [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0, np.nan]), + ("max", np.max, [2.0, 3.0, 4.0, 100.0, 100.0, 100.0, 8.0, 9.0, 9.0, np.nan]), ], ) -def test_rolling_forward_window(constructor, func, expected): +def test_rolling_forward_window(constructor, func, alt_func, expected): # GH 32865 class ForwardIndexer(BaseIndexer): def get_window_bounds(self, num_values, min_periods, center, closed): @@ -126,3 +126,5 @@ def get_window_bounds(self, num_values, min_periods, center, closed): result = getattr(rolling, func)() expected = constructor(expected) tm.assert_equal(result, expected) + expected2 = constructor(rolling.apply(lambda x: alt_func(x))) + tm.assert_equal(result, expected2) From c5ab8bcec4ad6a17581ca9c30488801e0ac22d2a Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 2 Apr 2020 13:54:23 +0300 Subject: [PATCH 15/19] TST: update ForwardIndexer implementation --- pandas/tests/window/test_base_indexer.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 36db889191724..b468e3c670320 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -107,15 +107,11 @@ def test_rolling_forward_window(constructor, func, alt_func, expected): # GH 32865 class ForwardIndexer(BaseIndexer): def get_window_bounds(self, num_values, min_periods, center, closed): - start = np.empty(num_values, dtype=np.int64) - end = np.empty(num_values, dtype=np.int64) - for i in range(num_values): - if i + min_periods <= num_values: - start[i] = i - end[i] = min(i + self.window_size, num_values) - else: - start[i] = i - end[i] = i + 1 + start = np.arange(num_values, dtype="int64") + end_s = start[:-self.window_size] + self.window_size + end_e = np.full(self.window_size, num_values, dtype="int64") + end = np.concatenate([end_s, end_e]) + return start, end values = np.arange(10) From 5204a6c595f8c2c3357727b46ace0120c178c58c Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 2 Apr 2020 14:49:22 +0300 Subject: [PATCH 16/19] CLN: black the test --- pandas/tests/window/test_base_indexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index b468e3c670320..25d575e0ad0b6 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -108,7 +108,7 @@ def test_rolling_forward_window(constructor, func, alt_func, expected): class ForwardIndexer(BaseIndexer): def get_window_bounds(self, num_values, min_periods, center, closed): start = np.arange(num_values, dtype="int64") - end_s = start[:-self.window_size] + self.window_size + end_s = start[: -self.window_size] + self.window_size end_e = np.full(self.window_size, num_values, dtype="int64") end = np.concatenate([end_s, end_e]) From 72970b7793fcb128bea8d52b12aac71850d9d44e Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 2 Apr 2020 14:57:16 +0300 Subject: [PATCH 17/19] DOC: add dash for next entry to whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5cd6f5ed975d1..34b23477e5e8e 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -103,6 +103,7 @@ Other API changes - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) - Using a :func:`pandas.api.indexers.BaseIndexer` with ``std``, ``var``, ``count``, ``skew``, ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`) - Using a :func:`pandas.api.indexers.BaseIndexer` with ``min``, ``max`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) +- Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From b9ddfc1bf615e92d67fa9a6902783e6d057a69ca Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 2 Apr 2020 16:06:51 +0300 Subject: [PATCH 18/19] restart tests From 9cfea824425e418f8da57d671291c9357c5de909 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 3 Apr 2020 08:56:46 +0300 Subject: [PATCH 19/19] restart tests yet again