From e5bef9c78cd92fe78013f36194927a1fac650bdf Mon Sep 17 00:00:00 2001 From: Jan Skoda Date: Sun, 1 Mar 2020 16:33:02 +0100 Subject: [PATCH 1/5] BUG: fix `calculate_variable_window_bounds` with monotonic decreasing index and update tests CLN: black reformatting --- pandas/_libs/window/indexers.pyx | 10 +++++++--- pandas/tests/window/test_timeseries_window.py | 20 +++++++++---------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 2d01d1964c043..b038c9aaec349 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -44,6 +44,7 @@ def calculate_variable_window_bounds( cdef: bint left_closed = False bint right_closed = False + int index_growth_sign = +1 ndarray[int64_t, ndim=1] start, end int64_t start_bound, end_bound Py_ssize_t i, j @@ -58,6 +59,9 @@ def calculate_variable_window_bounds( if closed in ['left', 'both']: left_closed = True + if index[num_values-1] < index[0]: + index_growth_sign = -1 + start = np.empty(num_values, dtype='int64') start.fill(-1) end = np.empty(num_values, dtype='int64') @@ -78,7 +82,7 @@ def calculate_variable_window_bounds( # end is end of slice interval (not including) for i in range(1, num_values): end_bound = index[i] - start_bound = index[i] - window_size + start_bound = index[i] - index_growth_sign * window_size # left endpoint is closed if left_closed: @@ -88,13 +92,13 @@ def calculate_variable_window_bounds( # within the constraint start[i] = i for j in range(start[i - 1], i): - if index[j] > start_bound: + if (index[j] - start_bound) * index_growth_sign > 0: start[i] = j break # end bound is previous end # or current index - if index[end[i - 1]] <= end_bound: + if (index[end[i - 1]] - end_bound) * index_growth_sign <= 0: end[i] = i + 1 else: end[i] = end[i - 1] diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 5f5e10b5dd497..dea2b548abb46 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -709,20 +709,18 @@ def test_rolling_cov_offset(self): tm.assert_series_equal(result, expected2) def test_rolling_on_decreasing_index(self): - # GH-19248 + # GH-19248, GH-32385 index = [ - Timestamp("20190101 09:00:00"), - Timestamp("20190101 09:00:02"), - Timestamp("20190101 09:00:03"), - Timestamp("20190101 09:00:05"), - Timestamp("20190101 09:00:06"), + Timestamp("20190101 09:00:30"), + Timestamp("20190101 09:00:27"), + Timestamp("20190101 09:00:20"), + Timestamp("20190101 09:00:18"), + Timestamp("20190101 09:00:10"), ] - df = DataFrame({"column": [3, 4, 4, 2, 1]}, index=reversed(index)) - result = df.rolling("2s").min() - expected = DataFrame( - {"column": [3.0, 3.0, 3.0, 2.0, 1.0]}, index=reversed(index) - ) + df = DataFrame({"column": [3, 4, 4, 5, 6]}, index=index) + result = df.rolling("5s").min() + expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]}, index=index) tm.assert_frame_equal(result, expected) def test_rolling_on_multi_index_level(self): From 5bddb9b54bbc8c20130bb64bab4c44fa21f30b51 Mon Sep 17 00:00:00 2001 From: Jan Skoda Date: Sun, 1 Mar 2020 16:33:15 +0100 Subject: [PATCH 2/5] DOC: docstrings fix --- pandas/_libs/window/aggregations.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 80b9144042041..a90d2f77e44d1 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1013,7 +1013,7 @@ def roll_max_variable(ndarray[float64_t] values, ndarray[int64_t] start, def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, int64_t win): """ - Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + Moving min of 1d array of any numeric type along axis=0 ignoring NaNs. Parameters ---------- @@ -1030,7 +1030,7 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp): """ - Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + Moving min of 1d array of any numeric type along axis=0 ignoring NaNs. Parameters ---------- From ff24fab62a1a8a62f258fe8f81bbd25856415aff Mon Sep 17 00:00:00 2001 From: Jan Skoda Date: Sun, 1 Mar 2020 16:51:12 +0100 Subject: [PATCH 3/5] DOC: whatsnew update --- doc/source/whatsnew/v1.0.2.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 1b6098e6b6ac1..045df00943fe3 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -83,6 +83,10 @@ Bug fixes - Fixed bug where :meth:`GroupBy.first` and :meth:`GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) - Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) +**Rolling** + +- Fix bug in :meth:`calculate_variable_window_bounds` that couldn't work on decreasing index (:issue:`32385`). + .. --------------------------------------------------------------------------- .. _whatsnew_102.contributors: From b22b3edfcd1cd84b263a07eaac2612955c631dfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20=C5=A0koda?= Date: Tue, 3 Mar 2020 09:37:40 +0100 Subject: [PATCH 4/5] CLN: style fixes --- doc/source/whatsnew/v1.0.2.rst | 2 +- pandas/_libs/window/indexers.pyx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 045df00943fe3..fae41953a6371 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -85,7 +85,7 @@ Bug fixes **Rolling** -- Fix bug in :meth:`calculate_variable_window_bounds` that couldn't work on decreasing index (:issue:`32385`). +- Fixed rolling operations with variable window (defined by time duration) on decreasing time index (:issue:`32385`). .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index b038c9aaec349..8a1e7feb57ace 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -44,7 +44,7 @@ def calculate_variable_window_bounds( cdef: bint left_closed = False bint right_closed = False - int index_growth_sign = +1 + int index_growth_sign = 1 ndarray[int64_t, ndim=1] start, end int64_t start_bound, end_bound Py_ssize_t i, j @@ -59,7 +59,7 @@ def calculate_variable_window_bounds( if closed in ['left', 'both']: left_closed = True - if index[num_values-1] < index[0]: + if index[num_values - 1] < index[0]: index_growth_sign = -1 start = np.empty(num_values, dtype='int64') From 281152a512e46f22fd2c4d1e021664d12c359436 Mon Sep 17 00:00:00 2001 From: Jan Skoda Date: Mon, 9 Mar 2020 19:42:06 +0100 Subject: [PATCH 5/5] TST: test rolling on empty dataframe --- pandas/tests/window/test_timeseries_window.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index dea2b548abb46..0c5289cd78fed 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -723,6 +723,13 @@ def test_rolling_on_decreasing_index(self): expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]}, index=index) tm.assert_frame_equal(result, expected) + def test_rolling_on_empty(self): + # GH-32385 + df = DataFrame({"column": []}, index=[]) + result = df.rolling("5s").min() + expected = DataFrame({"column": []}, index=[]) + tm.assert_frame_equal(result, expected) + def test_rolling_on_multi_index_level(self): # GH-15584 df = DataFrame(