Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v1.0.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ Bug fixes
- Fixed bug where :meth:`GroupBy.first` and :meth:`GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`)
- Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`)

**Rolling**

- Fix bug in :meth:`calculate_variable_window_bounds` that couldn't work on decreasing index (:issue:`32385`).

.. ---------------------------------------------------------------------------

.. _whatsnew_102.contributors:
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1013,7 +1013,7 @@ def roll_max_variable(ndarray[float64_t] values, ndarray[int64_t] start,
def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp, int64_t win):
"""
Moving max of 1d array of any numeric type along axis=0 ignoring NaNs.
Moving min of 1d array of any numeric type along axis=0 ignoring NaNs.

Parameters
----------
Expand All @@ -1030,7 +1030,7 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp):
"""
Moving max of 1d array of any numeric type along axis=0 ignoring NaNs.
Moving min of 1d array of any numeric type along axis=0 ignoring NaNs.

Parameters
----------
Expand Down
10 changes: 7 additions & 3 deletions pandas/_libs/window/indexers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def calculate_variable_window_bounds(
cdef:
bint left_closed = False
bint right_closed = False
int index_growth_sign = +1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 -> 1

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

ndarray[int64_t, ndim=1] start, end
int64_t start_bound, end_bound
Py_ssize_t i, j
Expand All @@ -58,6 +59,9 @@ def calculate_variable_window_bounds(
if closed in ['left', 'both']:
left_closed = True

if index[num_values-1] < index[0]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

index[num_values-1] -> index[num_values - 1]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

index_growth_sign = -1

start = np.empty(num_values, dtype='int64')
start.fill(-1)
end = np.empty(num_values, dtype='int64')
Expand All @@ -78,7 +82,7 @@ def calculate_variable_window_bounds(
# end is end of slice interval (not including)
for i in range(1, num_values):
end_bound = index[i]
start_bound = index[i] - window_size
start_bound = index[i] - index_growth_sign * window_size

# left endpoint is closed
if left_closed:
Expand All @@ -88,13 +92,13 @@ def calculate_variable_window_bounds(
# within the constraint
start[i] = i
for j in range(start[i - 1], i):
if index[j] > start_bound:
if (index[j] - start_bound) * index_growth_sign > 0:
start[i] = j
break

# end bound is previous end
# or current index
if index[end[i - 1]] <= end_bound:
if (index[end[i - 1]] - end_bound) * index_growth_sign <= 0:
end[i] = i + 1
else:
end[i] = end[i - 1]
Expand Down
20 changes: 9 additions & 11 deletions pandas/tests/window/test_timeseries_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,20 +709,18 @@ def test_rolling_cov_offset(self):
tm.assert_series_equal(result, expected2)

def test_rolling_on_decreasing_index(self):
# GH-19248
# GH-19248, GH-32385
index = [
Timestamp("20190101 09:00:00"),
Timestamp("20190101 09:00:02"),
Timestamp("20190101 09:00:03"),
Timestamp("20190101 09:00:05"),
Timestamp("20190101 09:00:06"),
Timestamp("20190101 09:00:30"),
Timestamp("20190101 09:00:27"),
Timestamp("20190101 09:00:20"),
Timestamp("20190101 09:00:18"),
Timestamp("20190101 09:00:10"),
]

df = DataFrame({"column": [3, 4, 4, 2, 1]}, index=reversed(index))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't this have been working previously with a decreasing index?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not fully sure what the question is, but: on 0.25 we raised an error for decreasing index. For 1.0, rolling with a decreasing index was enabled, but gave wrong output (see eg #32385 for what the output of this should actually be)

Copy link
Contributor Author

@leftys leftys Mar 3, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

v1.0 would return [3,3,3,2,1] for this case. The rolling window covered the whole series.

result = df.rolling("2s").min()
expected = DataFrame(
{"column": [3.0, 3.0, 3.0, 2.0, 1.0]}, index=reversed(index)
)
df = DataFrame({"column": [3, 4, 4, 5, 6]}, index=index)
result = df.rolling("5s").min()
expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]}, index=index)
tm.assert_frame_equal(result, expected)

def test_rolling_on_multi_index_level(self):
Expand Down