-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Allow users to definite their own window bound calculations in rolling #29878
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
1c8c24a
1188edd
6b5e894
3a310f6
d1d0775
218395e
46d4a52
c10854d
c237090
1ddc828
a861982
8f482f7
38691c7
9d740d3
d18e954
f06e8e6
7ccbcd0
4e2fd30
c3153d8
89100c4
2704c59
87768ea
6a6d896
2864e95
b16e711
ed08ca3
9eb3022
f358466
0d8cc1f
25a05fe
5d8819f
9194557
e7e1061
09afec4
10c4994
9089f7b
87e391f
7ce1967
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,140 +1,78 @@ | ||
# cython: boundscheck=False, wraparound=False, cdivision=True | ||
|
||
from typing import Tuple | ||
|
||
import numpy as np | ||
from numpy cimport ndarray, int64_t | ||
|
||
# ---------------------------------------------------------------------- | ||
# The indexer objects for rolling | ||
# These define start/end indexers to compute offsets | ||
|
||
|
||
class FixedWindowIndexer: | ||
""" | ||
create a fixed length window indexer object | ||
that has start & end, that point to offsets in | ||
the index object; these are defined based on the win | ||
arguments | ||
|
||
Parameters | ||
---------- | ||
values: ndarray | ||
values data array | ||
win: int64_t | ||
window size | ||
index: object | ||
index of the values | ||
closed: string | ||
closed behavior | ||
""" | ||
def __init__(self, ndarray values, int64_t win, object closed, object index=None): | ||
cdef: | ||
ndarray[int64_t, ndim=1] start_s, start_e, end_s, end_e | ||
int64_t N = len(values) | ||
|
||
start_s = np.zeros(win, dtype='int64') | ||
start_e = np.arange(win, N, dtype='int64') - win + 1 | ||
self.start = np.concatenate([start_s, start_e])[:N] | ||
|
||
end_s = np.arange(win, dtype='int64') + 1 | ||
end_e = start_e + win | ||
self.end = np.concatenate([end_s, end_e])[:N] | ||
|
||
def get_window_bounds(self) -> Tuple[np.ndarray, np.ndarray]: | ||
return self.start, self.end | ||
|
||
|
||
class VariableWindowIndexer: | ||
""" | ||
create a variable length window indexer object | ||
that has start & end, that point to offsets in | ||
the index object; these are defined based on the win | ||
arguments | ||
|
||
Parameters | ||
---------- | ||
values: ndarray | ||
values data array | ||
win: int64_t | ||
window size | ||
index: ndarray | ||
index of the values | ||
closed: string | ||
closed behavior | ||
""" | ||
def __init__(self, ndarray values, int64_t win, object closed, ndarray index): | ||
cdef: | ||
bint left_closed = False | ||
bint right_closed = False | ||
int64_t N = len(index) | ||
|
||
# if windows is variable, default is 'right', otherwise default is 'both' | ||
if closed is None: | ||
closed = 'right' if index is not None else 'both' | ||
|
||
if closed in ['right', 'both']: | ||
right_closed = True | ||
|
||
if closed in ['left', 'both']: | ||
left_closed = True | ||
|
||
self.start, self.end = self.build(index, win, left_closed, right_closed, N) | ||
|
||
@staticmethod | ||
def build(const int64_t[:] index, int64_t win, bint left_closed, | ||
bint right_closed, int64_t N) -> Tuple[np.ndarray, np.ndarray]: | ||
|
||
cdef: | ||
ndarray[int64_t] start, end | ||
int64_t start_bound, end_bound | ||
Py_ssize_t i, j | ||
|
||
start = np.empty(N, dtype='int64') | ||
start.fill(-1) | ||
end = np.empty(N, dtype='int64') | ||
end.fill(-1) | ||
|
||
start[0] = 0 | ||
|
||
# right endpoint is closed | ||
if right_closed: | ||
end[0] = 1 | ||
# right endpoint is open | ||
else: | ||
end[0] = 0 | ||
|
||
with nogil: | ||
|
||
# start is start of slice interval (including) | ||
# end is end of slice interval (not including) | ||
for i in range(1, N): | ||
end_bound = index[i] | ||
start_bound = index[i] - win | ||
|
||
# left endpoint is closed | ||
if left_closed: | ||
start_bound -= 1 | ||
|
||
# advance the start bound until we are | ||
# within the constraint | ||
start[i] = i | ||
for j in range(start[i - 1], i): | ||
if index[j] > start_bound: | ||
start[i] = j | ||
break | ||
|
||
# end bound is previous end | ||
# or current index | ||
if index[end[i - 1]] <= end_bound: | ||
end[i] = i + 1 | ||
else: | ||
end[i] = end[i - 1] | ||
|
||
# right endpoint is open | ||
if not right_closed: | ||
end[i] -= 1 | ||
return start, end | ||
|
||
def get_window_bounds(self) -> Tuple[np.ndarray, np.ndarray]: | ||
return self.start, self.end | ||
# Cython routines for window indexers | ||
|
||
def calculate_variable_window_bounds( | ||
int64_t num_values, | ||
int64_t window_size, | ||
object min_periods, | ||
object center, | ||
object closed, | ||
object win_type, | ||
const int64_t[:] index | ||
): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string |
||
cdef: | ||
bint left_closed = False | ||
bint right_closed = False | ||
ndarray[int64_t, ndim=1] start, end | ||
int64_t start_bound, end_bound | ||
Py_ssize_t i, j | ||
|
||
# if windows is variable, default is 'right', otherwise default is 'both' | ||
if closed is None: | ||
closed = 'right' if index is not None else 'both' | ||
|
||
if closed in ['right', 'both']: | ||
right_closed = True | ||
|
||
if closed in ['left', 'both']: | ||
left_closed = True | ||
|
||
start = np.empty(num_values, dtype='int64') | ||
start.fill(-1) | ||
end = np.empty(num_values, dtype='int64') | ||
end.fill(-1) | ||
|
||
start[0] = 0 | ||
|
||
# right endpoint is closed | ||
if right_closed: | ||
end[0] = 1 | ||
# right endpoint is open | ||
else: | ||
end[0] = 0 | ||
|
||
with nogil: | ||
|
||
# start is start of slice interval (including) | ||
# end is end of slice interval (not including) | ||
for i in range(1, num_values): | ||
end_bound = index[i] | ||
start_bound = index[i] - window_size | ||
|
||
# left endpoint is closed | ||
if left_closed: | ||
start_bound -= 1 | ||
|
||
# advance the start bound until we are | ||
# within the constraint | ||
start[i] = i | ||
for j in range(start[i - 1], i): | ||
if index[j] > start_bound: | ||
start[i] = j | ||
break | ||
|
||
# end bound is previous end | ||
# or current index | ||
if index[end[i - 1]] <= end_bound: | ||
end[i] = i + 1 | ||
else: | ||
end[i] = end[i - 1] | ||
|
||
# right endpoint is open | ||
if not right_closed: | ||
end[i] -= 1 | ||
return start, end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
""" public toolkit API """ | ||
from . import extensions, types # noqa | ||
from . import extensions, indexers, types # noqa |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
"""Public API for Rolling Window Indexers""" | ||
from pandas.core.window.indexers import BaseIndexer # noqa: F401 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
"""Indexer objects for computing start/end window bounds for rolling operations""" | ||
from typing import Optional, Tuple | ||
|
||
import numpy as np | ||
|
||
from pandas._libs.window.indexers import calculate_variable_window_bounds | ||
from pandas.util._decorators import Appender | ||
|
||
get_window_bounds_doc = """ | ||
Computes the bounds of a window. | ||
|
||
Parameters | ||
---------- | ||
num_values : int, default 0 | ||
number of values that will be aggregated over | ||
window_size : int, default 0 | ||
the number of rows in a window | ||
min_periods : int, default None | ||
min_periods passed from the top level rolling API | ||
center : bool, default None | ||
center passed from the top level rolling API | ||
closed : str, default None | ||
closed passed from the top level rolling API | ||
win_type : str, default None | ||
win_type passed from the top level rolling API | ||
|
||
Returns | ||
------- | ||
A tuple of ndarray[int64]s, indicating the boundaries of each | ||
window | ||
""" | ||
|
||
|
||
class BaseIndexer: | ||
"""Base class for window bounds calculations""" | ||
|
||
def __init__( | ||
self, index: Optional[np.ndarray] = None, **kwargs, | ||
): | ||
""" | ||
Parameters | ||
---------- | ||
**kwargs : | ||
keyword argument that will be available when get_window_bounds is called | ||
""" | ||
self.index = index | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the name |
||
self.__dict__.update(kwargs) | ||
|
||
@Appender(get_window_bounds_doc) | ||
def get_window_bounds( | ||
self, | ||
num_values: int = 0, | ||
window_size: int = 0, | ||
min_periods: Optional[int] = None, | ||
center: Optional[bool] = None, | ||
closed: Optional[str] = None, | ||
win_type: Optional[str] = None, | ||
) -> Tuple[np.ndarray, np.ndarray]: | ||
|
||
raise NotImplementedError | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AbstractMethodError? |
||
|
||
|
||
class FixedWindowIndexer(BaseIndexer): | ||
"""Creates window boundaries that are of fixed length.""" | ||
|
||
@Appender(get_window_bounds_doc) | ||
def get_window_bounds( | ||
self, | ||
num_values: int = 0, | ||
window_size: int = 0, | ||
min_periods: Optional[int] = None, | ||
center: Optional[bool] = None, | ||
closed: Optional[str] = None, | ||
win_type: Optional[str] = None, | ||
) -> Tuple[np.ndarray, np.ndarray]: | ||
|
||
start_s = np.zeros(window_size, dtype="int64") | ||
start_e = np.arange(window_size, num_values, dtype="int64") - window_size + 1 | ||
start = np.concatenate([start_s, start_e])[:num_values] | ||
|
||
end_s = np.arange(window_size, dtype="int64") + 1 | ||
end_e = start_e + window_size | ||
end = np.concatenate([end_s, end_e])[:num_values] | ||
return start, end | ||
|
||
|
||
class VariableWindowIndexer(BaseIndexer): | ||
"""Creates window boundaries that are of variable length, namely for time series.""" | ||
|
||
@Appender(get_window_bounds_doc) | ||
def get_window_bounds( | ||
self, | ||
num_values: int = 0, | ||
window_size: int = 0, | ||
min_periods: Optional[int] = None, | ||
center: Optional[bool] = None, | ||
closed: Optional[str] = None, | ||
win_type: Optional[str] = None, | ||
) -> Tuple[np.ndarray, np.ndarray]: | ||
|
||
return calculate_variable_window_bounds( | ||
num_values, window_size, min_periods, center, closed, win_type, self.index | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
looks like min_periods is not used. is this a dummy to make the signature match something else? if so, pls comment/docstring