Skip to content

ENH: Allow users to definite their own window bound calculations in rolling #29878

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 38 commits into from
Dec 5, 2019
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
1c8c24a
Add BaseIndexer class
Nov 25, 2019
1188edd
Merge remote-tracking branch 'upstream/master' into feature/baseindexer
Nov 26, 2019
6b5e894
Reformat Indexers
Nov 26, 2019
3a310f6
Remove init
Nov 26, 2019
d1d0775
Add BaseIndexer to api and allow rolling to accept BaseIndexer subcla…
Nov 26, 2019
218395e
Merge remote-tracking branch 'upstream/master' into feature/baseindexer
Nov 27, 2019
46d4a52
Merge remote-tracking branch 'upstream/master' into feature/baseindexer
Nov 27, 2019
c10854d
Lint cython files
Nov 27, 2019
c237090
Move indexers to pandas/core/window/indexers
Nov 27, 2019
1ddc828
Share get_window_bounds docstring
Nov 27, 2019
a861982
isort
Nov 27, 2019
8f482f7
Validate signature of get_window_bounds
Nov 27, 2019
38691c7
Validate signature of get_window_bounds
Nov 27, 2019
9d740d3
Merge remote-tracking branch 'upstream/master' into feature/baseindexer
Nov 28, 2019
d18e954
Lint
Nov 28, 2019
f06e8e6
Comment on unused variables in calculate_variable_window_bounds
Nov 28, 2019
7ccbcd0
Type annotate _get_window_indexer & black
Nov 28, 2019
4e2fd30
self.index -> self.index_array
Nov 28, 2019
c3153d8
Add test for ExpandingIndexer
Nov 28, 2019
89100c4
Merge remote-tracking branch 'upstream/master' into feature/baseindexer
Nov 29, 2019
2704c59
Add doc example in computation.rst with test + handle start, end boun…
Dec 1, 2019
87768ea
Add back win_type (for now)
Dec 1, 2019
6a6d896
Add 1.0.0 whatsnew note
Dec 1, 2019
2864e95
Remove BaseIndexers accepting win_type (weighted rolling)
Dec 1, 2019
b16e711
Lint
Dec 1, 2019
ed08ca3
Merge remote-tracking branch 'upstream/master' into feature/baseindexer
Dec 1, 2019
9eb3022
Merge remote-tracking branch 'upstream/master' into feature/baseindexer
Dec 2, 2019
f358466
Try changing import
Dec 2, 2019
0d8cc1f
Merge remote-tracking branch 'upstream/master' into feature/baseindexer
Dec 3, 2019
25a05fe
Make doc example a code block, add docstring
Dec 3, 2019
5d8819f
Change self.__dict__(kwargs) to more explicit setattr
Dec 3, 2019
9194557
Merge remote-tracking branch 'upstream/master' into feature/baseindexer
Dec 3, 2019
e7e1061
Fix docstring
Dec 3, 2019
09afec4
Merge remote-tracking branch 'upstream/master' into feature/baseindexer
Dec 3, 2019
10c4994
Merge remote-tracking branch 'upstream/master' into feature/baseindexer
Dec 4, 2019
9089f7b
Add BaseIndexer in doc/source/reference/window/rst
Dec 4, 2019
87e391f
Add typing for _validate_get_window_bounds_signature
Dec 4, 2019
7ce1967
Merge remote-tracking branch 'upstream/master' into feature/baseindexer
Dec 4, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 73 additions & 135 deletions pandas/_libs/window/indexers.pyx
Original file line number Diff line number Diff line change
@@ -1,140 +1,78 @@
# cython: boundscheck=False, wraparound=False, cdivision=True

from typing import Tuple

import numpy as np
from numpy cimport ndarray, int64_t

# ----------------------------------------------------------------------
# The indexer objects for rolling
# These define start/end indexers to compute offsets


class FixedWindowIndexer:
"""
create a fixed length window indexer object
that has start & end, that point to offsets in
the index object; these are defined based on the win
arguments

Parameters
----------
values: ndarray
values data array
win: int64_t
window size
index: object
index of the values
closed: string
closed behavior
"""
def __init__(self, ndarray values, int64_t win, object closed, object index=None):
cdef:
ndarray[int64_t, ndim=1] start_s, start_e, end_s, end_e
int64_t N = len(values)

start_s = np.zeros(win, dtype='int64')
start_e = np.arange(win, N, dtype='int64') - win + 1
self.start = np.concatenate([start_s, start_e])[:N]

end_s = np.arange(win, dtype='int64') + 1
end_e = start_e + win
self.end = np.concatenate([end_s, end_e])[:N]

def get_window_bounds(self) -> Tuple[np.ndarray, np.ndarray]:
return self.start, self.end


class VariableWindowIndexer:
"""
create a variable length window indexer object
that has start & end, that point to offsets in
the index object; these are defined based on the win
arguments

Parameters
----------
values: ndarray
values data array
win: int64_t
window size
index: ndarray
index of the values
closed: string
closed behavior
"""
def __init__(self, ndarray values, int64_t win, object closed, ndarray index):
cdef:
bint left_closed = False
bint right_closed = False
int64_t N = len(index)

# if windows is variable, default is 'right', otherwise default is 'both'
if closed is None:
closed = 'right' if index is not None else 'both'

if closed in ['right', 'both']:
right_closed = True

if closed in ['left', 'both']:
left_closed = True

self.start, self.end = self.build(index, win, left_closed, right_closed, N)

@staticmethod
def build(const int64_t[:] index, int64_t win, bint left_closed,
bint right_closed, int64_t N) -> Tuple[np.ndarray, np.ndarray]:

cdef:
ndarray[int64_t] start, end
int64_t start_bound, end_bound
Py_ssize_t i, j

start = np.empty(N, dtype='int64')
start.fill(-1)
end = np.empty(N, dtype='int64')
end.fill(-1)

start[0] = 0

# right endpoint is closed
if right_closed:
end[0] = 1
# right endpoint is open
else:
end[0] = 0

with nogil:

# start is start of slice interval (including)
# end is end of slice interval (not including)
for i in range(1, N):
end_bound = index[i]
start_bound = index[i] - win

# left endpoint is closed
if left_closed:
start_bound -= 1

# advance the start bound until we are
# within the constraint
start[i] = i
for j in range(start[i - 1], i):
if index[j] > start_bound:
start[i] = j
break

# end bound is previous end
# or current index
if index[end[i - 1]] <= end_bound:
end[i] = i + 1
else:
end[i] = end[i - 1]

# right endpoint is open
if not right_closed:
end[i] -= 1
return start, end

def get_window_bounds(self) -> Tuple[np.ndarray, np.ndarray]:
return self.start, self.end
# Cython routines for window indexers

def calculate_variable_window_bounds(
int64_t num_values,
int64_t window_size,
object min_periods,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like min_periods is not used. is this a dummy to make the signature match something else? if so, pls comment/docstring

object center,
object closed,
object win_type,
const int64_t[:] index
):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a doc-string

cdef:
bint left_closed = False
bint right_closed = False
ndarray[int64_t, ndim=1] start, end
int64_t start_bound, end_bound
Py_ssize_t i, j

# if windows is variable, default is 'right', otherwise default is 'both'
if closed is None:
closed = 'right' if index is not None else 'both'

if closed in ['right', 'both']:
right_closed = True

if closed in ['left', 'both']:
left_closed = True

start = np.empty(num_values, dtype='int64')
start.fill(-1)
end = np.empty(num_values, dtype='int64')
end.fill(-1)

start[0] = 0

# right endpoint is closed
if right_closed:
end[0] = 1
# right endpoint is open
else:
end[0] = 0

with nogil:

# start is start of slice interval (including)
# end is end of slice interval (not including)
for i in range(1, num_values):
end_bound = index[i]
start_bound = index[i] - window_size

# left endpoint is closed
if left_closed:
start_bound -= 1

# advance the start bound until we are
# within the constraint
start[i] = i
for j in range(start[i - 1], i):
if index[j] > start_bound:
start[i] = j
break

# end bound is previous end
# or current index
if index[end[i - 1]] <= end_bound:
end[i] = i + 1
else:
end[i] = end[i - 1]

# right endpoint is open
if not right_closed:
end[i] -= 1
return start, end
2 changes: 1 addition & 1 deletion pandas/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
""" public toolkit API """
from . import extensions, types # noqa
from . import extensions, indexers, types # noqa
2 changes: 2 additions & 0 deletions pandas/api/indexers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
"""Public API for Rolling Window Indexers"""
from pandas.core.window.indexers import BaseIndexer # noqa: F401
103 changes: 103 additions & 0 deletions pandas/core/window/indexers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""Indexer objects for computing start/end window bounds for rolling operations"""
from typing import Optional, Tuple

import numpy as np

from pandas._libs.window.indexers import calculate_variable_window_bounds
from pandas.util._decorators import Appender

get_window_bounds_doc = """
Computes the bounds of a window.

Parameters
----------
num_values : int, default 0
number of values that will be aggregated over
window_size : int, default 0
the number of rows in a window
min_periods : int, default None
min_periods passed from the top level rolling API
center : bool, default None
center passed from the top level rolling API
closed : str, default None
closed passed from the top level rolling API
win_type : str, default None
win_type passed from the top level rolling API

Returns
-------
A tuple of ndarray[int64]s, indicating the boundaries of each
window
"""


class BaseIndexer:
"""Base class for window bounds calculations"""

def __init__(
self, index: Optional[np.ndarray] = None, **kwargs,
):
"""
Parameters
----------
**kwargs :
keyword argument that will be available when get_window_bounds is called
"""
self.index = index
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the name index makes me expect a pd.Index. Is there a less ambiguous name?

self.__dict__.update(kwargs)

@Appender(get_window_bounds_doc)
def get_window_bounds(
self,
num_values: int = 0,
window_size: int = 0,
min_periods: Optional[int] = None,
center: Optional[bool] = None,
closed: Optional[str] = None,
win_type: Optional[str] = None,
) -> Tuple[np.ndarray, np.ndarray]:

raise NotImplementedError
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AbstractMethodError?



class FixedWindowIndexer(BaseIndexer):
"""Creates window boundaries that are of fixed length."""

@Appender(get_window_bounds_doc)
def get_window_bounds(
self,
num_values: int = 0,
window_size: int = 0,
min_periods: Optional[int] = None,
center: Optional[bool] = None,
closed: Optional[str] = None,
win_type: Optional[str] = None,
) -> Tuple[np.ndarray, np.ndarray]:

start_s = np.zeros(window_size, dtype="int64")
start_e = np.arange(window_size, num_values, dtype="int64") - window_size + 1
start = np.concatenate([start_s, start_e])[:num_values]

end_s = np.arange(window_size, dtype="int64") + 1
end_e = start_e + window_size
end = np.concatenate([end_s, end_e])[:num_values]
return start, end


class VariableWindowIndexer(BaseIndexer):
"""Creates window boundaries that are of variable length, namely for time series."""

@Appender(get_window_bounds_doc)
def get_window_bounds(
self,
num_values: int = 0,
window_size: int = 0,
min_periods: Optional[int] = None,
center: Optional[bool] = None,
closed: Optional[str] = None,
win_type: Optional[str] = None,
) -> Tuple[np.ndarray, np.ndarray]:

return calculate_variable_window_bounds(
num_values, window_size, min_periods, center, closed, win_type, self.index
)
Loading