Skip to content

Commit 35dbf6c

Browse files
committed
BUG: rolling/expanding_* treamtment of center
1 parent f400014 commit 35dbf6c

File tree

3 files changed

+77
-62
lines changed

3 files changed

+77
-62
lines changed

doc/source/v0.15.0.txt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,39 @@ API changes
9393
Previously the first ``min_periods`` entries of the result were set to ``NaN``.
9494
The new behavior accords with the existing documentation. (:issue:`7884`)
9595

96+
- :func:`rolling_max`, :func:`rolling_min`, :func:`rolling_sum`, :func:`rolling_mean`, :func:`rolling_median`,
97+
:func:`rolling_std`, :func:`rolling_var`, :func:`rolling_skew`, :func:`rolling_kurt`, and :func:`rolling_quantile`,
98+
:func:`rolling_cov`, :func:`rolling_corr`, :func:`rolling_corr_pairwise`,
99+
:func:`rolling_window`, and :func:`rolling_apply` with ``center=True`` previously would return a result of the same
100+
structure as the input ``arg`` with ``NaN``s in the final ``(window-1)/2`` entries.
101+
Now the final ``(window-1)/2`` entries of the result are calculated as if the input ``arg`` were followed
102+
by ``(window-1)/2`` ``NaN``s. (:issue:`7925`)
103+
104+
Prior behavior (note final value is ``NaN``):
105+
106+
.. code-block:: python
107+
108+
In [7]: rolling_sum(Series(range(5)), window=3, min_periods=0, center=True)
109+
Out[7]:
110+
0 1
111+
1 3
112+
2 6
113+
3 9
114+
4 NaN
115+
dtype: float64
116+
117+
New behavior (note final value is ``7 = sum([3, 4, NaN])``):
118+
119+
.. ipython:: python
120+
121+
rolling_sum(Series(range(5)), window=3, min_periods=0, center=True)
122+
123+
- Removed ``center`` argument from :func:`expanding_max`, :func:`expanding_min`, :func:`expanding_sum`,
124+
:func:`expanding_mean`, :func:`expanding_median`, :func:`expanding_std`, :func:`expanding_var`,
125+
:func:`expanding_skew`, :func:`expanding_kurt`, :func:`expanding_quantile`, :func:`expanding_count`,
126+
:func:`expanding_cov`, :func:`expanding_corr`, :func:`expanding_corr_pairwise`, and :func:`expanding_apply`,
127+
as the results produced when ``center=True`` did not make much sense. (:issue:`7925`)
128+
96129
- Bug in passing a ``DatetimeIndex`` with a timezone that was not being retained in DataFrame construction from a dict (:issue:`7822`)
97130

98131
In prior versions this would drop the timezone.

pandas/stats/moments.py

Lines changed: 35 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,10 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
372372
y : type of input
373373
"""
374374
arg = _conv_timerule(arg, freq, how)
375-
calc = lambda x: func(x, window, minp=minp, args=args, kwargs=kwargs,
375+
offset = int((window - 1) / 2.) if center else 0
376+
additional_nans = np.array([np.NaN] * offset)
377+
calc = lambda x: func(np.concatenate((x, additional_nans)) if center else x,
378+
window, minp=minp, args=args, kwargs=kwargs,
376379
**kwds)
377380
return_hook, values = _process_data_structure(arg)
378381
# actually calculate the moment. Faster way to do this?
@@ -381,10 +384,10 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
381384
else:
382385
result = calc(values)
383386

384-
rs = return_hook(result)
385387
if center:
386-
rs = _center_window(rs, window, axis)
387-
return rs
388+
result = _center_window(result, window, axis)
389+
390+
return return_hook(result)
388391

389392

390393
def _center_window(rs, window, axis):
@@ -393,20 +396,13 @@ def _center_window(rs, window, axis):
393396
"dimensions")
394397

395398
offset = int((window - 1) / 2.)
396-
if isinstance(rs, (Series, DataFrame, Panel)):
397-
rs = rs.shift(-offset, axis=axis)
398-
else:
399-
rs_indexer = [slice(None)] * rs.ndim
400-
rs_indexer[axis] = slice(None, -offset)
401-
402-
lead_indexer = [slice(None)] * rs.ndim
403-
lead_indexer[axis] = slice(offset, None)
404-
405-
na_indexer = [slice(None)] * rs.ndim
406-
na_indexer[axis] = slice(-offset, None)
407-
408-
rs[tuple(rs_indexer)] = np.copy(rs[tuple(lead_indexer)])
409-
rs[tuple(na_indexer)] = np.nan
399+
if offset > 0:
400+
if isinstance(rs, (Series, DataFrame, Panel)):
401+
rs = rs.slice_shift(-offset, axis=axis)
402+
else:
403+
lead_indexer = [slice(None)] * rs.ndim
404+
lead_indexer[axis] = slice(offset, None)
405+
rs = np.copy(rs[tuple(lead_indexer)])
410406
return rs
411407

412408

@@ -821,13 +817,16 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None,
821817
arg = _conv_timerule(arg, freq, how)
822818
return_hook, values = _process_data_structure(arg)
823819

824-
f = lambda x: algos.roll_window(x, window, minp, avg=mean)
820+
offset = int((len(window) - 1) / 2.) if center else 0
821+
additional_nans = np.array([np.NaN] * offset)
822+
f = lambda x: algos.roll_window(np.concatenate((x, additional_nans)) if center else x,
823+
window, minp, avg=mean)
825824
result = np.apply_along_axis(f, axis, values)
826825

827-
rs = return_hook(result)
828826
if center:
829-
rs = _center_window(rs, len(window), axis)
830-
return rs
827+
result = _center_window(result, len(window), axis)
828+
829+
return return_hook(result)
831830

832831

833832
def _validate_win_type(win_type, kwargs):
@@ -856,14 +855,14 @@ def _expanding_func(func, desc, check_minp=_use_window):
856855
@Substitution(desc, _unary_arg, _expanding_kw, _type_of_input_retval, "")
857856
@Appender(_doc_template)
858857
@wraps(func)
859-
def f(arg, min_periods=1, freq=None, center=False, **kwargs):
858+
def f(arg, min_periods=1, freq=None, **kwargs):
860859
window = len(arg)
861860

862861
def call_cython(arg, window, minp, args=(), kwargs={}, **kwds):
863862
minp = check_minp(minp, window)
864863
return func(arg, window, minp, **kwds)
865864
return _rolling_moment(arg, window, call_cython, min_periods, freq=freq,
866-
center=center, **kwargs)
865+
**kwargs)
867866

868867
return f
869868

@@ -887,7 +886,7 @@ def call_cython(arg, window, minp, args=(), kwargs={}, **kwds):
887886
check_minp=_require_min_periods(4))
888887

889888

890-
def expanding_count(arg, freq=None, center=False):
889+
def expanding_count(arg, freq=None):
891890
"""
892891
Expanding count of number of non-NaN observations.
893892
@@ -897,8 +896,6 @@ def expanding_count(arg, freq=None, center=False):
897896
freq : string or DateOffset object, optional (default None)
898897
Frequency to conform the data to before computing the statistic. Specified
899898
as a frequency string or DateOffset object.
900-
center : boolean, default False
901-
Whether the label should correspond with center of window.
902899
903900
Returns
904901
-------
@@ -910,11 +907,10 @@ def expanding_count(arg, freq=None, center=False):
910907
frequency by resampling the data. This is done with the default parameters
911908
of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
912909
"""
913-
return rolling_count(arg, len(arg), freq=freq, center=center)
910+
return rolling_count(arg, len(arg), freq=freq)
914911

915912

916-
def expanding_quantile(arg, quantile, min_periods=1, freq=None,
917-
center=False):
913+
def expanding_quantile(arg, quantile, min_periods=1, freq=None):
918914
"""Expanding quantile.
919915
920916
Parameters
@@ -928,8 +924,6 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None,
928924
freq : string or DateOffset object, optional (default None)
929925
Frequency to conform the data to before computing the statistic. Specified
930926
as a frequency string or DateOffset object.
931-
center : boolean, default False
932-
Whether the label should correspond with center of window.
933927
934928
Returns
935929
-------
@@ -942,14 +936,13 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None,
942936
of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
943937
"""
944938
return rolling_quantile(arg, len(arg), quantile, min_periods=min_periods,
945-
freq=freq, center=center)
939+
freq=freq)
946940

947941

948942
@Substitution("Unbiased expanding covariance.", _binary_arg_flex,
949943
_expanding_kw+_pairwise_kw, _flex_retval, "")
950944
@Appender(_doc_template)
951-
def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, center=False,
952-
pairwise=None):
945+
def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None):
953946
if arg2 is None:
954947
arg2 = arg1
955948
pairwise = True if pairwise is None else pairwise
@@ -960,14 +953,13 @@ def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, center=False,
960953
window = len(arg1) + len(arg2)
961954
return rolling_cov(arg1, arg2, window,
962955
min_periods=min_periods, freq=freq,
963-
center=center, pairwise=pairwise)
956+
pairwise=pairwise)
964957

965958

966959
@Substitution("Expanding sample correlation.", _binary_arg_flex,
967960
_expanding_kw+_pairwise_kw, _flex_retval, "")
968961
@Appender(_doc_template)
969-
def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, center=False,
970-
pairwise=None):
962+
def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, pairwise=None):
971963
if arg2 is None:
972964
arg2 = arg1
973965
pairwise = True if pairwise is None else pairwise
@@ -978,22 +970,21 @@ def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, center=False,
978970
window = len(arg1) + len(arg2)
979971
return rolling_corr(arg1, arg2, window,
980972
min_periods=min_periods,
981-
freq=freq, center=center, pairwise=pairwise)
973+
freq=freq, pairwise=pairwise)
982974

983975

984976
@Substitution("Deprecated. Use expanding_corr(..., pairwise=True) instead.\n\n"
985977
"Pairwise expanding sample correlation", _pairwise_arg,
986978
_expanding_kw, _pairwise_retval, "")
987979
@Appender(_doc_template)
988-
def expanding_corr_pairwise(df1, df2=None, min_periods=1, freq=None,
989-
center=False):
980+
def expanding_corr_pairwise(df1, df2=None, min_periods=1, freq=None):
990981
import warnings
991982
warnings.warn("expanding_corr_pairwise is deprecated, use expanding_corr(..., pairwise=True)", FutureWarning)
992983
return expanding_corr(df1, df2, min_periods=min_periods,
993-
freq=freq, center=center, pairwise=True)
984+
freq=freq, pairwise=True)
994985

995986

996-
def expanding_apply(arg, func, min_periods=1, freq=None, center=False,
987+
def expanding_apply(arg, func, min_periods=1, freq=None,
997988
args=(), kwargs={}):
998989
"""Generic expanding function application.
999990
@@ -1008,8 +999,6 @@ def expanding_apply(arg, func, min_periods=1, freq=None, center=False,
1008999
freq : string or DateOffset object, optional (default None)
10091000
Frequency to conform the data to before computing the statistic. Specified
10101001
as a frequency string or DateOffset object.
1011-
center : boolean, default False
1012-
Whether the label should correspond with center of window.
10131002
args : tuple
10141003
Passed on to func
10151004
kwargs : dict
@@ -1027,4 +1016,4 @@ def expanding_apply(arg, func, min_periods=1, freq=None, center=False,
10271016
"""
10281017
window = len(arg)
10291018
return rolling_apply(arg, window, func, min_periods=min_periods, freq=freq,
1030-
center=center, args=args, kwargs=kwargs)
1019+
args=args, kwargs=kwargs)

pandas/stats/tests/test_moments.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -406,24 +406,16 @@ def _check_ndarray(self, func, static_comp, window=50,
406406
result = func(arr, 50)
407407
assert_almost_equal(result[-1], static_comp(arr[10:-10]))
408408

409+
# GH 7925
409410
if has_center:
410411
if has_min_periods:
411412
result = func(arr, 20, min_periods=15, center=True)
412-
expected = func(arr, 20, min_periods=15)
413+
expected = func(np.concatenate((arr, np.array([np.NaN] * 9))), 20, min_periods=15)[9:]
413414
else:
414415
result = func(arr, 20, center=True)
415-
expected = func(arr, 20)
416+
expected = func(np.concatenate((arr, np.array([np.NaN] * 9))), 20)[9:]
416417

417-
assert_almost_equal(result[1], expected[10])
418-
if fill_value is None:
419-
self.assertTrue(np.isnan(result[-9:]).all())
420-
else:
421-
self.assertTrue((result[-9:] == 0).all())
422-
if has_min_periods:
423-
self.assertTrue(np.isnan(expected[23]))
424-
self.assertTrue(np.isnan(result[14]))
425-
self.assertTrue(np.isnan(expected[-5]))
426-
self.assertTrue(np.isnan(result[-14]))
418+
self.assert_numpy_array_equivalent(result, expected)
427419

428420
if test_stable:
429421
result = func(self.arr + 1e9, window)
@@ -488,20 +480,21 @@ def _check_structures(self, func, static_comp,
488480
assert_almost_equal(frame_result.xs(last_date),
489481
trunc_frame.apply(static_comp))
490482

483+
# GH 7925
491484
if has_center:
492485
if has_min_periods:
493486
minp = 10
494-
series_xp = func(self.series, 25, min_periods=minp).shift(-12)
495-
frame_xp = func(self.frame, 25, min_periods=minp).shift(-12)
487+
series_xp = func(self.series.reindex(list(self.series.index)+['x%d'%x for x in range(12)]), 25, min_periods=minp).shift(-12).reindex(self.series.index)
488+
frame_xp = func(self.frame.reindex(list(self.frame.index)+['x%d'%x for x in range(12)]), 25, min_periods=minp).shift(-12).reindex(self.frame.index)
496489

497490
series_rs = func(self.series, 25, min_periods=minp,
498491
center=True)
499492
frame_rs = func(self.frame, 25, min_periods=minp,
500493
center=True)
501494

502495
else:
503-
series_xp = func(self.series, 25).shift(-12)
504-
frame_xp = func(self.frame, 25).shift(-12)
496+
series_xp = func(self.series.reindex(list(self.series.index)+['x%d'%x for x in range(12)]), 25).shift(-12).reindex(self.series.index)
497+
frame_xp = func(self.frame.reindex(list(self.frame.index)+['x%d'%x for x in range(12)]), 25).shift(-12).reindex(self.frame.index)
505498

506499
series_rs = func(self.series, 25, center=True)
507500
frame_rs = func(self.frame, 25, center=True)

0 commit comments

Comments
 (0)