diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index c2d234b5a06c1..8c51519cf2065 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -58,6 +58,18 @@ API changes rolling_min(s, window=10, min_periods=5) +- :func:`ewma`, :func:`ewmastd`, :func:`ewmavar`, :func:`ewmacorr`, and :func:`ewmacov` + now have an optional ``ignore_na`` argument. + When ``ignore_na = Flase`` (the default), missing values are taken into account in the weights calculation. + When ``ignore_na = True`` (which reproduces the pre-0.15.0 behavior), missing values are ignored in the weights calculation. + (:issue:`7603`) + + .. ipython:: python + + ewma(Series([None, 1., 100.]), com=2.5) + ewma(Series([1., None, 100.]), com=2.5, ignore_na=True) # pre-0.15.0 behavior + ewma(Series([1., None, 100.]), com=2.5, ignore_na=False) # default + - Bug in passing a ``DatetimeIndex`` with a timezone that was not being retained in DataFrame construction from a dict (:issue:`7822`) In prior versions this would drop the timezone. diff --git a/pandas/algos.pyx b/pandas/algos.pyx index d993447fc7408..54d71d79ae7c3 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -979,7 +979,7 @@ def roll_mean(ndarray[double_t] input, #------------------------------------------------------------------------------- # Exponentially weighted moving average -def ewma(ndarray[double_t] input, double_t com, int adjust): +def ewma(ndarray[double_t] input, double_t com, int adjust, int ignore_na): ''' Compute exponentially-weighted moving average using center-of-mass. @@ -987,6 +987,8 @@ def ewma(ndarray[double_t] input, double_t com, int adjust): ---------- input : ndarray (float64 type) com : float64 + adjust: int + ignore_na: int Returns ------- @@ -1002,37 +1004,27 @@ def ewma(ndarray[double_t] input, double_t com, int adjust): if N == 0: return output - neww = 1. / (1. + com) - oldw = 1. - neww - adj = oldw + alpha = 1. / (1. + com) + old_wt_factor = 1. - alpha + new_wt = 1.0 if adjust else alpha - if adjust: - output[0] = neww * input[0] - else: - output[0] = input[0] + output[0] = input[0] + weighted_avg = output[0] + old_wt = 1. for i from 1 <= i < N: cur = input[i] - prev = output[i - 1] - - if cur == cur: - if prev == prev: - output[i] = oldw * prev + neww * cur - else: - output[i] = neww * cur + if weighted_avg == weighted_avg: + if cur == cur: + old_wt *= old_wt_factor + weighted_avg = ((old_wt * weighted_avg) + (new_wt * cur)) / (old_wt + new_wt) + old_wt += new_wt + elif not ignore_na: + old_wt *= old_wt_factor else: - output[i] = prev - - if adjust: - for i from 0 <= i < N: - cur = input[i] + weighted_avg = cur - if cur == cur: - output[i] = output[i] / (1. - adj) - adj *= oldw - else: - if i >= 1: - output[i] = output[i - 1] + output[i] = weighted_avg return output diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 5a405a5b74f7b..6f06255c7262d 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -89,6 +89,9 @@ imbalance in relative weightings (viewing EWMA as a moving average) how : string, default 'mean' Method for down- or re-sampling +ignore_na : boolean, default False + Ignore missing values when calculating weights; + specify True to reproduce pre-0.15.0 behavior """ _ewm_notes = r""" @@ -420,12 +423,12 @@ def _get_center_of_mass(com, span, halflife): _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None, - adjust=True, how=None): + adjust=True, how=None, ignore_na=False): com = _get_center_of_mass(com, span, halflife) arg = _conv_timerule(arg, freq, how) def _ewma(v): - result = algos.ewma(v, com, int(adjust)) + result = algos.ewma(v, com, int(adjust), int(ignore_na)) first_index = _first_valid_index(v) result[first_index: first_index + min_periods] = NaN return result @@ -444,11 +447,11 @@ def _first_valid_index(arr): _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, - freq=None, how=None): + freq=None, how=None, ignore_na=False): com = _get_center_of_mass(com, span, halflife) arg = _conv_timerule(arg, freq, how) - moment2nd = ewma(arg * arg, com=com, min_periods=min_periods) - moment1st = ewma(arg, com=com, min_periods=min_periods) + moment2nd = ewma(arg * arg, com=com, min_periods=min_periods, ignore_na=ignore_na) + moment1st = ewma(arg, com=com, min_periods=min_periods, ignore_na=ignore_na) result = moment2nd - moment1st ** 2 if not bias: @@ -460,9 +463,10 @@ def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, @Substitution("Exponentially-weighted moving std", _unary_arg, _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) -def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False): +def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, + ignore_na=False): result = ewmvar(arg, com=com, span=span, halflife=halflife, - min_periods=min_periods, bias=bias) + min_periods=min_periods, bias=bias, ignore_na=ignore_na) return _zsqrt(result) ewmvol = ewmstd @@ -472,7 +476,7 @@ def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False): _ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, - bias=False, freq=None, pairwise=None, how=None): + bias=False, freq=None, pairwise=None, how=None, ignore_na=False): if arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise @@ -484,7 +488,8 @@ def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, arg2 = _conv_timerule(arg2, freq, how) def _get_ewmcov(X, Y): - mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods) + mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods, + ignore_na=ignore_na) return (mean(X * Y) - mean(X) * mean(Y)) result = _flex_binary_moment(arg1, arg2, _get_ewmcov, pairwise=bool(pairwise)) @@ -499,7 +504,7 @@ def _get_ewmcov(X, Y): _ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, - freq=None, pairwise=None, how=None): + freq=None, pairwise=None, how=None, ignore_na=False): if arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise @@ -511,9 +516,10 @@ def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, arg2 = _conv_timerule(arg2, freq, how) def _get_ewmcorr(X, Y): - mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods) + mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods, + ignore_na=ignore_na) var = lambda x: ewmvar(x, com=com, span=span, halflife=halflife, min_periods=min_periods, - bias=True) + bias=True, ignore_na=ignore_na) return (mean(X * Y) - mean(X) * mean(Y)) / _zsqrt(var(X) * var(Y)) result = _flex_binary_moment(arg1, arg2, _get_ewmcorr, pairwise=bool(pairwise)) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 0c840a3803194..7124eaf6fb797 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -520,11 +520,64 @@ def test_ewma(self): result = mom.ewma(arr, span=100, adjust=False).sum() self.assertTrue(np.abs(result - 1) < 1e-2) + s = Series([1.0, 2.0, 4.0, 8.0]) + + expected = Series([1.0, 1.6, 2.736842, 4.923077]) + for f in [lambda s: mom.ewma(s, com=2.0, adjust=True), + lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=False), + lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=True), + ]: + result = f(s) + assert_series_equal(result, expected) + + expected = Series([1.0, 1.333333, 2.222222, 4.148148]) + for f in [lambda s: mom.ewma(s, com=2.0, adjust=False), + lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=False), + lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=True), + ]: + result = f(s) + assert_series_equal(result, expected) + def test_ewma_nan_handling(self): s = Series([1.] + [np.nan] * 5 + [1.]) + result = mom.ewma(s, com=5) + assert_almost_equal(result, [1.] * len(s)) + s = Series([np.nan] * 2 + [1.] + [np.nan] * 2 + [1.]) result = mom.ewma(s, com=5) - assert_almost_equal(result, [1] * len(s)) + assert_almost_equal(result, [np.nan] * 2 + [1.] * 4) + + # GH 7603 + s0 = Series([np.nan, 1., 101.]) + s1 = Series([1., np.nan, 101.]) + s2 = Series([np.nan, 1., np.nan, np.nan, 101., np.nan]) + com = 2. + alpha = 1. / (1. + com) + + def simple_wma(s, w): + return (s.multiply(w).cumsum() / w.cumsum()).fillna(method='ffill') + + for (s, adjust, ignore_na, w) in [ + (s0, True, False, [np.nan, (1.0 - alpha), 1.]), + (s0, True, True, [np.nan, (1.0 - alpha), 1.]), + (s0, False, False, [np.nan, (1.0 - alpha), alpha]), + (s0, False, True, [np.nan, (1.0 - alpha), alpha]), + (s1, True, False, [(1.0 - alpha)**2, np.nan, 1.]), + (s1, True, True, [(1.0 - alpha), np.nan, 1.]), + (s1, False, False, [(1.0 - alpha)**2, np.nan, alpha]), + (s1, False, True, [(1.0 - alpha), np.nan, alpha]), + (s2, True, False, [np.nan, (1.0 - alpha)**3, np.nan, np.nan, 1., np.nan]), + (s2, True, True, [np.nan, (1.0 - alpha), np.nan, np.nan, 1., np.nan]), + (s2, False, False, [np.nan, (1.0 - alpha)**3, np.nan, np.nan, alpha, np.nan]), + (s2, False, True, [np.nan, (1.0 - alpha), np.nan, np.nan, alpha, np.nan]), + ]: + expected = simple_wma(s, Series(w)) + result = mom.ewma(s, com=com, adjust=adjust, ignore_na=ignore_na) + assert_series_equal(result, expected) + if ignore_na is False: + # check that ignore_na defaults to False + result = mom.ewma(s, com=com, adjust=adjust) + assert_series_equal(result, expected) def test_ewmvar(self): self._check_ew(mom.ewmvar)