diff --git a/doc/source/release.rst b/doc/source/release.rst index 8de8929c5fa7a..0666eb7f88675 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -111,6 +111,7 @@ Bug Fixes - Bug in ``pd.read_msgpack`` with inferring a ``DateTimeIndex`` frequencey incorrectly (:issue:`5947`) - Fixed ``to_datetime`` for array with both Tz-aware datetimes and ``NaT``s (:issue:`5961`) + - Bug in rolling skew/kurtosis when passed a Series with bad data (:issue:`5749`) pandas 0.13.0 ------------- diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 08ec707b0d96d..d916de32b7cd3 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -1167,8 +1167,11 @@ def roll_skew(ndarray[double_t] input, int win, int minp): R = sqrt(B) - output[i] = ((sqrt(nobs * (nobs - 1.)) * C) / - ((nobs-2) * R * R * R)) + if B == 0 or nobs < 3: + output[i] = NaN + else: + output[i] = ((sqrt(nobs * (nobs - 1.)) * C) / + ((nobs-2) * R * R * R)) else: output[i] = NaN @@ -1236,10 +1239,15 @@ def roll_kurt(ndarray[double_t] input, R = R * A D = xxxx / nobs - R - 6*B*A*A - 4*C*A - K = (nobs * nobs - 1.)*D/(B*B) - 3*((nobs-1.)**2) - K = K / ((nobs - 2.)*(nobs-3.)) + if B == 0 or nobs < 4: + output[i] = NaN + + else: + K = (nobs * nobs - 1.)*D/(B*B) - 3*((nobs-1.)**2) + K = K / ((nobs - 2.)*(nobs-3.)) + + output[i] = K - output[i] = K else: output[i] = NaN diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 7381d4c1ae0b4..970adeace1e0f 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -741,6 +741,50 @@ def test_expanding_corr_pairwise(self): for i in result.items: assert_almost_equal(result[i], rolling_result[i]) + def test_rolling_skew_edge_cases(self): + + all_nan = Series([np.NaN] * 5) + + # yields all NaN (0 variance) + d = Series([1] * 5) + x = mom.rolling_skew(d, window=5) + assert_series_equal(all_nan, x) + + # yields all NaN (window too small) + d = Series(np.random.randn(5)) + x = mom.rolling_skew(d, window=2) + assert_series_equal(all_nan, x) + + # yields [NaN, NaN, NaN, 0.177994, 1.548824] + d = Series([-1.50837035, -0.1297039 , 0.19501095, + 1.73508164, 0.41941401]) + expected = Series([np.NaN, np.NaN, np.NaN, + 0.177994, 1.548824]) + x = mom.rolling_skew(d, window=4) + assert_series_equal(expected, x) + + def test_rolling_kurt_edge_cases(self): + + all_nan = Series([np.NaN] * 5) + + # yields all NaN (0 variance) + d = Series([1] * 5) + x = mom.rolling_kurt(d, window=5) + assert_series_equal(all_nan, x) + + # yields all NaN (window too small) + d = Series(np.random.randn(5)) + x = mom.rolling_kurt(d, window=3) + assert_series_equal(all_nan, x) + + # yields [NaN, NaN, NaN, 1.224307, 2.671499] + d = Series([-1.50837035, -0.1297039 , 0.19501095, + 1.73508164, 0.41941401]) + expected = Series([np.NaN, np.NaN, np.NaN, + 1.224307, 2.671499]) + x = mom.rolling_kurt(d, window=4) + assert_series_equal(expected, x) + def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, has_time_rule=True, preserve_nan=True): result = func(self.arr)