From b123ece6f84e907b9db289b688ed48f694783853 Mon Sep 17 00:00:00 2001 From: Elliot S Date: Fri, 20 Dec 2013 14:26:15 -0500 Subject: [PATCH] Update rolling skew & kurtosis to handle cases where they aren't defined The rolling skewness and kurtosis in algos.pyx were modified to match the testing logic in pandas/core/nanops.py. They now both return NaN where they are not defined, which occurs where there are either too few observations or where the variance is zero. A set of tests was added to verify that Nan is returned in these cases and that the computations continue to work correctly when the values are defined. --- doc/source/release.rst | 1 + pandas/algos.pyx | 18 ++++++++---- pandas/stats/tests/test_moments.py | 44 ++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 5 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 8de8929c5fa7a..0666eb7f88675 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -111,6 +111,7 @@ Bug Fixes - Bug in ``pd.read_msgpack`` with inferring a ``DateTimeIndex`` frequencey incorrectly (:issue:`5947`) - Fixed ``to_datetime`` for array with both Tz-aware datetimes and ``NaT``s (:issue:`5961`) + - Bug in rolling skew/kurtosis when passed a Series with bad data (:issue:`5749`) pandas 0.13.0 ------------- diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 08ec707b0d96d..d916de32b7cd3 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -1167,8 +1167,11 @@ def roll_skew(ndarray[double_t] input, int win, int minp): R = sqrt(B) - output[i] = ((sqrt(nobs * (nobs - 1.)) * C) / - ((nobs-2) * R * R * R)) + if B == 0 or nobs < 3: + output[i] = NaN + else: + output[i] = ((sqrt(nobs * (nobs - 1.)) * C) / + ((nobs-2) * R * R * R)) else: output[i] = NaN @@ -1236,10 +1239,15 @@ def roll_kurt(ndarray[double_t] input, R = R * A D = xxxx / nobs - R - 6*B*A*A - 4*C*A - K = (nobs * nobs - 1.)*D/(B*B) - 3*((nobs-1.)**2) - K = K / ((nobs - 2.)*(nobs-3.)) + if B == 0 or nobs < 4: + output[i] = NaN + + else: + K = (nobs * nobs - 1.)*D/(B*B) - 3*((nobs-1.)**2) + K = K / ((nobs - 2.)*(nobs-3.)) + + output[i] = K - output[i] = K else: output[i] = NaN diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 7381d4c1ae0b4..970adeace1e0f 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -741,6 +741,50 @@ def test_expanding_corr_pairwise(self): for i in result.items: assert_almost_equal(result[i], rolling_result[i]) + def test_rolling_skew_edge_cases(self): + + all_nan = Series([np.NaN] * 5) + + # yields all NaN (0 variance) + d = Series([1] * 5) + x = mom.rolling_skew(d, window=5) + assert_series_equal(all_nan, x) + + # yields all NaN (window too small) + d = Series(np.random.randn(5)) + x = mom.rolling_skew(d, window=2) + assert_series_equal(all_nan, x) + + # yields [NaN, NaN, NaN, 0.177994, 1.548824] + d = Series([-1.50837035, -0.1297039 , 0.19501095, + 1.73508164, 0.41941401]) + expected = Series([np.NaN, np.NaN, np.NaN, + 0.177994, 1.548824]) + x = mom.rolling_skew(d, window=4) + assert_series_equal(expected, x) + + def test_rolling_kurt_edge_cases(self): + + all_nan = Series([np.NaN] * 5) + + # yields all NaN (0 variance) + d = Series([1] * 5) + x = mom.rolling_kurt(d, window=5) + assert_series_equal(all_nan, x) + + # yields all NaN (window too small) + d = Series(np.random.randn(5)) + x = mom.rolling_kurt(d, window=3) + assert_series_equal(all_nan, x) + + # yields [NaN, NaN, NaN, 1.224307, 2.671499] + d = Series([-1.50837035, -0.1297039 , 0.19501095, + 1.73508164, 0.41941401]) + expected = Series([np.NaN, np.NaN, np.NaN, + 1.224307, 2.671499]) + x = mom.rolling_kurt(d, window=4) + assert_series_equal(expected, x) + def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, has_time_rule=True, preserve_nan=True): result = func(self.arr)