From 9a669f608918f61fb25b38f4ed808d733ac12b52 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 23 Apr 2020 09:28:13 +0300 Subject: [PATCH 1/6] whitelist rolling skewness --- pandas/core/window/common.py | 1 + pandas/tests/window/test_base_indexer.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 8707893dc20cf..082c2f533f3de 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -337,6 +337,7 @@ def validate_baseindexer_support(func_name: Optional[str]) -> None: "median", "std", "var", + "skew", "kurt", "quantile", } diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index aee47a085eb9c..563684c9cecbc 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -82,7 +82,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed): df.rolling(indexer, win_type="boxcar") -@pytest.mark.parametrize("func", ["skew", "cov", "corr"]) +@pytest.mark.parametrize("func", ["cov", "corr"]) def test_notimplemented_functions(func): # GH 32865 class CustomIndexer(BaseIndexer): From 21c58bd7edd1d5c7e5aded0363ff0b6a96465186 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 23 Apr 2020 09:50:33 +0300 Subject: [PATCH 2/6] DOC: clarify that all sample metrics need careful treatment Clarify that any method that has Sample before its name needs to be used carefully with windows as this is almost never what the user wants. --- doc/source/user_guide/computation.rst | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index d7d025981f2f4..f9c07df956341 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -318,8 +318,8 @@ We provide a number of common statistical functions: :meth:`~Rolling.kurt`, Sample kurtosis (4th moment) :meth:`~Rolling.quantile`, Sample quantile (value at %) :meth:`~Rolling.apply`, Generic apply - :meth:`~Rolling.cov`, Unbiased covariance (binary) - :meth:`~Rolling.corr`, Correlation (binary) + :meth:`~Rolling.cov`, Sample covariance (binary) + :meth:`~Rolling.corr`, Sample correlation (binary) .. _computation.window_variance.caveats: @@ -341,6 +341,8 @@ We provide a number of common statistical functions: sample variance under the circumstances would result in a biased estimator of the variable we are trying to determine. + The same caveats apply to using any supported statistical sample methods. + .. _stats.rolling_apply: Rolling apply @@ -870,12 +872,12 @@ Method summary :meth:`~Expanding.max`, Maximum :meth:`~Expanding.std`, Sample standard deviation :meth:`~Expanding.var`, Sample variance - :meth:`~Expanding.skew`, Unbiased skewness (3rd moment) - :meth:`~Expanding.kurt`, Unbiased kurtosis (4th moment) + :meth:`~Expanding.skew`, Sample skewness (3rd moment) + :meth:`~Expanding.kurt`, Sample kurtosis (4th moment) :meth:`~Expanding.quantile`, Sample quantile (value at %) :meth:`~Expanding.apply`, Generic apply - :meth:`~Expanding.cov`, Unbiased covariance (binary) - :meth:`~Expanding.corr`, Correlation (binary) + :meth:`~Expanding.cov`, Sample covariance (binary) + :meth:`~Expanding.corr`, Sample correlation (binary) .. note:: @@ -884,6 +886,8 @@ Method summary windows. See :ref:`this section ` for more information. + The same caveats apply to using any supported statistical sample methods. + .. currentmodule:: pandas Aside from not having a ``window`` parameter, these functions have the same From 29d9945d9dbfc6c37928eac50f5cc464dc6bb04b Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 23 Apr 2020 10:28:48 +0300 Subject: [PATCH 3/6] BUG: fix self.window type check Also fix the value passed to calculate_min_periods for custom BaseIndexer implementations. --- pandas/core/window/rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 3b14921528890..cdd61edba57ce 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -471,13 +471,13 @@ def _apply( def calc(x): x = np.concatenate((x, additional_nans)) - if not isinstance(window, BaseIndexer): + if not isinstance(self.window, BaseIndexer): min_periods = calculate_min_periods( window, self.min_periods, len(x), require_min_periods, floor ) else: min_periods = calculate_min_periods( - self.min_periods or 1, + window_indexer.window_size, self.min_periods, len(x), require_min_periods, From 401cac9689078744ab410219d4b295eecfd50761 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 23 Apr 2020 10:58:55 +0300 Subject: [PATCH 4/6] TST: add test --- pandas/tests/window/test_base_indexer.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 563684c9cecbc..0c942d55f26df 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -184,3 +184,16 @@ def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs) result3 = getattr(rolling3, func)() expected3 = constructor(rolling3.apply(lambda x: np_func(x, **np_kwargs))) tm.assert_equal(result3, expected3) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_rolling_forward_skewness(constructor): + values = np.arange(10) + values[5] = 100.0 + + indexer = FixedForwardWindowIndexer(window_size=5) + rolling = constructor(values).rolling(window=indexer, min_periods=3) + result = rolling.skew() + + expected = constructor([0.0, 2.232396, 2.229508, 2.228340, 2.229091, 2.231989, 0.0, 0.0, np.nan, np.nan]) + tm.assert_equal(result, expected) From b6816191e5702649cc89834144d56c838355df5f Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 23 Apr 2020 10:59:54 +0300 Subject: [PATCH 5/6] DOC: add whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 07849702c646d..f7b21ceeb1fd8 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -174,8 +174,8 @@ Other API changes - Added :meth:`DataFrame.value_counts` (:issue:`5377`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) -- Using a :func:`pandas.api.indexers.BaseIndexer` with ``skew``, ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`) -- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) +- Using a :func:`pandas.api.indexers.BaseIndexer` with ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`) +- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median``, ``skew`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) - Added a :func:`pandas.api.indexers.FixedForwardWindowIndexer` class to support forward-looking windows during ``rolling`` operations. - From ef7042ace13d8298fdede6d4b4f9e715be843d52 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 23 Apr 2020 11:02:13 +0300 Subject: [PATCH 6/6] CLN: run black pandas --- pandas/tests/window/test_base_indexer.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 0c942d55f26df..15e6a904dd11a 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -195,5 +195,18 @@ def test_rolling_forward_skewness(constructor): rolling = constructor(values).rolling(window=indexer, min_periods=3) result = rolling.skew() - expected = constructor([0.0, 2.232396, 2.229508, 2.228340, 2.229091, 2.231989, 0.0, 0.0, np.nan, np.nan]) + expected = constructor( + [ + 0.0, + 2.232396, + 2.229508, + 2.228340, + 2.229091, + 2.231989, + 0.0, + 0.0, + np.nan, + np.nan, + ] + ) tm.assert_equal(result, expected)