diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a6ba7770dadcc..c1a5c452e1df4 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -765,6 +765,7 @@ Numeric - Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`) - Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`) - Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`) +- Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`) - Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`) Conversion diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b896721469f1f..55dcc81139276 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9901,11 +9901,11 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwar data = self.fillna(method=fill_method, limit=limit, axis=axis) rs = data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1 - rs = rs.loc[~rs.index.duplicated()] - rs = rs.reindex_like(data) - if freq is None: - mask = isna(com.values_from_object(data)) - np.putmask(rs.values, mask, np.nan) + if freq is not None: + # Shift method is implemented differently when freq is not None + # We want to restore the original index + rs = rs.loc[~rs.index.duplicated()] + rs = rs.reindex_like(data) return rs def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs): diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py index 0c15533c37f01..ac13a5e146043 100644 --- a/pandas/tests/frame/methods/test_pct_change.py +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -76,3 +76,21 @@ def test_pct_change_periods_freq( rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) tm.assert_frame_equal(rs_freq, rs_periods) + + +@pytest.mark.parametrize("fill_method", ["pad", "ffill", None]) +def test_pct_change_with_duplicated_indices(fill_method): + # GH30463 + data = DataFrame( + {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3 + ) + result = data.pct_change(fill_method=fill_method) + if fill_method is None: + second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] + else: + second_column = [np.nan, np.inf, 0.0, 2.0, 2.0, 1.0] + expected = DataFrame( + {0: [np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], 1: second_column}, + index=["a", "b"] * 3, + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_pct_change.py b/pandas/tests/series/methods/test_pct_change.py index abc5c498813ef..aa01543132841 100644 --- a/pandas/tests/series/methods/test_pct_change.py +++ b/pandas/tests/series/methods/test_pct_change.py @@ -68,3 +68,12 @@ def test_pct_change_periods_freq( rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) tm.assert_series_equal(rs_freq, rs_periods) + + +@pytest.mark.parametrize("fill_method", ["pad", "ffill", None]) +def test_pct_change_with_duplicated_indices(fill_method): + # GH30463 + s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3) + result = s.pct_change(fill_method=fill_method) + expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3) + tm.assert_series_equal(result, expected)