From a62d90fb77a61cb527e67fd6041112b472e8b5cf Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 30 Jun 2021 08:30:33 +0100 Subject: [PATCH] Revert "Revert "REF: move shift logic from BlockManager to DataFrame (#40536)" (#42317)" This reverts commit 027d37406cb6616f43fc5e1fb65a5fd90dcec545. --- pandas/core/frame.py | 51 ++++++++++++++-------- pandas/core/internals/managers.py | 19 -------- pandas/tests/apply/test_frame_transform.py | 9 +--- 3 files changed, 35 insertions(+), 44 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8ccde3093cf70..6f621699aa5ae 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5279,28 +5279,45 @@ def shift( axis = self._get_axis_number(axis) ncols = len(self.columns) - if axis == 1 and periods != 0 and fill_value is lib.no_default and ncols > 0: - # We will infer fill_value to match the closest column - # Use a column that we know is valid for our column's dtype GH#38434 - label = self.columns[0] + if ( + axis == 1 + and periods != 0 + and ncols > 0 + and (fill_value is lib.no_default or len(self._mgr.arrays) > 1) + ): + # Exclude single-array-with-fill_value case so we issue a FutureWarning + # if an integer is passed with datetimelike dtype GH#31971 + from pandas import concat + # tail: the data that is still in our shifted DataFrame if periods > 0: - result = self.iloc[:, :-periods] - for col in range(min(ncols, abs(periods))): - # TODO(EA2D): doing this in a loop unnecessary with 2D EAs - # Define filler inside loop so we get a copy - filler = self.iloc[:, 0].shift(len(self)) - result.insert(0, label, filler, allow_duplicates=True) + tail = self.iloc[:, :-periods] else: - result = self.iloc[:, -periods:] - for col in range(min(ncols, abs(periods))): - # Define filler inside loop so we get a copy - filler = self.iloc[:, -1].shift(len(self)) - result.insert( - len(result.columns), label, filler, allow_duplicates=True - ) + tail = self.iloc[:, -periods:] + # pin a simple Index to avoid costly casting + tail.columns = range(len(tail.columns)) + + if fill_value is not lib.no_default: + # GH#35488 + # TODO(EA2D): with 2D EAs we could construct other directly + ser = Series(fill_value, index=self.index) + else: + # We infer fill_value to match the closest column + if periods > 0: + ser = self.iloc[:, 0].shift(len(self)) + else: + ser = self.iloc[:, -1].shift(len(self)) + + width = min(abs(periods), ncols) + other = concat([ser] * width, axis=1) + + if periods > 0: + result = concat([other, tail], axis=1) + else: + result = concat([tail, other], axis=1) + result = cast(DataFrame, result) result.columns = self.columns.copy() return result diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index dca6ddf703446..1578c6fb2261a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -381,25 +381,6 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T: if fill_value is lib.no_default: fill_value = None - if axis == 0 and self.ndim == 2 and self.nblocks > 1: - # GH#35488 we need to watch out for multi-block cases - # We only get here with fill_value not-lib.no_default - ncols = self.shape[0] - if periods > 0: - indexer = [-1] * periods + list(range(ncols - periods)) - else: - nper = abs(periods) - indexer = list(range(nper, ncols)) + [-1] * nper - result = self.reindex_indexer( - self.items, - indexer, - axis=0, - fill_value=fill_value, - allow_dups=True, - consolidate=False, - ) - return result - return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) def fillna(self: T, value, limit, inplace: bool, downcast) -> T: diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 0d3d4eecf92aa..9050fab702881 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -39,15 +39,8 @@ def test_transform_ufunc(axis, float_frame, frame_or_series): @pytest.mark.parametrize("op", frame_transform_kernels) -def test_transform_groupby_kernel(axis, float_frame, op, using_array_manager, request): +def test_transform_groupby_kernel(axis, float_frame, op, request): # GH 35964 - if using_array_manager and op == "pct_change" and axis in (1, "columns"): - # TODO(ArrayManager) shift with axis=1 - request.node.add_marker( - pytest.mark.xfail( - reason="shift axis=1 not yet implemented for ArrayManager" - ) - ) args = [0.0] if op == "fillna" else [] if axis == 0 or axis == "index":