From d89bd6e47c8830d9e492909049196d254840fc03 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Mar 2021 08:06:41 -0700 Subject: [PATCH 1/4] REF: move shift logic from BlockManager to DataFrame --- pandas/core/frame.py | 50 ++++++++++++++++++++----------- pandas/core/internals/managers.py | 19 ------------ 2 files changed, 33 insertions(+), 36 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b1f0ad8eda2aa..7881d1da5a33c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5080,27 +5080,43 @@ def shift( axis = self._get_axis_number(axis) ncols = len(self.columns) - if axis == 1 and periods != 0 and fill_value is lib.no_default and ncols > 0: - # We will infer fill_value to match the closest column - # Use a column that we know is valid for our column's dtype GH#38434 - label = self.columns[0] + if ( + axis == 1 + and periods != 0 + and ncols > 0 + and (fill_value is lib.no_default or len(self._mgr.arrays) > 1) + ): + # Exclude single-array-with-fill_value case so we issue a FutureWarning + # if an integer is passed with datetimelike dtype GH#31971 + from pandas import concat + # tail: the data that is still in our shifted DataFrame if periods > 0: - result = self.iloc[:, :-periods] - for col in range(min(ncols, abs(periods))): - # TODO(EA2D): doing this in a loop unnecessary with 2D EAs - # Define filler inside loop so we get a copy - filler = self.iloc[:, 0].shift(len(self)) - result.insert(0, label, filler, allow_duplicates=True) + tail = self.iloc[:, :-periods] else: - result = self.iloc[:, -periods:] - for col in range(min(ncols, abs(periods))): - # Define filler inside loop so we get a copy - filler = self.iloc[:, -1].shift(len(self)) - result.insert( - len(result.columns), label, filler, allow_duplicates=True - ) + tail = self.iloc[:, -periods:] + # pin a simple Index to avoid costly casting + tail.columns = range(len(tail.columns)) + + if fill_value is not lib.no_default: + # GH#35488 + # TODO(EA2D): with 2D EAs we could construct other directly + ser = Series(fill_value, index=self.index) + else: + # We infer fill_value to match the closest column + if periods > 0: + ser = self.iloc[:, 0].shift(len(self)) + else: + ser = self.iloc[:, -1].shift(len(self)) + + width = min(abs(periods), ncols) + other = concat([ser] * width, axis=1) + + if periods > 0: + result = concat([other, tail], axis=1) + else: + result = concat([tail, other], axis=1) result.columns = self.columns.copy() return result diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ea264da4c7b5f..d043e2fbd5e79 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -622,25 +622,6 @@ def shift(self, periods: int, axis: int, fill_value) -> BlockManager: if fill_value is lib.no_default: fill_value = None - if axis == 0 and self.ndim == 2 and self.nblocks > 1: - # GH#35488 we need to watch out for multi-block cases - # We only get here with fill_value not-lib.no_default - ncols = self.shape[0] - if periods > 0: - indexer = [-1] * periods + list(range(ncols - periods)) - else: - nper = abs(periods) - indexer = list(range(nper, ncols)) + [-1] * nper - result = self.reindex_indexer( - self.items, - indexer, - axis=0, - fill_value=fill_value, - allow_dups=True, - consolidate=False, - ) - return result - return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) def fillna(self, value, limit, inplace: bool, downcast) -> BlockManager: From 647a3939c89f236033c06afe120c8e879bf1f853 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Mar 2021 10:26:16 -0700 Subject: [PATCH 2/4] DOC: suppress warnings from CategoricalBlock deprecation --- doc/source/user_guide/io.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index cf153ddd2cbbd..3b7a6037a9715 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -5240,6 +5240,7 @@ Write to a feather file. Read from a feather file. .. ipython:: python + :okwarning: result = pd.read_feather("example.feather") result @@ -5323,6 +5324,7 @@ Write to a parquet file. Read from a parquet file. .. ipython:: python + :okwarning: result = pd.read_parquet("example_fp.parquet", engine="fastparquet") result = pd.read_parquet("example_pa.parquet", engine="pyarrow") From 81f87a3e9970a23fdf67f2069df329fab31d4788 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Mar 2021 20:55:07 -0700 Subject: [PATCH 3/4] mypy fixup --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7881d1da5a33c..7d5c56b63ff86 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5118,6 +5118,7 @@ def shift( else: result = concat([tail, other], axis=1) + result = cast(DataFrame, result) result.columns = self.columns.copy() return result From 1d3804bbbac53efbd9e4ab6f6683f68cdf5d2837 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 21 Mar 2021 09:27:13 -0700 Subject: [PATCH 4/4] un-xfail ArrayManager --- pandas/tests/apply/test_frame_transform.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 1a12cbff47092..bf53e41442182 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -39,15 +39,8 @@ def test_transform_ufunc(axis, float_frame, frame_or_series): @pytest.mark.parametrize("op", frame_transform_kernels) -def test_transform_groupby_kernel(axis, float_frame, op, using_array_manager, request): +def test_transform_groupby_kernel(axis, float_frame, op, request): # GH 35964 - if using_array_manager and op == "pct_change" and axis in (1, "columns"): - # TODO(ArrayManager) shift with axis=1 - request.node.add_marker( - pytest.mark.xfail( - reason="shift axis=1 not yet implemented for ArrayManager" - ) - ) args = [0.0] if op == "fillna" else [] if axis == 0 or axis == "index":