From 9a7dfa2612ee02f3a6d80289785ed795dcb7f45d Mon Sep 17 00:00:00 2001 From: frreiss Date: Wed, 28 Jul 2021 15:52:18 -0700 Subject: [PATCH 1/5] Add test for GH43430 --- pandas/tests/arrays/test_array.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 958ccec930f0e..bba8223de466e 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -317,6 +317,25 @@ def test_bounds_check(): pd.array([-1, 2, 3], dtype="UInt16") +def test_ellipsis_index(): + # GH42430 + class CapturingStringArray(pd.arrays.StringArray): + """Extend StringArray to capture arguments to __getitem__""" + + def __getitem__(self, item): + self.last_item_arg = item + return super().__getitem__(item) + + df = pd.DataFrame( + {"col1": CapturingStringArray(np.array(["hello", "world"], dtype=object))}) + _ = df.iloc[:1] + + # String comparison because there's no native way to compare slices. + # Before the fix for GH42430, last_item_arg would get set to the 2D slice + # (Ellipsis, slice(None, 1, None)) + tm.assert_equal(str(df["col1"].array.last_item_arg), "slice(None, 1, None)") + + # --------------------------------------------------------------------------- # A couple dummy classes to ensure that Series and Indexes are unboxed before # getting to the EA classes. From 6b2f1b2a2763fb4b239c02d77dba043bc75a1a3f Mon Sep 17 00:00:00 2001 From: frreiss Date: Wed, 28 Jul 2021 15:52:43 -0700 Subject: [PATCH 2/5] Fix for GH42430 --- pandas/core/internals/blocks.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 5d63fcdf7b0dc..8d5ec874a67f4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1552,12 +1552,8 @@ def _slice(self, slicer): def getitem_block_index(self, slicer: slice) -> ExtensionBlock: """ Perform __getitem__-like specialized to slicing along index. - - Assumes self.ndim == 2 """ - # error: Invalid index type "Tuple[ellipsis, slice]" for - # "Union[ndarray, ExtensionArray]"; expected type "Union[int, slice, ndarray]" - new_values = self.values[..., slicer] # type: ignore[index] + new_values = self.array_values[slicer] return type(self)(new_values, self._mgr_locs, ndim=self.ndim) def fillna( From 6b09216f1baf4f5669922b666c820d31a5338128 Mon Sep 17 00:00:00 2001 From: frreiss Date: Wed, 28 Jul 2021 15:57:22 -0700 Subject: [PATCH 3/5] Run file through black --- pandas/tests/arrays/test_array.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index bba8223de466e..2b82c2a8f4daf 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -327,7 +327,8 @@ def __getitem__(self, item): return super().__getitem__(item) df = pd.DataFrame( - {"col1": CapturingStringArray(np.array(["hello", "world"], dtype=object))}) + {"col1": CapturingStringArray(np.array(["hello", "world"], dtype=object))} + ) _ = df.iloc[:1] # String comparison because there's no native way to compare slices. From e41b22be1cbae93bca090617cee4f5d87585c893 Mon Sep 17 00:00:00 2001 From: frreiss Date: Wed, 28 Jul 2021 16:01:50 -0700 Subject: [PATCH 4/5] Add whatsnew entry --- doc/source/whatsnew/v1.3.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index f4804215db8c1..afd8ab2422c92 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -26,7 +26,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- 1D slices over extension types turn into N-dimensional slices over ExtensionArrays (:issue:`42430`) - .. --------------------------------------------------------------------------- From 23f66f2c4d8d988c1022a9cd3d364c8fd638a2ca Mon Sep 17 00:00:00 2001 From: frreiss Date: Thu, 29 Jul 2021 15:55:12 -0700 Subject: [PATCH 5/5] Address review comments --- pandas/core/internals/blocks.py | 4 +++- pandas/tests/arrays/test_array.py | 20 -------------------- pandas/tests/extension/base/getitem.py | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 20729921ad768..953ccedaa5222 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1553,7 +1553,9 @@ def getitem_block_index(self, slicer: slice) -> ExtensionBlock: """ Perform __getitem__-like specialized to slicing along index. """ - new_values = self.array_values[slicer] + # GH#42787 in principle this is equivalent to values[..., slicer], but we don't + # require subclasses of ExtensionArray to support that form (for now). + new_values = self.values[slicer] return type(self)(new_values, self._mgr_locs, ndim=self.ndim) def fillna( diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 2b82c2a8f4daf..958ccec930f0e 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -317,26 +317,6 @@ def test_bounds_check(): pd.array([-1, 2, 3], dtype="UInt16") -def test_ellipsis_index(): - # GH42430 - class CapturingStringArray(pd.arrays.StringArray): - """Extend StringArray to capture arguments to __getitem__""" - - def __getitem__(self, item): - self.last_item_arg = item - return super().__getitem__(item) - - df = pd.DataFrame( - {"col1": CapturingStringArray(np.array(["hello", "world"], dtype=object))} - ) - _ = df.iloc[:1] - - # String comparison because there's no native way to compare slices. - # Before the fix for GH42430, last_item_arg would get set to the 2D slice - # (Ellipsis, slice(None, 1, None)) - tm.assert_equal(str(df["col1"].array.last_item_arg), "slice(None, 1, None)") - - # --------------------------------------------------------------------------- # A couple dummy classes to ensure that Series and Indexes are unboxed before # getting to the EA classes. diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 96833a2e49fa1..ac181af7875b5 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -425,3 +425,23 @@ def test_item(self, data): with pytest.raises(ValueError, match=msg): s.item() + + def test_ellipsis_index(self): + # GH42430 1D slices over extension types turn into N-dimensional slices over + # ExtensionArrays + class CapturingStringArray(pd.arrays.StringArray): + """Extend StringArray to capture arguments to __getitem__""" + + def __getitem__(self, item): + self.last_item_arg = item + return super().__getitem__(item) + + df = pd.DataFrame( + {"col1": CapturingStringArray(np.array(["hello", "world"], dtype=object))} + ) + _ = df.iloc[:1] + + # String comparison because there's no native way to compare slices. + # Before the fix for GH42430, last_item_arg would get set to the 2D slice + # (Ellipsis, slice(None, 1, None)) + self.assert_equal(str(df["col1"].array.last_item_arg), "slice(None, 1, None)")