From 5f8946c15e80ca2cdbf67f4eca3a3f6f1fc908a9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 10 Mar 2021 10:10:04 +0100 Subject: [PATCH 1/5] [ArrayManager] TST: Enable extension tests --- .github/workflows/ci.yml | 1 + pandas/core/internals/array_manager.py | 5 ++++- pandas/tests/extension/base/casting.py | 13 ++++++++++--- pandas/tests/extension/base/constructors.py | 17 ++++++++++++----- pandas/tests/extension/base/getitem.py | 3 ++- pandas/tests/extension/base/interface.py | 3 ++- pandas/tests/extension/base/reshaping.py | 8 +++++++- pandas/tests/extension/test_external_block.py | 4 ++++ pandas/tests/extension/test_numpy.py | 5 +++++ pandas/tests/extension/test_sparse.py | 12 +++++++----- 10 files changed, 54 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6c60522092739..7b916b6d0225e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -182,6 +182,7 @@ jobs: pytest pandas/tests/computation/ pytest pandas/tests/config/ pytest pandas/tests/dtypes/ + pytest pandas/tests/extension/ pytest pandas/tests/generic/ pytest pandas/tests/indexes/ pytest pandas/tests/libs/ diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 906c95c825cab..eb47bcd4ac819 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -683,7 +683,10 @@ def get_numeric_data(self, copy: bool = False) -> ArrayManager: copy : bool, default False Whether to copy the blocks """ - return self._get_data_subset(lambda arr: is_numeric_dtype(arr.dtype)) + return self._get_data_subset( + lambda arr: is_numeric_dtype(arr.dtype) + or getattr(arr.dtype, "_is_numeric", False) + ) def copy(self: T, deep=True) -> T: """ diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 0b79a5368a542..7c5ef5b3b27d3 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -12,14 +12,21 @@ class BaseCastingTests(BaseExtensionTests): def test_astype_object_series(self, all_data): ser = pd.Series(all_data, name="A") result = ser.astype(object) - assert isinstance(result._mgr.blocks[0], ObjectBlock) + assert result.dtype == np.dtype(object) + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], ObjectBlock) + assert isinstance(result._mgr.array, np.ndarray) + assert result._mgr.array.dtype == np.dtype(object) def test_astype_object_frame(self, all_data): df = pd.DataFrame({"A": all_data}) result = df.astype(object) - blk = result._data.blocks[0] - assert isinstance(blk, ObjectBlock), type(blk) + if hasattr(result._mgr, "blocks"): + blk = result._data.blocks[0] + assert isinstance(blk, ObjectBlock), type(blk) + assert isinstance(result._mgr.arrays[0], np.ndarray) + assert result._mgr.arrays[0].dtype == np.dtype(object) # FIXME: these currently fail; dont leave commented-out # check that we can compare the dtypes diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 6f0d8d16a0224..e2323620daa0e 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -2,6 +2,7 @@ import pytest import pandas as pd +from pandas.api.extensions import ExtensionArray from pandas.core.internals import ExtensionBlock from pandas.tests.extension.base.base import BaseExtensionTests @@ -24,13 +25,15 @@ def test_series_constructor(self, data): result = pd.Series(data) assert result.dtype == data.dtype assert len(result) == len(data) - assert isinstance(result._mgr.blocks[0], ExtensionBlock) - assert result._mgr.blocks[0].values is data + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], ExtensionBlock) + assert result._mgr.array is data # Series[EA] is unboxed / boxed correctly result2 = pd.Series(result) assert result2.dtype == data.dtype - assert isinstance(result2._mgr.blocks[0], ExtensionBlock) + if hasattr(result._mgr, "blocks"): + assert isinstance(result2._mgr.blocks[0], ExtensionBlock) def test_series_constructor_no_data_with_index(self, dtype, na_value): result = pd.Series(index=[1, 2, 3], dtype=dtype) @@ -64,13 +67,17 @@ def test_dataframe_constructor_from_dict(self, data, from_series): result = pd.DataFrame({"A": data}) assert result.dtypes["A"] == data.dtype assert result.shape == (len(data), 1) - assert isinstance(result._mgr.blocks[0], ExtensionBlock) + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], ExtensionBlock) + assert isinstance(result._mgr.arrays[0], ExtensionArray) def test_dataframe_from_series(self, data): result = pd.DataFrame(pd.Series(data)) assert result.dtypes[0] == data.dtype assert result.shape == (len(data), 1) - assert isinstance(result._mgr.blocks[0], ExtensionBlock) + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], ExtensionBlock) + assert isinstance(result._mgr.arrays[0], ExtensionArray) def test_series_given_mismatched_index_raises(self, data): msg = r"Length of values \(3\) does not match length of index \(5\)" diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 286ed9c736f31..f49db31aaccff 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -388,7 +388,8 @@ def test_loc_len1(self, data): # see GH-27785 take_nd with indexer of len 1 resulting in wrong ndim df = pd.DataFrame({"A": data}) res = df.loc[[0], "A"] - assert res._mgr._block.ndim == 1 + if hasattr(res._mgr, "blocks"): + assert res._mgr._block.ndim == 1 def test_item(self, data): # https://github.com/pandas-dev/pandas/pull/30175 diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 5bf26e2ca476e..f51f9f732bace 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -81,7 +81,8 @@ def test_no_values_attribute(self, data): def test_is_numeric_honored(self, data): result = pd.Series(data) - assert result._mgr.blocks[0].is_numeric is data.dtype._is_numeric + if hasattr(result._mgr, "blocks"): + assert result._mgr.blocks[0].is_numeric is data.dtype._is_numeric def test_isna_extension_array(self, data_missing): # If your `isna` returns an ExtensionArray, you must also implement diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 18f6084f989dc..5a2d928eea744 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -3,7 +3,10 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd +from pandas.api.extensions import ExtensionArray from pandas.core.internals import ExtensionBlock from pandas.tests.extension.base.base import BaseExtensionTests @@ -26,7 +29,9 @@ def test_concat(self, data, in_frame): dtype = result.dtype assert dtype == data.dtype - assert isinstance(result._mgr.blocks[0], ExtensionBlock) + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], ExtensionBlock) + assert isinstance(result._mgr.arrays[0], ExtensionArray) @pytest.mark.parametrize("in_frame", [True, False]) def test_concat_all_na_block(self, data_missing, in_frame): @@ -106,6 +111,7 @@ def test_concat_extension_arrays_copy_false(self, data, na_value): result = pd.concat([df1, df2], axis=1, copy=False) self.assert_frame_equal(result, expected) + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) concat reindex def test_concat_with_reindex(self, data): # GH-33027 a = pd.DataFrame({"a": data[:5]}) diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py index 693d0645c9519..5266333d467c9 100644 --- a/pandas/tests/extension/test_external_block.py +++ b/pandas/tests/extension/test_external_block.py @@ -1,10 +1,14 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas.core.internals import BlockManager from pandas.core.internals.blocks import ExtensionBlock +pytestmark = td.skip_array_manager_invalid_test + class CustomBlock(ExtensionBlock): diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 718ef087e47d3..56a34b06e6ccd 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -16,6 +16,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas.core.dtypes.dtypes import ( ExtensionDtype, PandasDtype, @@ -28,6 +30,9 @@ from pandas.core.internals import managers from pandas.tests.extension import base +# TODO(ArrayManager) PandasArray +pytestmark = td.skip_array_manager_not_yet_implemented + def _extract_array_patched(obj): if isinstance(obj, (pd.Index, pd.Series)): diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index b8e042f0599f7..0613c727dec98 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -290,7 +290,8 @@ def test_fillna_copy_frame(self, data_missing): filled_val = df.iloc[0, 0] result = df.fillna(filled_val) - assert df.values.base is not result.values.base + if hasattr(df._mgr, "blocks"): + assert df.values.base is not result.values.base assert df.A._values.to_dense() is arr.to_dense() def test_fillna_copy_series(self, data_missing): @@ -362,18 +363,19 @@ def test_equals(self, data, na_value, as_series, box): class TestCasting(BaseSparseTests, base.BaseCastingTests): def test_astype_object_series(self, all_data): # Unlike the base class, we do not expect the resulting Block - # to be ObjectBlock + # to be ObjectBlock / resulting array to be np.dtype("object") ser = pd.Series(all_data, name="A") result = ser.astype(object) - assert is_object_dtype(result._data.blocks[0].dtype) + assert is_object_dtype(result.dtype) + assert is_object_dtype(result._mgr.array.dtype) def test_astype_object_frame(self, all_data): # Unlike the base class, we do not expect the resulting Block - # to be ObjectBlock + # to be ObjectBlock / resulting array to be np.dtype("object") df = pd.DataFrame({"A": all_data}) result = df.astype(object) - assert is_object_dtype(result._data.blocks[0].dtype) + assert is_object_dtype(result._mgr.arrays[0].dtype) # FIXME: these currently fail; dont leave commented-out # check that we can compare the dtypes From d5e0ec85d03faa054480ff5b9a763e18f29d7763 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 10 Mar 2021 10:20:42 +0100 Subject: [PATCH 2/5] array property --- pandas/core/internals/managers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8b08a5fd70537..728f6f4597b07 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1617,6 +1617,14 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: def index(self) -> Index: return self.axes[0] + @property + def array(self) -> ArrayLike: + """ + Quick access to the backing array of the Block. + Only for compatibility with ArrayManager for testing convenience. + """ + return self._block.values + @property def dtype(self) -> DtypeObj: return self._block.dtype From 3c0b848c7b441ffb9fa5e8dce9a83f08e9c291d6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 10 Mar 2021 10:51:29 +0100 Subject: [PATCH 3/5] move array property --- pandas/core/internals/array_manager.py | 4 ---- pandas/core/internals/base.py | 8 ++++++++ pandas/core/internals/managers.py | 8 -------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index eb47bcd4ac819..5c4aea9af3023 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -1181,10 +1181,6 @@ def axes(self): def index(self) -> Index: return self._axes[0] - @property - def array(self): - return self.arrays[0] - @property def dtype(self): return self.array.dtype diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 0e4b5ce2e7452..e942a12ac9dd2 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -7,6 +7,7 @@ TypeVar, ) +from pandas._typing import ArrayLike from pandas.errors import AbstractMethodError from pandas.core.base import PandasObject @@ -102,3 +103,10 @@ def equals(self, other: object) -> bool: class SingleDataManager(DataManager): ndim = 1 + + @property + def array(self) -> ArrayLike: + """ + Quick access to the backing array of the Block or SingleArrayManager. + """ + return self.arrays[0] # type: ignore[attr-defined] diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 728f6f4597b07..8b08a5fd70537 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1617,14 +1617,6 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: def index(self) -> Index: return self.axes[0] - @property - def array(self) -> ArrayLike: - """ - Quick access to the backing array of the Block. - Only for compatibility with ArrayManager for testing convenience. - """ - return self._block.values - @property def dtype(self) -> DtypeObj: return self._block.dtype From f138f45af93c260fdc113488ba21cb9fe80580a6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 10 Mar 2021 15:23:54 +0100 Subject: [PATCH 4/5] fixup property --- pandas/core/internals/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index e942a12ac9dd2..5d56f44b7122d 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -7,7 +7,6 @@ TypeVar, ) -from pandas._typing import ArrayLike from pandas.errors import AbstractMethodError from pandas.core.base import PandasObject @@ -105,7 +104,7 @@ class SingleDataManager(DataManager): ndim = 1 @property - def array(self) -> ArrayLike: + def array(self): """ Quick access to the backing array of the Block or SingleArrayManager. """ From 6f45917079e599b94fea177d89493fe598508d9d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 16 Mar 2021 09:14:00 +0100 Subject: [PATCH 5/5] update test --- pandas/tests/extension/base/getitem.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index fd423938a6f35..200a736771421 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -388,6 +388,8 @@ def test_loc_len1(self, data): # see GH-27785 take_nd with indexer of len 1 resulting in wrong ndim df = pd.DataFrame({"A": data}) res = df.loc[[0], "A"] + assert res.ndim == 1 + assert res._mgr.arrays[0].ndim == 1 if hasattr(res._mgr, "blocks"): assert res._mgr._block.ndim == 1