diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index 0797e62de7a9f..284f8ef135d99 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -11,8 +11,6 @@ Block, DatetimeTZBlock, ExtensionBlock, - NumericBlock, - ObjectBlock, ) from pandas.core.internals.concat import concatenate_managers from pandas.core.internals.managers import ( @@ -23,10 +21,8 @@ __all__ = [ "Block", - "NumericBlock", "DatetimeTZBlock", "ExtensionBlock", - "ObjectBlock", "make_block", "DataManager", "ArrayManager", @@ -38,3 +34,27 @@ # this is preserved here for downstream compatibility (GH-33892) "create_block_manager_from_blocks", ] + + +def __getattr__(name: str): + import warnings + + from pandas.util._exceptions import find_stack_level + + if name in ["NumericBlock", "ObjectBlock"]: + warnings.warn( + f"{name} is deprecated and will be removed in a future version. " + "Use public APIs instead.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + if name == "NumericBlock": + from pandas.core.internals.blocks import NumericBlock + + return NumericBlock + else: + from pandas.core.internals.blocks import ObjectBlock + + return ObjectBlock + + raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a08a44a11866a..7c5d686d96939 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -469,13 +469,36 @@ def convert( using_cow: bool = False, ) -> list[Block]: """ - attempt to coerce any object types to better types return a copy - of the block (if copy = True) by definition we are not an ObjectBlock - here! + Attempt to coerce any object types to better types. Return a copy + of the block (if copy = True). """ - if not copy and using_cow: - return [self.copy(deep=False)] - return [self.copy()] if copy else [self] + if not self.is_object: + if not copy and using_cow: + return [self.copy(deep=False)] + return [self.copy()] if copy else [self] + + if self.ndim != 1 and self.shape[0] != 1: + return self.split_and_operate(Block.convert, copy=copy, using_cow=using_cow) + + values = self.values + if values.ndim == 2: + # the check above ensures we only get here with values.shape[0] == 1, + # avoid doing .ravel as that might make a copy + values = values[0] + + res_values = lib.maybe_convert_objects( + values, # type: ignore[arg-type] + convert_non_numeric=True, + ) + refs = None + if copy and res_values is values: + res_values = values.copy() + elif res_values is values and using_cow: + refs = self.refs + + res_values = ensure_block_shape(res_values, self.ndim) + res_values = maybe_coerce_values(res_values) + return [self.make_block(res_values, refs=refs)] # --------------------------------------------------------------------- # Array-Like Methods @@ -680,7 +703,7 @@ def _replace_regex( List[Block] """ if not self._can_hold_element(to_replace): - # i.e. only ObjectBlock, but could in principle include a + # i.e. only if self.is_object is True, but could in principle include a # String ExtensionBlock if using_cow: return [self.copy(deep=False)] @@ -1273,7 +1296,7 @@ def fillna( ) -> list[Block]: """ fillna on the block with the value. If we fail, then convert to - ObjectBlock and try again + block to hold objects instead and try again """ # Caller is responsible for validating limit; if int it is strictly positive inplace = validate_bool_kwarg(inplace, "inplace") @@ -2064,7 +2087,7 @@ def _unstack( needs_masking: npt.NDArray[np.bool_], ): # ExtensionArray-safe unstack. - # We override ObjectBlock._unstack, which unstacks directly on the + # We override Block._unstack, which unstacks directly on the # values of the array. For EA-backed blocks, this would require # converting to a 2-D ndarray of objects. # Instead, we unstack an ndarray of integer positions, followed by @@ -2100,6 +2123,7 @@ def _unstack( class NumpyBlock(libinternals.NumpyBlock, Block): values: np.ndarray + __slots__ = () @property def is_view(self) -> bool: @@ -2118,10 +2142,28 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: def values_for_json(self) -> np.ndarray: return self.values + @cache_readonly + def is_numeric(self) -> bool: # type: ignore[override] + dtype = self.values.dtype + kind = dtype.kind + + return kind in "fciub" + + @cache_readonly + def is_object(self) -> bool: # type: ignore[override] + return self.values.dtype.kind == "O" + class NumericBlock(NumpyBlock): + # this Block type is kept for backwards-compatibility + # TODO(3.0): delete and remove deprecation in __init__.py. + __slots__ = () + + +class ObjectBlock(NumpyBlock): + # this Block type is kept for backwards-compatibility + # TODO(3.0): delete and remove deprecation in __init__.py. __slots__ = () - is_numeric = True class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock): @@ -2257,49 +2299,6 @@ class DatetimeTZBlock(DatetimeLikeBlock): values_for_json = NDArrayBackedExtensionBlock.values_for_json -class ObjectBlock(NumpyBlock): - __slots__ = () - is_object = True - - @maybe_split - def convert( - self, - *, - copy: bool = True, - using_cow: bool = False, - ) -> list[Block]: - """ - attempt to cast any object types to better types return a copy of - the block (if copy = True) by definition we ARE an ObjectBlock!!!!! - """ - if self.dtype != _dtype_obj: - # GH#50067 this should be impossible in ObjectBlock, but until - # that is fixed, we short-circuit here. - if using_cow: - return [self.copy(deep=False)] - return [self] - - values = self.values - if values.ndim == 2: - # maybe_split ensures we only get here with values.shape[0] == 1, - # avoid doing .ravel as that might make a copy - values = values[0] - - res_values = lib.maybe_convert_objects( - values, - convert_non_numeric=True, - ) - refs = None - if copy and res_values is values: - res_values = values.copy() - elif res_values is values and using_cow: - refs = self.refs - - res_values = ensure_block_shape(res_values, self.ndim) - res_values = maybe_coerce_values(res_values) - return [self.make_block(res_values, refs=refs)] - - # ----------------------------------------------------------------- # Constructor Helpers @@ -2358,10 +2357,8 @@ def get_block_type(dtype: DtypeObj) -> type[Block]: kind = dtype.kind if kind in "Mm": return DatetimeLikeBlock - elif kind in "fciub": - return NumericBlock - return ObjectBlock + return NumpyBlock def new_block_2d( diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index e24c3b4569e3e..5a6b0d38e5055 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -4,7 +4,7 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas.core.internals import ObjectBlock +from pandas.core.internals.blocks import NumpyBlock from pandas.tests.extension.base.base import BaseExtensionTests @@ -16,7 +16,9 @@ def test_astype_object_series(self, all_data): result = ser.astype(object) assert result.dtype == np.dtype(object) if hasattr(result._mgr, "blocks"): - assert isinstance(result._mgr.blocks[0], ObjectBlock) + blk = result._mgr.blocks[0] + assert isinstance(blk, NumpyBlock) + assert blk.is_object assert isinstance(result._mgr.array, np.ndarray) assert result._mgr.array.dtype == np.dtype(object) @@ -26,7 +28,8 @@ def test_astype_object_frame(self, all_data): result = df.astype(object) if hasattr(result._mgr, "blocks"): blk = result._mgr.blocks[0] - assert isinstance(blk, ObjectBlock), type(blk) + assert isinstance(blk, NumpyBlock), type(blk) + assert blk.is_object assert isinstance(result._mgr.arrays[0], np.ndarray) assert result._mgr.arrays[0].dtype == np.dtype(object) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 3ad5c304d9a30..6b6c1f6f64ff7 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -20,10 +20,7 @@ option_context, ) import pandas._testing as tm -from pandas.core.internals import ( - NumericBlock, - ObjectBlock, -) +from pandas.core.internals.blocks import NumpyBlock # Segregated collection of methods that require the BlockManager internal data # structure @@ -387,7 +384,8 @@ def test_constructor_no_pandas_array(self): result = DataFrame({"A": arr}) expected = DataFrame({"A": [1, 2, 3]}) tm.assert_frame_equal(result, expected) - assert isinstance(result._mgr.blocks[0], NumericBlock) + assert isinstance(result._mgr.blocks[0], NumpyBlock) + assert result._mgr.blocks[0].is_numeric def test_add_column_with_pandas_array(self): # GH 26390 @@ -400,8 +398,10 @@ def test_add_column_with_pandas_array(self): "c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)), } ) - assert type(df["c"]._mgr.blocks[0]) == ObjectBlock - assert type(df2["c"]._mgr.blocks[0]) == ObjectBlock + assert type(df["c"]._mgr.blocks[0]) == NumpyBlock + assert df["c"]._mgr.blocks[0].is_object + assert type(df2["c"]._mgr.blocks[0]) == NumpyBlock + assert df2["c"]._mgr.blocks[0].is_object tm.assert_frame_equal(df, df2) diff --git a/pandas/tests/internals/test_api.py b/pandas/tests/internals/test_api.py index c759cc163106d..5cd6c718260ea 100644 --- a/pandas/tests/internals/test_api.py +++ b/pandas/tests/internals/test_api.py @@ -27,10 +27,8 @@ def test_namespace(): ] expected = [ "Block", - "NumericBlock", "DatetimeTZBlock", "ExtensionBlock", - "ObjectBlock", "make_block", "DataManager", "ArrayManager", diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 9750e8d32c844..4bf16b6d20d1f 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -46,7 +46,7 @@ IntervalArray, period_array, ) -from pandas.core.internals.blocks import NumericBlock +from pandas.core.internals.blocks import NumpyBlock class TestSeriesConstructors: @@ -2098,7 +2098,8 @@ def test_constructor_no_pandas_array(self, using_array_manager): result = Series(ser.array) tm.assert_series_equal(ser, result) if not using_array_manager: - assert isinstance(result._mgr.blocks[0], NumericBlock) + assert isinstance(result._mgr.blocks[0], NumpyBlock) + assert result._mgr.blocks[0].is_numeric @td.skip_array_manager_invalid_test def test_from_array(self):