Skip to content

Commit b8c3fa0

Browse files
committed
CLN: unify NumpyBlock, ObjectBlock, and NumericBlock
1 parent bcd5d25 commit b8c3fa0

File tree

6 files changed

+75
-76
lines changed

6 files changed

+75
-76
lines changed

pandas/core/internals/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@
1111
Block,
1212
DatetimeTZBlock,
1313
ExtensionBlock,
14-
NumericBlock,
15-
ObjectBlock,
14+
NumpyBlock,
1615
)
1716
from pandas.core.internals.concat import concatenate_managers
1817
from pandas.core.internals.managers import (
@@ -23,10 +22,9 @@
2322

2423
__all__ = [
2524
"Block",
26-
"NumericBlock",
2725
"DatetimeTZBlock",
2826
"ExtensionBlock",
29-
"ObjectBlock",
27+
"NumpyBlock",
3028
"make_block",
3129
"DataManager",
3230
"ArrayManager",

pandas/core/internals/blocks.py

Lines changed: 59 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -467,9 +467,8 @@ def convert(
467467
using_cow: bool = False,
468468
) -> list[Block]:
469469
"""
470-
attempt to coerce any object types to better types return a copy
471-
of the block (if copy = True) by definition we are not an ObjectBlock
472-
here!
470+
Attempt to coerce any object types to better types. Return a copy
471+
of the block (if copy = True).
473472
"""
474473
if not copy and using_cow:
475474
return [self.copy(deep=False)]
@@ -678,7 +677,7 @@ def _replace_regex(
678677
List[Block]
679678
"""
680679
if not self._can_hold_element(to_replace):
681-
# i.e. only ObjectBlock, but could in principle include a
680+
# i.e. only if self.is_object is True, but could in principle include a
682681
# String ExtensionBlock
683682
if using_cow:
684683
return [self.copy(deep=False)]
@@ -1269,7 +1268,7 @@ def fillna(
12691268
) -> list[Block]:
12701269
"""
12711270
fillna on the block with the value. If we fail, then convert to
1272-
ObjectBlock and try again
1271+
block to hold objects instead and try again
12731272
"""
12741273
# Caller is responsible for validating limit; if int it is strictly positive
12751274
inplace = validate_bool_kwarg(inplace, "inplace")
@@ -2060,7 +2059,7 @@ def _unstack(
20602059
needs_masking: npt.NDArray[np.bool_],
20612060
):
20622061
# ExtensionArray-safe unstack.
2063-
# We override ObjectBlock._unstack, which unstacks directly on the
2062+
# We override Block._unstack, which unstacks directly on the
20642063
# values of the array. For EA-backed blocks, this would require
20652064
# converting to a 2-D ndarray of objects.
20662065
# Instead, we unstack an ndarray of integer positions, followed by
@@ -2096,6 +2095,7 @@ def _unstack(
20962095

20972096
class NumpyBlock(libinternals.NumpyBlock, Block):
20982097
values: np.ndarray
2098+
__slots__ = ()
20992099

21002100
@property
21012101
def is_view(self) -> bool:
@@ -2114,10 +2114,59 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
21142114
def values_for_json(self) -> np.ndarray:
21152115
return self.values
21162116

2117+
@cache_readonly
2118+
def is_numeric(self) -> bool:
2119+
dtype = self.values.dtype
2120+
kind = dtype.kind
21172121

2118-
class NumericBlock(NumpyBlock):
2119-
__slots__ = ()
2120-
is_numeric = True
2122+
if kind in "fciub":
2123+
return True
2124+
else:
2125+
return False
2126+
2127+
@cache_readonly
2128+
def is_object(self) -> bool:
2129+
if self.values.dtype.kind == "O":
2130+
return True
2131+
else:
2132+
return False
2133+
2134+
@maybe_split
2135+
def convert(
2136+
self,
2137+
*,
2138+
copy: bool = True,
2139+
using_cow: bool = False,
2140+
) -> list[Block]:
2141+
"""
2142+
Attempt to coerce any object types to better types. Return a copy
2143+
of the block (if copy = True).
2144+
"""
2145+
if not self.is_object:
2146+
return super().convert(copy=copy, using_cow=using_cow)
2147+
2148+
values = self.values
2149+
if values.ndim == 2:
2150+
# maybe_split ensures we only get here with values.shape[0] == 1,
2151+
# avoid doing .ravel as that might make a copy
2152+
values = values[0]
2153+
2154+
res_values = lib.maybe_convert_objects(
2155+
values,
2156+
convert_datetime=True,
2157+
convert_timedelta=True,
2158+
convert_period=True,
2159+
convert_interval=True,
2160+
)
2161+
refs = None
2162+
if copy and res_values is values:
2163+
res_values = values.copy()
2164+
elif res_values is values and using_cow:
2165+
refs = self.refs
2166+
2167+
res_values = ensure_block_shape(res_values, self.ndim)
2168+
res_values = maybe_coerce_values(res_values)
2169+
return [self.make_block(res_values, refs=refs)]
21212170

21222171

21232172
class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock):
@@ -2253,52 +2302,6 @@ class DatetimeTZBlock(DatetimeLikeBlock):
22532302
values_for_json = NDArrayBackedExtensionBlock.values_for_json
22542303

22552304

2256-
class ObjectBlock(NumpyBlock):
2257-
__slots__ = ()
2258-
is_object = True
2259-
2260-
@maybe_split
2261-
def convert(
2262-
self,
2263-
*,
2264-
copy: bool = True,
2265-
using_cow: bool = False,
2266-
) -> list[Block]:
2267-
"""
2268-
attempt to cast any object types to better types return a copy of
2269-
the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
2270-
"""
2271-
if self.dtype != _dtype_obj:
2272-
# GH#50067 this should be impossible in ObjectBlock, but until
2273-
# that is fixed, we short-circuit here.
2274-
if using_cow:
2275-
return [self.copy(deep=False)]
2276-
return [self]
2277-
2278-
values = self.values
2279-
if values.ndim == 2:
2280-
# maybe_split ensures we only get here with values.shape[0] == 1,
2281-
# avoid doing .ravel as that might make a copy
2282-
values = values[0]
2283-
2284-
res_values = lib.maybe_convert_objects(
2285-
values,
2286-
convert_datetime=True,
2287-
convert_timedelta=True,
2288-
convert_period=True,
2289-
convert_interval=True,
2290-
)
2291-
refs = None
2292-
if copy and res_values is values:
2293-
res_values = values.copy()
2294-
elif res_values is values and using_cow:
2295-
refs = self.refs
2296-
2297-
res_values = ensure_block_shape(res_values, self.ndim)
2298-
res_values = maybe_coerce_values(res_values)
2299-
return [self.make_block(res_values, refs=refs)]
2300-
2301-
23022305
# -----------------------------------------------------------------
23032306
# Constructor Helpers
23042307

@@ -2357,10 +2360,8 @@ def get_block_type(dtype: DtypeObj) -> type[Block]:
23572360
kind = dtype.kind
23582361
if kind in "Mm":
23592362
return DatetimeLikeBlock
2360-
elif kind in "fciub":
2361-
return NumericBlock
23622363

2363-
return ObjectBlock
2364+
return NumpyBlock
23642365

23652366

23662367
def new_block_2d(

pandas/tests/extension/base/casting.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pandas.util._test_decorators as td
55

66
import pandas as pd
7-
from pandas.core.internals import ObjectBlock
7+
from pandas.core.internals import NumpyBlock
88
from pandas.tests.extension.base.base import BaseExtensionTests
99

1010

@@ -16,7 +16,8 @@ def test_astype_object_series(self, all_data):
1616
result = ser.astype(object)
1717
assert result.dtype == np.dtype(object)
1818
if hasattr(result._mgr, "blocks"):
19-
assert isinstance(result._mgr.blocks[0], ObjectBlock)
19+
assert isinstance(result._mgr.blocks[0], NumpyBlock)
20+
assert result._mgr.block[0].is_object
2021
assert isinstance(result._mgr.array, np.ndarray)
2122
assert result._mgr.array.dtype == np.dtype(object)
2223

@@ -26,7 +27,8 @@ def test_astype_object_frame(self, all_data):
2627
result = df.astype(object)
2728
if hasattr(result._mgr, "blocks"):
2829
blk = result._mgr.blocks[0]
29-
assert isinstance(blk, ObjectBlock), type(blk)
30+
assert isinstance(blk, NumpyBlock), type(blk)
31+
assert blk.is_object
3032
assert isinstance(result._mgr.arrays[0], np.ndarray)
3133
assert result._mgr.arrays[0].dtype == np.dtype(object)
3234

pandas/tests/frame/test_block_internals.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,7 @@
2020
option_context,
2121
)
2222
import pandas._testing as tm
23-
from pandas.core.internals import (
24-
NumericBlock,
25-
ObjectBlock,
26-
)
23+
from pandas.core.internals import NumpyBlock
2724

2825
# Segregated collection of methods that require the BlockManager internal data
2926
# structure
@@ -387,7 +384,7 @@ def test_constructor_no_pandas_array(self):
387384
result = DataFrame({"A": arr})
388385
expected = DataFrame({"A": [1, 2, 3]})
389386
tm.assert_frame_equal(result, expected)
390-
assert isinstance(result._mgr.blocks[0], NumericBlock)
387+
assert isinstance(result._mgr.blocks[0], NumpyBlock)
391388

392389
def test_add_column_with_pandas_array(self):
393390
# GH 26390
@@ -400,8 +397,10 @@ def test_add_column_with_pandas_array(self):
400397
"c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)),
401398
}
402399
)
403-
assert type(df["c"]._mgr.blocks[0]) == ObjectBlock
404-
assert type(df2["c"]._mgr.blocks[0]) == ObjectBlock
400+
assert type(df["c"]._mgr.blocks[0]) == NumpyBlock
401+
assert df["c"]._mgr.blocks[0].is_object
402+
assert type(df2["c"]._mgr.blocks[0]) == NumpyBlock
403+
assert df2["c"]._mgr.blocks[0].is_object
405404
tm.assert_frame_equal(df, df2)
406405

407406

pandas/tests/internals/test_api.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,9 @@ def test_namespace():
2727
]
2828
expected = [
2929
"Block",
30-
"NumericBlock",
3130
"DatetimeTZBlock",
3231
"ExtensionBlock",
33-
"ObjectBlock",
32+
"NumpyBlock",
3433
"make_block",
3534
"DataManager",
3635
"ArrayManager",

pandas/tests/series/test_constructors.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
IntervalArray,
4747
period_array,
4848
)
49-
from pandas.core.internals.blocks import NumericBlock
49+
from pandas.core.internals.blocks import NumpyBlock
5050

5151

5252
class TestSeriesConstructors:
@@ -2092,7 +2092,7 @@ def test_constructor_no_pandas_array(self, using_array_manager):
20922092
result = Series(ser.array)
20932093
tm.assert_series_equal(ser, result)
20942094
if not using_array_manager:
2095-
assert isinstance(result._mgr.blocks[0], NumericBlock)
2095+
assert isinstance(result._mgr.blocks[0], NumpyBlock)
20962096

20972097
@td.skip_array_manager_invalid_test
20982098
def test_from_array(self):

0 commit comments

Comments
 (0)