Skip to content

Commit 8349b02

Browse files
authored
PERF/REF: require BlockPlacement in Block.__init__ (#40361)
1 parent 126bb92 commit 8349b02

File tree

6 files changed

+56
-33
lines changed

6 files changed

+56
-33
lines changed

pandas/core/arrays/timedeltas.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
from pandas.core.dtypes.common import (
4343
DT64NS_DTYPE,
4444
TD64NS_DTYPE,
45-
is_categorical_dtype,
4645
is_dtype_equal,
4746
is_float_dtype,
4847
is_integer_dtype,
@@ -53,7 +52,10 @@
5352
pandas_dtype,
5453
)
5554
from pandas.core.dtypes.dtypes import DatetimeTZDtype
56-
from pandas.core.dtypes.generic import ABCMultiIndex
55+
from pandas.core.dtypes.generic import (
56+
ABCCategorical,
57+
ABCMultiIndex,
58+
)
5759
from pandas.core.dtypes.missing import isna
5860

5961
from pandas.core import nanops
@@ -970,7 +972,7 @@ def sequence_to_td64ns(
970972
elif not isinstance(data, (np.ndarray, ExtensionArray)):
971973
# GH#24539 e.g. xarray, dask object
972974
data = np.asarray(data)
973-
elif is_categorical_dtype(data.dtype):
975+
elif isinstance(data, ABCCategorical):
974976
data = data.categories.take(data.codes, fill_value=NaT)._values
975977
copy = False
976978

pandas/core/internals/blocks.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def _simple_new(
173173
obj._mgr_locs = placement
174174
return obj
175175

176-
def __init__(self, values, placement, ndim: int):
176+
def __init__(self, values, placement: BlockPlacement, ndim: int):
177177
"""
178178
Parameters
179179
----------
@@ -183,8 +183,10 @@ def __init__(self, values, placement, ndim: int):
183183
ndim : int
184184
1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame
185185
"""
186+
assert isinstance(ndim, int)
187+
assert isinstance(placement, BlockPlacement)
186188
self.ndim = ndim
187-
self.mgr_locs = placement
189+
self._mgr_locs = placement
188190
self.values = values
189191

190192
@property
@@ -263,14 +265,12 @@ def fill_value(self):
263265
return np.nan
264266

265267
@property
266-
def mgr_locs(self):
268+
def mgr_locs(self) -> BlockPlacement:
267269
return self._mgr_locs
268270

269271
@mgr_locs.setter
270-
def mgr_locs(self, new_mgr_locs):
271-
if not isinstance(new_mgr_locs, libinternals.BlockPlacement):
272-
new_mgr_locs = libinternals.BlockPlacement(new_mgr_locs)
273-
272+
def mgr_locs(self, new_mgr_locs: BlockPlacement):
273+
assert isinstance(new_mgr_locs, BlockPlacement)
274274
self._mgr_locs = new_mgr_locs
275275

276276
@final
@@ -289,7 +289,9 @@ def make_block(self, values, placement=None) -> Block:
289289
return new_block(values, placement=placement, ndim=self.ndim)
290290

291291
@final
292-
def make_block_same_class(self, values, placement=None) -> Block:
292+
def make_block_same_class(
293+
self, values, placement: Optional[BlockPlacement] = None
294+
) -> Block:
293295
""" Wrap given values in a block of same type as self. """
294296
if placement is None:
295297
placement = self._mgr_locs
@@ -1221,7 +1223,11 @@ def func(yvalues: np.ndarray) -> np.ndarray:
12211223
return self._maybe_downcast(blocks, downcast)
12221224

12231225
def take_nd(
1224-
self, indexer, axis: int, new_mgr_locs=None, fill_value=lib.no_default
1226+
self,
1227+
indexer,
1228+
axis: int,
1229+
new_mgr_locs: Optional[BlockPlacement] = None,
1230+
fill_value=lib.no_default,
12251231
) -> Block:
12261232
"""
12271233
Take values according to indexer and return them as a block.bb
@@ -1569,7 +1575,11 @@ def to_native_types(self, na_rep="nan", quoting=None, **kwargs):
15691575
return self.make_block(new_values)
15701576

15711577
def take_nd(
1572-
self, indexer, axis: int = 0, new_mgr_locs=None, fill_value=lib.no_default
1578+
self,
1579+
indexer,
1580+
axis: int = 0,
1581+
new_mgr_locs: Optional[BlockPlacement] = None,
1582+
fill_value=lib.no_default,
15731583
) -> Block:
15741584
"""
15751585
Take values according to indexer and return them as a block.
@@ -2258,8 +2268,8 @@ def check_ndim(values, placement: BlockPlacement, ndim: int):
22582268

22592269

22602270
def extract_pandas_array(
2261-
values: ArrayLike, dtype: Optional[DtypeObj], ndim: int
2262-
) -> Tuple[ArrayLike, Optional[DtypeObj]]:
2271+
values: Union[np.ndarray, ExtensionArray], dtype: Optional[DtypeObj], ndim: int
2272+
) -> Tuple[Union[np.ndarray, ExtensionArray], Optional[DtypeObj]]:
22632273
"""
22642274
Ensure that we don't allow PandasArray / PandasDtype in internals.
22652275
"""

pandas/core/internals/managers.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,8 @@ def make_empty(self: T, axes=None) -> T:
240240
assert isinstance(self, SingleBlockManager) # for mypy
241241
blk = self.blocks[0]
242242
arr = blk.values[:0]
243-
nb = blk.make_block_same_class(arr, placement=slice(0, 0))
243+
bp = BlockPlacement(slice(0, 0))
244+
nb = blk.make_block_same_class(arr, placement=bp)
244245
blocks = [nb]
245246
else:
246247
blocks = []
@@ -786,7 +787,7 @@ def _combine(
786787
new_blocks: List[Block] = []
787788
for b in blocks:
788789
b = b.copy(deep=copy)
789-
b.mgr_locs = inv_indexer[b.mgr_locs.indexer]
790+
b.mgr_locs = BlockPlacement(inv_indexer[b.mgr_locs.indexer])
790791
new_blocks.append(b)
791792

792793
axes = list(self.axes)
@@ -1053,8 +1054,9 @@ def iget(self, i: int) -> SingleBlockManager:
10531054
values = block.iget(self.blklocs[i])
10541055

10551056
# shortcut for select a single-dim from a 2-dim BM
1057+
bp = BlockPlacement(slice(0, len(values)))
10561058
values = maybe_coerce_values(values)
1057-
nb = type(block)(values, placement=slice(0, len(values)), ndim=1)
1059+
nb = type(block)(values, placement=bp, ndim=1)
10581060
return SingleBlockManager(nb, self.axes[1])
10591061

10601062
def iget_values(self, i: int) -> ArrayLike:
@@ -1266,7 +1268,7 @@ def insert(
12661268
else:
12671269
new_mgr_locs = blk.mgr_locs.as_array.copy()
12681270
new_mgr_locs[new_mgr_locs >= loc] += 1
1269-
blk.mgr_locs = new_mgr_locs
1271+
blk.mgr_locs = BlockPlacement(new_mgr_locs)
12701272

12711273
# Accessing public blklocs ensures the public versions are initialized
12721274
if loc == self.blklocs.shape[0]:
@@ -1415,11 +1417,12 @@ def _slice_take_blocks_ax0(
14151417
# all(np.shares_memory(nb.values, blk.values) for nb in blocks)
14161418
return blocks
14171419
else:
1420+
bp = BlockPlacement(slice(0, sllen))
14181421
return [
14191422
blk.take_nd(
14201423
slobj,
14211424
axis=0,
1422-
new_mgr_locs=slice(0, sllen),
1425+
new_mgr_locs=bp,
14231426
fill_value=fill_value,
14241427
)
14251428
]
@@ -1456,7 +1459,7 @@ def _slice_take_blocks_ax0(
14561459
# item.
14571460
for mgr_loc in mgr_locs:
14581461
newblk = blk.copy(deep=False)
1459-
newblk.mgr_locs = slice(mgr_loc, mgr_loc + 1)
1462+
newblk.mgr_locs = BlockPlacement(slice(mgr_loc, mgr_loc + 1))
14601463
blocks.append(newblk)
14611464

14621465
else:
@@ -1655,12 +1658,15 @@ def getitem_mgr(self, indexer) -> SingleBlockManager:
16551658
# similar to get_slice, but not restricted to slice indexer
16561659
blk = self._block
16571660
array = blk._slice(indexer)
1658-
if array.ndim > blk.values.ndim:
1661+
if array.ndim > 1:
16591662
# This will be caught by Series._get_values
16601663
raise ValueError("dimension-expanding indexing not allowed")
16611664

1662-
block = blk.make_block_same_class(array, placement=slice(0, len(array)))
1663-
return type(self)(block, self.index[indexer])
1665+
bp = BlockPlacement(slice(0, len(array)))
1666+
block = blk.make_block_same_class(array, placement=bp)
1667+
1668+
new_idx = self.index[indexer]
1669+
return type(self)(block, new_idx)
16641670

16651671
def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager:
16661672
assert isinstance(slobj, slice), type(slobj)
@@ -1669,7 +1675,8 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager:
16691675

16701676
blk = self._block
16711677
array = blk._slice(slobj)
1672-
block = blk.make_block_same_class(array, placement=slice(0, len(array)))
1678+
bp = BlockPlacement(slice(0, len(array)))
1679+
block = blk.make_block_same_class(array, placement=bp)
16731680
new_index = self.index._getitem_slice(slobj)
16741681
return type(self)(block, new_index)
16751682

@@ -1733,7 +1740,7 @@ def set_values(self, values: ArrayLike):
17331740
valid for the current Block/SingleBlockManager (length, dtype, etc).
17341741
"""
17351742
self.blocks[0].values = values
1736-
self.blocks[0]._mgr_locs = libinternals.BlockPlacement(slice(len(values)))
1743+
self.blocks[0]._mgr_locs = BlockPlacement(slice(len(values)))
17371744

17381745

17391746
# --------------------------------------------------------------------
@@ -1985,7 +1992,8 @@ def _merge_blocks(
19851992
new_values = new_values[argsort]
19861993
new_mgr_locs = new_mgr_locs[argsort]
19871994

1988-
return [new_block(new_values, placement=new_mgr_locs, ndim=2)]
1995+
bp = BlockPlacement(new_mgr_locs)
1996+
return [new_block(new_values, placement=bp, ndim=2)]
19891997

19901998
# can't consolidate --> no merge
19911999
return blocks

pandas/core/internals/ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def _reset_block_mgr_locs(nbs: List[Block], locs):
8787
Reset mgr_locs to correspond to our original DataFrame.
8888
"""
8989
for nb in nbs:
90-
nblocs = locs.as_array[nb.mgr_locs.indexer]
90+
nblocs = locs[nb.mgr_locs.indexer]
9191
nb.mgr_locs = nblocs
9292
# Assertions are disabled for performance, but should hold:
9393
# assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)

pandas/tests/extension/test_external_block.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._libs.internals import BlockPlacement
5+
46
import pandas as pd
57
from pandas.core.internals import BlockManager
68
from pandas.core.internals.blocks import ExtensionBlock
@@ -17,7 +19,8 @@ def df():
1719
df1 = pd.DataFrame({"a": [1, 2, 3]})
1820
blocks = df1._mgr.blocks
1921
values = np.arange(3, dtype="int64")
20-
custom_block = CustomBlock(values, placement=slice(1, 2), ndim=2)
22+
bp = BlockPlacement(slice(1, 2))
23+
custom_block = CustomBlock(values, placement=bp, ndim=2)
2124
blocks = blocks + (custom_block,)
2225
block_manager = BlockManager(blocks, [pd.Index(["a", "b"]), df1.index])
2326
return pd.DataFrame(block_manager)

pandas/tests/internals/test_internals.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -327,8 +327,8 @@ def test_duplicate_ref_loc_failure(self):
327327

328328
axes, blocks = tmp_mgr.axes, tmp_mgr.blocks
329329

330-
blocks[0].mgr_locs = np.array([0])
331-
blocks[1].mgr_locs = np.array([0])
330+
blocks[0].mgr_locs = BlockPlacement(np.array([0]))
331+
blocks[1].mgr_locs = BlockPlacement(np.array([0]))
332332

333333
# test trying to create block manager with overlapping ref locs
334334

@@ -338,8 +338,8 @@ def test_duplicate_ref_loc_failure(self):
338338
mgr = BlockManager(blocks, axes)
339339
mgr._rebuild_blknos_and_blklocs()
340340

341-
blocks[0].mgr_locs = np.array([0])
342-
blocks[1].mgr_locs = np.array([1])
341+
blocks[0].mgr_locs = BlockPlacement(np.array([0]))
342+
blocks[1].mgr_locs = BlockPlacement(np.array([1]))
343343
mgr = BlockManager(blocks, axes)
344344
mgr.iget(1)
345345

0 commit comments

Comments
 (0)