5757import pandas .core .algorithms as algos
5858from pandas .core .arrays ._mixins import NDArrayBackedExtensionArray
5959from pandas .core .arrays .sparse import SparseDtype
60+ import pandas .core .common as com
6061from pandas .core .construction import (
6162 ensure_wrapped_if_datetimelike ,
6263 extract_array ,
@@ -148,6 +149,7 @@ class BaseBlockManager(DataManager):
148149 blocks : tuple [Block , ...]
149150 axes : list [Index ]
150151 refs : list [weakref .ref | None ] | None
152+ parent : object
151153
152154 @property
153155 def ndim (self ) -> int :
@@ -165,6 +167,7 @@ def from_blocks(
165167 blocks : list [Block ],
166168 axes : list [Index ],
167169 refs : list [weakref .ref | None ] | None = None ,
170+ parent : object = None ,
168171 ) -> T :
169172 raise NotImplementedError
170173
@@ -264,6 +267,8 @@ def _clear_reference_block(self, blkno: int) -> None:
264267 """
265268 if self .refs is not None :
266269 self .refs [blkno ] = None
270+ if com .all_none (* self .refs ):
271+ self .parent = None
267272
268273 def get_dtypes (self ):
269274 dtypes = np .array ([blk .dtype for blk in self .blocks ])
@@ -605,7 +610,9 @@ def _combine(
605610 axes [- 1 ] = index
606611 axes [0 ] = self .items .take (indexer )
607612
608- return type (self ).from_blocks (new_blocks , axes , new_refs )
613+ return type (self ).from_blocks (
614+ new_blocks , axes , new_refs , parent = None if copy else self
615+ )
609616
610617 @property
611618 def nblocks (self ) -> int :
@@ -648,11 +655,14 @@ def copy_func(ax):
648655 new_refs : list [weakref .ref | None ] | None
649656 if deep :
650657 new_refs = None
658+ parent = None
651659 else :
652660 new_refs = [weakref .ref (blk ) for blk in self .blocks ]
661+ parent = self
653662
654663 res .axes = new_axes
655664 res .refs = new_refs
665+ res .parent = parent
656666
657667 if self .ndim > 1 :
658668 # Avoid needing to re-compute these
@@ -744,6 +754,7 @@ def reindex_indexer(
744754 only_slice = only_slice ,
745755 use_na_proxy = use_na_proxy ,
746756 )
757+ parent = None if com .all_none (* new_refs ) else self
747758 else :
748759 new_blocks = [
749760 blk .take_nd (
@@ -756,11 +767,12 @@ def reindex_indexer(
756767 for blk in self .blocks
757768 ]
758769 new_refs = None
770+ parent = None
759771
760772 new_axes = list (self .axes )
761773 new_axes [axis ] = new_axis
762774
763- new_mgr = type (self ).from_blocks (new_blocks , new_axes , new_refs )
775+ new_mgr = type (self ).from_blocks (new_blocks , new_axes , new_refs , parent = parent )
764776 if axis == 1 :
765777 # We can avoid the need to rebuild these
766778 new_mgr ._blknos = self .blknos .copy ()
@@ -995,6 +1007,7 @@ def __init__(
9951007 blocks : Sequence [Block ],
9961008 axes : Sequence [Index ],
9971009 refs : list [weakref .ref | None ] | None = None ,
1010+ parent : object = None ,
9981011 verify_integrity : bool = True ,
9991012 ) -> None :
10001013
@@ -1059,11 +1072,13 @@ def from_blocks(
10591072 blocks : list [Block ],
10601073 axes : list [Index ],
10611074 refs : list [weakref .ref | None ] | None = None ,
1075+ parent : object = None ,
10621076 ) -> BlockManager :
10631077 """
10641078 Constructor for BlockManager and SingleBlockManager with same signature.
10651079 """
1066- return cls (blocks , axes , refs , verify_integrity = False )
1080+ parent = parent if _using_copy_on_write () else None
1081+ return cls (blocks , axes , refs , parent , verify_integrity = False )
10671082
10681083 # ----------------------------------------------------------------
10691084 # Indexing
@@ -1085,7 +1100,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
10851100 block = new_block (result , placement = slice (0 , len (result )), ndim = 1 )
10861101 # in the case of a single block, the new block is a view
10871102 ref = weakref .ref (self .blocks [0 ])
1088- return SingleBlockManager (block , self .axes [0 ], [ref ])
1103+ return SingleBlockManager (block , self .axes [0 ], [ref ], parent = self )
10891104
10901105 dtype = interleaved_dtype ([blk .dtype for blk in self .blocks ])
10911106
@@ -1119,7 +1134,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
11191134 block = new_block (result , placement = slice (0 , len (result )), ndim = 1 )
11201135 return SingleBlockManager (block , self .axes [0 ])
11211136
1122- def iget (self , i : int ) -> SingleBlockManager :
1137+ def iget (self , i : int , track_ref : bool = True ) -> SingleBlockManager :
11231138 """
11241139 Return the data as a SingleBlockManager.
11251140 """
@@ -1129,7 +1144,9 @@ def iget(self, i: int) -> SingleBlockManager:
11291144 # shortcut for select a single-dim from a 2-dim BM
11301145 bp = BlockPlacement (slice (0 , len (values )))
11311146 nb = type (block )(values , placement = bp , ndim = 1 )
1132- return SingleBlockManager (nb , self .axes [1 ], [weakref .ref (block )])
1147+ ref = weakref .ref (block ) if track_ref else None
1148+ parent = self if track_ref else None
1149+ return SingleBlockManager (nb , self .axes [1 ], [ref ], parent )
11331150
11341151 def iget_values (self , i : int ) -> ArrayLike :
11351152 """
@@ -1371,7 +1388,9 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None
13711388 self .blocks = tuple (blocks )
13721389 self ._clear_reference_block (blkno )
13731390
1374- col_mgr = self .iget (loc )
1391+ # this manager is only created temporarily to mutate the values in place
1392+ # so don't track references, otherwise the `setitem` would perform CoW again
1393+ col_mgr = self .iget (loc , track_ref = False )
13751394 new_mgr = col_mgr .setitem ((idx ,), value )
13761395 self .iset (loc , new_mgr ._block .values , inplace = True )
13771396
@@ -1469,7 +1488,9 @@ def idelete(self, indexer) -> BlockManager:
14691488 nbs , new_refs = self ._slice_take_blocks_ax0 (taker , only_slice = True )
14701489 new_columns = self .items [~ is_deleted ]
14711490 axes = [new_columns , self .axes [1 ]]
1472- return type (self )(tuple (nbs ), axes , new_refs , verify_integrity = False )
1491+ # TODO this might not be needed (can a delete ever be done in chained manner?)
1492+ parent = None if com .all_none (* new_refs ) else self
1493+ return type (self )(tuple (nbs ), axes , new_refs , parent , verify_integrity = False )
14731494
14741495 # ----------------------------------------------------------------
14751496 # Block-wise Operation
@@ -1875,6 +1896,7 @@ def __init__(
18751896 block : Block ,
18761897 axis : Index ,
18771898 refs : list [weakref .ref | None ] | None = None ,
1899+ parent : object = None ,
18781900 verify_integrity : bool = False ,
18791901 fastpath = lib .no_default ,
18801902 ) -> None :
@@ -1893,13 +1915,15 @@ def __init__(
18931915 self .axes = [axis ]
18941916 self .blocks = (block ,)
18951917 self .refs = refs
1918+ self .parent = parent if _using_copy_on_write () else None
18961919
18971920 @classmethod
18981921 def from_blocks (
18991922 cls ,
19001923 blocks : list [Block ],
19011924 axes : list [Index ],
19021925 refs : list [weakref .ref | None ] | None = None ,
1926+ parent : object = None ,
19031927 ) -> SingleBlockManager :
19041928 """
19051929 Constructor for BlockManager and SingleBlockManager with same signature.
@@ -1908,7 +1932,7 @@ def from_blocks(
19081932 assert len (axes ) == 1
19091933 if refs is not None :
19101934 assert len (refs ) == 1
1911- return cls (blocks [0 ], axes [0 ], refs , verify_integrity = False )
1935+ return cls (blocks [0 ], axes [0 ], refs , parent , verify_integrity = False )
19121936
19131937 @classmethod
19141938 def from_array (cls , array : ArrayLike , index : Index ) -> SingleBlockManager :
@@ -1928,7 +1952,10 @@ def to_2d_mgr(self, columns: Index) -> BlockManager:
19281952 new_blk = type (blk )(arr , placement = bp , ndim = 2 )
19291953 axes = [columns , self .axes [0 ]]
19301954 refs : list [weakref .ref | None ] = [weakref .ref (blk )]
1931- return BlockManager ([new_blk ], axes = axes , refs = refs , verify_integrity = False )
1955+ parent = self if _using_copy_on_write () else None
1956+ return BlockManager (
1957+ [new_blk ], axes = axes , refs = refs , parent = parent , verify_integrity = False
1958+ )
19321959
19331960 def _has_no_reference (self , i : int = 0 ) -> bool :
19341961 """
@@ -2010,7 +2037,7 @@ def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockMana
20102037 new_idx = self .index [indexer ]
20112038 # TODO(CoW) in theory only need to track reference if new_array is a view
20122039 ref = weakref .ref (blk )
2013- return type (self )(block , new_idx , [ref ])
2040+ return type (self )(block , new_idx , [ref ], parent = self )
20142041
20152042 def get_slice (self , slobj : slice , axis : AxisInt = 0 ) -> SingleBlockManager :
20162043 # Assertion disabled for performance
@@ -2023,7 +2050,9 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager:
20232050 bp = BlockPlacement (slice (0 , len (array )))
20242051 block = type (blk )(array , placement = bp , ndim = 1 )
20252052 new_index = self .index ._getitem_slice (slobj )
2026- return type (self )(block , new_index , [weakref .ref (blk )])
2053+ # TODO this method is only used in groupby SeriesSplitter at the moment,
2054+ # so passing refs / parent is not yet covered by the tests
2055+ return type (self )(block , new_index , [weakref .ref (blk )], parent = self )
20272056
20282057 @property
20292058 def index (self ) -> Index :
@@ -2070,6 +2099,7 @@ def setitem_inplace(self, indexer, value) -> None:
20702099 if _using_copy_on_write () and not self ._has_no_reference (0 ):
20712100 self .blocks = (self ._block .copy (),)
20722101 self .refs = None
2102+ self .parent = None
20732103 self ._cache .clear ()
20742104
20752105 super ().setitem_inplace (indexer , value )
@@ -2086,6 +2116,7 @@ def idelete(self, indexer) -> SingleBlockManager:
20862116 self ._cache .clear ()
20872117 # clear reference since delete always results in a new array
20882118 self .refs = None
2119+ self .parent = None
20892120 return self
20902121
20912122 def fast_xs (self , loc ):
0 commit comments