5555import pandas .core .algorithms as algos
5656from pandas .core .arrays ._mixins import NDArrayBackedExtensionArray
5757from pandas .core .arrays .sparse import SparseDtype
58+ import pandas .core .common as com
5859from pandas .core .construction import (
5960 ensure_wrapped_if_datetimelike ,
6061 extract_array ,
@@ -146,6 +147,7 @@ class BaseBlockManager(DataManager):
146147 blocks : tuple [Block , ...]
147148 axes : list [Index ]
148149 refs : list [weakref .ref | None ] | None
150+ parent : object
149151
150152 @property
151153 def ndim (self ) -> int :
@@ -163,6 +165,7 @@ def from_blocks(
163165 blocks : list [Block ],
164166 axes : list [Index ],
165167 refs : list [weakref .ref | None ] | None = None ,
168+ parent : object = None ,
166169 ) -> T :
167170 raise NotImplementedError
168171
@@ -262,6 +265,8 @@ def _clear_reference_block(self, blkno: int) -> None:
262265 """
263266 if self .refs is not None :
264267 self .refs [blkno ] = None
268+ if com .all_none (* self .refs ):
269+ self .parent = None
265270
266271 def get_dtypes (self ):
267272 dtypes = np .array ([blk .dtype for blk in self .blocks ])
@@ -602,7 +607,9 @@ def _combine(
602607 axes [- 1 ] = index
603608 axes [0 ] = self .items .take (indexer )
604609
605- return type (self ).from_blocks (new_blocks , axes , new_refs )
610+ return type (self ).from_blocks (
611+ new_blocks , axes , new_refs , parent = None if copy else self
612+ )
606613
607614 @property
608615 def nblocks (self ) -> int :
@@ -645,11 +652,14 @@ def copy_func(ax):
645652 new_refs : list [weakref .ref | None ] | None
646653 if deep :
647654 new_refs = None
655+ parent = None
648656 else :
649657 new_refs = [weakref .ref (blk ) for blk in self .blocks ]
658+ parent = self
650659
651660 res .axes = new_axes
652661 res .refs = new_refs
662+ res .parent = parent
653663
654664 if self .ndim > 1 :
655665 # Avoid needing to re-compute these
@@ -738,6 +748,7 @@ def reindex_indexer(
738748 only_slice = only_slice ,
739749 use_na_proxy = use_na_proxy ,
740750 )
751+ parent = None if com .all_none (* new_refs ) else self
741752 else :
742753 new_blocks = [
743754 blk .take_nd (
@@ -750,11 +761,12 @@ def reindex_indexer(
750761 for blk in self .blocks
751762 ]
752763 new_refs = None
764+ parent = None
753765
754766 new_axes = list (self .axes )
755767 new_axes [axis ] = new_axis
756768
757- new_mgr = type (self ).from_blocks (new_blocks , new_axes , new_refs )
769+ new_mgr = type (self ).from_blocks (new_blocks , new_axes , new_refs , parent = parent )
758770 if axis == 1 :
759771 # We can avoid the need to rebuild these
760772 new_mgr ._blknos = self .blknos .copy ()
@@ -989,6 +1001,7 @@ def __init__(
9891001 blocks : Sequence [Block ],
9901002 axes : Sequence [Index ],
9911003 refs : list [weakref .ref | None ] | None = None ,
1004+ parent : object = None ,
9921005 verify_integrity : bool = True ,
9931006 ) -> None :
9941007
@@ -1053,11 +1066,13 @@ def from_blocks(
10531066 blocks : list [Block ],
10541067 axes : list [Index ],
10551068 refs : list [weakref .ref | None ] | None = None ,
1069+ parent : object = None ,
10561070 ) -> BlockManager :
10571071 """
10581072 Constructor for BlockManager and SingleBlockManager with same signature.
10591073 """
1060- return cls (blocks , axes , refs , verify_integrity = False )
1074+ parent = parent if _using_copy_on_write () else None
1075+ return cls (blocks , axes , refs , parent , verify_integrity = False )
10611076
10621077 # ----------------------------------------------------------------
10631078 # Indexing
@@ -1079,7 +1094,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
10791094 block = new_block (result , placement = slice (0 , len (result )), ndim = 1 )
10801095 # in the case of a single block, the new block is a view
10811096 ref = weakref .ref (self .blocks [0 ])
1082- return SingleBlockManager (block , self .axes [0 ], [ref ])
1097+ return SingleBlockManager (block , self .axes [0 ], [ref ], parent = self )
10831098
10841099 dtype = interleaved_dtype ([blk .dtype for blk in self .blocks ])
10851100
@@ -1113,7 +1128,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
11131128 block = new_block (result , placement = slice (0 , len (result )), ndim = 1 )
11141129 return SingleBlockManager (block , self .axes [0 ])
11151130
1116- def iget (self , i : int ) -> SingleBlockManager :
1131+ def iget (self , i : int , track_ref : bool = True ) -> SingleBlockManager :
11171132 """
11181133 Return the data as a SingleBlockManager.
11191134 """
@@ -1123,7 +1138,9 @@ def iget(self, i: int) -> SingleBlockManager:
11231138 # shortcut for select a single-dim from a 2-dim BM
11241139 bp = BlockPlacement (slice (0 , len (values )))
11251140 nb = type (block )(values , placement = bp , ndim = 1 )
1126- return SingleBlockManager (nb , self .axes [1 ], [weakref .ref (block )])
1141+ ref = weakref .ref (block ) if track_ref else None
1142+ parent = self if track_ref else None
1143+ return SingleBlockManager (nb , self .axes [1 ], [ref ], parent )
11271144
11281145 def iget_values (self , i : int ) -> ArrayLike :
11291146 """
@@ -1365,7 +1382,9 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None
13651382 self .blocks = tuple (blocks )
13661383 self ._clear_reference_block (blkno )
13671384
1368- col_mgr = self .iget (loc )
1385+ # this manager is only created temporarily to mutate the values in place
1386+ # so don't track references, otherwise the `setitem` would perform CoW again
1387+ col_mgr = self .iget (loc , track_ref = False )
13691388 new_mgr = col_mgr .setitem ((idx ,), value )
13701389 self .iset (loc , new_mgr ._block .values , inplace = True )
13711390
@@ -1463,7 +1482,9 @@ def idelete(self, indexer) -> BlockManager:
14631482 nbs , new_refs = self ._slice_take_blocks_ax0 (taker , only_slice = True )
14641483 new_columns = self .items [~ is_deleted ]
14651484 axes = [new_columns , self .axes [1 ]]
1466- return type (self )(tuple (nbs ), axes , new_refs , verify_integrity = False )
1485+ # TODO this might not be needed (can a delete ever be done in chained manner?)
1486+ parent = None if com .all_none (* new_refs ) else self
1487+ return type (self )(tuple (nbs ), axes , new_refs , parent , verify_integrity = False )
14671488
14681489 # ----------------------------------------------------------------
14691490 # Block-wise Operation
@@ -1869,6 +1890,7 @@ def __init__(
18691890 block : Block ,
18701891 axis : Index ,
18711892 refs : list [weakref .ref | None ] | None = None ,
1893+ parent : object = None ,
18721894 verify_integrity : bool = False ,
18731895 fastpath = lib .no_default ,
18741896 ) -> None :
@@ -1887,13 +1909,15 @@ def __init__(
18871909 self .axes = [axis ]
18881910 self .blocks = (block ,)
18891911 self .refs = refs
1912+ self .parent = parent if _using_copy_on_write () else None
18901913
18911914 @classmethod
18921915 def from_blocks (
18931916 cls ,
18941917 blocks : list [Block ],
18951918 axes : list [Index ],
18961919 refs : list [weakref .ref | None ] | None = None ,
1920+ parent : object = None ,
18971921 ) -> SingleBlockManager :
18981922 """
18991923 Constructor for BlockManager and SingleBlockManager with same signature.
@@ -1902,7 +1926,7 @@ def from_blocks(
19021926 assert len (axes ) == 1
19031927 if refs is not None :
19041928 assert len (refs ) == 1
1905- return cls (blocks [0 ], axes [0 ], refs , verify_integrity = False )
1929+ return cls (blocks [0 ], axes [0 ], refs , parent , verify_integrity = False )
19061930
19071931 @classmethod
19081932 def from_array (cls , array : ArrayLike , index : Index ) -> SingleBlockManager :
@@ -1922,7 +1946,10 @@ def to_2d_mgr(self, columns: Index) -> BlockManager:
19221946 new_blk = type (blk )(arr , placement = bp , ndim = 2 )
19231947 axes = [columns , self .axes [0 ]]
19241948 refs : list [weakref .ref | None ] = [weakref .ref (blk )]
1925- return BlockManager ([new_blk ], axes = axes , refs = refs , verify_integrity = False )
1949+ parent = self if _using_copy_on_write () else None
1950+ return BlockManager (
1951+ [new_blk ], axes = axes , refs = refs , parent = parent , verify_integrity = False
1952+ )
19261953
19271954 def _has_no_reference (self , i : int = 0 ) -> bool :
19281955 """
@@ -2004,7 +2031,7 @@ def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockMana
20042031 new_idx = self .index [indexer ]
20052032 # TODO(CoW) in theory only need to track reference if new_array is a view
20062033 ref = weakref .ref (blk )
2007- return type (self )(block , new_idx , [ref ])
2034+ return type (self )(block , new_idx , [ref ], parent = self )
20082035
20092036 def get_slice (self , slobj : slice , axis : int = 0 ) -> SingleBlockManager :
20102037 # Assertion disabled for performance
@@ -2017,7 +2044,9 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager:
20172044 bp = BlockPlacement (slice (0 , len (array )))
20182045 block = type (blk )(array , placement = bp , ndim = 1 )
20192046 new_index = self .index ._getitem_slice (slobj )
2020- return type (self )(block , new_index , [weakref .ref (blk )])
2047+ # TODO this method is only used in groupby SeriesSplitter at the moment,
2048+ # so passing refs / parent is not yet covered by the tests
2049+ return type (self )(block , new_index , [weakref .ref (blk )], parent = self )
20212050
20222051 @property
20232052 def index (self ) -> Index :
@@ -2064,6 +2093,7 @@ def setitem_inplace(self, indexer, value) -> None:
20642093 if _using_copy_on_write () and not self ._has_no_reference (0 ):
20652094 self .blocks = (self ._block .copy (),)
20662095 self .refs = None
2096+ self .parent = None
20672097 self ._cache .clear ()
20682098
20692099 super ().setitem_inplace (indexer , value )
@@ -2080,6 +2110,7 @@ def idelete(self, indexer) -> SingleBlockManager:
20802110 self ._cache .clear ()
20812111 # clear reference since delete always results in a new array
20822112 self .refs = None
2113+ self .parent = None
20832114 return self
20842115
20852116 def fast_xs (self , loc ):
0 commit comments