From e1a66216235c90cfa6dc13cb9718afdb4fea3444 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Nov 2022 14:57:15 -0700 Subject: [PATCH 1/4] API: avoid silent consolidation --- pandas/core/frame.py | 9 +++------ pandas/core/generic.py | 9 --------- pandas/tests/frame/test_block_internals.py | 6 ------ 3 files changed, 3 insertions(+), 21 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2809aa9eaa37d..4c5880f52e08f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -169,6 +169,7 @@ ) from pandas.core.arrays.sparse import SparseFrameAccessor from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, extract_array, sanitize_array, sanitize_masked_array, @@ -977,8 +978,6 @@ def _values( # type: ignore[override] """ Analogue to ._values that may return a 2D ExtensionArray. """ - self._consolidate_inplace() - mgr = self._mgr if isinstance(mgr, ArrayManager): @@ -986,11 +985,11 @@ def _values( # type: ignore[override] # error: Item "ExtensionArray" of "Union[ndarray, ExtensionArray]" # has no attribute "reshape" return mgr.arrays[0].reshape(-1, 1) # type: ignore[union-attr] - return self.values + return ensure_wrapped_if_datetimelike(self.values) blocks = mgr.blocks if len(blocks) != 1: - return self.values + return ensure_wrapped_if_datetimelike(self.values) arr = blocks[0].values if arr.ndim == 1: @@ -1821,7 +1820,6 @@ def to_numpy( array([[1, 3.0, Timestamp('2000-01-01 00:00:00')], [2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object) """ - self._consolidate_inplace() if dtype is not None: dtype = np.dtype(dtype) result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value) @@ -11679,7 +11677,6 @@ def values(self) -> np.ndarray: ['lion', 80.5, 1], ['monkey', nan, None]], dtype=object) """ - self._consolidate_inplace() return self._mgr.as_array() @overload diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 05494e37256df..939c8c154d56c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3858,7 +3858,6 @@ def _take( See the docstring of `take` for full explanation of the parameters. """ - self._consolidate_inplace() new_data = self._mgr.take( indices, @@ -4020,8 +4019,6 @@ class animal locomotion else: index = self.index - self._consolidate_inplace() - if isinstance(index, MultiIndex): loc, new_index = index._get_loc_level(key, level=0) if not drop_level: @@ -5275,8 +5272,6 @@ def reindex(self: NDFrameT, *args, **kwargs) -> NDFrameT: f'argument "{list(kwargs.keys())[0]}"' ) - self._consolidate_inplace() - # if all axes that are requested to reindex are equal, then only copy # if indicated must have index names equal here as well as values if all( @@ -6812,8 +6807,6 @@ def fillna( inplace = validate_bool_kwarg(inplace, "inplace") value, method = validate_fillna_kwargs(value, method) - self._consolidate_inplace() - # set the default here, so functions examining the signaure # can detect if something was set (e.g. in groupby) (GH9221) if axis is None: @@ -7131,8 +7124,6 @@ def replace( if not is_bool(regex) and to_replace is not None: raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") - self._consolidate_inplace() - if value is lib.no_default or method is not lib.no_default: # GH#36984 if the user explicitly passes value=None we want to # respect that. We have the corner case where the user explicitly diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index ed9d7bced9253..2baf1aeba7ed0 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -86,12 +86,6 @@ def test_consolidate_inplace(self, float_frame): for letter in range(ord("A"), ord("Z")): float_frame[chr(letter)] = chr(letter) - def test_values_consolidate(self, float_frame): - float_frame["E"] = 7.0 - assert not float_frame._mgr.is_consolidated() - _ = float_frame.values - assert float_frame._mgr.is_consolidated() - def test_modify_values(self, float_frame): float_frame.values[5] = 5 assert (float_frame.values[5] == 5).all() From b27e330e5745ec8861625f530b5fd620247ffacc Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 2 Nov 2022 08:56:49 -0700 Subject: [PATCH 2/4] update test --- pandas/tests/frame/test_block_internals.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 2baf1aeba7ed0..83af0262bf64f 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -94,10 +94,10 @@ def test_modify_values(self, float_frame): float_frame["E"] = 7.0 col = float_frame["E"] float_frame.values[6] = 6 - assert (float_frame.values[6] == 6).all() + # as of 2.0 .values does not consolidate, so subsequent calls to .values + # does not share data + assert not (float_frame.values[6] == 6).all() - # check that item_cache was cleared - assert float_frame["E"] is not col assert (col == 7).all() def test_boolean_set_uncons(self, float_frame): From d22e4b417b50da8179695b6e1b93f1f85785edc7 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 8 Nov 2022 14:29:06 -0800 Subject: [PATCH 3/4] fix ArrayManager xfail --- pandas/tests/frame/methods/test_values.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index f755b0addfd6d..1f134af68be6b 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -256,11 +256,7 @@ def test_private_values_dt64tz_multicol(self): df2 = df - df tm.assert_equal(df2._values, tda) - def test_private_values_dt64_multiblock(self, using_array_manager, request): - if using_array_manager: - mark = pytest.mark.xfail(reason="returns ndarray") - request.node.add_marker(mark) - + def test_private_values_dt64_multiblock(self): dta = date_range("2000", periods=8)._data df = DataFrame({"A": dta[:4]}, copy=False) From 046e910613ff771041f7dc642531f9d7d39c2d63 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 21 Nov 2022 18:59:42 -0800 Subject: [PATCH 4/4] whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 20e99d007c798..8aeb577747354 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -348,6 +348,7 @@ Other API changes - Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`) - :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`) - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`) +- :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`) - .. ---------------------------------------------------------------------------