diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 20e99d007c798..8aeb577747354 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -348,6 +348,7 @@ Other API changes - Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`) - :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`) - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`) +- :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7b181a3e8e391..376281717219e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -165,6 +165,7 @@ ) from pandas.core.arrays.sparse import SparseFrameAccessor from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, extract_array, sanitize_array, sanitize_masked_array, @@ -960,8 +961,6 @@ def _values( # type: ignore[override] """ Analogue to ._values that may return a 2D ExtensionArray. """ - self._consolidate_inplace() - mgr = self._mgr if isinstance(mgr, ArrayManager): @@ -969,11 +968,11 @@ def _values( # type: ignore[override] # error: Item "ExtensionArray" of "Union[ndarray, ExtensionArray]" # has no attribute "reshape" return mgr.arrays[0].reshape(-1, 1) # type: ignore[union-attr] - return self.values + return ensure_wrapped_if_datetimelike(self.values) blocks = mgr.blocks if len(blocks) != 1: - return self.values + return ensure_wrapped_if_datetimelike(self.values) arr = blocks[0].values if arr.ndim == 1: @@ -1804,7 +1803,6 @@ def to_numpy( array([[1, 3.0, Timestamp('2000-01-01 00:00:00')], [2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object) """ - self._consolidate_inplace() if dtype is not None: dtype = np.dtype(dtype) result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value) @@ -11291,7 +11289,6 @@ def values(self) -> np.ndarray: ['lion', 80.5, 1], ['monkey', nan, None]], dtype=object) """ - self._consolidate_inplace() return self._mgr.as_array() @overload diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fa35060c8aff8..1b17deb7def90 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3777,7 +3777,6 @@ def _take( See the docstring of `take` for full explanation of the parameters. """ - self._consolidate_inplace() new_data = self._mgr.take( indices, @@ -3934,8 +3933,6 @@ class animal locomotion else: index = self.index - self._consolidate_inplace() - if isinstance(index, MultiIndex): loc, new_index = index._get_loc_level(key, level=0) if not drop_level: @@ -5190,8 +5187,6 @@ def reindex(self: NDFrameT, *args, **kwargs) -> NDFrameT: f'argument "{list(kwargs.keys())[0]}"' ) - self._consolidate_inplace() - # if all axes that are requested to reindex are equal, then only copy # if indicated must have index names equal here as well as values if all( @@ -6730,8 +6725,6 @@ def fillna( inplace = validate_bool_kwarg(inplace, "inplace") value, method = validate_fillna_kwargs(value, method) - self._consolidate_inplace() - # set the default here, so functions examining the signaure # can detect if something was set (e.g. in groupby) (GH9221) if axis is None: @@ -7049,8 +7042,6 @@ def replace( if not is_bool(regex) and to_replace is not None: raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") - self._consolidate_inplace() - if value is lib.no_default or method is not lib.no_default: # GH#36984 if the user explicitly passes value=None we want to # respect that. We have the corner case where the user explicitly diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index f755b0addfd6d..1f134af68be6b 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -256,11 +256,7 @@ def test_private_values_dt64tz_multicol(self): df2 = df - df tm.assert_equal(df2._values, tda) - def test_private_values_dt64_multiblock(self, using_array_manager, request): - if using_array_manager: - mark = pytest.mark.xfail(reason="returns ndarray") - request.node.add_marker(mark) - + def test_private_values_dt64_multiblock(self): dta = date_range("2000", periods=8)._data df = DataFrame({"A": dta[:4]}, copy=False) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 57e498defccc1..a7f9b14f44674 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -85,12 +85,6 @@ def test_consolidate_inplace(self, float_frame): for letter in range(ord("A"), ord("Z")): float_frame[chr(letter)] = chr(letter) - def test_values_consolidate(self, float_frame): - float_frame["E"] = 7.0 - assert not float_frame._mgr.is_consolidated() - _ = float_frame.values - assert float_frame._mgr.is_consolidated() - def test_modify_values(self, float_frame): float_frame.values[5] = 5 assert (float_frame.values[5] == 5).all() @@ -99,10 +93,10 @@ def test_modify_values(self, float_frame): float_frame["E"] = 7.0 col = float_frame["E"] float_frame.values[6] = 6 - assert (float_frame.values[6] == 6).all() + # as of 2.0 .values does not consolidate, so subsequent calls to .values + # does not share data + assert not (float_frame.values[6] == 6).all() - # check that item_cache was cleared - assert float_frame["E"] is not col assert (col == 7).all() def test_boolean_set_uncons(self, float_frame):