From 963be53cd35d029c81ab5b2fbc1002ae0ecc08bf Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Nov 2022 09:43:28 -0800 Subject: [PATCH 1/8] DEPR: enforce inplaceness for df.loc[:, foo]=bar --- pandas/core/dtypes/cast.py | 4 + pandas/core/indexing.py | 98 +++++++------------ pandas/core/internals/array_manager.py | 12 ++- pandas/core/internals/managers.py | 10 +- pandas/tests/copy_view/test_indexing.py | 27 ++--- pandas/tests/extension/base/setitem.py | 27 +---- pandas/tests/frame/indexing/test_indexing.py | 37 ++++--- pandas/tests/frame/methods/test_diff.py | 5 +- pandas/tests/frame/methods/test_dropna.py | 4 +- pandas/tests/frame/test_constructors.py | 4 +- pandas/tests/indexing/multiindex/test_loc.py | 7 +- .../indexing/test_chaining_and_caching.py | 5 +- pandas/tests/indexing/test_iloc.py | 48 ++++----- pandas/tests/indexing/test_indexing.py | 36 ++++--- pandas/tests/indexing/test_loc.py | 91 ++++++++--------- pandas/tests/indexing/test_partial.py | 8 +- 16 files changed, 191 insertions(+), 232 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 215b6c1021fd8..ae32b91731745 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1899,6 +1899,10 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: return element raise LossySetitemError + if dtype.kind == "V": + # i.e. np.void, which cannot hold _anything_ + raise LossySetitemError + raise NotImplementedError(dtype) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b6fd298a2d41a..87cde9cb3ede4 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -9,7 +9,6 @@ cast, final, ) -import warnings import numpy as np @@ -23,9 +22,9 @@ AbstractMethodError, IndexingError, InvalidIndexError, + LossySetitemError, ) from pandas.util._decorators import doc -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ( can_hold_element, @@ -793,6 +792,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: if self.ndim != 2: return + orig_key = key if isinstance(key, tuple) and len(key) > 1: # key may be a tuple if we are .loc # if length of key is > 1 set key to column part @@ -808,6 +808,25 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: ): # GH#38148 keys = self.obj.columns.union(key, sort=False) + diff = Index(key).difference(self.obj.columns, sort=False) + + if len(diff) and com.is_null_slice(orig_key[0]): + # e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B" + # is a new column, add the new columns with dtype=np.void + # so that later when we go through setitem_single_column + # we will use isetitem. Without this, the reindex_axis + # below would create float64 columns in this example, which + # would successfully hold 7, so we would end up with the wrong + # dtype. + arr = np.empty(len(self.obj), dtype=np.void) + df = type(self.obj)({i: arr for i in range(len(diff))}) + df.columns = diff + df.index = self.obj.index + from pandas import concat + + new_obj = concat([self.obj, df], axis=1) + self.obj._mgr = new_obj._mgr + return self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True) @@ -1971,72 +1990,25 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: """ pi = plane_indexer - orig_values = self.obj._get_column_array(loc) - - # perform the equivalent of a setitem on the info axis - # as we have a null slice or a slice with full bounds - # which means essentially reassign to the columns of a - # multi-dim object - # GH#6149 (null slice), GH#10408 (full bounds) - if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)): - pass - elif ( - is_array_like(value) - and len(value.shape) > 0 - and self.obj.shape[0] == value.shape[0] - and not is_empty_indexer(pi) - ): - if is_list_like(pi) and not is_bool_dtype(pi): - value = value[np.argsort(pi)] - else: - # in case of slice - value = value[pi] + is_full_setter = com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)) + + if is_full_setter: + + try: + self.obj._mgr.column_setitem( + loc, plane_indexer, value, inplace_only=True + ) + except (ValueError, TypeError, LossySetitemError): + # If we're setting an entire column and we can't do it inplace, + # then we can use value's dtype (or inferred dtype) + # instead of object + self.obj.isetitem(loc, value) else: # set value into the column (first attempting to operate inplace, then # falling back to casting if necessary) self.obj._mgr.column_setitem(loc, plane_indexer, value) - self.obj._clear_item_cache() - return - - self.obj._iset_item(loc, value) - # We will not operate in-place, but will attempt to in the future. - # To determine whether we need to issue a FutureWarning, see if the - # setting in-place would work, i.e. behavior will change. - - new_values = self.obj._get_column_array(loc) - - if can_hold_element(orig_values, new_values) and not len(new_values) == 0: - # Don't issue the warning yet, as we can still trim a few cases where - # behavior will not change. - - if ( - isinstance(new_values, np.ndarray) - and isinstance(orig_values, np.ndarray) - and ( - np.shares_memory(new_values, orig_values) - or new_values.shape != orig_values.shape - ) - ): - # TODO: get something like tm.shares_memory working? - # The values were set inplace after all, no need to warn, - # e.g. test_rename_nocopy - # In case of enlarging we can not set inplace, so need to - # warn either - pass - else: - warnings.warn( - "In a future version, `df.iloc[:, i] = newvals` will attempt " - "to set the values inplace instead of always setting a new " - "array. To retain the old behavior, use either " - "`df[df.columns[i]] = newvals` or, if columns are non-unique, " - "`df.isetitem(i, newvals)`", - FutureWarning, - stacklevel=find_stack_level(), - ) - # TODO: how to get future behavior? - # TODO: what if we got here indirectly via loc? - return + self.obj._clear_item_cache() def _setitem_single_block(self, indexer, value, name: str) -> None: """ diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 37ae9d103c8b5..06bccf66f9d0c 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -859,18 +859,26 @@ def iset( self.arrays[mgr_idx] = value_arr return - def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None: + def column_setitem( + self, loc: int, idx: int | slice | np.ndarray, value, inplace_only: bool = False + ) -> None: """ Set values ("setitem") into a single column (not setting the full column). This is a method on the ArrayManager level, to avoid creating an intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) + + """ if not is_integer(loc): raise TypeError("The column index should be an integer") arr = self.arrays[loc] mgr = SingleArrayManager([arr], [self._axes[0]]) - new_mgr = mgr.setitem((idx,), value) + if inplace_only: + mgr.setitem_inplace((idx,), value) + new_mgr = mgr + else: + new_mgr = mgr.setitem((idx,), value) # update existing ArrayManager in-place self.arrays[loc] = new_mgr.arrays[0] diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3eca3756e1678..06a30d24964a9 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1344,7 +1344,9 @@ def _iset_single( self._clear_reference_block(blkno) return - def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None: + def column_setitem( + self, loc: int, idx: int | slice | np.ndarray, value, inplace_only: bool = False + ) -> None: """ Set values ("setitem") into a single column (not setting the full column). @@ -1362,7 +1364,11 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None # this manager is only created temporarily to mutate the values in place # so don't track references, otherwise the `setitem` would perform CoW again col_mgr = self.iget(loc, track_ref=False) - new_mgr = col_mgr.setitem((idx,), value) + if inplace_only: + col_mgr.setitem_inplace((idx,), value) + new_mgr = col_mgr + else: + new_mgr = col_mgr.setitem((idx,), value) self.iset(loc, new_mgr._block.values, inplace=True) def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index b8028fd28f8f8..89bd821839640 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -340,7 +340,7 @@ def test_subset_set_column_with_loc(using_copy_on_write, using_array_manager, dt # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those # warnings and only assert the SettingWithCopyWarning with tm.assert_produces_warning( - SettingWithCopyWarning, + None, raise_on_extra_warnings=not using_array_manager, ): subset.loc[:, "a"] = np.array([10, 11], dtype="int64") @@ -353,6 +353,8 @@ def test_subset_set_column_with_loc(using_copy_on_write, using_array_manager, dt tm.assert_frame_equal(subset, expected) if using_copy_on_write or using_array_manager: # original parent dataframe is not modified (CoW) + # with the enforcement of GH#45333 in 2.0, the original is modified + df_orig.loc[1:3, "a"] = np.array([10, 11], dtype="int64") tm.assert_frame_equal(df, df_orig) else: # original parent dataframe is actually updated @@ -376,7 +378,7 @@ def test_subset_set_column_with_loc2(using_copy_on_write, using_array_manager): # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those # warnings and only assert the SettingWithCopyWarning with tm.assert_produces_warning( - SettingWithCopyWarning, + None, raise_on_extra_warnings=not using_array_manager, ): subset.loc[:, "a"] = 0 @@ -386,6 +388,8 @@ def test_subset_set_column_with_loc2(using_copy_on_write, using_array_manager): tm.assert_frame_equal(subset, expected) if using_copy_on_write or using_array_manager: # original parent dataframe is not modified (CoW) + # with the enforcement of GH#45333 in 2.0, the original is modified + df_orig.loc[1:3, "a"] = 0 tm.assert_frame_equal(df, df_orig) else: # original parent dataframe is actually updated @@ -441,22 +445,23 @@ def test_subset_set_with_column_indexer( with pd.option_context("chained_assignment", "warn"): # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those # warnings and only assert the SettingWithCopyWarning - with tm.assert_produces_warning( - SettingWithCopyWarning, raise_on_extra_warnings=False - ): - subset.loc[:, indexer] = 0 + # As of 2.0, this setitem attempts (successfully) to set values + # inplace, so the assignment is not chained. + subset.loc[:, indexer] = 0 subset._mgr._verify_integrity() expected = DataFrame({"a": [0, 0], "b": [0.0, 0.0], "c": [5, 6]}, index=range(1, 3)) - # TODO full row slice .loc[:, idx] update inplace instead of overwrite? - expected["b"] = expected["b"].astype("int64") tm.assert_frame_equal(subset, expected) if using_copy_on_write or using_array_manager: + # pre-2.0, the original would not be mutated at all. this changed with the + # enforcement of GH#45333 + df_orig.loc[1:2, ["a", "b"]] = 0 tm.assert_frame_equal(df, df_orig) else: - # In the mixed case with BlockManager, only one of the two columns is - # mutated in the parent frame .. - df_orig.loc[1:2, ["a"]] = 0 + # pre-2.0, in the mixed case with BlockManager, only column "a" + # would be mutated in the parent frame. this changed with the + # enforcement of GH#45333 + df_orig.loc[1:2, ["a", "b"]] = 0 tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 8dbf7d47374a6..f52abb9349578 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -1,13 +1,6 @@ import numpy as np import pytest -from pandas.core.dtypes.dtypes import ( - DatetimeTZDtype, - IntervalDtype, - PandasDtype, - PeriodDtype, -) - import pandas as pd import pandas._testing as tm from pandas.tests.extension.base.base import BaseExtensionTests @@ -382,11 +375,6 @@ def test_setitem_frame_2d_values(self, data): # GH#44514 df = pd.DataFrame({"A": data}) - # These dtypes have non-broken implementations of _can_hold_element - has_can_hold_element = isinstance( - data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype) - ) - # Avoiding using_array_manager fixture # https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410 using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager) @@ -396,24 +384,13 @@ def test_setitem_frame_2d_values(self, data): orig = df.copy() - msg = "will attempt to set the values inplace instead" - warn = None - if has_can_hold_element and not isinstance(data.dtype, PandasDtype): - # PandasDtype excluded because it isn't *really* supported. - warn = FutureWarning - - with tm.assert_produces_warning(warn, match=msg): - df.iloc[:] = df + df.iloc[:] = df self.assert_frame_equal(df, orig) df.iloc[:-1] = df.iloc[:-1] self.assert_frame_equal(df, orig) - if isinstance(data.dtype, DatetimeTZDtype): - # no warning bc df.values casts to object dtype - warn = None - with tm.assert_produces_warning(warn, match=msg): - df.iloc[:] = df.values + df.iloc[:] = df.values self.assert_frame_equal(df, orig) if not using_array_manager and not using_copy_on_write: # GH#33457 Check that this setting occurred in-place diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 4e4d0590830de..8605e101cd0a7 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -555,12 +555,13 @@ def test_fancy_getitem_slice_mixed( assert np.shares_memory(sliced["C"]._values, float_frame["C"]._values) - msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" if not using_copy_on_write: - with pytest.raises(SettingWithCopyError, match=msg): - sliced.loc[:, "C"] = 4.0 + sliced.loc[:, "C"] = 4.0 assert (float_frame["C"] == 4).all() + + # with the enforcement of GH#45333 in 2.0, this remains a view + np.shares_memory(sliced["C"]._values, float_frame["C"]._values) else: sliced.loc[:, "C"] = 4.0 tm.assert_frame_equal(float_frame, original) @@ -785,10 +786,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager): assert len(result) == 5 cp = df.copy() - warn = FutureWarning if using_array_manager else None - msg = "will attempt to set the values inplace" - with tm.assert_produces_warning(warn, match=msg): - cp.loc[1.0:5.0] = 0 + cp.loc[1.0:5.0] = 0 result = cp.loc[1.0:5.0] assert (result == 0).values.all() @@ -1006,7 +1004,15 @@ def test_iloc_row(self): expected = df.reindex(df.index[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) - def test_iloc_row_slice_view(self, using_array_manager, using_copy_on_write): + def test_iloc_row_slice_view( + self, using_array_manager, using_copy_on_write, request + ): + if using_array_manager: + mark = pytest.mark.xfail( + reason="Enforcement of GH#45333 causing the last assertion to fail" + ) + request.node.add_marker(mark) + df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2)) original = df.copy() @@ -1017,16 +1023,17 @@ def test_iloc_row_slice_view(self, using_array_manager, using_copy_on_write): assert np.shares_memory(df[2], subset[2]) exp_col = original[2].copy() - msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" if using_copy_on_write: subset.loc[:, 2] = 0.0 else: - with pytest.raises(SettingWithCopyError, match=msg): - subset.loc[:, 2] = 0.0 + subset.loc[:, 2] = 0.0 # TODO(ArrayManager) verify it is expected that the original didn't change if not using_array_manager: exp_col._values[4:8] = 0.0 + + # With the enforcement of GH#45333 in 2.0, this remains a view + assert np.shares_memory(df[2], subset[2]) tm.assert_series_equal(df[2], exp_col) def test_iloc_col(self): @@ -1060,12 +1067,12 @@ def test_iloc_col_slice_view(self, using_array_manager, using_copy_on_write): # verify slice is view assert np.shares_memory(df[8]._values, subset[8]._values) - # and that we are setting a copy - msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - subset.loc[:, 8] = 0.0 + subset.loc[:, 8] = 0.0 assert (df[8] == 0).all() + + # with the enforcement of GH#45333 in 2.0, this remains a view + assert np.shares_memory(df[8]._values, subset[8]._values) else: if using_copy_on_write: # verify slice is view diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 9a9fea3462752..a9454d73d5429 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -82,7 +82,7 @@ def test_diff_datetime_axis0_with_nat(self, tz): expected = Series(ex_index).to_frame() tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("tz", [pytest.param(None, marks=pytest.mark.xfail), "UTC"]) + @pytest.mark.parametrize("tz", [None, "UTC"]) def test_diff_datetime_with_nat_zero_periods(self, tz): # diff on NaT values should give NaT, not timedelta64(0) dti = date_range("2016-01-01", periods=4, tz=tz) @@ -91,8 +91,7 @@ def test_diff_datetime_with_nat_zero_periods(self, tz): df[1] = ser.copy() - with tm.assert_produces_warning(None): - df.iloc[:, 0] = pd.NaT + df.iloc[:, 0] = pd.NaT expected = df - df assert expected[0].isna().all() diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index 8c4d9499e3676..f072cd04f49e1 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -220,9 +220,7 @@ def test_dropna_with_duplicate_columns(self): df.iloc[2, [0, 1, 2]] = np.nan df.iloc[0, 0] = np.nan df.iloc[1, 1] = np.nan - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.iloc[:, 3] = np.nan + df.iloc[:, 3] = np.nan expected = df.dropna(subset=["A", "B", "C"], how="all") expected.columns = ["A", "A", "B", "C"] diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c70268bd0aef2..baaaef9c7a0ff 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2541,8 +2541,8 @@ def check_views(c_only: bool = False): # FIXME(GH#35417): until GH#35417, iloc.setitem into EA values does not preserve # view, so we have to check in the other direction - with tm.assert_produces_warning(FutureWarning, match="will attempt to set"): - df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype) + # with tm.assert_produces_warning(FutureWarning, match="will attempt to set"): + df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype) assert df.dtypes.iloc[2] == c.dtype if not copy and not using_copy_on_write: check_views(True) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index ac4bb1093d84a..97fb1b577412c 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -539,11 +539,8 @@ def test_loc_setitem_single_column_slice(): columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]), ) expected = df.copy() - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.loc[:, "B"] = np.arange(4) - with tm.assert_produces_warning(FutureWarning, match=msg): - expected.iloc[:, 2] = np.arange(4) + df.loc[:, "B"] = np.arange(4) + expected.iloc[:, 2] = np.arange(4) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 81914e1b8052f..78727917ba66a 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -360,8 +360,9 @@ def test_detect_chained_assignment_implicit_take2(self, using_copy_on_write): assert df._is_copy is not None df.loc[:, "letters"] = df["letters"].apply(str.lower) - # Should be ok even though it's a copy! - assert df._is_copy is None + # with the enforcement of #45333 in 2.0, the .loc[:, letters] setting + # is inplace, so df._is_copy remains non-None. + assert df._is_copy is not None df["letters"] = df["letters"].apply(str.lower) assert df._is_copy is None diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index f7e6665aad253..1d980ebfc0815 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -87,22 +87,11 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage df = frame.copy() orig_vals = df.values - overwrite = isinstance(key, slice) and key == slice(None) - warn = None - if overwrite: - warn = FutureWarning - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(warn, match=msg): - indexer(df)[key, 0] = cat - - if overwrite: - # TODO: GH#39986 this probably shouldn't behave differently - expected = DataFrame({0: cat}) - assert not np.shares_memory(df.values, orig_vals) - else: - expected = DataFrame({0: cat}).astype(object) - if not using_array_manager: - assert np.shares_memory(df[0].values, orig_vals) + indexer(df)[key, 0] = cat + + expected = DataFrame({0: cat}).astype(object) + if not using_array_manager: + assert np.shares_memory(df[0].values, orig_vals) tm.assert_frame_equal(df, expected) @@ -110,13 +99,14 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage df.iloc[0, 0] = "gamma" assert cat[0] != "gamma" - # TODO with mixed dataframe ("split" path), we always overwrite the column + # pre-2.0 with mixed dataframe ("split" path) we always overwrote the + # column. as of 2.0 we correctly write "into" the column, so + # we retain the object dtype. frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)}) df = frame.copy() orig_vals = df.values - with tm.assert_produces_warning(FutureWarning, match=msg): - indexer(df)[key, 0] = cat - expected = DataFrame({0: cat, 1: range(3)}) + indexer(df)[key, 0] = cat + expected = DataFrame({0: cat.astype(object), 1: range(3)}) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("box", [array, Series]) @@ -908,10 +898,9 @@ def test_iloc_setitem_categorical_updates_inplace(self, using_copy_on_write): assert tm.shares_memory(df[1], cat) - # This should modify our original values in-place - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.iloc[:, 0] = cat[::-1] + # With the enforcement of GH#45333 in 2.0, this modifies original + # values inplace + df.iloc[:, 0] = cat[::-1] if not using_copy_on_write: assert tm.shares_memory(df[1], cat) @@ -1320,12 +1309,15 @@ def test_iloc_setitem_dtypes_duplicate_columns( ): # GH#22035 df = DataFrame([[init_value, "str", "str2"]], columns=["a", "b", "b"]) - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.iloc[:, 0] = df.iloc[:, 0].astype(dtypes) + + # with the enforcement of GH#45333 in 2.0, this sets values inplace, + # so we retain object dtype + df.iloc[:, 0] = df.iloc[:, 0].astype(dtypes) expected_df = DataFrame( - [[expected_value, "str", "str2"]], columns=["a", "b", "b"] + [[expected_value, "str", "str2"]], + columns=["a", "b", "b"], + dtype=object, ) tm.assert_frame_equal(df, expected_df) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index b3e59da4b0130..727927b8f7d74 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -549,52 +549,56 @@ def test_astype_assignment(self): ) df = df_orig.copy() - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64) + + # with the enforcement of GH#45333 in 2.0, this setting is attempted inplace, + # so object dtype is retained + df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64) expected = DataFrame( [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) + expected["A"] = expected["A"].astype(object) + expected["B"] = expected["B"].astype(object) tm.assert_frame_equal(df, expected) df = df_orig.copy() - with tm.assert_produces_warning(FutureWarning, match=msg): - df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True) + df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True) expected = DataFrame( [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) + expected["A"] = expected["A"].astype(object) + expected["B"] = expected["B"].astype(object) tm.assert_frame_equal(df, expected) # GH5702 (loc) df = df_orig.copy() - with tm.assert_produces_warning(FutureWarning, match=msg): - df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64) + df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64) expected = DataFrame( [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) + expected["A"] = expected["A"].astype(object) tm.assert_frame_equal(df, expected) df = df_orig.copy() - with tm.assert_produces_warning(FutureWarning, match=msg): - df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64) + df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64) expected = DataFrame( [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) + expected["B"] = expected["B"].astype(object) + expected["C"] = expected["C"].astype(object) tm.assert_frame_equal(df, expected) def test_astype_assignment_full_replacements(self): # full replacements / no nans df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.iloc[:, 0] = df["A"].astype(np.int64) - expected = DataFrame({"A": [1, 2, 3, 4]}) + + # With the enforcement of GH#45333 in 2.0, this assignment occurs inplace, + # so float64 is retained + df.iloc[:, 0] = df["A"].astype(np.int64) + expected = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) tm.assert_frame_equal(df, expected) df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) - with tm.assert_produces_warning(FutureWarning, match=msg): - df.loc[:, "A"] = df["A"].astype(np.int64) - expected = DataFrame({"A": [1, 2, 3, 4]}) + df.loc[:, "A"] = df["A"].astype(np.int64) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("indexer", [tm.getitem, tm.loc]) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 26eb7532adfa4..41beafbd78b0d 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -390,16 +390,16 @@ def test_loc_setitem_dtype(self): # GH31340 df = DataFrame({"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}) cols = ["a", "b", "c"] - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.loc[:, cols] = df.loc[:, cols].astype("float32") + df.loc[:, cols] = df.loc[:, cols].astype("float32") + # pre-2.0 this setting would swap in new arrays, in 2.0 it is correctly + # in-place, consistent with non-split-path expected = DataFrame( { "id": ["A"], - "a": np.array([1.2], dtype="float32"), - "b": np.array([0.0], dtype="float32"), - "c": np.array([-2.5], dtype="float32"), + "a": np.array([1.2], dtype="float64"), + "b": np.array([0.0], dtype="float64"), + "c": np.array([-2.5], dtype="float64"), } ) # id is inferred as object @@ -620,15 +620,18 @@ def test_loc_setitem_consistency_single_row(self): def test_loc_setitem_consistency_empty(self): # empty (essentially noops) + # before the enforcement of #45333 in 2.0, the loc.setitem here would + # change the dtype of df.x to int64 expected = DataFrame(columns=["x", "y"]) - expected["x"] = expected["x"].astype(np.int64) df = DataFrame(columns=["x", "y"]) with tm.assert_produces_warning(None): df.loc[:, "x"] = 1 tm.assert_frame_equal(df, expected) + # setting with setitem swaps in a new array, so changes the dtype df = DataFrame(columns=["x", "y"]) df["x"] = 1 + expected["x"] = expected["x"].astype(np.int64) tm.assert_frame_equal(df, expected) def test_loc_setitem_consistency_slice_column_len(self): @@ -655,28 +658,24 @@ def test_loc_setitem_consistency_slice_column_len(self): ] df = DataFrame(values, index=mi, columns=cols) - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.loc[:, ("Respondent", "StartDate")] = to_datetime( - df.loc[:, ("Respondent", "StartDate")] - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - df.loc[:, ("Respondent", "EndDate")] = to_datetime( - df.loc[:, ("Respondent", "EndDate")] - ) - with tm.assert_produces_warning(None, match=msg): - # Adding a new key -> no warning - df.loc[:, ("Respondent", "Duration")] = ( - df.loc[:, ("Respondent", "EndDate")] - - df.loc[:, ("Respondent", "StartDate")] - ) + df.loc[:, ("Respondent", "StartDate")] = to_datetime( + df.loc[:, ("Respondent", "StartDate")] + ) + df.loc[:, ("Respondent", "EndDate")] = to_datetime( + df.loc[:, ("Respondent", "EndDate")] + ) + + # Adding a new key + df.loc[:, ("Respondent", "Duration")] = ( + df.loc[:, ("Respondent", "EndDate")] + - df.loc[:, ("Respondent", "StartDate")] + ) - with tm.assert_produces_warning(None, match=msg): - # timedelta64[m] -> float, so this cannot be done inplace, so - # no warning - df.loc[:, ("Respondent", "Duration")] = df.loc[ - :, ("Respondent", "Duration") - ] / Timedelta(60_000_000_000) + # timedelta64[m] -> float, so this cannot be done inplace, so + # no warning + df.loc[:, ("Respondent", "Duration")] = df.loc[ + :, ("Respondent", "Duration") + ] / Timedelta(60_000_000_000) expected = Series( [23.0, 12.0, 14.0, 36.0], index=df.index, name=("Respondent", "Duration") @@ -742,11 +741,11 @@ def test_loc_setitem_frame_with_reindex_mixed(self): # GH#40480 df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float) df["B"] = "string" - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") + df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64") - expected = DataFrame({"A": ser}) + # pre-2.0 this setting swapped in a new array, now it is inplace + # consistent with non-split-path + expected = DataFrame({"A": ser.astype(float)}) expected["B"] = "string" tm.assert_frame_equal(df, expected) @@ -754,10 +753,10 @@ def test_loc_setitem_frame_with_inverted_slice(self): # GH#40480 df = DataFrame(index=[1, 2, 3], columns=["A", "B"], dtype=float) df["B"] = "string" - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64") - expected = DataFrame({"A": [3, 2, 1], "B": "string"}, index=[1, 2, 3]) + df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64") + # pre-2.0 this setting swapped in a new array, now it is inplace + # consistent with non-split-path + expected = DataFrame({"A": [3.0, 2.0, 1.0], "B": "string"}, index=[1, 2, 3]) tm.assert_frame_equal(df, expected) def test_loc_setitem_empty_frame(self): @@ -930,13 +929,7 @@ def test_loc_setitem_missing_columns(self, index, box, expected): # GH 29334 df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) - warn = None - if isinstance(index[0], slice) and index[0] == slice(None): - warn = FutureWarning - - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(warn, match=msg): - df.loc[index] = box + df.loc[index] = box tm.assert_frame_equal(df, expected) def test_loc_coercion(self): @@ -1174,8 +1167,6 @@ def test_loc_uint64_disallow_negative(self): # don't wrap around ser.loc[[-1]] - # FIXME: warning issued here is false-positive - @pytest.mark.filterwarnings("ignore:.*will attempt to set.*:FutureWarning") def test_loc_setitem_empty_append_expands_rows(self): # GH6173, various appends to an empty dataframe @@ -1187,8 +1178,6 @@ def test_loc_setitem_empty_append_expands_rows(self): df.loc[:, "x"] = data tm.assert_frame_equal(df, expected) - # FIXME: warning issued here is false-positive - @pytest.mark.filterwarnings("ignore:.*will attempt to set.*:FutureWarning") def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self): # GH#37932 same as test_loc_setitem_empty_append_expands_rows # but with mixed dtype so we go through take_split_path @@ -1439,12 +1428,12 @@ def test_loc_setitem_single_row_categorical(self): df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) categories = Categorical(df["Alpha"], categories=["a", "b", "c"]) - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.loc[:, "Alpha"] = categories + # pre-2.0 this swapped in a new array, in 2.0 it operates inplace, + # consistent with non-split-path + df.loc[:, "Alpha"] = categories result = df["Alpha"] - expected = Series(categories, index=df.index, name="Alpha") + expected = Series(categories, index=df.index, name="Alpha").astype(object) tm.assert_series_equal(result, expected) def test_loc_setitem_datetime_coercion(self): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 938056902e745..a62474ed14fa1 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -308,12 +308,12 @@ def test_partial_setting_frame(self, using_array_manager): tm.assert_frame_equal(df, expected) # mixed dtype frame, overwrite - expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])})) + expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0.0, 2.0, 4.0])})) df = df_orig.copy() df["B"] = df["B"].astype(np.float64) - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.loc[:, "B"] = df.loc[:, "A"] + # as of 2.0, df.loc[:, "B"] = ... attempts (and here succeeds) at + # setting inplace + df.loc[:, "B"] = df.loc[:, "A"] tm.assert_frame_equal(df, expected) # single dtype frame, partial setting From 9a65009a53ac3de3ffce34445ba2261995cb609e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Nov 2022 13:08:37 -0800 Subject: [PATCH 2/8] Fix ArrayManager tests --- pandas/tests/computation/test_eval.py | 1 - pandas/tests/frame/indexing/test_setitem.py | 25 +++++--------------- pandas/tests/frame/indexing/test_where.py | 16 ++++--------- pandas/tests/frame/methods/test_fillna.py | 1 - pandas/tests/frame/methods/test_rename.py | 3 --- pandas/tests/frame/methods/test_shift.py | 10 ++------ pandas/tests/frame/test_nonunique_indexes.py | 6 +---- pandas/tests/frame/test_stack_unstack.py | 8 ++----- pandas/tests/indexing/test_iloc.py | 15 +++--------- pandas/tests/indexing/test_loc.py | 9 +------ 10 files changed, 20 insertions(+), 74 deletions(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index b0a182ffe4933..04248aafeec6c 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1874,7 +1874,6 @@ def test_eval_no_support_column_name(request, column): tm.assert_frame_equal(result, expected) -@td.skip_array_manager_not_yet_implemented def test_set_inplace(using_copy_on_write): # https://github.com/pandas-dev/pandas/issues/47449 # Ensure we don't only update the DataFrame inplace, but also the actual diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 8331bed881ce1..af83d4af7717d 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -409,16 +409,12 @@ def test_setitem_frame_length_0_str_key(self, indexer): expected["A"] = expected["A"].astype("object") tm.assert_frame_equal(df, expected) - def test_setitem_frame_duplicate_columns(self, using_array_manager): + def test_setitem_frame_duplicate_columns(self): # GH#15695 - warn = FutureWarning if using_array_manager else None - msg = "will attempt to set the values inplace" - cols = ["A", "B", "C"] * 2 df = DataFrame(index=range(3), columns=cols) df.loc[0, "A"] = (0, 3) - with tm.assert_produces_warning(warn, match=msg): - df.loc[:, "B"] = (1, 4) + df.loc[:, "B"] = (1, 4) df["C"] = (2, 5) expected = DataFrame( [ @@ -429,19 +425,10 @@ def test_setitem_frame_duplicate_columns(self, using_array_manager): dtype="object", ) - if using_array_manager: - # setitem replaces column so changes dtype - - expected.columns = cols - expected["C"] = expected["C"].astype("int64") - # TODO(ArrayManager) .loc still overwrites - expected["B"] = expected["B"].astype("int64") - - else: - # set these with unique columns to be extra-unambiguous - expected[2] = expected[2].astype(np.int64) - expected[5] = expected[5].astype(np.int64) - expected.columns = cols + # set these with unique columns to be extra-unambiguous + expected[2] = expected[2].astype(np.int64) + expected[5] = expected[5].astype(np.int64) + expected.columns = cols tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 501822f856a63..3f95dbd576a3b 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -363,7 +363,7 @@ def test_where_bug_transposition(self): result = a.where(do_not_replace, b) tm.assert_frame_equal(result, expected) - def test_where_datetime(self, using_array_manager): + def test_where_datetime(self): # GH 3311 df = DataFrame( @@ -384,10 +384,7 @@ def test_where_datetime(self, using_array_manager): expected = df.copy() expected.loc[[0, 1], "A"] = np.nan - warn = FutureWarning if using_array_manager else None - msg = "will attempt to set the values inplace" - with tm.assert_produces_warning(warn, match=msg): - expected.loc[:, "C"] = np.nan + expected.loc[:, "C"] = np.nan tm.assert_frame_equal(result, expected) def test_where_none(self): @@ -461,7 +458,7 @@ def test_where_complex(self): df[df.abs() >= 5] = np.nan tm.assert_frame_equal(df, expected) - def test_where_axis(self, using_array_manager): + def test_where_axis(self): # GH 9736 df = DataFrame(np.random.randn(2, 2)) mask = DataFrame([[False, False], [False, False]]) @@ -515,7 +512,7 @@ def test_where_axis_with_upcast(self): assert return_value is None tm.assert_frame_equal(result, expected) - def test_where_axis_multiple_dtypes(self, using_array_manager): + def test_where_axis_multiple_dtypes(self): # Multiple dtypes (=> multiple Blocks) df = pd.concat( [ @@ -571,10 +568,7 @@ def test_where_axis_multiple_dtypes(self, using_array_manager): d2 = df.copy().drop(1, axis=1) expected = df.copy() - warn = FutureWarning if using_array_manager else None - msg = "will attempt to set the values inplace" - with tm.assert_produces_warning(warn, match=msg): - expected.loc[:, 1] = np.nan + expected.loc[:, 1] = np.nan result = df.where(mask, d2) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index a3424f09f334c..0645afd861029 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -20,7 +20,6 @@ class TestFillNA: - @td.skip_array_manager_not_yet_implemented def test_fillna_dict_inplace_nonunique_columns(self, using_copy_on_write): df = DataFrame( {"A": [np.nan] * 3, "B": [NaT, Timestamp(1), NaT], "C": [np.nan, "foo", 2]} diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 239c31b2f9521..7b4a040e679eb 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -4,8 +4,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - from pandas import ( DataFrame, Index, @@ -168,7 +166,6 @@ def test_rename_multiindex(self): renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) tm.assert_index_equal(renamed.index, new_index) - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) setitem copy/view def test_rename_nocopy(self, float_frame, using_copy_on_write): renamed = float_frame.rename(columns={"C": "foo"}, copy=False) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index c44c0bd566585..9db93379852b7 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -364,23 +364,17 @@ def test_shift_empty(self): tm.assert_frame_equal(df, rs) - def test_shift_duplicate_columns(self, using_array_manager): + def test_shift_duplicate_columns(self): # GH#9092; verify that position-based shifting works # in the presence of duplicate columns column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]] data = np.random.randn(20, 5) - warn = None - if using_array_manager: - warn = FutureWarning - shifted = [] for columns in column_lists: df = DataFrame(data.copy(), columns=columns) for s in range(5): - msg = "will attempt to set the values inplace" - with tm.assert_produces_warning(warn, match=msg): - df.iloc[:, s] = df.iloc[:, s].shift(s + 1) + df.iloc[:, s] = df.iloc[:, s].shift(s + 1) df.columns = range(5) shifted.append(df) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 2c28800fb181f..1a90cc364263f 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas.compat import is_platform_windows - import pandas as pd from pandas import ( DataFrame, @@ -322,9 +320,7 @@ def test_dup_columns_across_dtype(self): def test_set_value_by_index(self, using_array_manager): # See gh-12344 - warn = ( - FutureWarning if using_array_manager and not is_platform_windows() else None - ) + warn = None msg = "will attempt to set the values inplace" df = DataFrame(np.arange(9).reshape(3, 3).T) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 22075a30bdb65..77913067def1c 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -22,13 +22,9 @@ class TestDataFrameReshape: - def test_stack_unstack(self, float_frame, using_array_manager): - warn = FutureWarning if using_array_manager else None - msg = "will attempt to set the values inplace" - + def test_stack_unstack(self, float_frame): df = float_frame.copy() - with tm.assert_produces_warning(warn, match=msg): - df[:] = np.arange(np.prod(df.shape)).reshape(df.shape) + df[:] = np.arange(np.prod(df.shape)).reshape(df.shape) stacked = df.stack() stacked_df = DataFrame({"foo": stacked, "bar": stacked}) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 1d980ebfc0815..e42402e76234d 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -839,16 +839,8 @@ def test_iloc_empty_list_indexer_is_ok(self): df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True ) - def test_identity_slice_returns_new_object( - self, using_array_manager, using_copy_on_write, request - ): + def test_identity_slice_returns_new_object(self, using_copy_on_write): # GH13873 - if using_array_manager: - mark = pytest.mark.xfail( - reason="setting with .loc[:, 'a'] does not alter inplace" - ) - request.node.add_marker(mark) - original_df = DataFrame({"a": [1, 2, 3]}) sliced_df = original_df.iloc[:] assert sliced_df is not original_df @@ -889,7 +881,6 @@ def test_series_indexing_zerodim_np_array(self): result = s.iloc[np.array(0)] assert result == 1 - @td.skip_array_manager_not_yet_implemented def test_iloc_setitem_categorical_updates_inplace(self, using_copy_on_write): # Mixed dtype ensures we go through take_split_path in setitem_with_indexer cat = Categorical(["A", "B", "C"]) @@ -1208,7 +1199,7 @@ def test_iloc_getitem_int_single_ea_block_view(self): arr[2] = arr[-1] assert ser[0] == arr[-1] - def test_iloc_setitem_multicolumn_to_datetime(self, using_array_manager): + def test_iloc_setitem_multicolumn_to_datetime(self): # GH#20511 df = DataFrame({"A": ["2022-01-01", "2022-01-02"], "B": ["2021", "2022"]}) @@ -1223,7 +1214,7 @@ def test_iloc_setitem_multicolumn_to_datetime(self, using_array_manager): "B": ["2021", "2022"], } ) - tm.assert_frame_equal(df, expected, check_dtype=using_array_manager) + tm.assert_frame_equal(df, expected, check_dtype=False) class TestILocErrors: diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 41beafbd78b0d..bbffa8a1e61d8 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1077,15 +1077,8 @@ def test_loc_empty_list_indexer_is_ok(self): df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True ) - def test_identity_slice_returns_new_object( - self, using_array_manager, request, using_copy_on_write - ): + def test_identity_slice_returns_new_object(self, using_copy_on_write): # GH13873 - if using_array_manager: - mark = pytest.mark.xfail( - reason="setting with .loc[:, 'a'] does not alter inplace" - ) - request.node.add_marker(mark) original_df = DataFrame({"a": [1, 2, 3]}) sliced_df = original_df.loc[:] From bc515ea6e9835f068179c2b9d3a0b7dde7ae18b7 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Nov 2022 14:10:46 -0800 Subject: [PATCH 3/8] suggested edits to AM tets --- pandas/tests/copy_view/test_indexing.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 89bd821839640..6df8fd9480d09 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -351,10 +351,8 @@ def test_subset_set_column_with_loc(using_copy_on_write, using_array_manager, dt index=range(1, 3), ) tm.assert_frame_equal(subset, expected) - if using_copy_on_write or using_array_manager: + if using_copy_on_write: # original parent dataframe is not modified (CoW) - # with the enforcement of GH#45333 in 2.0, the original is modified - df_orig.loc[1:3, "a"] = np.array([10, 11], dtype="int64") tm.assert_frame_equal(df, df_orig) else: # original parent dataframe is actually updated @@ -386,10 +384,8 @@ def test_subset_set_column_with_loc2(using_copy_on_write, using_array_manager): subset._mgr._verify_integrity() expected = DataFrame({"a": [0, 0]}, index=range(1, 3)) tm.assert_frame_equal(subset, expected) - if using_copy_on_write or using_array_manager: + if using_copy_on_write: # original parent dataframe is not modified (CoW) - # with the enforcement of GH#45333 in 2.0, the original is modified - df_orig.loc[1:3, "a"] = 0 tm.assert_frame_equal(df, df_orig) else: # original parent dataframe is actually updated @@ -452,10 +448,7 @@ def test_subset_set_with_column_indexer( subset._mgr._verify_integrity() expected = DataFrame({"a": [0, 0], "b": [0.0, 0.0], "c": [5, 6]}, index=range(1, 3)) tm.assert_frame_equal(subset, expected) - if using_copy_on_write or using_array_manager: - # pre-2.0, the original would not be mutated at all. this changed with the - # enforcement of GH#45333 - df_orig.loc[1:2, ["a", "b"]] = 0 + if using_copy_on_write: tm.assert_frame_equal(df, df_orig) else: # pre-2.0, in the mixed case with BlockManager, only column "a" From d40230cdbb7913234c2504e3a98b557d6bb82bba Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Nov 2022 15:29:22 -0800 Subject: [PATCH 4/8] update doctest --- pandas/core/frame.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7b181a3e8e391..3f1f0375b45c2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8038,10 +8038,10 @@ def update( >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]}) >>> df.update(new_df) >>> df - A B - 0 1 4.0 - 1 2 500.0 - 2 3 6.0 + A B + 0 1 4 + 1 2 500 + 2 3 6 """ from pandas.core.computation import expressions @@ -8079,9 +8079,7 @@ def update( if mask.all(): continue - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "In a future version, `df.iloc") - self.loc[:, col] = expressions.where(mask, this, that) + self.loc[:, col] = expressions.where(mask, this, that) # ---------------------------------------------------------------------- # Data reshaping From e8d1bb4d8af461e6fc2d864dd51a4d0e529b93c5 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Nov 2022 18:09:26 -0800 Subject: [PATCH 5/8] CoW test --- pandas/tests/frame/methods/test_rename.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 7b4a040e679eb..80e9ee8d488a2 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -171,12 +171,7 @@ def test_rename_nocopy(self, float_frame, using_copy_on_write): assert np.shares_memory(renamed["foo"]._values, float_frame["C"]._values) - # TODO(CoW) this also shouldn't warn in case of CoW, but the heuristic - # checking if the array shares memory doesn't work if CoW happened - with tm.assert_produces_warning(FutureWarning if using_copy_on_write else None): - # This loc setitem already happens inplace, so no warning - # that this will change in the future - renamed.loc[:, "foo"] = 1.0 + renamed.loc[:, "foo"] = 1.0 if using_copy_on_write: assert not (float_frame["C"] == 1.0).all() else: From 0917853a72f26c650e3978207d3af507d46d758e Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Nov 2022 10:07:21 -0800 Subject: [PATCH 6/8] whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 20e99d007c798..6edbc16f1091b 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -570,6 +570,7 @@ Removal of prior version deprecations/changes - Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`) - Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`) - Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`) +- Changed behavior in setting values with ``df.loc[:, foo] = bar`` or ``df.iloc[:, foo] = bar``, these now always attempt to set values inplace before falling back to casting (:issue:`45333`) - .. --------------------------------------------------------------------------- From 77e091c6703fd7edd3ddf6ca2e0915cbb832d717 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 12 Dec 2022 15:15:59 -0800 Subject: [PATCH 7/8] Use reindex_indexer --- pandas/core/frame.py | 2 +- pandas/core/indexing.py | 14 ++++++-------- pandas/tests/frame/test_nonunique_indexes.py | 2 +- pandas/tests/indexing/test_loc.py | 1 + 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 87b04f7451fb5..d48e3aebe415e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4217,7 +4217,7 @@ def _set_value( else: icol = self.columns.get_loc(col) iindex = self.index.get_loc(index) - self._mgr.column_setitem(icol, iindex, value, inplace=True) + self._mgr.column_setitem(icol, iindex, value, inplace_only=True) self._clear_item_cache() except (KeyError, TypeError, ValueError, LossySetitemError): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ee4e6b1f447f8..86dd1cda78a1f 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -818,14 +818,12 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: # below would create float64 columns in this example, which # would successfully hold 7, so we would end up with the wrong # dtype. - arr = np.empty(len(self.obj), dtype=np.void) - df = type(self.obj)({i: arr for i in range(len(diff))}) - df.columns = diff - df.index = self.obj.index - from pandas import concat - - new_obj = concat([self.obj, df], axis=1) - self.obj._mgr = new_obj._mgr + indexer = np.arange(len(keys), dtype=np.intp) + indexer[len(self.obj.columns) :] = -1 + new_mgr = self.obj._mgr.reindex_indexer( + keys, indexer=indexer, axis=0, only_slice=True, use_na_proxy=True + ) + self.obj._mgr = new_mgr return self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 1a90cc364263f..4c21446cab375 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -318,7 +318,7 @@ def test_dup_columns_across_dtype(self): xp.columns = ["A", "A", "B"] tm.assert_frame_equal(rs, xp) - def test_set_value_by_index(self, using_array_manager): + def test_set_value_by_index(self): # See gh-12344 warn = None msg = "will attempt to set the values inplace" diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index d8a8558376cea..ad04ab5ef37b0 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -663,6 +663,7 @@ def test_loc_setitem_consistency_slice_column_len(self): df.loc[:, ("Respondent", "EndDate")] = to_datetime( df.loc[:, ("Respondent", "EndDate")] ) + df = df.infer_objects(copy=False) # Adding a new key df.loc[:, ("Respondent", "Duration")] = ( From 34a6f5ad3b28ae8ded2ae659dce17bcba2512733 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 12 Dec 2022 15:23:30 -0800 Subject: [PATCH 8/8] suggested test edits --- pandas/tests/copy_view/test_indexing.py | 6 ------ pandas/tests/frame/indexing/test_indexing.py | 22 +++++--------------- pandas/tests/frame/test_constructors.py | 1 - pandas/tests/indexing/test_loc.py | 4 ++++ 4 files changed, 9 insertions(+), 24 deletions(-) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 08c9fed73845e..184799cd1efa8 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -337,8 +337,6 @@ def test_subset_set_column_with_loc(using_copy_on_write, using_array_manager, dt subset.loc[:, "a"] = np.array([10, 11], dtype="int64") else: with pd.option_context("chained_assignment", "warn"): - # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those - # warnings and only assert the SettingWithCopyWarning with tm.assert_produces_warning( None, raise_on_extra_warnings=not using_array_manager, @@ -373,8 +371,6 @@ def test_subset_set_column_with_loc2(using_copy_on_write, using_array_manager): subset.loc[:, "a"] = 0 else: with pd.option_context("chained_assignment", "warn"): - # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those - # warnings and only assert the SettingWithCopyWarning with tm.assert_produces_warning( None, raise_on_extra_warnings=not using_array_manager, @@ -439,8 +435,6 @@ def test_subset_set_with_column_indexer( subset.loc[:, indexer] = 0 else: with pd.option_context("chained_assignment", "warn"): - # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those - # warnings and only assert the SettingWithCopyWarning # As of 2.0, this setitem attempts (successfully) to set values # inplace, so the assignment is not chained. subset.loc[:, indexer] = 0 diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 61ed65e58cf72..65daf748e4e9d 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -556,15 +556,14 @@ def test_fancy_getitem_slice_mixed( assert np.shares_memory(sliced["C"]._values, float_frame["C"]._values) + sliced.loc[:, "C"] = 4.0 if not using_copy_on_write: - sliced.loc[:, "C"] = 4.0 assert (float_frame["C"] == 4).all() # with the enforcement of GH#45333 in 2.0, this remains a view np.shares_memory(sliced["C"]._values, float_frame["C"]._values) else: - sliced.loc[:, "C"] = 4.0 tm.assert_frame_equal(float_frame, original) def test_getitem_setitem_non_ix_labels(self): @@ -1005,14 +1004,7 @@ def test_iloc_row(self): expected = df.reindex(df.index[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) - def test_iloc_row_slice_view( - self, using_array_manager, using_copy_on_write, request - ): - if using_array_manager: - mark = pytest.mark.xfail( - reason="Enforcement of GH#45333 causing the last assertion to fail" - ) - request.node.add_marker(mark) + def test_iloc_row_slice_view(self, using_copy_on_write, request): df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2)) original = df.copy() @@ -1024,14 +1016,10 @@ def test_iloc_row_slice_view( assert np.shares_memory(df[2], subset[2]) exp_col = original[2].copy() - if using_copy_on_write: - subset.loc[:, 2] = 0.0 - else: + subset.loc[:, 2] = 0.0 + if not using_copy_on_write: subset.loc[:, 2] = 0.0 - - # TODO(ArrayManager) verify it is expected that the original didn't change - if not using_array_manager: - exp_col._values[4:8] = 0.0 + exp_col._values[4:8] = 0.0 # With the enforcement of GH#45333 in 2.0, this remains a view assert np.shares_memory(df[2], subset[2]) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2e54c67cf61e1..f6529e4a0f82d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2544,7 +2544,6 @@ def check_views(c_only: bool = False): # FIXME(GH#35417): until GH#35417, iloc.setitem into EA values does not preserve # view, so we have to check in the other direction - # with tm.assert_produces_warning(FutureWarning, match="will attempt to set"): df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype) assert df.dtypes.iloc[2] == c.dtype if not copy and not using_copy_on_write: diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index ad04ab5ef37b0..cb65ecf411118 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1429,6 +1429,10 @@ def test_loc_setitem_single_row_categorical(self): expected = Series(categories, index=df.index, name="Alpha").astype(object) tm.assert_series_equal(result, expected) + # double-check that the non-loc setting retains categoricalness + df["Alpha"] = categories + tm.assert_series_equal(df["Alpha"], Series(categories, name="Alpha")) + def test_loc_setitem_datetime_coercion(self): # GH#1048 df = DataFrame({"c": [Timestamp("2010-10-01")] * 3})