pandas-dev · mroeschke · Dec 21, 2022 · Nov 18, 2022 · Nov 18, 2022 · Nov 18, 2022
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -636,6 +636,7 @@ Removal of prior version deprecations/changes
 - Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
 - Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
 - Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`)
+- Changed behavior in setting values with ``df.loc[:, foo] = bar`` or ``df.iloc[:, foo] = bar``, these now always attempt to set values inplace before falling back to casting (:issue:`45333`)
 - Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`)
 - Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)
 - Using the method :meth:`DataFrameGroupBy.transform` with a callable that returns DataFrames will align to the input's index (:issue:`47244`)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1789,6 +1789,10 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
             return element
         raise LossySetitemError
 
+    if dtype.kind == "V":
+        # i.e. np.void, which cannot hold _anything_
+        raise LossySetitemError
+
     raise NotImplementedError(dtype)
 
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4217,7 +4217,7 @@ def _set_value(
             else:
                 icol = self.columns.get_loc(col)
                 iindex = self.index.get_loc(index)
-            self._mgr.column_setitem(icol, iindex, value, inplace=True)
+            self._mgr.column_setitem(icol, iindex, value, inplace_only=True)
             self._clear_item_cache()
 
         except (KeyError, TypeError, ValueError, LossySetitemError):
@@ -8111,10 +8111,10 @@ def update(
         >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]})
         >>> df.update(new_df)
         >>> df
-           A      B
-        0  1    4.0
-        1  2  500.0
-        2  3    6.0
+           A    B
+        0  1    4
+        1  2  500
+        2  3    6
         """
         from pandas.core.computation import expressions
 
@@ -8152,9 +8152,7 @@ def update(
             if mask.all():
                 continue
 
-            with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", "In a future version, `df.iloc")
-                self.loc[:, col] = expressions.where(mask, this, that)
+            self.loc[:, col] = expressions.where(mask, this, that)
 
     # ----------------------------------------------------------------------
     # Data reshaping

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -9,7 +9,6 @@
     cast,
     final,
 )
-import warnings
 
 import numpy as np
 
@@ -23,9 +22,9 @@
     AbstractMethodError,
     IndexingError,
     InvalidIndexError,
+    LossySetitemError,
 )
 from pandas.util._decorators import doc
-from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import (
     can_hold_element,
@@ -793,6 +792,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
         if self.ndim != 2:
             return
 
+        orig_key = key
         if isinstance(key, tuple) and len(key) > 1:
             # key may be a tuple if we are .loc
             # if length of key is > 1 set key to column part
@@ -808,6 +808,23 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
         ):
             # GH#38148
             keys = self.obj.columns.union(key, sort=False)
+            diff = Index(key).difference(self.obj.columns, sort=False)
+
+            if len(diff) and com.is_null_slice(orig_key[0]):
+                # e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B"
+                #  is a new column, add the new columns with dtype=np.void
+                #  so that later when we go through setitem_single_column
+                #  we will use isetitem. Without this, the reindex_axis
+                #  below would create float64 columns in this example, which
+                #  would successfully hold 7, so we would end up with the wrong
+                #  dtype.
+                indexer = np.arange(len(keys), dtype=np.intp)
+                indexer[len(self.obj.columns) :] = -1
+                new_mgr = self.obj._mgr.reindex_indexer(
+                    keys, indexer=indexer, axis=0, only_slice=True, use_na_proxy=True
+                )
+                self.obj._mgr = new_mgr
+                return
 
             self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True)
 
@@ -1979,72 +1996,25 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
         """
         pi = plane_indexer
 
-        orig_values = self.obj._get_column_array(loc)
-
-        # perform the equivalent of a setitem on the info axis
-        # as we have a null slice or a slice with full bounds
-        # which means essentially reassign to the columns of a
-        # multi-dim object
-        # GH#6149 (null slice), GH#10408 (full bounds)
-        if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)):
-            pass
-        elif (
-            is_array_like(value)
-            and len(value.shape) > 0
-            and self.obj.shape[0] == value.shape[0]
-            and not is_empty_indexer(pi)
-        ):
-            if is_list_like(pi) and not is_bool_dtype(pi):
-                value = value[np.argsort(pi)]
-            else:
-                # in case of slice
-                value = value[pi]
+        is_full_setter = com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj))
+
+        if is_full_setter:
+
+            try:
+                self.obj._mgr.column_setitem(
+                    loc, plane_indexer, value, inplace_only=True
+                )
+            except (ValueError, TypeError, LossySetitemError):
+                # If we're setting an entire column and we can't do it inplace,
+                #  then we can use value's dtype (or inferred dtype)
+                #  instead of object
+                self.obj.isetitem(loc, value)
         else:
             # set value into the column (first attempting to operate inplace, then
             #  falling back to casting if necessary)
             self.obj._mgr.column_setitem(loc, plane_indexer, value)
-            self.obj._clear_item_cache()
-            return
-
-        self.obj._iset_item(loc, value)
 
-        # We will not operate in-place, but will attempt to in the future.
-        #  To determine whether we need to issue a FutureWarning, see if the
-        #  setting in-place would work, i.e. behavior will change.
-
-        new_values = self.obj._get_column_array(loc)
-
-        if can_hold_element(orig_values, new_values) and not len(new_values) == 0:
-            # Don't issue the warning yet, as we can still trim a few cases where
-            #  behavior will not change.
-
-            if (
-                isinstance(new_values, np.ndarray)
-                and isinstance(orig_values, np.ndarray)
-                and (
-                    np.shares_memory(new_values, orig_values)
-                    or new_values.shape != orig_values.shape
-                )
-            ):
-                # TODO: get something like tm.shares_memory working?
-                # The values were set inplace after all, no need to warn,
-                #  e.g. test_rename_nocopy
-                # In case of enlarging we can not set inplace, so need to
-                # warn either
-                pass
-            else:
-                warnings.warn(
-                    "In a future version, `df.iloc[:, i] = newvals` will attempt "
-                    "to set the values inplace instead of always setting a new "
-                    "array. To retain the old behavior, use either "
-                    "`df[df.columns[i]] = newvals` or, if columns are non-unique, "
-                    "`df.isetitem(i, newvals)`",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-                # TODO: how to get future behavior?
-                # TODO: what if we got here indirectly via loc?
-        return
+        self.obj._clear_item_cache()
 
     def _setitem_single_block(self, indexer, value, name: str) -> None:
         """

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -856,19 +856,21 @@ def iset(
         return
 
     def column_setitem(
-        self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False
+        self, loc: int, idx: int | slice | np.ndarray, value, inplace_only: bool = False
     ) -> None:
         """
         Set values ("setitem") into a single column (not setting the full column).
 
         This is a method on the ArrayManager level, to avoid creating an
         intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`)
+
+
         """
         if not is_integer(loc):
             raise TypeError("The column index should be an integer")
         arr = self.arrays[loc]
         mgr = SingleArrayManager([arr], [self._axes[0]])
-        if inplace:
+        if inplace_only:
             mgr.setitem_inplace(idx, value)
         else:
             new_mgr = mgr.setitem((idx,), value)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1336,7 +1336,7 @@ def _iset_single(
         return
 
     def column_setitem(
-        self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False
+        self, loc: int, idx: int | slice | np.ndarray, value, inplace_only: bool = False
     ) -> None:
         """
         Set values ("setitem") into a single column (not setting the full column).
@@ -1355,7 +1355,7 @@ def column_setitem(
         # this manager is only created temporarily to mutate the values in place
         # so don't track references, otherwise the `setitem` would perform CoW again
         col_mgr = self.iget(loc, track_ref=False)
-        if inplace:
+        if inplace_only:
             col_mgr.setitem_inplace(idx, value)
         else:
             new_mgr = col_mgr.setitem((idx,), value)

diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
@@ -1874,7 +1874,6 @@ def test_eval_no_support_column_name(request, column):
     tm.assert_frame_equal(result, expected)
 
 
-@td.skip_array_manager_not_yet_implemented
 def test_set_inplace(using_copy_on_write):
     # https://github.com/pandas-dev/pandas/issues/47449
     # Ensure we don't only update the DataFrame inplace, but also the actual

diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
@@ -337,10 +337,8 @@ def test_subset_set_column_with_loc(using_copy_on_write, using_array_manager, dt
         subset.loc[:, "a"] = np.array([10, 11], dtype="int64")
     else:
         with pd.option_context("chained_assignment", "warn"):
-            # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those
-            # warnings and only assert the SettingWithCopyWarning
             with tm.assert_produces_warning(
-                SettingWithCopyWarning,
+                None,
                 raise_on_extra_warnings=not using_array_manager,
             ):
                 subset.loc[:, "a"] = np.array([10, 11], dtype="int64")
@@ -351,7 +349,7 @@ def test_subset_set_column_with_loc(using_copy_on_write, using_array_manager, dt
         index=range(1, 3),
     )
     tm.assert_frame_equal(subset, expected)
-    if using_copy_on_write or using_array_manager:
+    if using_copy_on_write:
         # original parent dataframe is not modified (CoW)
         tm.assert_frame_equal(df, df_orig)
     else:
@@ -373,18 +371,16 @@ def test_subset_set_column_with_loc2(using_copy_on_write, using_array_manager):
         subset.loc[:, "a"] = 0
     else:
         with pd.option_context("chained_assignment", "warn"):
-            # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those
-            # warnings and only assert the SettingWithCopyWarning
             with tm.assert_produces_warning(
-                SettingWithCopyWarning,
+                None,
                 raise_on_extra_warnings=not using_array_manager,
             ):
                 subset.loc[:, "a"] = 0
 
     subset._mgr._verify_integrity()
     expected = DataFrame({"a": [0, 0]}, index=range(1, 3))
     tm.assert_frame_equal(subset, expected)
-    if using_copy_on_write or using_array_manager:
+    if using_copy_on_write:
         # original parent dataframe is not modified (CoW)
         tm.assert_frame_equal(df, df_orig)
     else:
@@ -439,24 +435,20 @@ def test_subset_set_with_column_indexer(
         subset.loc[:, indexer] = 0
     else:
         with pd.option_context("chained_assignment", "warn"):
-            # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those
-            # warnings and only assert the SettingWithCopyWarning
-            with tm.assert_produces_warning(
-                SettingWithCopyWarning, raise_on_extra_warnings=False
-            ):
-                subset.loc[:, indexer] = 0
+            # As of 2.0, this setitem attempts (successfully) to set values
+            #  inplace, so the assignment is not chained.
+            subset.loc[:, indexer] = 0
 
     subset._mgr._verify_integrity()
     expected = DataFrame({"a": [0, 0], "b": [0.0, 0.0], "c": [5, 6]}, index=range(1, 3))
-    # TODO full row slice .loc[:, idx] update inplace instead of overwrite?
-    expected["b"] = expected["b"].astype("int64")
     tm.assert_frame_equal(subset, expected)
-    if using_copy_on_write or using_array_manager:
+    if using_copy_on_write:
         tm.assert_frame_equal(df, df_orig)
     else:
-        # In the mixed case with BlockManager, only one of the two columns is
-        # mutated in the parent frame ..
-        df_orig.loc[1:2, ["a"]] = 0
+        # pre-2.0, in the mixed case with BlockManager, only column "a"
+        #  would be mutated in the parent frame. this changed with the
+        #  enforcement of GH#45333
+        df_orig.loc[1:2, ["a", "b"]] = 0
         tm.assert_frame_equal(df, df_orig)
 
 

diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
@@ -1,13 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas.core.dtypes.dtypes import (
-    DatetimeTZDtype,
-    IntervalDtype,
-    PandasDtype,
-    PeriodDtype,
-)
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.tests.extension.base.base import BaseExtensionTests
@@ -382,11 +375,6 @@ def test_setitem_frame_2d_values(self, data):
         # GH#44514
         df = pd.DataFrame({"A": data})
 
-        # These dtypes have non-broken implementations of _can_hold_element
-        has_can_hold_element = isinstance(
-            data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype)
-        )
-
         # Avoiding using_array_manager fixture
         #  https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
         using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
@@ -396,24 +384,13 @@ def test_setitem_frame_2d_values(self, data):
 
         orig = df.copy()
 
-        msg = "will attempt to set the values inplace instead"
-        warn = None
-        if has_can_hold_element and not isinstance(data.dtype, PandasDtype):
-            # PandasDtype excluded because it isn't *really* supported.
-            warn = FutureWarning
-
-        with tm.assert_produces_warning(warn, match=msg):
-            df.iloc[:] = df
+        df.iloc[:] = df
         self.assert_frame_equal(df, orig)
 
         df.iloc[:-1] = df.iloc[:-1]
         self.assert_frame_equal(df, orig)
 
-        if isinstance(data.dtype, DatetimeTZDtype):
-            # no warning bc df.values casts to object dtype
-            warn = None
-        with tm.assert_produces_warning(warn, match=msg):
-            df.iloc[:] = df.values
+        df.iloc[:] = df.values
         self.assert_frame_equal(df, orig)
         if not using_array_manager and not using_copy_on_write:
             # GH#33457 Check that this setting occurred in-place