From 9b30bdd98a7a675fc66000c3c01947e807da982e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 25 Nov 2023 23:32:15 +0100 Subject: [PATCH 1/9] CoW: Warn for cases that go through putmask --- pandas/core/internals/base.py | 15 ++++- pandas/core/internals/blocks.py | 58 ++++++++++++++++++- pandas/core/internals/managers.py | 25 +------- pandas/tests/copy_view/test_clip.py | 8 ++- pandas/tests/copy_view/test_indexing.py | 19 ++---- pandas/tests/copy_view/test_methods.py | 25 +++++--- pandas/tests/copy_view/test_replace.py | 8 ++- pandas/tests/frame/methods/test_replace.py | 8 ++- .../indexing/test_chaining_and_caching.py | 5 +- 9 files changed, 115 insertions(+), 56 deletions(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index fe366c7375c6a..ee45707791fbf 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -14,7 +14,10 @@ import numpy as np -from pandas._config import using_copy_on_write +from pandas._config import ( + using_copy_on_write, + warn_copy_on_write, +) from pandas._libs import ( algos as libalgos, @@ -49,6 +52,11 @@ ) +class _AlreadyWarned: + def __init__(self): + self.warned_already = False + + class DataManager(PandasObject): # TODO share more methods/attributes @@ -203,12 +211,17 @@ def putmask(self, mask, new, align: bool = True) -> Self: align_keys = ["mask"] new = extract_array(new, extract_numpy=True) + already_warned = None + if warn_copy_on_write(): + already_warned = _AlreadyWarned() + return self.apply_with_block( "putmask", align_keys=align_keys, mask=mask, new=new, using_cow=using_copy_on_write(), + already_warned=already_warned, ) @final diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 535d18f99f0ef..ec58b46371149 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -18,6 +18,7 @@ from pandas._config import ( get_option, using_copy_on_write, + warn_copy_on_write, ) from pandas._libs import ( @@ -136,6 +137,29 @@ _dtype_obj = np.dtype("object") +COW_WARNING_GENERAL_MSG = """\ +Setting a value on a view: behaviour will change in pandas 3.0. +You are mutating a Series or DataFrame object, and currently this mutation will +also have effect on other Series or DataFrame objects that share data with this +object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object +will never modify another. +""" + + +COW_WARNING_SETITEM_MSG = """\ +Setting a value on a view: behaviour will change in pandas 3.0. +Currently, the mutation will also have effect on the object that shares data +with this object. For example, when setting a value in a Series that was +extracted from a column of a DataFrame, that DataFrame will also be updated: + + ser = df["col"] + ser[0] = 0 <--- in pandas 2, this also updates `df` + +In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never +modify another, and thus in the example above, `df` will not be changed. +""" + + def maybe_split(meth: F) -> F: """ If we have a multi-column block, split and operate block-wise. Otherwise @@ -1355,7 +1379,9 @@ def setitem(self, indexer, value, using_cow: bool = False) -> Block: values[indexer] = casted return self - def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: + def putmask( + self, mask, new, using_cow: bool = False, already_warned=None + ) -> list[Block]: """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -1388,6 +1414,19 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: return [self.copy(deep=False)] return [self] + if ( + warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + try: casted = np_can_hold_element(values.dtype, new) @@ -2020,7 +2059,9 @@ def where( return [nb] @final - def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: + def putmask( + self, mask, new, using_cow: bool = False, already_warned=None + ) -> list[Block]: """ See Block.putmask.__doc__ """ @@ -2038,6 +2079,19 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: return [self.copy(deep=False)] return [self] + if ( + warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + self = self._maybe_copy(using_cow, inplace=True) values = self.values if values.ndim == 2: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 843441e4865c7..6d6552f38c59c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -72,6 +72,8 @@ interleaved_dtype, ) from pandas.core.internals.blocks import ( + COW_WARNING_GENERAL_MSG, + COW_WARNING_SETITEM_MSG, Block, NumpyBlock, ensure_block_shape, @@ -100,29 +102,6 @@ from pandas.api.extensions import ExtensionArray -COW_WARNING_GENERAL_MSG = """\ -Setting a value on a view: behaviour will change in pandas 3.0. -You are mutating a Series or DataFrame object, and currently this mutation will -also have effect on other Series or DataFrame objects that share data with this -object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object -will never modify another. -""" - - -COW_WARNING_SETITEM_MSG = """\ -Setting a value on a view: behaviour will change in pandas 3.0. -Currently, the mutation will also have effect on the object that shares data -with this object. For example, when setting a value in a Series that was -extracted from a column of a DataFrame, that DataFrame will also be updated: - - ser = df["col"] - ser[0] = 0 <--- in pandas 2, this also updates `df` - -In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never -modify another, and thus in the example above, `df` will not be changed. -""" - - class BaseBlockManager(DataManager): """ Core internal data structure to implement DataFrame, Series, etc. diff --git a/pandas/tests/copy_view/test_clip.py b/pandas/tests/copy_view/test_clip.py index 6a27a0633a4aa..1363500f0d6c0 100644 --- a/pandas/tests/copy_view/test_clip.py +++ b/pandas/tests/copy_view/test_clip.py @@ -5,12 +5,16 @@ from pandas.tests.copy_view.util import get_array -def test_clip_inplace_reference(using_copy_on_write): +def test_clip_inplace_reference(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1.5, 2, 3]}) df_copy = df.copy() arr_a = get_array(df, "a") view = df[:] - df.clip(lower=2, inplace=True) + if warn_copy_on_write: + with tm.assert_cow_warning(): + df.clip(lower=2, inplace=True) + else: + df.clip(lower=2, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 2e623f885b648..20522218876d6 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -367,10 +367,11 @@ def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write): mask = subset > 3 - # TODO(CoW-warn) should warn -> mask is a DataFrame, which ends up going through - # DataFrame._where(..., inplace=True) - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: subset[mask] = 0 + elif warn_copy_on_write: + with tm.assert_cow_warning(): + subset[mask] = 0 else: with pd.option_context("chained_assignment", "warn"): with tm.assert_produces_warning(SettingWithCopyWarning): @@ -867,18 +868,8 @@ def test_series_subset_set_with_indexer( and indexer.dtype.kind == "i" ): warn = FutureWarning - is_mask = ( - indexer_si is tm.setitem - and isinstance(indexer, np.ndarray) - and indexer.dtype.kind == "b" - ) if warn_copy_on_write: - # TODO(CoW-warn) should also warn for setting with mask - # -> Series.__setitem__ with boolean mask ends up using Series._set_values - # or Series._where depending on value being set - with tm.assert_cow_warning( - not is_mask, raise_on_extra_warnings=warn is not None - ): + with tm.assert_cow_warning(raise_on_extra_warnings=warn is not None): indexer_si(subset)[indexer] = 0 else: with tm.assert_produces_warning(warn, match=msg): diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 6416dd50daddc..f5ac2530a9ee2 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1407,11 +1407,12 @@ def test_items(using_copy_on_write, warn_copy_on_write): @pytest.mark.parametrize("dtype", ["int64", "Int64"]) -def test_putmask(using_copy_on_write, dtype): +def test_putmask(using_copy_on_write, dtype, warn_copy_on_write): df = DataFrame({"a": [1, 2], "b": 1, "c": 2}, dtype=dtype) view = df[:] df_orig = df.copy() - df[df == df] = 5 + with tm.assert_cow_warning(warn_copy_on_write): + df[df == df] = 5 if using_copy_on_write: assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) @@ -1445,15 +1446,21 @@ def test_putmask_aligns_rhs_no_reference(using_copy_on_write, dtype): @pytest.mark.parametrize( "val, exp, warn", [(5.5, True, FutureWarning), (5, False, None)] ) -def test_putmask_dont_copy_some_blocks(using_copy_on_write, val, exp, warn): +def test_putmask_dont_copy_some_blocks( + using_copy_on_write, val, exp, warn, warn_copy_on_write +): df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5}) view = df[:] df_orig = df.copy() indexer = DataFrame( [[True, False, False], [True, False, False]], columns=list("abc") ) - with tm.assert_produces_warning(warn, match="incompatible dtype"): - df[indexer] = val + if warn_copy_on_write: + with tm.assert_cow_warning(): + df[indexer] = val + else: + with tm.assert_produces_warning(warn, match="incompatible dtype"): + df[indexer] = val if using_copy_on_write: assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) @@ -1785,13 +1792,17 @@ def test_update_frame(using_copy_on_write, warn_copy_on_write): tm.assert_frame_equal(view, expected) -def test_update_series(using_copy_on_write): +def test_update_series(using_copy_on_write, warn_copy_on_write): ser1 = Series([1.0, 2.0, 3.0]) ser2 = Series([100.0], index=[1]) ser1_orig = ser1.copy() view = ser1[:] - ser1.update(ser2) + if warn_copy_on_write: + with tm.assert_cow_warning(): + ser1.update(ser2) + else: + ser1.update(ser2) expected = Series([1.0, 100.0, 3.0]) tm.assert_series_equal(ser1, expected) diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index d11a2893becdc..3d8559a1905fc 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -279,14 +279,18 @@ def test_replace_categorical(using_copy_on_write, val): @pytest.mark.parametrize("method", ["where", "mask"]) -def test_masking_inplace(using_copy_on_write, method): +def test_masking_inplace(using_copy_on_write, method, warn_copy_on_write): df = DataFrame({"a": [1.5, 2, 3]}) df_orig = df.copy() arr_a = get_array(df, "a") view = df[:] method = getattr(df, method) - method(df["a"] > 1.6, -1, inplace=True) + if warn_copy_on_write: + with tm.assert_cow_warning(): + method(df["a"] > 1.6, -1, inplace=True) + else: + method(df["a"] > 1.6, -1, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index f07c53060a06b..cbca15ea1ed2d 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -717,7 +717,7 @@ def test_replace_value_is_none(self, datetime_frame): datetime_frame.iloc[0, 0] = orig_value datetime_frame.iloc[1, 0] = orig2 - def test_replace_for_new_dtypes(self, datetime_frame): + def test_replace_for_new_dtypes(self, datetime_frame, warn_copy_on_write): # dtypes tsframe = datetime_frame.copy().astype(np.float32) tsframe.loc[tsframe.index[:5], "A"] = np.nan @@ -732,7 +732,11 @@ def test_replace_for_new_dtypes(self, datetime_frame): tsframe.loc[tsframe.index[:5], "B"] = -1e8 b = tsframe["B"] - b[b == -1e8] = np.nan + if warn_copy_on_write: + with tm.assert_cow_warning(): + b[b == -1e8] = np.nan + else: + b[b == -1e8] = np.nan tsframe["B"] = b msg = "DataFrame.fillna with 'method' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 21aab6652a300..b406118246e88 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -164,9 +164,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" + with tm.assert_cow_warning(warn_copy_on_write): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) recarray = np.rec.fromarrays([data], names=["response"]) From 8585bd761a05847705dee803a30c438588f363b5 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 00:29:29 +0100 Subject: [PATCH 2/9] Fix --- pandas/tests/indexing/test_chaining_and_caching.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index b406118246e88..3243e5577feb4 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -176,9 +176,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" + with tm.assert_cow_warning(warn_copy_on_write): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) df = DataFrame({"response": data, "response1": data}) @@ -189,9 +188,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) df.response[mask] = "none" tm.assert_frame_equal(df, df_original) else: - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" + with tm.assert_cow_warning(warn_copy_on_write): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) # GH 6056 @@ -202,7 +200,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) df["A"].iloc[0] = np.nan expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) else: - # TODO(CoW-warn) custom warning message with tm.assert_cow_warning(warn_copy_on_write): df["A"].iloc[0] = np.nan expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) From cad8aad6e320e25c2d73d346a5243029866e6190 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 27 Nov 2023 22:21:35 +0100 Subject: [PATCH 3/9] Update base.py --- pandas/core/internals/base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index ee45707791fbf..5ac82c25bcde9 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -54,6 +54,11 @@ class _AlreadyWarned: def __init__(self): + # This class is used on the manager level to the block level to + # ensure that we warn only once. The block method can update the + # warned_already option without returning a value to keep the + # interface consistent. This is only a temporary solution for + # CoW warnings. self.warned_already = False From 4aa030af74a8d6c99c9225d55d06722eed8842dc Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 27 Nov 2023 23:52:58 +0100 Subject: [PATCH 4/9] Update base.py --- pandas/core/internals/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 5ac82c25bcde9..60ab251e437c7 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -54,10 +54,10 @@ class _AlreadyWarned: def __init__(self): - # This class is used on the manager level to the block level to + # This class is used on the manager level to the block level to # ensure that we warn only once. The block method can update the - # warned_already option without returning a value to keep the - # interface consistent. This is only a temporary solution for + # warned_already option without returning a value to keep the + # interface consistent. This is only a temporary solution for # CoW warnings. self.warned_already = False From b5992b31ff6a18ee9a1dd35d4a0bbc07b9647dc2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 1 Dec 2023 21:43:53 +0100 Subject: [PATCH 5/9] Update test --- pandas/tests/indexing/test_chaining_and_caching.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 08e9a94dc6ead..8287e7107ba66 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -155,8 +155,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) if using_copy_on_write: tm.assert_frame_equal(df, DataFrame({"response": data})) else: - with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) recarray = np.rec.fromarrays([data], names=["response"]) @@ -167,8 +165,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) if using_copy_on_write: tm.assert_frame_equal(df, DataFrame({"response": data})) else: - with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) df = DataFrame({"response": data, "response1": data}) @@ -179,8 +175,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) if using_copy_on_write: tm.assert_frame_equal(df, df_original) else: - with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) # GH 6056 @@ -191,8 +185,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) if using_copy_on_write: expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) else: - with tm.assert_cow_warning(warn_copy_on_write): - df["A"].iloc[0] = np.nan expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) result = df.head() tm.assert_frame_equal(result, expected) From 9dee585247789ad613464d0fdbea624b298a6327 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 1 Dec 2023 22:12:40 +0100 Subject: [PATCH 6/9] Fixup --- .../copy_view/test_chained_assignment_deprecation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 7b08d9b80fc9b..4f53dd8a78858 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -70,7 +70,7 @@ def test_methods_iloc_getitem_item_cache(func, args, using_copy_on_write): @pytest.mark.parametrize( "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] ) -def test_series_setitem(indexer, using_copy_on_write): +def test_series_setitem(indexer, using_copy_on_write, warn_copy_on_write): # ensure we only get a single warning for those typical cases of chained # assignment df = DataFrame({"a": [1, 2, 3], "b": 1}) @@ -79,7 +79,11 @@ def test_series_setitem(indexer, using_copy_on_write): # fail if multiple warnings are raised with pytest.warns() as record: df["a"][indexer] = 0 - assert len(record) == 1 + assert ( + len(record) == 2 + if warn_copy_on_write and isinstance(indexer, np.ndarray) + else 1 + ) if using_copy_on_write: assert record[0].category == ChainedAssignmentError else: From 5cd4dfd5fc1fa0506b7cd71cf8112a48bf240e36 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 2 Dec 2023 18:38:35 +0100 Subject: [PATCH 7/9] CoW: Warn for replace --- pandas/core/internals/base.py | 7 ++- pandas/core/internals/blocks.py | 45 +++++++++++++++++++ .../test_chained_assignment_deprecation.py | 8 ++-- pandas/tests/copy_view/test_replace.py | 20 +++++---- 4 files changed, 68 insertions(+), 12 deletions(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 60ab251e437c7..4ba3fcfca2fc7 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -249,12 +249,16 @@ def replace(self, to_replace, value, inplace: bool) -> Self: value=value, inplace=inplace, using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), ) @final def replace_regex(self, **kwargs) -> Self: return self.apply_with_block( - "_replace_regex", **kwargs, using_cow=using_copy_on_write() + "_replace_regex", + **kwargs, + using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), ) @final @@ -275,6 +279,7 @@ def replace_list( inplace=inplace, regex=regex, using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), ) bm._consolidate_inplace() return bm diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ec58b46371149..5d4aed98c7481 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -830,6 +830,7 @@ def replace( # mask may be pre-computed if we're called from replace_list mask: npt.NDArray[np.bool_] | None = None, using_cow: bool = False, + already_warned=None, ) -> list[Block]: """ replace the to_replace value with value, possible to create new @@ -874,6 +875,20 @@ def replace( # and rest? blk = self._maybe_copy(using_cow, inplace) putmask_inplace(blk.values, mask, value) + if ( + inplace + and warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + if not (self.is_object and value is None): # if the user *explicitly* gave None, we keep None, otherwise # may downcast to NaN @@ -934,6 +949,7 @@ def _replace_regex( inplace: bool = False, mask=None, using_cow: bool = False, + already_warned=None, ) -> list[Block]: """ Replace elements by the given value. @@ -968,6 +984,20 @@ def _replace_regex( replace_regex(block.values, rx, value, mask) + if ( + inplace + and warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + nbs = block.convert(copy=False, using_cow=using_cow) opt = get_option("future.no_silent_downcasting") if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt: @@ -992,6 +1022,7 @@ def replace_list( inplace: bool = False, regex: bool = False, using_cow: bool = False, + already_warned=None, ) -> list[Block]: """ See BlockManager.replace_list docstring. @@ -1048,6 +1079,20 @@ def replace_list( else: rb = [self if inplace else self.copy()] + if ( + inplace + and warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + opt = get_option("future.no_silent_downcasting") for i, ((src, dest), mask) in enumerate(zip(pairs, masks)): convert = i == src_len # only convert once at the end diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 4f53dd8a78858..4200082b474f2 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -36,11 +36,13 @@ def test_methods_iloc_warn(using_copy_on_write): ("ffill", ()), ], ) -def test_methods_iloc_getitem_item_cache(func, args, using_copy_on_write): +def test_methods_iloc_getitem_item_cache( + func, args, using_copy_on_write, warn_copy_on_write +): df = DataFrame({"a": [1, 2, 3], "b": 1}) ser = df.iloc[:, 0] - # TODO(CoW-warn) should warn about updating a view - getattr(ser, func)(*args, inplace=True) + with tm.assert_cow_warning(warn_copy_on_write and func == "replace"): + getattr(ser, func)(*args, inplace=True) # parent that holds item_cache is dead, so don't increase ref count ser = df.copy()["a"] diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index 3d8559a1905fc..a56a2cdee4425 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -48,12 +48,13 @@ def test_replace(using_copy_on_write, replace_kwargs): tm.assert_frame_equal(df, df_orig) -def test_replace_regex_inplace_refs(using_copy_on_write): +def test_replace_regex_inplace_refs(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": ["aaa", "bbb"]}) df_orig = df.copy() view = df[:] arr = get_array(df, "a") - df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) if using_copy_on_write: assert not np.shares_memory(arr, get_array(df, "a")) assert df._mgr._has_no_reference(0) @@ -202,11 +203,12 @@ def test_replace_inplace(using_copy_on_write, to_replace): @pytest.mark.parametrize("to_replace", [1.5, [1.5]]) -def test_replace_inplace_reference(using_copy_on_write, to_replace): +def test_replace_inplace_reference(using_copy_on_write, to_replace, warn_copy_on_write): df = DataFrame({"a": [1.5, 2, 3]}) arr_a = get_array(df, "a") view = df[:] - df.replace(to_replace=to_replace, value=15.5, inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.replace(to_replace=to_replace, value=15.5, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) @@ -354,12 +356,13 @@ def test_replace_list_none(using_copy_on_write): assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) -def test_replace_list_none_inplace_refs(using_copy_on_write): +def test_replace_list_none_inplace_refs(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": ["a", "b", "c"]}) arr = get_array(df, "a") df_orig = df.copy() view = df[:] - df.replace(["a"], value=None, inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.replace(["a"], value=None, inplace=True) if using_copy_on_write: assert df._mgr._has_no_reference(0) assert not np.shares_memory(arr, get_array(df, "a")) @@ -431,7 +434,7 @@ def test_replace_listlike(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_replace_listlike_inplace(using_copy_on_write): +def test_replace_listlike_inplace(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) arr = get_array(df, "a") df.replace([200, 2], [10, 11], inplace=True) @@ -439,7 +442,8 @@ def test_replace_listlike_inplace(using_copy_on_write): view = df[:] df_orig = df.copy() - df.replace([200, 3], [10, 11], inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.replace([200, 3], [10, 11], inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr) tm.assert_frame_equal(view, df_orig) From e170023ef982081a3e1da1f83cf1600c1f395713 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 Dec 2023 10:48:02 +0100 Subject: [PATCH 8/9] clean-up merge --- pandas/core/internals/blocks.py | 6 +++--- .../copy_view/test_chained_assignment_deprecation.py | 6 +----- pandas/tests/frame/methods/test_replace.py | 8 ++------ pandas/tests/indexing/test_chaining_and_caching.py | 2 +- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 03274a4f134e0..1af2d9e739038 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -883,7 +883,7 @@ def replace( ): if self.refs.has_reference(): warnings.warn( - COW_WARNING_SETITEM_MSG, + COW_WARNING_GENERAL_MSG, FutureWarning, stacklevel=find_stack_level(), ) @@ -992,7 +992,7 @@ def _replace_regex( ): if self.refs.has_reference(): warnings.warn( - COW_WARNING_SETITEM_MSG, + COW_WARNING_GENERAL_MSG, FutureWarning, stacklevel=find_stack_level(), ) @@ -1087,7 +1087,7 @@ def replace_list( ): if self.refs.has_reference(): warnings.warn( - COW_WARNING_SETITEM_MSG, + COW_WARNING_GENERAL_MSG, FutureWarning, stacklevel=find_stack_level(), ) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 25cf586a0b490..feb59619c374a 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -87,11 +87,7 @@ def test_series_setitem(indexer, using_copy_on_write, warn_copy_on_write): # fail if multiple warnings are raised with pytest.warns() as record: df["a"][indexer] = 0 - assert ( - len(record) == 2 - if warn_copy_on_write and isinstance(indexer, np.ndarray) - else 1 - ) + assert len(record) == 1 if using_copy_on_write: assert record[0].category == ChainedAssignmentError else: diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 28e4d298acc2b..13e2c1a249ac2 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -717,7 +717,7 @@ def test_replace_value_is_none(self, datetime_frame): datetime_frame.iloc[0, 0] = orig_value datetime_frame.iloc[1, 0] = orig2 - def test_replace_for_new_dtypes(self, datetime_frame, warn_copy_on_write): + def test_replace_for_new_dtypes(self, datetime_frame): # dtypes tsframe = datetime_frame.copy().astype(np.float32) tsframe.loc[tsframe.index[:5], "A"] = np.nan @@ -729,11 +729,7 @@ def test_replace_for_new_dtypes(self, datetime_frame, warn_copy_on_write): tsframe.loc[tsframe.index[:5], "A"] = np.nan tsframe.loc[tsframe.index[-5:], "A"] = np.nan - tsframe.loc[tsframe.index[:5], "B"] = -1e8 - - b = tsframe["B"] - with tm.assert_cow_warning(warn_copy_on_write): - b[b == -1e8] = np.nan + tsframe.loc[tsframe.index[:5], "B"] = np.nan msg = "DataFrame.fillna with 'method' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): # TODO: what is this even testing? diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 2e08b393d0283..b97df376ac47f 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -139,7 +139,7 @@ def test_altering_series_clears_parent_cache( class TestChaining: - def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write): + def test_setitem_chained_setfault(self, using_copy_on_write): # GH6026 data = ["right", "left", "left", "left", "right", "left", "timeout"] mdata = ["right", "left", "left", "left", "right", "left", "none"] From 5e61b42a1a1e7fedfd5ca7de1638601ef340755e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 Dec 2023 11:02:56 +0100 Subject: [PATCH 9/9] update test --- pandas/tests/copy_view/test_chained_assignment_deprecation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index feb59619c374a..25935d9489730 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -47,7 +47,7 @@ def test_methods_iloc_getitem_item_cache( ): df = DataFrame({"a": [1, 2, 3], "b": 1}) ser = df.iloc[:, 0] - with tm.assert_cow_warning(warn_copy_on_write): + with tm.assert_cow_warning(warn_copy_on_write and func == "replace"): getattr(ser, func)(*args, inplace=True) # parent that holds item_cache is dead, so don't increase ref count