From 4f4f663b066972ae735040fc96f389433a1ab2ff Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 3 Jan 2023 11:03:42 +0100 Subject: [PATCH 1/8] TST: test explicit copy keyword with CoW enabled --- pandas/tests/copy_view/test_methods.py | 65 +++++++++++++++++--------- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 2b3d13b982d4d..a8cba51e17773 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -77,22 +77,31 @@ def test_reset_index(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_rename_columns(using_copy_on_write): +@pytest.mark.parametrize("copy", [True, None, False]) +def test_rename_columns(using_copy_on_write, copy): # Case: renaming columns returns a new dataframe # + afterwards modifying the result df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() - df2 = df.rename(columns=str.upper) + df2 = df.rename(columns=str.upper, copy=copy) - if using_copy_on_write: + if (using_copy_on_write and copy is not True) or copy is False: assert np.shares_memory(get_array(df2, "A"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a")) df2.iloc[0, 0] = 0 - assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a")) - if using_copy_on_write: + if using_copy_on_write or copy is not False: + assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a")) + else: + assert np.shares_memory(get_array(df2, "A"), get_array(df, "a")) + if using_copy_on_write and copy is not True: assert np.shares_memory(get_array(df2, "C"), get_array(df, "c")) expected = DataFrame({"A": [0, 2, 3], "B": [4, 5, 6], "C": [0.1, 0.2, 0.3]}) tm.assert_frame_equal(df2, expected) - tm.assert_frame_equal(df, df_orig) + if using_copy_on_write or copy is not False: + tm.assert_frame_equal(df, df_orig) + else: + assert not df.equals(df_orig) def test_rename_columns_modify_parent(using_copy_on_write): @@ -115,14 +124,17 @@ def test_rename_columns_modify_parent(using_copy_on_write): tm.assert_frame_equal(df2, df2_orig) -def test_reindex_columns(using_copy_on_write): +@pytest.mark.parametrize("copy", [True, None, False]) +def test_reindex_columns(using_copy_on_write, copy): # Case: reindexing the column returns a new dataframe # + afterwards modifying the result df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() - df2 = df.reindex(columns=["a", "c"]) + df2 = df.reindex(columns=["a", "c"], copy=copy) - if using_copy_on_write: + # TODO copy=False without CoW still returns a copy in this case + # TODO copy=True with CoW still returns a view + if using_copy_on_write: # and copy is not True) or copy is False: # still shares memory (df2 is a shallow copy) assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: @@ -130,7 +142,7 @@ def test_reindex_columns(using_copy_on_write): # mutating df2 triggers a copy-on-write for that column df2.iloc[0, 0] = 0 assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - if using_copy_on_write: + if using_copy_on_write: # and copy is not True: assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) tm.assert_frame_equal(df, df_orig) @@ -182,6 +194,7 @@ def test_select_dtypes(using_copy_on_write): lambda x, y: x.align(y.a.iloc[slice(0, 1)], axis=1), ], ) +# TODO test copy keyword def test_align_frame(using_copy_on_write, func): df = DataFrame({"a": [1, 2, 3], "b": "a"}) df_orig = df.copy() @@ -389,6 +402,7 @@ def test_assign_drop_duplicates(using_copy_on_write, method): tm.assert_frame_equal(df, df_orig) +# TODO test copy keyword def test_reindex_like(using_copy_on_write): df = DataFrame({"a": [1, 2], "b": "a"}) other = DataFrame({"b": "a", "a": [1, 2]}) @@ -444,21 +458,26 @@ def test_swaplevel(using_copy_on_write, obj): tm.assert_equal(obj, obj_orig) -def test_frame_set_axis(using_copy_on_write): +@pytest.mark.parametrize("copy", [True, None, False]) +def test_frame_set_axis(using_copy_on_write, copy): # GH 49473 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() - df2 = df.set_axis(["a", "b", "c"], axis="index") + df2 = df.set_axis(["a", "b", "c"], axis="index", copy=copy) - if using_copy_on_write: + if (using_copy_on_write and copy is not True) or copy is False: assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column / block df2.iloc[0, 0] = 0 - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) + if using_copy_on_write or copy is not False: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + else: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not df.equals(df_orig) @pytest.mark.parametrize( @@ -483,6 +502,7 @@ def test_tz_convert_localize(using_copy_on_write, func, tz): tm.assert_series_equal(ser, ser_orig) +# TODO test copy keyword def test_series_set_axis(using_copy_on_write): # GH 49473 ser = Series([1, 2, 3]) @@ -500,19 +520,22 @@ def test_series_set_axis(using_copy_on_write): tm.assert_series_equal(ser, ser_orig) -@pytest.mark.parametrize("copy_kwargs", [{"copy": True}, {}]) +@pytest.mark.parametrize("copy", [True, None, False]) @pytest.mark.parametrize("kwargs", [{"mapper": "test"}, {"index": "test"}]) -def test_rename_axis(using_copy_on_write, kwargs, copy_kwargs): +def test_rename_axis(using_copy_on_write, kwargs, copy): df = DataFrame({"a": [1, 2, 3, 4]}, index=Index([1, 2, 3, 4], name="a")) df_orig = df.copy() - df2 = df.rename_axis(**kwargs, **copy_kwargs) + df2 = df.rename_axis(**kwargs, copy=copy) - if using_copy_on_write and not copy_kwargs: + if (using_copy_on_write and copy is not True) or copy is False: assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = 0 - if using_copy_on_write: + if using_copy_on_write or copy is not False: assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) + else: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not df.equals(df_orig) From 3cc360afa9f37db80ada6fc06f89bfefc01d335b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 13 Jan 2023 17:12:20 +0100 Subject: [PATCH 2/8] undo previous changes --- pandas/tests/copy_view/test_methods.py | 194 ++++++------------------- 1 file changed, 44 insertions(+), 150 deletions(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 399697df9e38b..2b3d13b982d4d 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -77,31 +77,22 @@ def test_reset_index(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -@pytest.mark.parametrize("copy", [True, None, False]) -def test_rename_columns(using_copy_on_write, copy): +def test_rename_columns(using_copy_on_write): # Case: renaming columns returns a new dataframe # + afterwards modifying the result df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() - df2 = df.rename(columns=str.upper, copy=copy) + df2 = df.rename(columns=str.upper) - if (using_copy_on_write and copy is not True) or copy is False: + if using_copy_on_write: assert np.shares_memory(get_array(df2, "A"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a")) df2.iloc[0, 0] = 0 - if using_copy_on_write or copy is not False: - assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a")) - else: - assert np.shares_memory(get_array(df2, "A"), get_array(df, "a")) - if using_copy_on_write and copy is not True: + assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a")) + if using_copy_on_write: assert np.shares_memory(get_array(df2, "C"), get_array(df, "c")) expected = DataFrame({"A": [0, 2, 3], "B": [4, 5, 6], "C": [0.1, 0.2, 0.3]}) tm.assert_frame_equal(df2, expected) - if using_copy_on_write or copy is not False: - tm.assert_frame_equal(df, df_orig) - else: - assert not df.equals(df_orig) + tm.assert_frame_equal(df, df_orig) def test_rename_columns_modify_parent(using_copy_on_write): @@ -124,32 +115,24 @@ def test_rename_columns_modify_parent(using_copy_on_write): tm.assert_frame_equal(df2, df2_orig) -@pytest.mark.parametrize("copy", [True, None, False]) -def test_reindex_columns(using_copy_on_write, using_array_manager, copy): +def test_reindex_columns(using_copy_on_write): # Case: reindexing the column returns a new dataframe # + afterwards modifying the result df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() - df2 = df.reindex(columns=["a", "c"], copy=copy) + df2 = df.reindex(columns=["a", "c"]) - # TODO copy=False without CoW still returns a copy in this case - # TODO copy=True with CoW or AM still returns a view - # ((using_COW or using_AM) and copy is not True) or copy is False: - if using_copy_on_write or using_array_manager: + if using_copy_on_write: # still shares memory (df2 is a shallow copy) assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column df2.iloc[0, 0] = 0 - if using_array_manager: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert df.iloc[0, 0] == 0 - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - if using_copy_on_write: # and copy is not True: - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - tm.assert_frame_equal(df, df_orig) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + tm.assert_frame_equal(df, df_orig) def test_drop_on_column(using_copy_on_write): @@ -199,7 +182,6 @@ def test_select_dtypes(using_copy_on_write): lambda x, y: x.align(y.a.iloc[slice(0, 1)], axis=1), ], ) -# TODO test copy keyword def test_align_frame(using_copy_on_write, func): df = DataFrame({"a": [1, 2, 3], "b": "a"}) df_orig = df.copy() @@ -389,30 +371,6 @@ def test_head_tail(method, using_copy_on_write): tm.assert_frame_equal(df, df_orig) -@pytest.mark.parametrize( - "kwargs", - [ - {"before": "a", "after": "b", "axis": 1}, - {"before": 0, "after": 1, "axis": 0}, - ], -) -def test_truncate(using_copy_on_write, kwargs): - df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 2}) - df_orig = df.copy() - df2 = df.truncate(**kwargs) - df2._mgr._verify_integrity() - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - - df2.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) - - @pytest.mark.parametrize("method", ["assign", "drop_duplicates"]) def test_assign_drop_duplicates(using_copy_on_write, method): df = DataFrame({"a": [1, 2, 3]}) @@ -431,7 +389,6 @@ def test_assign_drop_duplicates(using_copy_on_write, method): tm.assert_frame_equal(df, df_orig) -# TODO test copy keyword def test_reindex_like(using_copy_on_write): df = DataFrame({"a": [1, 2], "b": "a"}) other = DataFrame({"b": "a", "a": [1, 2]}) @@ -450,23 +407,6 @@ def test_reindex_like(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_sort_index(using_copy_on_write): - # GH 49473 - ser = Series([1, 2, 3]) - ser_orig = ser.copy() - ser2 = ser.sort_index() - - if using_copy_on_write: - assert np.shares_memory(ser.values, ser2.values) - else: - assert not np.shares_memory(ser.values, ser2.values) - - # mutating ser triggers a copy-on-write for the column / block - ser2.iloc[0] = 0 - assert not np.shares_memory(ser2.values, ser.values) - tm.assert_series_equal(ser, ser_orig) - - def test_reorder_levels(using_copy_on_write): index = MultiIndex.from_tuples( [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"] @@ -486,25 +426,6 @@ def test_reorder_levels(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_series_reorder_levels(using_copy_on_write): - index = MultiIndex.from_tuples( - [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"] - ) - ser = Series([1, 2, 3, 4], index=index) - ser_orig = ser.copy() - ser2 = ser.reorder_levels(order=["two", "one"]) - - if using_copy_on_write: - assert np.shares_memory(ser2.values, ser.values) - else: - assert not np.shares_memory(ser2.values, ser.values) - - ser2.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(ser2.values, ser.values) - tm.assert_series_equal(ser, ser_orig) - - @pytest.mark.parametrize("obj", [Series([1, 2, 3]), DataFrame({"a": [1, 2, 3]})]) def test_swaplevel(using_copy_on_write, obj): index = MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1)], names=["one", "two"]) @@ -523,102 +444,75 @@ def test_swaplevel(using_copy_on_write, obj): tm.assert_equal(obj, obj_orig) -@pytest.mark.parametrize("copy", [True, None, False]) -def test_frame_set_axis(using_copy_on_write, copy): +def test_frame_set_axis(using_copy_on_write): # GH 49473 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() - df2 = df.set_axis(["a", "b", "c"], axis="index", copy=copy) + df2 = df.set_axis(["a", "b", "c"], axis="index") - if (using_copy_on_write and copy is not True) or copy is False: + if using_copy_on_write: assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column / block df2.iloc[0, 0] = 0 - if using_copy_on_write or copy is not False: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) - else: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert not df.equals(df_orig) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) -# TODO test copy keyword -def test_series_set_axis(using_copy_on_write): +@pytest.mark.parametrize( + "func, tz", [("tz_convert", "Europe/Berlin"), ("tz_localize", None)] +) +def test_tz_convert_localize(using_copy_on_write, func, tz): # GH 49473 - ser = Series([1, 2, 3]) + ser = Series( + [1, 2], index=date_range(start="2014-08-01 09:00", freq="H", periods=2, tz=tz) + ) ser_orig = ser.copy() - ser2 = ser.set_axis(["a", "b", "c"], axis="index") + ser2 = getattr(ser, func)("US/Central") if using_copy_on_write: - assert np.shares_memory(ser, ser2) + assert np.shares_memory(ser.values, ser2.values) else: - assert not np.shares_memory(ser, ser2) + assert not np.shares_memory(ser.values, ser2.values) # mutating ser triggers a copy-on-write for the column / block ser2.iloc[0] = 0 - assert not np.shares_memory(ser2, ser) + assert not np.shares_memory(ser2.values, ser.values) tm.assert_series_equal(ser, ser_orig) -def test_set_flags(using_copy_on_write): +def test_series_set_axis(using_copy_on_write): + # GH 49473 ser = Series([1, 2, 3]) ser_orig = ser.copy() - ser2 = ser.set_flags(allows_duplicate_labels=False) + ser2 = ser.set_axis(["a", "b", "c"], axis="index") - assert np.shares_memory(ser, ser2) + if using_copy_on_write: + assert np.shares_memory(ser, ser2) + else: + assert not np.shares_memory(ser, ser2) # mutating ser triggers a copy-on-write for the column / block ser2.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(ser2, ser) - tm.assert_series_equal(ser, ser_orig) - else: - assert np.shares_memory(ser2, ser) - expected = Series([0, 2, 3]) - tm.assert_series_equal(ser, expected) + assert not np.shares_memory(ser2, ser) + tm.assert_series_equal(ser, ser_orig) -@pytest.mark.parametrize("copy", [True, None, False]) +@pytest.mark.parametrize("copy_kwargs", [{"copy": True}, {}]) @pytest.mark.parametrize("kwargs", [{"mapper": "test"}, {"index": "test"}]) -def test_rename_axis(using_copy_on_write, kwargs, copy): +def test_rename_axis(using_copy_on_write, kwargs, copy_kwargs): df = DataFrame({"a": [1, 2, 3, 4]}, index=Index([1, 2, 3, 4], name="a")) df_orig = df.copy() - df2 = df.rename_axis(**kwargs, copy=copy) + df2 = df.rename_axis(**kwargs, **copy_kwargs) - if (using_copy_on_write and copy is not True) or copy is False: + if using_copy_on_write and not copy_kwargs: assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = 0 - if using_copy_on_write or copy is not False: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) - else: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert not df.equals(df_orig) - - -@pytest.mark.parametrize( - "func, tz", [("tz_convert", "Europe/Berlin"), ("tz_localize", None)] -) -def test_tz_convert_localize(using_copy_on_write, func, tz): - # GH 49473 - ser = Series( - [1, 2], index=date_range(start="2014-08-01 09:00", freq="H", periods=2, tz=tz) - ) - ser_orig = ser.copy() - ser2 = getattr(ser, func)("US/Central") - if using_copy_on_write: - assert np.shares_memory(ser.values, ser2.values) - else: - assert not np.shares_memory(ser.values, ser2.values) - - # mutating ser triggers a copy-on-write for the column / block - ser2.iloc[0] = 0 - assert not np.shares_memory(ser2.values, ser.values) - tm.assert_series_equal(ser, ser_orig) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) From ac7a2220243b6f5a7a1a3fee37017dc282e5ba29 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 13 Jan 2023 17:51:35 +0100 Subject: [PATCH 3/8] single parametrized test --- pandas/tests/copy_view/test_methods.py | 34 ++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index bb2bee7de9206..46a9c83368e03 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -53,6 +53,40 @@ def test_copy_shallow(using_copy_on_write): assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) +@pytest.mark.parametrize("copy", [True, None, False]) +@pytest.mark.parametrize( + "method", + [ + lambda df, copy: df.rename(columns=str.lower, copy=copy), + lambda df, copy: df.reindex(columns=["a", "c"], copy=copy), + lambda df, copy: df.set_axis(["a", "b", "c"], axis="index", copy=copy), + lambda df, copy: df.rename_axis(index="test", copy=copy), + lambda df, copy: df.rename_axis(columns="test", copy=copy), + ], + ids=["rename", "reindex", "set_axis", "rename_axis0", "rename_axis1"], +) +def test_methods_copy_keyword( + request, method, copy, using_copy_on_write, using_array_manager +): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df2 = method(df, copy=copy) + + share_memory = (using_copy_on_write and copy is not True) or copy is False + + if request.node.callspec.id.startswith("reindex"): + # TODO copy=False without CoW still returns a copy in this case + if not using_copy_on_write and not using_array_manager and copy is False: + share_memory = False + # TODO copy=True with CoW still returns a view + if using_copy_on_write: + share_memory = True + + if share_memory: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + # ----------------------------------------------------------------------------- # DataFrame methods returning new DataFrame using shallow copy From dd458f7230f025468626f72a465b4ecb4512f48a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 13 Jan 2023 20:30:34 +0100 Subject: [PATCH 4/8] fix rebase --- pandas/tests/copy_view/test_methods.py | 78 ++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 46a9c83368e03..40a8086c87b9d 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -567,6 +567,30 @@ def test_head_tail(method, using_copy_on_write): tm.assert_frame_equal(df, df_orig) +@pytest.mark.parametrize( + "kwargs", + [ + {"before": "a", "after": "b", "axis": 1}, + {"before": 0, "after": 1, "axis": 0}, + ], +) +def test_truncate(using_copy_on_write, kwargs): + df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 2}) + df_orig = df.copy() + df2 = df.truncate(**kwargs) + df2._mgr._verify_integrity() + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + df2.iloc[0, 0] = 0 + if using_copy_on_write: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + + @pytest.mark.parametrize("method", ["assign", "drop_duplicates"]) def test_assign_drop_duplicates(using_copy_on_write, method): df = DataFrame({"a": [1, 2, 3]}) @@ -637,6 +661,23 @@ def test_reindex_like(using_copy_on_write): tm.assert_frame_equal(df, df_orig) +def test_sort_index(using_copy_on_write): + # GH 49473 + ser = Series([1, 2, 3]) + ser_orig = ser.copy() + ser2 = ser.sort_index() + + if using_copy_on_write: + assert np.shares_memory(ser.values, ser2.values) + else: + assert not np.shares_memory(ser.values, ser2.values) + + # mutating ser triggers a copy-on-write for the column / block + ser2.iloc[0] = 0 + assert not np.shares_memory(ser2.values, ser.values) + tm.assert_series_equal(ser, ser_orig) + + def test_reorder_levels(using_copy_on_write): index = MultiIndex.from_tuples( [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"] @@ -656,6 +697,25 @@ def test_reorder_levels(using_copy_on_write): tm.assert_frame_equal(df, df_orig) +def test_series_reorder_levels(using_copy_on_write): + index = MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"] + ) + ser = Series([1, 2, 3, 4], index=index) + ser_orig = ser.copy() + ser2 = ser.reorder_levels(order=["two", "one"]) + + if using_copy_on_write: + assert np.shares_memory(ser2.values, ser.values) + else: + assert not np.shares_memory(ser2.values, ser.values) + + ser2.iloc[0] = 0 + if using_copy_on_write: + assert not np.shares_memory(ser2.values, ser.values) + tm.assert_series_equal(ser, ser_orig) + + @pytest.mark.parametrize("obj", [Series([1, 2, 3]), DataFrame({"a": [1, 2, 3]})]) def test_swaplevel(using_copy_on_write, obj): index = MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1)], names=["one", "two"]) @@ -708,6 +768,24 @@ def test_series_set_axis(using_copy_on_write): tm.assert_series_equal(ser, ser_orig) +def test_set_flags(using_copy_on_write): + ser = Series([1, 2, 3]) + ser_orig = ser.copy() + ser2 = ser.set_flags(allows_duplicate_labels=False) + + assert np.shares_memory(ser, ser2) + + # mutating ser triggers a copy-on-write for the column / block + ser2.iloc[0] = 0 + if using_copy_on_write: + assert not np.shares_memory(ser2, ser) + tm.assert_series_equal(ser, ser_orig) + else: + assert np.shares_memory(ser2, ser) + expected = Series([0, 2, 3]) + tm.assert_series_equal(ser, expected) + + @pytest.mark.parametrize("copy_kwargs", [{"copy": True}, {}]) @pytest.mark.parametrize("kwargs", [{"mapper": "test"}, {"index": "test"}]) def test_rename_axis(using_copy_on_write, kwargs, copy_kwargs): From 12a41d3e1230469f40f1c6cc743233b73d8e0dbf Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 16 Jan 2023 09:41:26 +0100 Subject: [PATCH 5/8] add more methods --- pandas/tests/copy_view/test_methods.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 40a8086c87b9d..a64cbdd7654d6 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -59,11 +59,25 @@ def test_copy_shallow(using_copy_on_write): [ lambda df, copy: df.rename(columns=str.lower, copy=copy), lambda df, copy: df.reindex(columns=["a", "c"], copy=copy), + lambda df, copy: df.reindex_like(df, copy=copy), lambda df, copy: df.set_axis(["a", "b", "c"], axis="index", copy=copy), lambda df, copy: df.rename_axis(index="test", copy=copy), lambda df, copy: df.rename_axis(columns="test", copy=copy), + # lambda df, copy: df.astype({'b': 'int64'}, copy=copy), + # lambda df, copy: df.swaplevel(0, 0, copy=copy), + lambda df, copy: df.truncate(0, 5, copy=copy), + ], + ids=[ + "rename", + "reindex", + "reindex_like", + "set_axis", + "rename_axis0", + "rename_axis1", + # "astype", # CoW not yet implemented + # "swaplevel", # only series + "truncate", ], - ids=["rename", "reindex", "set_axis", "rename_axis0", "rename_axis1"], ) def test_methods_copy_keyword( request, method, copy, using_copy_on_write, using_array_manager @@ -73,7 +87,7 @@ def test_methods_copy_keyword( share_memory = (using_copy_on_write and copy is not True) or copy is False - if request.node.callspec.id.startswith("reindex"): + if request.node.callspec.id.startswith("reindex-"): # TODO copy=False without CoW still returns a copy in this case if not using_copy_on_write and not using_array_manager and copy is False: share_memory = False From 7ece5fdfbd9ccb818e4b6639b3ba302a1a5a2ae1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 16 Jan 2023 09:51:35 +0100 Subject: [PATCH 6/8] add time related methods --- pandas/tests/copy_view/test_methods.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index a64cbdd7654d6..ad62e83e94ebd 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -9,6 +9,7 @@ Series, Timestamp, date_range, + period_range, ) import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -66,6 +67,11 @@ def test_copy_shallow(using_copy_on_write): # lambda df, copy: df.astype({'b': 'int64'}, copy=copy), # lambda df, copy: df.swaplevel(0, 0, copy=copy), lambda df, copy: df.truncate(0, 5, copy=copy), + # lambda df, copy: df.infer_objects(copy=copy) + lambda df, copy: df.to_timestamp(copy=copy), + lambda df, copy: df.to_period(freq="D", copy=copy), + lambda df, copy: df.tz_localize("US/Central", copy=copy), + lambda df, copy: df.tz_convert("US/Central", copy=copy), ], ids=[ "rename", @@ -77,12 +83,27 @@ def test_copy_shallow(using_copy_on_write): # "astype", # CoW not yet implemented # "swaplevel", # only series "truncate", + # "infer_objects", # CoW not yet implemented + "to_timestamp", + "to_period", + "tz_localize", + "tz_convert", ], ) def test_methods_copy_keyword( request, method, copy, using_copy_on_write, using_array_manager ): - df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + index = None + if "to_timestamp" in request.node.callspec.id: + index = period_range("2012-01-01", freq="D", periods=3) + elif "to_period" in request.node.callspec.id: + index = date_range("2012-01-01", freq="D", periods=3) + elif "tz_localize" in request.node.callspec.id: + index = date_range("2012-01-01", freq="D", periods=3) + elif "tz_convert" in request.node.callspec.id: + index = date_range("2012-01-01", freq="D", periods=3, tz="Europe/Brussels") + + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=index) df2 = method(df, copy=copy) share_memory = (using_copy_on_write and copy is not True) or copy is False From 3ec49a457d07278a355bf7041c18076506f97efe Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 16 Jan 2023 09:54:40 +0100 Subject: [PATCH 7/8] add set_flags --- pandas/tests/copy_view/test_methods.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index ad62e83e94ebd..2ade4885ded6a 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -72,6 +72,7 @@ def test_copy_shallow(using_copy_on_write): lambda df, copy: df.to_period(freq="D", copy=copy), lambda df, copy: df.tz_localize("US/Central", copy=copy), lambda df, copy: df.tz_convert("US/Central", copy=copy), + lambda df, copy: df.set_flags(allows_duplicate_labels=False, copy=copy), ], ids=[ "rename", @@ -88,6 +89,7 @@ def test_copy_shallow(using_copy_on_write): "to_period", "tz_localize", "tz_convert", + "set_flags", ], ) def test_methods_copy_keyword( From e6799980eaabb60fc2480c80151447b2d4288ef0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 20 Jan 2023 15:25:16 +0100 Subject: [PATCH 8/8] add swapaxes --- pandas/tests/copy_view/test_methods.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 5cf9103c77849..0cec5522e39cd 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -66,6 +66,7 @@ def test_copy_shallow(using_copy_on_write): lambda df, copy: df.rename_axis(columns="test", copy=copy), # lambda df, copy: df.astype({'b': 'int64'}, copy=copy), # lambda df, copy: df.swaplevel(0, 0, copy=copy), + lambda df, copy: df.swapaxes(0, 0, copy=copy), lambda df, copy: df.truncate(0, 5, copy=copy), # lambda df, copy: df.infer_objects(copy=copy) lambda df, copy: df.to_timestamp(copy=copy), @@ -83,6 +84,7 @@ def test_copy_shallow(using_copy_on_write): "rename_axis1", # "astype", # CoW not yet implemented # "swaplevel", # only series + "swapaxes", "truncate", # "infer_objects", # CoW not yet implemented "to_timestamp",