Skip to content

CoW: Enable CoW by default and remove warning build #56633

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,18 +73,6 @@ jobs:
env_file: actions-312.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Copy-on-Write 3.11 (warnings)"
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
- name: "Copy-on-Write 3.10 (warnings)"
env_file: actions-310.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
- name: "Copy-on-Write 3.9 (warnings)"
env_file: actions-39.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
- name: "Pypy"
env_file: actions-pypy-39.yaml
pattern: "not slow and not network and not single_cpu"
Expand Down
3 changes: 2 additions & 1 deletion asv_bench/benchmarks/algos/isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def setup(self, dtype):
elif dtype in ["str", "string[python]", "string[pyarrow]"]:
try:
self.series = Series(
Index([f"i-{i}" for i in range(N)], dtype=object), dtype=dtype
Index([f"i-{i}" for i in range(N)], dtype=object)._values,
dtype=dtype,
)
except ImportError as err:
raise NotImplementedError from err
Expand Down
3 changes: 2 additions & 1 deletion asv_bench/benchmarks/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ class Dtypes:
def setup(self, dtype):
try:
self.s = Series(
Index([f"i-{i}" for i in range(10000)], dtype=object), dtype=dtype
Index([f"i-{i}" for i in range(10000)], dtype=object)._values,
dtype=dtype,
)
except ImportError as err:
raise NotImplementedError from err
Expand Down
6 changes: 2 additions & 4 deletions pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,11 @@


def using_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["copy_on_write"] is True
return True


def warn_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["copy_on_write"] == "warn"
return False


def using_nullable_dtypes() -> bool:
Expand Down
4 changes: 2 additions & 2 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1963,15 +1963,15 @@ def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return pd.options.mode.copy_on_write is True
return True


@pytest.fixture
def warn_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is in warning mode.
"""
return pd.options.mode.copy_on_write == "warn"
return False


@pytest.fixture
Expand Down
37 changes: 11 additions & 26 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6704,8 +6704,7 @@ def copy(self, deep: bool_t | None = True) -> Self:
:ref:`gotchas <gotchas.thread-safety>` when copying in a threading
environment.

When ``copy_on_write`` in pandas config is set to ``True``, the
``copy_on_write`` config takes effect even when ``deep=False``.
Copy-on-Write protects shallow copies against accidental modifications.
This means that any changes to the copied data would make a new copy
of the data upon write (and vice versa). Changes made to either the
original or copied variable would not be reflected in the counterpart.
Expand All @@ -6731,12 +6730,15 @@ def copy(self, deep: bool_t | None = True) -> Self:
>>> deep = s.copy()
>>> shallow = s.copy(deep=False)

Shallow copy shares data and index with original.
Shallow copy shares index with original, the data is a
view of the original.

>>> s is shallow
False
>>> s.values is shallow.values and s.index is shallow.index
True
>>> s.values is shallow.values
False
>>> s.index is shallow.index
False

Deep copy has own copy of data and index.

Expand All @@ -6745,18 +6747,17 @@ def copy(self, deep: bool_t | None = True) -> Self:
>>> s.values is deep.values or s.index is deep.index
False

Updates to the data shared by shallow copy and original is reflected
in both (NOTE: this will no longer be true for pandas >= 3.0);
deep copy remains unchanged.
The shallow copy is protected against updating the original object
as well. Thus, updates will only reflect in one of both objects.

>>> s.iloc[0] = 3
>>> shallow.iloc[1] = 4
>>> s
a 3
b 4
b 2
dtype: int64
>>> shallow
a 3
a 1
b 4
dtype: int64
>>> deep
Expand All @@ -6779,22 +6780,6 @@ def copy(self, deep: bool_t | None = True) -> Self:
0 [10, 2]
1 [3, 4]
dtype: object

**Copy-on-Write is set to true**, the shallow copy is not modified
when the original data is changed:

>>> with pd.option_context("mode.copy_on_write", True):
... s = pd.Series([1, 2], index=["a", "b"])
... copy = s.copy(deep=False)
... s.iloc[0] = 100
... s
a 100
b 2
dtype: int64
>>> copy
a 1
b 2
dtype: int64
"""
data = self._mgr.copy(deep=deep)
self._clear_item_cache()
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3492,6 +3492,8 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:
"cannot index with a boolean indexer that "
"is not the same length as the index"
)
if isinstance(k, (ABCSeries, Index)):
k = k._values
lvl_indexer = np.asarray(k)
if indexer is None:
lvl_indexer = lvl_indexer.copy()
Expand Down
30 changes: 0 additions & 30 deletions pandas/tests/copy_view/test_internals.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame
import pandas._testing as tm
from pandas.tests.copy_view.util import get_array
Expand Down Expand Up @@ -42,35 +41,6 @@ def test_consolidate(using_copy_on_write):
assert df.loc[0, "b"] == 0.1


@pytest.mark.single_cpu
def test_switch_options():
# ensure we can switch the value of the option within one session
# (assuming data is constructed after switching)

# using the option_context to ensure we set back to global option value
# after running the test
with pd.option_context("mode.copy_on_write", False):
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
subset = df[:]
subset.iloc[0, 0] = 0
# df updated with CoW disabled
assert df.iloc[0, 0] == 0

pd.options.mode.copy_on_write = True
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
subset = df[:]
subset.iloc[0, 0] = 0
# df not updated with CoW enabled
assert df.iloc[0, 0] == 1

pd.options.mode.copy_on_write = False
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
subset = df[:]
subset.iloc[0, 0] = 0
# df updated with CoW disabled
assert df.iloc[0, 0] == 0


@pytest.mark.parametrize("dtype", [np.intp, np.int8])
@pytest.mark.parametrize(
"locs, arr",
Expand Down
7 changes: 2 additions & 5 deletions pandas/tests/extension/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@

import pytest

from pandas import (
Series,
options,
)
from pandas import Series


@pytest.fixture
Expand Down Expand Up @@ -222,4 +219,4 @@ def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return options.mode.copy_on_write is True
return True
22 changes: 0 additions & 22 deletions pandas/tests/frame/methods/test_copy.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas import DataFrame
import pandas._testing as tm


class TestCopy:
Expand All @@ -18,25 +15,6 @@ def test_copy_index_name_checking(self, float_frame, attr):
getattr(cp, attr).name = "foo"
assert getattr(float_frame, attr).name is None

@td.skip_copy_on_write_invalid_test
def test_copy_cache(self):
# GH#31784 _item_cache not cleared on copy causes incorrect reads after updates
df = DataFrame({"a": [1]})

df["x"] = [0]
df["a"]

df.copy()

df["a"].values[0] = -1

tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0]}))

df["y"] = [0]

assert df["a"].values[0] == -1
tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0], "y": [0]}))

def test_copy(self, float_frame, float_string_frame):
cop = float_frame.copy()
cop["E"] = cop["A"]
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/series/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
import pandas._testing as tm
from pandas.arrays import SparseArray
Expand Down Expand Up @@ -456,8 +454,7 @@ def add3(x, y, z):
ufunc(ser, ser, df)


# TODO(CoW) see https://github.com/pandas-dev/pandas/pull/51082
@td.skip_copy_on_write_not_yet_implemented
@pytest.mark.xfail(reason="see https://github.com/pandas-dev/pandas/pull/51082")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to still test this with cow?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no not at the moment

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay sounds good.

def test_np_fix():
# np.fix is not a ufunc but is composed of several ufunc calls under the hood
# with `out` and `where` keywords
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/test_downstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def test_dask_ufunc():
s = Series([1.5, 2.3, 3.7, 4.0])
ds = dd.from_pandas(s, npartitions=2)

result = da.fix(ds).compute()
expected = np.fix(s)
result = da.log(ds).compute()
expected = np.log(s)
tm.assert_series_equal(result, expected)
finally:
pd.set_option("compute.use_numexpr", olduse)
Expand Down
14 changes: 0 additions & 14 deletions pandas/util/_test_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,9 @@ def test_foo():

import pytest

from pandas._config import get_option

if TYPE_CHECKING:
from pandas._typing import F


from pandas.compat import (
IS64,
is_platform_windows,
Expand Down Expand Up @@ -144,14 +141,3 @@ def documented_fixture(fixture):
return fixture

return documented_fixture


skip_copy_on_write_not_yet_implemented = pytest.mark.xfail(
get_option("mode.copy_on_write") is True,
reason="Not yet implemented/adapted for Copy-on-Write mode",
)

skip_copy_on_write_invalid_test = pytest.mark.skipif(
get_option("mode.copy_on_write") is True,
reason="Test not valid for Copy-on-Write mode",
)