diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b22590759ea3f..e4e207fbe3d89 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -116,6 +116,10 @@ Copy-on-Write improvements returning multiple times an identical, cached Series object). This ensures that those Series objects correctly follow the Copy-on-Write rules (:issue:`49450`) +- The :class:`Series` constructor will now create a lazy copy (deferring the copy until + a modification to the data happens) when constructing a Series from an existing + Series with the default of ``copy=False`` (:issue:`50471`) + Copy-on-Write can be enabled through .. code-block:: python diff --git a/pandas/core/series.py b/pandas/core/series.py index 950499b1ae40d..925d22979576d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -426,10 +426,14 @@ def __init__( elif isinstance(data, Series): if index is None: index = data.index + if using_copy_on_write(): + data = data._mgr.copy(deep=False) + else: + data = data._mgr else: data = data.reindex(index, copy=copy) copy = False - data = data._mgr + data = data._mgr elif is_dict_like(data): data, index = self._init_dict(data, index, dtype) dtype = None diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py new file mode 100644 index 0000000000000..c04c733e5ee1d --- /dev/null +++ b/pandas/tests/copy_view/test_constructors.py @@ -0,0 +1,75 @@ +import numpy as np + +from pandas import Series + +# ----------------------------------------------------------------------------- +# Copy/view behaviour for Series / DataFrame constructors + + +def test_series_from_series(using_copy_on_write): + # Case: constructing a Series from another Series object follows CoW rules: + # a new object is returned and thus mutations are not propagated + ser = Series([1, 2, 3], name="name") + + # default is copy=False -> new Series is a shallow copy / view of original + result = Series(ser) + + # the shallow copy still shares memory + assert np.shares_memory(ser.values, result.values) + + if using_copy_on_write: + assert result._mgr.refs is not None + + if using_copy_on_write: + # mutating new series copy doesn't mutate original + result.iloc[0] = 0 + assert ser.iloc[0] == 1 + # mutating triggered a copy-on-write -> no longer shares memory + assert not np.shares_memory(ser.values, result.values) + else: + # mutating shallow copy does mutate original + result.iloc[0] = 0 + assert ser.iloc[0] == 0 + # and still shares memory + assert np.shares_memory(ser.values, result.values) + + # the same when modifying the parent + result = Series(ser) + + if using_copy_on_write: + # mutating original doesn't mutate new series + ser.iloc[0] = 0 + assert result.iloc[0] == 1 + else: + # mutating original does mutate shallow copy + ser.iloc[0] = 0 + assert result.iloc[0] == 0 + + +def test_series_from_series_with_reindex(using_copy_on_write): + # Case: constructing a Series from another Series with specifying an index + # that potentially requires a reindex of the values + ser = Series([1, 2, 3], name="name") + + # passing an index that doesn't actually require a reindex of the values + # -> without CoW we get an actual mutating view + for index in [ + ser.index, + ser.index.copy(), + list(ser.index), + ser.index.rename("idx"), + ]: + result = Series(ser, index=index) + assert np.shares_memory(ser.values, result.values) + result.iloc[0] = 0 + if using_copy_on_write: + assert ser.iloc[0] == 1 + else: + assert ser.iloc[0] == 0 + + # ensure that if an actual reindex is needed, we don't have any refs + # (mutating the result wouldn't trigger CoW) + result = Series(ser, index=[0, 1, 2, 3]) + assert not np.shares_memory(ser.values, result.values) + if using_copy_on_write: + assert result._mgr.refs is None or result._mgr.refs[0] is None diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index fd312c3375240..df6b83518eaff 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -883,6 +883,3 @@ def test_dataframe_add_column_from_series(): df.loc[2, "new"] = 100 expected_s = Series([0, 11, 12]) tm.assert_series_equal(s, expected_s) - - -# TODO add tests for constructors