diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 803d1c914c954..b8d8e7a2bb893 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -593,13 +593,6 @@ def __init__( copy: bool | None = None, ): - if copy is None: - if isinstance(data, dict) or data is None: - # retain pre-GH#38939 default behavior - copy = True - else: - copy = False - if data is None: data = {} if dtype is not None: @@ -618,6 +611,21 @@ def __init__( manager = get_option("mode.data_manager") + if copy is None: + if isinstance(data, dict): + # retain pre-GH#38939 default behavior + copy = True + elif ( + manager == "array" + and isinstance(data, (np.ndarray, ExtensionArray)) + and data.ndim == 2 + ): + # INFO(ArrayManager) by default copy the 2D input array to get + # contiguous 1D arrays + copy = True + else: + copy = False + if isinstance(data, (BlockManager, ArrayManager)): mgr = self._init_mgr( data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 77f3db0d09df5..61d38c43aca24 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -290,6 +290,10 @@ def ndarray_to_mgr( if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) + # if the array preparation does a copy -> avoid this for ArrayManager, + # since the copy is done on conversion to 1D arrays + copy_on_sanitize = False if typ == "array" else copy + vdtype = getattr(values, "dtype", None) if is_1d_only_ea_dtype(vdtype) or isinstance(dtype, ExtensionDtype): # GH#19157 @@ -324,7 +328,7 @@ def ndarray_to_mgr( else: # by definition an array here # the dtypes will be coerced to a single dtype - values = _prep_ndarray(values, copy=copy) + values = _prep_ndarray(values, copy=copy_on_sanitize) if dtype is not None and not is_dtype_equal(values.dtype, dtype): shape = values.shape @@ -334,7 +338,7 @@ def ndarray_to_mgr( rcf = not (is_integer_dtype(dtype) and values.dtype.kind == "f") values = sanitize_array( - flat, None, dtype=dtype, copy=copy, raise_cast_failure=rcf + flat, None, dtype=dtype, copy=copy_on_sanitize, raise_cast_failure=rcf ) values = values.reshape(shape) @@ -363,6 +367,9 @@ def ndarray_to_mgr( values = ensure_wrapped_if_datetimelike(values) arrays = [values[:, i] for i in range(values.shape[1])] + if copy: + arrays = [arr.copy() for arr in arrays] + return ArrayManager(arrays, [index, columns], verify_integrity=False) values = values.T diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index 477099fba75e1..f755b0addfd6d 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -226,8 +226,8 @@ def test_values_lcd(self, mixed_float_frame, mixed_int_frame): class TestPrivateValues: - def test_private_values_dt64tz(self, request): - + @td.skip_array_manager_invalid_test + def test_private_values_dt64tz(self): dta = date_range("2000", periods=4, tz="US/Central")._data.reshape(-1, 1) df = DataFrame(dta, columns=["A"]) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 7347640fc05a7..f070cf3dd20f4 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -264,12 +264,17 @@ def test_constructor_dtype_nocast_view_dataframe(self): should_be_view[0][0] = 99 assert df.values[0, 0] == 99 - @td.skip_array_manager_invalid_test # TODO(ArrayManager) keep view on 2D array? - def test_constructor_dtype_nocast_view_2d_array(self): - df = DataFrame([[1, 2]]) - should_be_view = DataFrame(df.values, dtype=df[0].dtype) - should_be_view[0][0] = 97 - assert df.values[0, 0] == 97 + def test_constructor_dtype_nocast_view_2d_array(self, using_array_manager): + df = DataFrame([[1, 2], [3, 4]], dtype="int64") + if not using_array_manager: + should_be_view = DataFrame(df.values, dtype=df[0].dtype) + should_be_view[0][0] = 97 + assert df.values[0, 0] == 97 + else: + # INFO(ArrayManager) DataFrame(ndarray) doesn't necessarily preserve + # a view on the array to ensure contiguous 1D arrays + df2 = DataFrame(df.values, dtype=df[0].dtype) + assert df2._mgr.arrays[0].flags.c_contiguous @td.skip_array_manager_invalid_test def test_1d_object_array_does_not_copy(self): @@ -2111,17 +2116,29 @@ def test_constructor_frame_copy(self, float_frame): assert (cop["A"] == 5).all() assert not (float_frame["A"] == 5).all() - # TODO(ArrayManager) keep view on 2D array? - @td.skip_array_manager_not_yet_implemented - def test_constructor_ndarray_copy(self, float_frame): - df = DataFrame(float_frame.values) + def test_constructor_ndarray_copy(self, float_frame, using_array_manager): + if not using_array_manager: + df = DataFrame(float_frame.values) - float_frame.values[5] = 5 - assert (df.values[5] == 5).all() + float_frame.values[5] = 5 + assert (df.values[5] == 5).all() - df = DataFrame(float_frame.values, copy=True) - float_frame.values[6] = 6 - assert not (df.values[6] == 6).all() + df = DataFrame(float_frame.values, copy=True) + float_frame.values[6] = 6 + assert not (df.values[6] == 6).all() + else: + arr = float_frame.values.copy() + # default: copy to ensure contiguous arrays + df = DataFrame(arr) + assert df._mgr.arrays[0].flags.c_contiguous + arr[0, 0] = 100 + assert df.iloc[0, 0] != 100 + + # manually specify copy=False + df = DataFrame(arr, copy=False) + assert not df._mgr.arrays[0].flags.c_contiguous + arr[0, 0] = 1000 + assert df.iloc[0, 0] == 1000 # TODO(ArrayManager) keep view on Series? @td.skip_array_manager_not_yet_implemented