From 99f29362daea13f9e90fe01e579c4de852f19473 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 20 Jun 2023 20:56:11 +0200 Subject: [PATCH 1/3] BUG: df constructor not copying ea backed series --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/internals/construction.py | 6 +++++- pandas/tests/frame/test_constructors.py | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 90a0e8781781d..309b6ec160632 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -502,6 +502,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ +- Bug in :class:`DataFrame` constructor not copying :class:`Series` with extension dtype when given in dict (:issue:`53744`) - Bug in :class:`~arrays.ArrowExtensionArray` converting pandas non-nanosecond temporal objects from non-zero values to zero values (:issue:`53171`) - Bug in :meth:`Series.quantile` for pyarrow temporal types raising ArrowInvalid (:issue:`52678`) - Bug in :meth:`Series.rank` returning wrong order for small values with ``Float64`` dtype (:issue:`52471`) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index dc9c47a4a5e34..7dcb33780dd08 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -469,7 +469,11 @@ def dict_to_mgr( x.copy() if isinstance(x, ExtensionArray) else x.copy(deep=True) - if isinstance(x, Index) + if ( + isinstance(x, Index) + or isinstance(x, ABCSeries) + and is_1d_only_ea_dtype(x.dtype) + ) else x for x in arrays ] diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 47e307f561cf4..5c5320948ccb8 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2657,6 +2657,12 @@ def test_construct_with_strings_and_none(self): expected = DataFrame({"a": ["1", "2", None]}, dtype="str") tm.assert_frame_equal(df, expected) + def test_construct_from_dict_ea_series(self): + # GH#53744 + ser = Series([1, 2, 3], dtype="Int64") + df = DataFrame({"a": ser}) + assert not np.shares_memory(ser.values._data, df["a"].values._data) + class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): From f932c112a4d349d8f7f6d0a80188e40c309f81f9 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Wed, 28 Jun 2023 12:13:50 +0200 Subject: [PATCH 2/3] Update pandas/tests/frame/test_constructors.py Co-authored-by: Joris Van den Bossche --- pandas/tests/frame/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 9a2747fda803d..c2e5cd2f09e95 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2658,7 +2658,7 @@ def test_construct_with_strings_and_none(self): tm.assert_frame_equal(df, expected) def test_construct_from_dict_ea_series(self): - # GH#53744 + # GH#53744 - default of copy=True should also apply for Series with extension dtype ser = Series([1, 2, 3], dtype="Int64") df = DataFrame({"a": ser}) assert not np.shares_memory(ser.values._data, df["a"].values._data) From 410458d69122694fb14c06b080483f0c8622ab6e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Wed, 28 Jun 2023 12:15:37 +0200 Subject: [PATCH 3/3] Move --- pandas/tests/frame/test_constructors.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c2e5cd2f09e95..3fbc6558b5c15 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2576,6 +2576,13 @@ def check_views(c_only: bool = False): # TODO: we can check b[0] == 0 if we stop consolidating in # setitem_with_indexer (except for datetimelike?) + def test_construct_from_dict_ea_series(self): + # GH#53744 - default of copy=True should also apply for Series with + # extension dtype + ser = Series([1, 2, 3], dtype="Int64") + df = DataFrame({"a": ser}) + assert not np.shares_memory(ser.values._data, df["a"].values._data) + def test_from_series_with_name_with_columns(self): # GH 7893 result = DataFrame(Series(1, name="foo"), columns=["bar"]) @@ -2657,12 +2664,6 @@ def test_construct_with_strings_and_none(self): expected = DataFrame({"a": ["1", "2", None]}, dtype="str") tm.assert_frame_equal(df, expected) - def test_construct_from_dict_ea_series(self): - # GH#53744 - default of copy=True should also apply for Series with extension dtype - ser = Series([1, 2, 3], dtype="Int64") - df = DataFrame({"a": ser}) - assert not np.shares_memory(ser.values._data, df["a"].values._data) - class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self):