From 02c95582e149ec14f7533f67cccf43e376c78665 Mon Sep 17 00:00:00 2001 From: Justin McOmie Date: Thu, 29 Jul 2021 14:09:07 -0700 Subject: [PATCH 1/5] BUG: Pass copy argument to expanddim constructor in concat. --- pandas/core/reshape/concat.py | 2 +- .../tests/extension/decimal/test_decimal.py | 13 +-------- pandas/tests/frame/methods/test_astype.py | 27 +++++++++++++++++++ 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 560735b593cd1..3be1a04d9e2a4 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -504,7 +504,7 @@ def get_result(self): cons = sample._constructor_expanddim index, columns = self.new_axes - df = cons(data, index=index) + df = cons(data, index=index, copy=self.copy) df.columns = columns return df.__finalize__(self, method="concat") diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 7a3f88d0d6c41..99d92a5bbf774 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -261,18 +261,7 @@ def test_dataframe_constructor_with_dtype(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "frame", - [ - pytest.param( - True, - marks=pytest.mark.xfail( - reason="pd.concat call inside NDFrame.astype reverts the dtype" - ), - ), - False, - ], -) +@pytest.mark.parametrize("frame", [True, False]) def test_astype_dispatches(frame): # This is a dtype-specific test that ensures Series[decimal].astype # gets all the way through to ExtensionArray.astype diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 1f1991214aad0..c8b8ac419ff5f 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -23,6 +23,7 @@ option_context, ) import pandas._testing as tm +from pandas.core.arrays.integer import coerce_to_array def _check_cast(df, v): @@ -726,3 +727,29 @@ def test_astype_categorical_to_string_missing(self): cat = df.astype("category") result = cat.astype(str) tm.assert_frame_equal(result, expected) + + +class IntegerArrayNoCopy(pd.core.arrays.IntegerArray): + # GH 42501 + + @classmethod + def _from_sequence(cls, scalars, *, dtype=None, copy=False): + values, mask = coerce_to_array(scalars, dtype=dtype, copy=copy) + return IntegerArrayNoCopy(values, mask) + + def copy(self): + assert False + + +class Int16DtypeNoCopy(pd.Int16Dtype): + # GH 42501 + + @classmethod + def construct_array_type(cls): + return IntegerArrayNoCopy + + +def test_frame_astype_no_copy(): + # GH 42501 + df = DataFrame({"col": [1, 4, None, 5]}, dtype=object) + df = df.astype({"col": Int16DtypeNoCopy()}, copy=False) From ff6c88dc1406044f73259a5552ab8c8569cb9144 Mon Sep 17 00:00:00 2001 From: Justin McOmie Date: Thu, 5 Aug 2021 17:12:53 -0700 Subject: [PATCH 2/5] Updates for CR: frame and array equality checks, whatsnew. --- doc/source/whatsnew/v1.3.2.rst | 1 + pandas/tests/frame/methods/test_astype.py | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index bcb096e630d85..98ef6e1ca49cf 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -23,6 +23,7 @@ Fixed regressions - Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`) - Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`) - Fixed regression in :meth:`.Styler.highlight_min` and :meth:`.Styler.highlight_max` where ``pandas.NA`` was not successfully ignored (:issue:`42650`) +- Fixed regression in :meth:`pandas.concat` where copy=False was not honored in ``axis=1`` Series concatenation. (:issue:42501). .. --------------------------------------------------------------------------- diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index c8b8ac419ff5f..d248281218dbe 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -751,5 +751,8 @@ def construct_array_type(cls): def test_frame_astype_no_copy(): # GH 42501 - df = DataFrame({"col": [1, 4, None, 5]}, dtype=object) - df = df.astype({"col": Int16DtypeNoCopy()}, copy=False) + df = DataFrame({"a": [1, 4, None, 5], "b": [6, 7, 8, 9]}, dtype=object) + result = df.astype({"a": Int16DtypeNoCopy()}, copy=False) + + tm.assert_frame_equal(df, result, check_dtype=False) + tm.assert_numpy_array_equal(df.b.values, result.b.values, check_same="same") From 657b1988e465c4b2a1b6ae48b9d05adc36f391a0 Mon Sep 17 00:00:00 2001 From: Justin McOmie Date: Thu, 5 Aug 2021 21:07:02 -0700 Subject: [PATCH 3/5] Whatsnew entry format fix. --- doc/source/whatsnew/v1.3.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 98ef6e1ca49cf..38ac95890748d 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -23,7 +23,7 @@ Fixed regressions - Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`) - Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`) - Fixed regression in :meth:`.Styler.highlight_min` and :meth:`.Styler.highlight_max` where ``pandas.NA`` was not successfully ignored (:issue:`42650`) -- Fixed regression in :meth:`pandas.concat` where copy=False was not honored in ``axis=1`` Series concatenation. (:issue:42501). +- Fixed regression in :meth:`pandas.concat` where copy=False was not honored in ``axis=1`` Series concatenation. (:issue:`42501`) .. --------------------------------------------------------------------------- From 4fcde9999014fcd790f9420fbc622a8b61cc752a Mon Sep 17 00:00:00 2001 From: Justin McOmie Date: Mon, 9 Aug 2021 09:29:24 -0700 Subject: [PATCH 4/5] Update doc/source/whatsnew/v1.3.2.rst Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v1.3.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 38ac95890748d..ec1774dc0dde8 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -23,7 +23,7 @@ Fixed regressions - Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`) - Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`) - Fixed regression in :meth:`.Styler.highlight_min` and :meth:`.Styler.highlight_max` where ``pandas.NA`` was not successfully ignored (:issue:`42650`) -- Fixed regression in :meth:`pandas.concat` where copy=False was not honored in ``axis=1`` Series concatenation. (:issue:`42501`) +- Fixed regression in :func:`pandas.concat` where ``copy=False`` was not honored in ``axis=1`` Series concatenation (:issue:`42501`) .. --------------------------------------------------------------------------- From e14d0066dda87c149fa1bae4ccd7a516c44adc8f Mon Sep 17 00:00:00 2001 From: Justin McOmie Date: Tue, 10 Aug 2021 23:23:34 -0600 Subject: [PATCH 5/5] Updates for CR: shares_memory call; dtype assertion. --- pandas/tests/frame/methods/test_astype.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index d248281218dbe..775a5a38768e6 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -754,5 +754,5 @@ def test_frame_astype_no_copy(): df = DataFrame({"a": [1, 4, None, 5], "b": [6, 7, 8, 9]}, dtype=object) result = df.astype({"a": Int16DtypeNoCopy()}, copy=False) - tm.assert_frame_equal(df, result, check_dtype=False) - tm.assert_numpy_array_equal(df.b.values, result.b.values, check_same="same") + assert result.a.dtype == pd.Int16Dtype() + assert np.shares_memory(df.b.values, result.b.values)