From b04980a0ce08d09da31415f34e8520209ba0d9c9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 20 Jun 2023 13:50:02 -0700 Subject: [PATCH 1/2] Backport PR #53651: BUG: convert_dtype(dtype_backend=nullable_numpy) with ArrowDtype --- doc/source/whatsnew/v2.0.3.rst | 1 + pandas/core/dtypes/cast.py | 5 +++++ pandas/tests/frame/methods/test_convert_dtypes.py | 10 +++++++++- pandas/tests/series/methods/test_convert_dtypes.py | 8 ++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst index 7b6363a094d00..bba149c2b778b 100644 --- a/doc/source/whatsnew/v2.0.3.rst +++ b/doc/source/whatsnew/v2.0.3.rst @@ -21,6 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Bug in :func:`DataFrame.convert_dtype` and :func:`Series.convert_dtype` when trying to convert :class:`ArrowDtype` with ``dtype_backend="nullable_numpy"`` (:issue:`53648`) - Bug in :func:`RangeIndex.union` when using ``sort=True`` with another :class:`RangeIndex` (:issue:`53490`) - Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` for the ``"c"`` and ``"python"`` engines (:issue:`53390`) - Bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` with ``expand=True`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`53532`) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 2dbd9465be3c6..9b1aa1a708764 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -96,6 +96,8 @@ notna, ) +from pandas.io._util import _arrow_dtype_mapping + if TYPE_CHECKING: from pandas import Index from pandas.core.arrays import ( @@ -1147,6 +1149,9 @@ def convert_dtypes( pa_type = to_pyarrow_type(base_dtype) if pa_type is not None: inferred_dtype = ArrowDtype(pa_type) + elif dtype_backend == "numpy_nullable" and isinstance(inferred_dtype, ArrowDtype): + # GH 53648 + inferred_dtype = _arrow_dtype_mapping()[inferred_dtype.pyarrow_dtype] # error: Incompatible return value type (got "Union[str, Union[dtype[Any], # ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]") diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index 2adee158379bb..082ef025992dd 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -146,7 +146,7 @@ def test_pyarrow_engine_lines_false(self): with pytest.raises(ValueError, match=msg): df.convert_dtypes(dtype_backend="numpy") - def test_pyarrow_backend_no_convesion(self): + def test_pyarrow_backend_no_conversion(self): # GH#52872 pytest.importorskip("pyarrow") df = pd.DataFrame({"a": [1, 2], "b": 1.5, "c": True, "d": "x"}) @@ -159,3 +159,11 @@ def test_pyarrow_backend_no_convesion(self): dtype_backend="pyarrow", ) tm.assert_frame_equal(result, expected) + + def test_convert_dtypes_pyarrow_to_np_nullable(self): + # GH 53648 + pytest.importorskip("pyarrow") + ser = pd.DataFrame(range(2), dtype="int32[pyarrow]") + result = ser.convert_dtypes(dtype_backend="numpy_nullable") + expected = pd.DataFrame(range(2), dtype="Int32") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index d91cd6a43daea..b0f5093e4951d 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -240,3 +240,11 @@ def test_convert_dtype_object_with_na_float(self, infer_objects, dtype): result = ser.convert_dtypes(infer_objects=infer_objects) expected = pd.Series([1.5, pd.NA], dtype=dtype) tm.assert_series_equal(result, expected) + + def test_convert_dtypes_pyarrow_to_np_nullable(self): + # GH 53648 + pytest.importorskip("pyarrow") + ser = pd.Series(range(2), dtype="int32[pyarrow]") + result = ser.convert_dtypes(dtype_backend="numpy_nullable") + expected = pd.Series(range(2), dtype="Int32") + tm.assert_series_equal(result, expected) From 8a868599f5461a5fdc62e9ccad753d0c7d9233c9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 20 Jun 2023 15:16:44 -0700 Subject: [PATCH 2/2] Move import --- pandas/core/dtypes/cast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 9b1aa1a708764..aef8d483a902f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1039,6 +1039,8 @@ def convert_dtypes( """ inferred_dtype: str | DtypeObj + from pandas.core.arrays.arrow.dtype import ArrowDtype + if ( convert_string or convert_integer or convert_boolean or convert_floating ) and isinstance(input_array, np.ndarray): @@ -1121,7 +1123,6 @@ def convert_dtypes( if dtype_backend == "pyarrow": from pandas.core.arrays.arrow.array import to_pyarrow_type - from pandas.core.arrays.arrow.dtype import ArrowDtype from pandas.core.arrays.string_ import StringDtype assert not isinstance(inferred_dtype, str)