diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e03a1154b0796..2bb1e4af21860 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -636,6 +636,7 @@ Bug fixes Categorical ^^^^^^^^^^^ - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`) +- Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`) - Datetimelike diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f11aefeeaaa00..dae04ba6244d4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1127,6 +1127,7 @@ def convert_dtypes( or ( inferred_dtype.kind not in "iufcb" and not isinstance(inferred_dtype, StringDtype) + and not isinstance(inferred_dtype, CategoricalDtype) ) ): if isinstance(inferred_dtype, PandasExtensionDtype) and not isinstance( diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index d0f30204758d3..ab847e2f8e81e 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -3,6 +3,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd import pandas._testing as tm @@ -35,6 +37,19 @@ def test_convert_empty(self): empty_df = pd.DataFrame() tm.assert_frame_equal(empty_df, empty_df.convert_dtypes()) + @td.skip_if_no("pyarrow") + def test_convert_empty_categorical_to_pyarrow(self): + # GH#59934 + df = pd.DataFrame( + { + "A": pd.Categorical([None] * 5), + "B": pd.Categorical([None] * 5, categories=["B1", "B2"]), + } + ) + converted = df.convert_dtypes(dtype_backend="pyarrow") + expected = df + tm.assert_frame_equal(converted, expected) + def test_convert_dtypes_retain_column_names(self): # GH#41435 df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index d373386108ff6..324e03894e92c 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -4,6 +4,7 @@ import pytest from pandas._libs import lib +import pandas.util._test_decorators as td import pandas as pd import pandas._testing as tm @@ -298,6 +299,15 @@ def test_convert_dtypes_pyarrow_null(self): expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null())) tm.assert_series_equal(result, expected) + @td.skip_if_no("pyarrow") + @pytest.mark.parametrize("categories", [None, ["S1", "S2"]]) + def test_convert_empty_categorical_to_pyarrow(self, categories): + # GH#59934 + ser = pd.Series(pd.Categorical([None] * 5, categories=categories)) + converted = ser.convert_dtypes(dtype_backend="pyarrow") + expected = ser + tm.assert_series_equal(converted, expected) + def test_convert_dtype_pyarrow_timezone_preserve(self): # GH 60237 pytest.importorskip("pyarrow")