Skip to content

BUG: Fix inconsistency of converting empty categorical with dtype_backend='pyarrow' #61131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 17, 2025
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,7 @@ Bug fixes
Categorical
^^^^^^^^^^^
- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
- Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
-

Datetimelike
Expand Down
1 change: 1 addition & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,7 @@ def convert_dtypes(
or (
inferred_dtype.kind not in "iufcb"
and not isinstance(inferred_dtype, StringDtype)
and not isinstance(inferred_dtype, CategoricalDtype)
)
):
if isinstance(inferred_dtype, PandasExtensionDtype) and not isinstance(
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/frame/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
import pandas._testing as tm

Expand Down Expand Up @@ -35,6 +37,19 @@ def test_convert_empty(self):
empty_df = pd.DataFrame()
tm.assert_frame_equal(empty_df, empty_df.convert_dtypes())

@td.skip_if_no("pyarrow")
def test_convert_empty_categorical_to_pyarrow(self):
# GH#59934
df = pd.DataFrame(
{
"A": pd.Categorical([None] * 5),
"B": pd.Categorical([None] * 5, categories=["B1", "B2"]),
}
)
converted = df.convert_dtypes(dtype_backend="pyarrow")
expected = df
tm.assert_frame_equal(converted, expected)

def test_convert_dtypes_retain_column_names(self):
# GH#41435
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/series/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from pandas._libs import lib
import pandas.util._test_decorators as td

import pandas as pd
import pandas._testing as tm
Expand Down Expand Up @@ -298,6 +299,15 @@ def test_convert_dtypes_pyarrow_null(self):
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
tm.assert_series_equal(result, expected)

@td.skip_if_no("pyarrow")
@pytest.mark.parametrize("categories", [None, ["S1", "S2"]])
def test_convert_empty_categorical_to_pyarrow(self, categories):
# GH#59934
ser = pd.Series(pd.Categorical([None] * 5, categories=categories))
converted = ser.convert_dtypes(dtype_backend="pyarrow")
expected = ser
tm.assert_series_equal(converted, expected)

def test_convert_dtype_pyarrow_timezone_preserve(self):
# GH 60237
pytest.importorskip("pyarrow")
Expand Down