diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 487d5d0d2accd..6f8646f12cafc 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -616,6 +616,7 @@ Categorical ^^^^^^^^^^^ - Bug in :meth:`Categorical.from_codes` where ``NaN`` values in ``codes`` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of ``.from_codes([1.1, 2.0])``. +- Constructing a :class:`pd.CategoricalIndex` with empty values and boolean categories was raising a ``ValueError`` after a change to dtype coercion (:issue:`22702`). Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 63a1dacb47abb..216bccf7d6309 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2439,9 +2439,13 @@ def _get_codes_for_values(values, categories): """ utility routine to turn values into codes given the specified categories """ - from pandas.core.algorithms import _get_data_algo, _hashtables - if not is_dtype_equal(values.dtype, categories.dtype): + if is_dtype_equal(values.dtype, categories.dtype): + # To prevent erroneous dtype coercion in _get_data_algo, retrieve + # the underlying numpy array. gh-22702 + values = getattr(values, 'values', values) + categories = getattr(categories, 'values', categories) + else: values = ensure_object(values) categories = ensure_object(categories) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index b5f499ba27323..998c1182c013a 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -42,6 +42,12 @@ def test_constructor_empty(self): expected = pd.Int64Index([1, 2, 3]) tm.assert_index_equal(c.categories, expected) + def test_constructor_empty_boolean(self): + # see gh-22702 + cat = pd.Categorical([], categories=[True, False]) + categories = sorted(cat.categories.tolist()) + assert categories == [False, True] + def test_constructor_tuples(self): values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object) result = Categorical(values) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 2221fd023b561..d49a6a6abc7c9 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -136,6 +136,12 @@ def test_construction_with_dtype(self): result = CategoricalIndex(idx, categories=idx, ordered=True) tm.assert_index_equal(result, expected, exact=True) + def test_construction_empty_with_bool_categories(self): + # see gh-22702 + cat = pd.CategoricalIndex([], categories=[True, False]) + categories = sorted(cat.categories.tolist()) + assert categories == [False, True] + def test_construction_with_categorical_dtype(self): # construction with CategoricalDtype # GH18109