diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ec5027840dfd5..392c48354256b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -693,7 +693,7 @@ Categorical - Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`) - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`) - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`) -- +- Bug in :meth:`array.astype` where casting a pyarrow-backed array to a temporal :class:`CategoricalDtype` (e.g. with datetime or timedelta categories) raised or incorrectly converted values to all ``NaT`` (:issue:`62051`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e8c5a03a6de50..581181d510c81 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3674,6 +3674,14 @@ def get_indexer( orig_target = target target = self._maybe_cast_listlike_indexer(target) + from pandas.api.types import is_timedelta64_dtype + + if target.dtype == "string[pyarrow]" and is_timedelta64_dtype(self.dtype): + from pandas.core.arrays.timedeltas import sequence_to_td64ns + + data, freq = sequence_to_td64ns(target, copy=False, unit=None) + target = type(target)(data) + self._check_indexing_method(method, limit, tolerance) if not self._index_as_unique: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 9adbaadbdcdc8..d88fb536d868b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -384,6 +384,18 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: if self.tz is not None: # If we have tz, we can compare to tzaware return isinstance(dtype, DatetimeTZDtype) + + from pandas import ArrowDtype + + if isinstance(dtype, ArrowDtype): + import pyarrow as pa + + return ( + pa.types.is_date32(dtype.pyarrow_dtype) + or pa.types.is_date64(dtype.pyarrow_dtype) + or pa.types.is_timestamp(dtype.pyarrow_dtype) + ) + # if we dont have tz, we can only compare to tznaive return lib.is_np_dtype(dtype, "M") diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py index 7ed4da69f5a99..4b9906f020c4b 100644 --- a/pandas/tests/arrays/categorical/test_astype.py +++ b/pandas/tests/arrays/categorical/test_astype.py @@ -7,14 +7,17 @@ CategoricalDtype, CategoricalIndex, DatetimeIndex, + Index, Interval, NaT, Period, Timestamp, array, + isna, to_datetime, ) import pandas._testing as tm +from pandas.core.arrays.arrow.array import ArrowExtensionArray class TestAstype: @@ -160,3 +163,20 @@ def test_astype_category_readonly_mask_values(self): result = arr.astype("category") expected = array([0, 1, 2], dtype="Int64").astype("category") tm.assert_extension_array_equal(result, expected) + + def test_arrow_array_astype_to_categorical_dtype_temporal(self): + # GH#62051 + pytest.importorskip("pyarrow") + arr = array( + ["2017-01-01", "2018-01-01", "2019-01-01"], dtype="date32[day][pyarrow]" + ) + cats = Index(["2017-01-01", "2018-01-01", "2019-01-01"], dtype="M8[s]") + dtype = CategoricalDtype(categories=cats, ordered=False) + + assert not all(isna(arr.astype(dtype))) + + arr = ArrowExtensionArray._from_sequence(["1h", "2h", "3h"]) + cats = Index(["1h", "2h", "3h"], dtype="m8[ns]") + dtype = CategoricalDtype(cats, ordered=False) + + assert not all(isna(arr.astype(dtype)))