diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 5601048c409e1..492df4f1e3612 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -535,7 +535,7 @@ Indexing - Bug in :meth:`DataFrame.sort_index` where ``ignore_index=True`` was not being respected when the index was already sorted (:issue:`43591`) - Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.datetime64("NaT")`` and ``np.timedelta64("NaT")`` (:issue:`43869`) - Bug in setting a scalar :class:`Interval` value into a :class:`Series` with ``IntervalDtype`` when the scalar's sides are floats and the values' sides are integers (:issue:`44201`) -- +- Bug when setting string-backed :class:`Categorical` values that can be parsed to datetimes into a :class:`DatetimeArray` or :class:`Series` or :class:`DataFrame` column backed by :class:`DatetimeArray` failing to parse these strings (:issue:`44236`) Missing diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 72c00dfe7c65a..f8aa1656c8c30 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -68,6 +68,7 @@ from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( + is_all_strings, is_categorical_dtype, is_datetime64_any_dtype, is_datetime64_dtype, @@ -720,7 +721,7 @@ def _validate_listlike(self, value, allow_object: bool = False): value = pd_array(value) value = extract_array(value, extract_numpy=True) - if is_dtype_equal(value.dtype, "string"): + if is_all_strings(value): # We got a StringArray try: # TODO: Could use from_sequence_of_strings if implemented diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 0788ecdd8b4b5..815a0a2040ddb 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -15,6 +15,7 @@ Interval, Period, algos, + lib, ) from pandas._libs.tslibs import conversion from pandas._typing import ( @@ -1788,3 +1789,23 @@ def pandas_dtype(dtype) -> DtypeObj: raise TypeError(f"dtype '{dtype}' not understood") return npdtype + + +def is_all_strings(value: ArrayLike) -> bool: + """ + Check if this is an array of strings that we should try parsing. + + Includes object-dtype ndarray containing all-strings, StringArray, + and Categorical with all-string categories. + Does not include numpy string dtypes. + """ + dtype = value.dtype + + if isinstance(dtype, np.dtype): + return ( + dtype == np.dtype("object") + and lib.infer_dtype(value, skipna=False) == "string" + ) + elif isinstance(dtype, CategoricalDtype): + return dtype.categories.inferred_type == "string" + return dtype == "string" diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 4604fad019eca..d6402e027be98 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -871,7 +871,7 @@ def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer_sli): else: assert ser._values is values - @pytest.mark.parametrize("box", [list, np.array, pd.array]) + @pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index]) @pytest.mark.parametrize( "key", [[0, 1], slice(0, 2), np.array([True, True, False])] ) @@ -911,7 +911,7 @@ def test_setitem_td64_scalar(self, indexer_sli, scalar): indexer_sli(ser)[0] = scalar assert ser._values._data is values._data - @pytest.mark.parametrize("box", [list, np.array, pd.array]) + @pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index]) @pytest.mark.parametrize( "key", [[0, 1], slice(0, 2), np.array([True, True, False])] )