From 3236bcd934fcdb6ae903f97954ea179107e307e5 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 13 Jan 2023 08:16:27 -0800 Subject: [PATCH 1/2] BUG: stringdtype.astype(dt64_or_td64) --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/arrays/base.py | 12 ++++++++++++ pandas/tests/arrays/string_/test_string.py | 12 +----------- pandas/tests/series/methods/test_astype.py | 4 +--- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 1c99ba0b8e412..0892d79449fe9 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -934,6 +934,7 @@ Conversion Strings ^^^^^^^ - Bug in :func:`pandas.api.dtypes.is_string_dtype` that would not return ``True`` for :class:`StringDtype` (:issue:`15585`) +- Bug in converting string dtypes to "datetime64[ns]" or "timedelta64[ns]" incorrectly raising ``TypeError`` (:issue:`36153`) - Interval diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 422b9effeface..4497583f60d71 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -55,9 +55,11 @@ from pandas.core.dtypes.cast import maybe_cast_to_extension_array from pandas.core.dtypes.common import ( + is_datetime64_dtype, is_dtype_equal, is_list_like, is_scalar, + is_timedelta64_dtype, pandas_dtype, ) from pandas.core.dtypes.dtypes import ExtensionDtype @@ -580,6 +582,16 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: cls = dtype.construct_array_type() return cls._from_sequence(self, dtype=dtype, copy=copy) + elif is_datetime64_dtype(dtype): + from pandas.core.arrays import DatetimeArray + + return DatetimeArray._from_sequence(self, dtype=dtype, copy=copy) + + elif is_timedelta64_dtype(dtype): + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy) + return np.array(self, dtype=dtype, copy=copy) def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll: diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 8d8d9ce20cefd..e1ea001819b1c 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -73,17 +73,7 @@ def test_setitem_with_scalar_string(dtype): tm.assert_extension_array_equal(arr, expected) -def test_astype_roundtrip(dtype, request): - if dtype.storage == "pyarrow": - reason = "ValueError: Could not convert object to NumPy datetime" - mark = pytest.mark.xfail(reason=reason, raises=ValueError) - request.node.add_marker(mark) - else: - mark = pytest.mark.xfail( - reason="GH#36153 casting from StringArray to dt64 fails", raises=ValueError - ) - request.node.add_marker(mark) - +def test_astype_roundtrip(dtype): ser = pd.Series(pd.date_range("2000", periods=12)) ser[0] = None diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 9cc49199166b8..835687d33f9f4 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -454,9 +454,7 @@ class TestAstypeString: def test_astype_string_to_extension_dtype_roundtrip( self, data, dtype, request, nullable_string_dtype ): - if dtype == "boolean" or ( - dtype in ("datetime64[ns]", "timedelta64[ns]") and NaT in data - ): + if dtype == "boolean": mark = pytest.mark.xfail( reason="TODO StringArray.astype() with missing values #GH40566" ) From e7a127570421004eaef81ebe0088c069ed0c990b Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 16 Jan 2023 08:10:24 -0800 Subject: [PATCH 2/2] fix test --- pandas/tests/series/methods/test_astype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 835687d33f9f4..024cdf9300157 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -454,7 +454,7 @@ class TestAstypeString: def test_astype_string_to_extension_dtype_roundtrip( self, data, dtype, request, nullable_string_dtype ): - if dtype == "boolean": + if dtype == "boolean" or (dtype == "timedelta64[ns]" and NaT in data): mark = pytest.mark.xfail( reason="TODO StringArray.astype() with missing values #GH40566" )