From 031fbc72c7595bbd2368fa5c6099f2666557ed7c Mon Sep 17 00:00:00 2001 From: mcortesdf Date: Tue, 10 Sep 2019 10:12:27 +0100 Subject: [PATCH 1/4] Fix to_datetime(errors='coerce') not swallowing all parser exceptions (#28299) --- doc/source/whatsnew/v0.25.1.rst | 1 + pandas/_libs/tslib.pyx | 7 ++++--- pandas/tests/indexes/datetimes/test_tools.py | 7 +++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 63dd56f4a3793..de284e320c684 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -30,6 +30,7 @@ Datetimelike - Bug in :func:`to_datetime` where passing a timezone-naive :class:`DatetimeArray` or :class:`DatetimeIndex` and ``utc=True`` would incorrectly return a timezone-naive result (:issue:`27733`) - Bug in :meth:`Period.to_timestamp` where a :class:`Period` outside the :class:`Timestamp` implementation bounds (roughly 1677-09-21 to 2262-04-11) would return an incorrect :class:`Timestamp` instead of raising ``OutOfBoundsDatetime`` (:issue:`19643`) - Bug in iterating over :class:`DatetimeIndex` when the underlying data is read-only (:issue:`28055`) +- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` while attempting to coerce errors could incorrectly lead to raising errors instead (:issue:`28299`) Timezones ^^^^^^^^^ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 01e500a80dcc4..fff2e7f69e028 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -610,6 +610,10 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', py_dt = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) + # If the dateutil parser returned tzinfo, capture it + # to check if all arguments have the same tzinfo + tz = py_dt.utcoffset() + except Exception: if is_coerce: iresult[i] = NPY_NAT @@ -617,9 +621,6 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', raise TypeError("invalid string coercion to " "datetime") - # If the dateutil parser returned tzinfo, capture it - # to check if all arguments have the same tzinfo - tz = py_dt.utcoffset() if tz is not None: seen_datetime_offset = 1 # dateutil timezone objects cannot be hashed, so diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 9af0f47f6dce9..4e1b5571f054e 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -901,6 +901,13 @@ def test_to_datetime_coerce(self): ) tm.assert_index_equal(result, expected) + def test_to_datetime_coerce_malformed(self): + # GH 28299 + ts_strings = ["200622-12-31", "111111-24-11"] + result = to_datetime(ts_strings, errors="coerce") + expected = Index([NaT, NaT]) + tm.assert_index_equal(result, expected) + def test_iso_8601_strings_with_same_offset(self): # GH 17697, 11736 ts_str = "2015-11-18 15:30:00+05:30" From 52264577ce86b9bf723b311ae512ccccf68331c6 Mon Sep 17 00:00:00 2001 From: mcortesdf Date: Tue, 10 Sep 2019 10:54:29 +0100 Subject: [PATCH 2/4] move whatsnew change to correct future (0.25.2) release notes --- doc/source/whatsnew/v0.25.1.rst | 1 - doc/source/whatsnew/v0.25.2.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index de284e320c684..63dd56f4a3793 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -30,7 +30,6 @@ Datetimelike - Bug in :func:`to_datetime` where passing a timezone-naive :class:`DatetimeArray` or :class:`DatetimeIndex` and ``utc=True`` would incorrectly return a timezone-naive result (:issue:`27733`) - Bug in :meth:`Period.to_timestamp` where a :class:`Period` outside the :class:`Timestamp` implementation bounds (roughly 1677-09-21 to 2262-04-11) would return an incorrect :class:`Timestamp` instead of raising ``OutOfBoundsDatetime`` (:issue:`19643`) - Bug in iterating over :class:`DatetimeIndex` when the underlying data is read-only (:issue:`28055`) -- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` while attempting to coerce errors could incorrectly lead to raising errors instead (:issue:`28299`) Timezones ^^^^^^^^^ diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 1cdf213d81a74..ec5ee34e29708 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -19,7 +19,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ -- +- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` while attempting to coerce errors could incorrectly lead to raising errors instead (:issue:`28299`) - - From 26dd0b6a5c6b71e5d5666fde61f14288e3e433cc Mon Sep 17 00:00:00 2001 From: mcortesdf Date: Tue, 10 Sep 2019 13:55:21 +0100 Subject: [PATCH 3/4] move whatsnew change to version 1.0.0 release notes --- doc/source/whatsnew/v0.25.2.rst | 2 +- doc/source/whatsnew/v1.0.0.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index ec5ee34e29708..1cdf213d81a74 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -19,7 +19,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ -- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` while attempting to coerce errors could incorrectly lead to raising errors instead (:issue:`28299`) +- - - diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 58892b316c940..a531b3e4d8a1c 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -97,7 +97,7 @@ Datetimelike - Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`) - Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`) - Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`) -- +- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` while attempting to coerce errors could incorrectly lead to raising errors instead (:issue:`28299`) Timedelta From 50b072f37191b11fe2ac54b1b02ee07d3c871655 Mon Sep 17 00:00:00 2001 From: Miguel Date: Thu, 12 Sep 2019 10:17:26 +0100 Subject: [PATCH 4/4] update whatsnew with suggested phrasing --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 0d2b08952da1a..bff0eaee96ff5 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -115,7 +115,7 @@ Datetimelike - Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`) - Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`) - Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`) -- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` while attempting to coerce errors could incorrectly lead to raising errors instead (:issue:`28299`) +- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` with errors="coerce" could incorrectly lead to raising ``ValueError`` (:issue:`28299`) - Bug in :meth:`pandas.core.groupby.SeriesGroupBy.nunique` where ``NaT`` values were interfering with the count of unique values (:issue:`27951`) - Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`) - Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)