From f2cf9158e78e238c7bdcab14d336f9e680232d7d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 3 Feb 2018 22:16:03 -0800 Subject: [PATCH 1/2] Fix parsing corner case closes #19382 --- pandas/_libs/tslib.pyx | 41 +++++++++++++++----- pandas/_libs/tslibs/conversion.pyx | 8 ++++ pandas/tests/indexes/datetimes/test_tools.py | 14 ++++++- pandas/tests/scalar/test_timestamp.py | 7 ++++ 4 files changed, 59 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 81df7981096ba..1ad094bb39a55 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -609,20 +609,26 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', value = tz_convert_single(value, tz, 'UTC') iresult[i] = value check_dts_bounds(&dts) + except OutOfBoundsDatetime: + # GH#19382 for just-barely-OutOfBounds falling back to + # dateutil parser will return incorrect result because + # it will ignore nanoseconds + if require_iso8601: + if _handle_error_require_iso8601(val, &iresult[i], + is_coerce, is_raise): + continue + return values + elif is_coerce: + iresult[i] = NPY_NAT + continue + raise except ValueError: # if requiring iso8601 strings, skip trying other formats if require_iso8601: - if _parse_today_now(val, &iresult[i]): + if _handle_error_require_iso8601(val, &iresult[i], + is_coerce, is_raise): continue - if is_coerce: - iresult[i] = NPY_NAT - continue - elif is_raise: - raise ValueError( - "time data %r doesn't match format " - "specified" % (val,)) - else: - return values + return values try: py_dt = parse_datetime_string(val, dayfirst=dayfirst, @@ -725,6 +731,21 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return oresult +cdef bint _handle_error_require_iso8601(object val, int64_t* iresult, + bint is_coerce, + bint is_raise) except? -1: + # Return True to continue, False to return values, or raise + if _parse_today_now(val, iresult): + return True + elif is_coerce: + iresult[0] = NPY_NAT + return True + elif is_raise: + raise ValueError("time data {val} doesn't match format " + "specified".format(val=val)) + return False + + cdef inline bint _parse_today_now(str val, int64_t* iresult): # We delay this check for as long as possible # because it catches relatively rare cases diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index a32bfc1f6836c..4f1a053da6f1d 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -26,6 +26,7 @@ from np_datetime cimport (check_dts_bounds, dt64_to_dtstruct, dtstruct_to_dt64, get_datetime64_unit, get_datetime64_value, pydatetime_to_dt64) +from np_datetime import OutOfBoundsDatetime from util cimport (is_string_object, is_datetime64_object, @@ -472,6 +473,13 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, ambiguous='raise', errors='raise')[0] + + except OutOfBoundsDatetime: + # GH#19382 for just-barely-OutOfBounds falling back to dateutil + # parser will return incorrect result because it will ignore + # nanoseconds + raise + except ValueError: try: ts = parse_datetime_string(ts, dayfirst=dayfirst, diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 44f3c21d23e62..e7e8ecdcd15b2 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -783,7 +783,6 @@ def test_dataframe_dtypes(self, cache): class TestToDatetimeMisc(object): - @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_iso8601(self, cache): result = to_datetime(["2012-01-01 00:00:00"], cache=cache) @@ -1596,6 +1595,19 @@ def test_coerce_of_invalid_datetimes(self): ) ) + def test_to_datetime_barely_out_of_bounds(self): + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object) + + with pytest.raises(tslib.OutOfBoundsDatetime): + to_datetime(arr) + + with pytest.raises(tslib.OutOfBoundsDatetime): + # Essentially the same as above, but more directly calling + # the relevant function + tslib.array_to_datetime(arr) + def test_normalize_date(): value = date(2012, 9, 7) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 301f6da140866..2cb0741128e5e 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -15,6 +15,7 @@ from pandas.tseries import offsets +from pandas._libs.tslib import OutOfBoundsDatetime from pandas._libs.tslibs import conversion from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz @@ -410,6 +411,12 @@ def test_out_of_bounds_string(self): with pytest.raises(ValueError): Timestamp('2263-01-01') + def test_barely_out_of_bounds(self): + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + with pytest.raises(OutOfBoundsDatetime): + Timestamp('2262-04-11 23:47:16.854775808') + def test_bounds_with_different_units(self): out_of_bounds_dates = ('1677-09-21', '2262-04-12') From 9b3e4b40213ad16a69abde6dd528bce542f3faee Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Feb 2018 09:17:55 -0800 Subject: [PATCH 2/2] Whatsnew note, requested edits --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/tslib.pyx | 35 +++++++++----------- pandas/tests/indexes/datetimes/test_tools.py | 6 ++-- pandas/tests/scalar/test_timestamp.py | 3 +- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 69965f44d87a8..85949f671be2d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -455,6 +455,7 @@ Datetimelike - Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) - Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) - Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`) +- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) - Timezones diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 1ad094bb39a55..877d7deff6ff4 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -614,9 +614,15 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', # dateutil parser will return incorrect result because # it will ignore nanoseconds if require_iso8601: - if _handle_error_require_iso8601(val, &iresult[i], - is_coerce, is_raise): + if _parse_today_now(val, &iresult[i]): + continue + elif is_coerce: + iresult[i] = NPY_NAT continue + elif is_raise: + raise ValueError("time data {val} doesn't match " + "format specified" + .format(val=val)) return values elif is_coerce: iresult[i] = NPY_NAT @@ -625,9 +631,15 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', except ValueError: # if requiring iso8601 strings, skip trying other formats if require_iso8601: - if _handle_error_require_iso8601(val, &iresult[i], - is_coerce, is_raise): + if _parse_today_now(val, &iresult[i]): + continue + elif is_coerce: + iresult[i] = NPY_NAT continue + elif is_raise: + raise ValueError("time data {val} doesn't match " + "format specified" + .format(val=val)) return values try: @@ -731,21 +743,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return oresult -cdef bint _handle_error_require_iso8601(object val, int64_t* iresult, - bint is_coerce, - bint is_raise) except? -1: - # Return True to continue, False to return values, or raise - if _parse_today_now(val, iresult): - return True - elif is_coerce: - iresult[0] = NPY_NAT - return True - elif is_raise: - raise ValueError("time data {val} doesn't match format " - "specified".format(val=val)) - return False - - cdef inline bint _parse_today_now(str val, int64_t* iresult): # We delay this check for as long as possible # because it catches relatively rare cases diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index e7e8ecdcd15b2..f8b1f68ba33ce 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -17,6 +17,7 @@ from pandas._libs.tslibs import parsing from pandas.core.tools import datetimes as tools +from pandas.errors import OutOfBoundsDatetime from pandas.compat import lmap from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.dtypes.common import is_datetime64_ns_dtype @@ -1596,14 +1597,15 @@ def test_coerce_of_invalid_datetimes(self): ) def test_to_datetime_barely_out_of_bounds(self): + # GH#19529 # GH#19382 close enough to bounds that dropping nanos would result # in an in-bounds datetime arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object) - with pytest.raises(tslib.OutOfBoundsDatetime): + with pytest.raises(OutOfBoundsDatetime): to_datetime(arr) - with pytest.raises(tslib.OutOfBoundsDatetime): + with pytest.raises(OutOfBoundsDatetime): # Essentially the same as above, but more directly calling # the relevant function tslib.array_to_datetime(arr) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 2cb0741128e5e..7695c94409232 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -15,10 +15,10 @@ from pandas.tseries import offsets -from pandas._libs.tslib import OutOfBoundsDatetime from pandas._libs.tslibs import conversion from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz +from pandas.errors import OutOfBoundsDatetime from pandas.compat import long, PY3 from pandas.compat.numpy import np_datetime64_compat from pandas import Timestamp, Period, Timedelta @@ -412,6 +412,7 @@ def test_out_of_bounds_string(self): Timestamp('2263-01-01') def test_barely_out_of_bounds(self): + # GH#19529 # GH#19382 close enough to bounds that dropping nanos would result # in an in-bounds datetime with pytest.raises(OutOfBoundsDatetime):