From 6cd8e0fe9b5a62b2cbb37742529c832dc2edc6ff Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 2 May 2016 20:59:29 -0400 Subject: [PATCH] BUG: More followups on to_datetime exceptions, xref #13033 closes #13059 --- doc/source/whatsnew/v0.18.1.txt | 17 ++++++-- pandas/tseries/tests/test_timeseries.py | 28 ++++++++++++- pandas/tseries/tools.py | 2 +- pandas/tslib.pyx | 52 ++++++++++++++++++------- 4 files changed, 78 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 21d8746abdd0f..c75999a33f8c9 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -478,7 +478,7 @@ In addition to this error change, several others have been made as well: ``to_datetime`` error changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Bugs in ``pd.to_datetime()`` when passing a ``unit`` with convertible entries and ``errors='coerce'`` or non-convertible with ``errors='ignore'`` (:issue:`11758`, :issue:`13052`) +Bugs in ``pd.to_datetime()`` when passing a ``unit`` with convertible entries and ``errors='coerce'`` or non-convertible with ``errors='ignore'``. Furthermore, an ``OutOfBoundsDateime`` exception will be raised when an out-of-range value is encountered for that unit when ``errors='raise'``. (:issue:`11758`, :issue:`13052`, :issue:`13059`) Previous behaviour: @@ -490,12 +490,21 @@ Previous behaviour: In [28]: pd.to_datetime(11111111, unit='D', errors='ignore') OverflowError: Python int too large to convert to C long + In [29]: pd.to_datetime(11111111, unit='D', errors='raise') + OverflowError: Python int too large to convert to C long + New behaviour: -.. ipython:: python +.. code-block:: python + + In [2]: pd.to_datetime(1420043460, unit='s', errors='coerce') + Out[2]: Timestamp('2014-12-31 16:31:00') + + In [3]: pd.to_datetime(11111111, unit='D', errors='ignore') + Out[3]: 11111111 - pd.to_datetime(1420043460, unit='s', errors='coerce') - pd.to_datetime(11111111, unit='D', errors='ignore') + In [4]: pd.to_datetime(11111111, unit='D', errors='raise') + OutOfBoundsDatetime: cannot convert input with unit 'D' .. _whatsnew_0181.api.other: diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 15e9136d78243..37e708df2595d 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -4249,7 +4249,7 @@ def test_unit_errors(self): 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) tm.assert_index_equal(result, expected) - with self.assertRaises(ValueError): + with self.assertRaises(tslib.OutOfBoundsDatetime): to_datetime(values, unit='D', errors='raise') values = [1420043460000, tslib.iNaT, pd.NaT, np.nan, 'NaT'] @@ -4263,9 +4263,33 @@ def test_unit_errors(self): expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) tm.assert_index_equal(result, expected) - with self.assertRaises(ValueError): + with self.assertRaises(tslib.OutOfBoundsDatetime): to_datetime(values, errors='raise', unit='s') + # if we have a string, then we raise a ValueError + # and NOT an OutOfBoundsDatetime + for val in ['foo', Timestamp('20130101')]: + try: + to_datetime(val, errors='raise', unit='s') + except tslib.OutOfBoundsDatetime: + raise AssertionError("incorrect exception raised") + except ValueError: + pass + + # consistency of conversions + expected = Timestamp('1970-05-09 14:25:11') + result = pd.to_datetime(11111111, unit='s', errors='raise') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + result = pd.to_datetime(11111111, unit='s', errors='coerce') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + result = pd.to_datetime(11111111, unit='s', errors='ignore') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + def test_roundtrip(self): # test value to string and back conversions diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 10ead73968f76..a46149035dbae 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -332,7 +332,7 @@ def _convert_listlike(arg, box, format, name=None): if box: if errors == 'ignore': from pandas import Index - return Index(result, dtype=object) + return Index(result) return DatetimeIndex(result, tz='utc' if utc else None, name=name) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 9b7942400d3a9..261997122988b 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1982,9 +1982,12 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): """ convert the ndarray according to the unit if errors: - - raise: return converted values or raise + - raise: return converted values or raise OutOfBoundsDatetime + if out of range on the conversion or + ValueError for other conversions (e.g. a string) - ignore: return non-convertible values as the same unit - coerce: NaT for non-convertibles + """ cdef: Py_ssize_t i, j, n=len(values) @@ -2023,7 +2026,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): if not need_to_iterate: if (fvalues < _NS_LOWER_BOUND).any() or (fvalues > _NS_UPPER_BOUND).any(): - raise ValueError("cannot convert input with unit: {0}".format(unit)) + raise OutOfBoundsDatetime("cannot convert input with unit '{0}'".format(unit)) result = (iresult*m).astype('M8[ns]') iresult = result.view('i8') iresult[mask] = iNaT @@ -2046,9 +2049,14 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): else: try: iresult[i] = cast_from_unit(val, unit) - except: - if is_ignore or is_raise: - raise + except OverflowError: + if is_raise: + raise OutOfBoundsDatetime("cannot convert input {0}" + "with the unit '{1}'".format( + val, + unit)) + elif is_ignore: + raise AssertionError iresult[i] = NPY_NAT elif util.is_string_object(val): @@ -2058,24 +2066,40 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): else: try: iresult[i] = cast_from_unit(float(val), unit) + except ValueError: + if is_raise: + raise ValueError("non convertible value {0}" + "with the unit '{1}'".format( + val, + unit)) + elif is_ignore: + raise AssertionError except: - if is_ignore or is_raise: - raise + if is_raise: + raise OutOfBoundsDatetime("cannot convert input {0}" + "with the unit '{1}'".format( + val, + unit)) + elif is_ignore: + raise AssertionError iresult[i] = NPY_NAT else: - if is_ignore or is_raise: - raise ValueError + if is_raise: + raise ValueError("non convertible value {0}" + "with the unit '{1}'".format( + val, + unit)) + if is_ignore: + raise AssertionError + iresult[i] = NPY_NAT return result - except (OverflowError, ValueError) as e: - - # we cannot process and are done - if is_raise: - raise ValueError("cannot convert input with the unit: {0}".format(unit)) + except AssertionError: + pass # we have hit an exception # and are in ignore mode