Skip to content

BUG: More followups on to_datetime exceptions, xref #13033 #13059

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ In addition to this error change, several others have been made as well:
``to_datetime`` error changes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Bugs in ``pd.to_datetime()`` when passing a ``unit`` with convertible entries and ``errors='coerce'`` or non-convertible with ``errors='ignore'`` (:issue:`11758`, :issue:`13052`)
Bugs in ``pd.to_datetime()`` when passing a ``unit`` with convertible entries and ``errors='coerce'`` or non-convertible with ``errors='ignore'``. Furthermore, an ``OutOfBoundsDateime`` exception will be raised when an out-of-range value is encountered for that unit when ``errors='raise'``. (:issue:`11758`, :issue:`13052`, :issue:`13059`)

Previous behaviour:

Expand All @@ -490,12 +490,21 @@ Previous behaviour:
In [28]: pd.to_datetime(11111111, unit='D', errors='ignore')
OverflowError: Python int too large to convert to C long

In [29]: pd.to_datetime(11111111, unit='D', errors='raise')
OverflowError: Python int too large to convert to C long

New behaviour:

.. ipython:: python
.. code-block:: python

In [2]: pd.to_datetime(1420043460, unit='s', errors='coerce')
Out[2]: Timestamp('2014-12-31 16:31:00')

In [3]: pd.to_datetime(11111111, unit='D', errors='ignore')
Out[3]: 11111111

pd.to_datetime(1420043460, unit='s', errors='coerce')
pd.to_datetime(11111111, unit='D', errors='ignore')
In [4]: pd.to_datetime(11111111, unit='D', errors='raise')
OutOfBoundsDatetime: cannot convert input with unit 'D'

.. _whatsnew_0181.api.other:

Expand Down
28 changes: 26 additions & 2 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4249,7 +4249,7 @@ def test_unit_errors(self):
'NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
tm.assert_index_equal(result, expected)

with self.assertRaises(ValueError):
with self.assertRaises(tslib.OutOfBoundsDatetime):
to_datetime(values, unit='D', errors='raise')

values = [1420043460000, tslib.iNaT, pd.NaT, np.nan, 'NaT']
Expand All @@ -4263,9 +4263,33 @@ def test_unit_errors(self):
expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
tm.assert_index_equal(result, expected)

with self.assertRaises(ValueError):
with self.assertRaises(tslib.OutOfBoundsDatetime):
to_datetime(values, errors='raise', unit='s')

# if we have a string, then we raise a ValueError
# and NOT an OutOfBoundsDatetime
for val in ['foo', Timestamp('20130101')]:
try:
to_datetime(val, errors='raise', unit='s')
except tslib.OutOfBoundsDatetime:
raise AssertionError("incorrect exception raised")
except ValueError:
pass

# consistency of conversions
expected = Timestamp('1970-05-09 14:25:11')
result = pd.to_datetime(11111111, unit='s', errors='raise')
self.assertEqual(result, expected)
self.assertIsInstance(result, Timestamp)

result = pd.to_datetime(11111111, unit='s', errors='coerce')
self.assertEqual(result, expected)
self.assertIsInstance(result, Timestamp)

result = pd.to_datetime(11111111, unit='s', errors='ignore')
self.assertEqual(result, expected)
self.assertIsInstance(result, Timestamp)

def test_roundtrip(self):

# test value to string and back conversions
Expand Down
2 changes: 1 addition & 1 deletion pandas/tseries/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def _convert_listlike(arg, box, format, name=None):
if box:
if errors == 'ignore':
from pandas import Index
return Index(result, dtype=object)
return Index(result)

return DatetimeIndex(result, tz='utc' if utc else None,
name=name)
Expand Down
52 changes: 38 additions & 14 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1982,9 +1982,12 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
"""
convert the ndarray according to the unit
if errors:
- raise: return converted values or raise
- raise: return converted values or raise OutOfBoundsDatetime
if out of range on the conversion or
ValueError for other conversions (e.g. a string)
- ignore: return non-convertible values as the same unit
- coerce: NaT for non-convertibles

"""
cdef:
Py_ssize_t i, j, n=len(values)
Expand Down Expand Up @@ -2023,7 +2026,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
if not need_to_iterate:

if (fvalues < _NS_LOWER_BOUND).any() or (fvalues > _NS_UPPER_BOUND).any():
raise ValueError("cannot convert input with unit: {0}".format(unit))
raise OutOfBoundsDatetime("cannot convert input with unit '{0}'".format(unit))
result = (iresult*m).astype('M8[ns]')
iresult = result.view('i8')
iresult[mask] = iNaT
Expand All @@ -2046,9 +2049,14 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
else:
try:
iresult[i] = cast_from_unit(val, unit)
except:
if is_ignore or is_raise:
raise
except OverflowError:
if is_raise:
raise OutOfBoundsDatetime("cannot convert input {0}"
"with the unit '{1}'".format(
val,
unit))
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT

elif util.is_string_object(val):
Expand All @@ -2058,24 +2066,40 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
else:
try:
iresult[i] = cast_from_unit(float(val), unit)
except ValueError:
if is_raise:
raise ValueError("non convertible value {0}"
"with the unit '{1}'".format(
val,
unit))
elif is_ignore:
raise AssertionError
except:
if is_ignore or is_raise:
raise
if is_raise:
raise OutOfBoundsDatetime("cannot convert input {0}"
"with the unit '{1}'".format(
val,
unit))
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT

else:

if is_ignore or is_raise:
raise ValueError
if is_raise:
raise ValueError("non convertible value {0}"
"with the unit '{1}'".format(
val,
unit))
if is_ignore:
raise AssertionError

iresult[i] = NPY_NAT

return result

except (OverflowError, ValueError) as e:

# we cannot process and are done
if is_raise:
raise ValueError("cannot convert input with the unit: {0}".format(unit))
except AssertionError:
pass

# we have hit an exception
# and are in ignore mode
Expand Down