diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 974de36cc736d..886469837d184 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -198,14 +198,14 @@ Datetimelike - Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`) - Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`) - Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`, :issue:`38764`, :issue:`38792`) -- Bug in constructing a :class:`Series` or :class:`DataFrame` with a ``datetime`` object out of bounds for ``datetime64[ns]`` dtype (:issue:`38792`) +- Bug in constructing a :class:`Series` or :class:`DataFrame` with a ``datetime`` object out of bounds for ``datetime64[ns]`` dtype or a ``timedelta`` object ouf of bounds for ``timedelta64[ns]`` dtype (:issue:`38792`, :issue:`38965`) - Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`) - Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`) - Timedelta ^^^^^^^^^ - +- Bug in constructing :class:`Timedelta` from ``np.timedelta64`` objects with non-nanosecond units that are out of bounds for ``timedelta64[ns]`` (:issue:`38965`) - - diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index b2524c6bc6c0d..026fa719d1cc1 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -42,6 +42,7 @@ cdef extern from "numpy/ndarraytypes.h": NPY_FR_ps NPY_FR_fs NPY_FR_as + NPY_FR_GENERIC cdef extern from "src/datetime/np_datetime.h": ctypedef struct pandas_timedeltastruct: diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index bc7def817c973..f3bf45f681b1f 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -24,7 +24,7 @@ PyDateTime_IMPORT cimport pandas._libs.tslibs.util as util from pandas._libs.tslibs.base cimport ABCTimestamp -from pandas._libs.tslibs.conversion cimport cast_from_unit +from pandas._libs.tslibs.conversion cimport cast_from_unit, precision_from_unit from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, @@ -32,7 +32,10 @@ from pandas._libs.tslibs.nattype cimport ( checknull_with_nat, ) from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, cmp_scalar, + get_datetime64_unit, + get_timedelta64_value, pandas_timedeltastruct, td64_to_tdstruct, ) @@ -156,7 +159,7 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: if isinstance(delta, _Timedelta): delta = delta.value if is_timedelta64_object(delta): - return delta.astype("timedelta64[ns]").item() + return get_timedelta64_value(ensure_td64ns(delta)) if is_integer_object(delta): return delta if PyDelta_Check(delta): @@ -169,6 +172,72 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: raise TypeError(type(delta)) +cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit): + if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # generic -> default to nanoseconds + return "ns" + elif unit == NPY_DATETIMEUNIT.NPY_FR_us: + return "us" + elif unit == NPY_DATETIMEUNIT.NPY_FR_ms: + return "ms" + elif unit == NPY_DATETIMEUNIT.NPY_FR_s: + return "s" + elif unit == NPY_DATETIMEUNIT.NPY_FR_m: + return "m" + elif unit == NPY_DATETIMEUNIT.NPY_FR_h: + return "h" + elif unit == NPY_DATETIMEUNIT.NPY_FR_D: + return "D" + elif unit == NPY_DATETIMEUNIT.NPY_FR_W: + return "W" + elif unit == NPY_DATETIMEUNIT.NPY_FR_M: + return "M" + elif unit == NPY_DATETIMEUNIT.NPY_FR_Y: + return "Y" + else: + raise NotImplementedError(unit) + + +@cython.overflowcheck(True) +cdef object ensure_td64ns(object ts): + """ + Overflow-safe implementation of td64.astype("m8[ns]") + + Parameters + ---------- + ts : np.timedelta64 + + Returns + ------- + np.timedelta64[ns] + """ + cdef: + NPY_DATETIMEUNIT td64_unit + int64_t td64_value, mult + str unitstr + + td64_unit = get_datetime64_unit(ts) + if ( + td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns + and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC + ): + unitstr = npy_unit_to_abbrev(td64_unit) + + td64_value = get_timedelta64_value(ts) + + mult = precision_from_unit(unitstr)[0] + try: + # NB: cython#1381 this cannot be *= + td64_value = td64_value * mult + except OverflowError as err: + from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta + raise OutOfBoundsTimedelta(ts) + + return np.timedelta64(td64_value, "ns") + + return ts + + cdef convert_to_timedelta64(object ts, str unit): """ Convert an incoming object to a timedelta64 if possible. @@ -184,37 +253,37 @@ cdef convert_to_timedelta64(object ts, str unit): Return an ns based int64 """ if checknull_with_nat(ts): - return np.timedelta64(NPY_NAT) + return np.timedelta64(NPY_NAT, "ns") elif isinstance(ts, _Timedelta): # already in the proper format - ts = np.timedelta64(ts.value) + ts = np.timedelta64(ts.value, "ns") elif is_datetime64_object(ts): # only accept a NaT here if ts.astype('int64') == NPY_NAT: return np.timedelta64(NPY_NAT) elif is_timedelta64_object(ts): - ts = ts.astype(f"m8[{unit.lower()}]") + ts = ensure_td64ns(ts) elif is_integer_object(ts): if ts == NPY_NAT: - return np.timedelta64(NPY_NAT) + return np.timedelta64(NPY_NAT, "ns") else: if unit in ['Y', 'M', 'W']: ts = np.timedelta64(ts, unit) else: ts = cast_from_unit(ts, unit) - ts = np.timedelta64(ts) + ts = np.timedelta64(ts, "ns") elif is_float_object(ts): if unit in ['Y', 'M', 'W']: ts = np.timedelta64(int(ts), unit) else: ts = cast_from_unit(ts, unit) - ts = np.timedelta64(ts) + ts = np.timedelta64(ts, "ns") elif isinstance(ts, str): if len(ts) > 0 and ts[0] == 'P': ts = parse_iso_format_string(ts) else: ts = parse_timedelta_string(ts) - ts = np.timedelta64(ts) + ts = np.timedelta64(ts, "ns") elif is_tick_object(ts): ts = np.timedelta64(ts.nanos, 'ns') @@ -1196,7 +1265,7 @@ class Timedelta(_Timedelta): elif is_timedelta64_object(value): if unit is not None: value = value.astype(f'timedelta64[{unit}]') - value = value.astype('timedelta64[ns]') + value = ensure_td64ns(value) elif is_tick_object(value): value = np.timedelta64(value.nanos, 'ns') elif is_integer_object(value) or is_float_object(value): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 87f6e73e09d66..8065f85548f8c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -28,6 +28,7 @@ from pandas._libs.tslibs import ( NaT, OutOfBoundsDatetime, + OutOfBoundsTimedelta, Period, Timedelta, Timestamp, @@ -743,8 +744,12 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, val = val.value elif isinstance(val, (np.timedelta64, timedelta)): - val = Timedelta(val).value - dtype = np.dtype("m8[ns]") + try: + val = Timedelta(val).value + except (OutOfBoundsTimedelta, OverflowError): + dtype = np.dtype(object) + else: + dtype = np.dtype("m8[ns]") elif is_bool(val): dtype = np.dtype(np.bool_) @@ -1386,7 +1391,7 @@ def try_timedelta(v): try: td_values = to_timedelta(v) - except ValueError: + except (ValueError, OverflowError): return v.reshape(shape) else: return np.asarray(td_values).reshape(shape) @@ -1618,8 +1623,16 @@ def construct_2d_arraylike_from_scalar( value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool ) -> np.ndarray: + shape = (length, width) + if dtype.kind in ["m", "M"]: value = maybe_unbox_datetimelike(value, dtype) + elif dtype == object: + if isinstance(value, (np.timedelta64, np.datetime64)): + # calling np.array below would cast to pytimedelta/pydatetime + out = np.empty(shape, dtype=object) + out.fill(value) + return out # Attempt to coerce to a numpy array try: @@ -1632,7 +1645,6 @@ def construct_2d_arraylike_from_scalar( if arr.ndim != 0: raise ValueError("DataFrame constructor not properly called!") - shape = (length, width) return np.full(shape, arr) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index f408a3ddde04e..889bd98d6d85a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -10,7 +10,7 @@ import pytest import pytz -from pandas.compat.numpy import _np_version_under1p19, _np_version_under1p20 +from pandas.compat.numpy import _np_version_under1p19 from pandas.core.dtypes.common import is_integer_dtype from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype @@ -2371,16 +2371,10 @@ def test_from_timedelta_scalar_preserves_nanos(self, constructor): def test_from_timestamp_scalar_preserves_nanos(self, constructor): ts = Timestamp.now() + Timedelta(1) - obj = Series(ts, index=range(1), dtype="M8[ns]") + obj = constructor(ts, dtype="M8[ns]") assert get1(obj) == ts - def test_from_timedelta64_scalar_object(self, constructor, request): - if getattr(constructor, "func", None) is DataFrame and _np_version_under1p20: - # getattr check means we only xfail when box is None - mark = pytest.mark.xfail( - reason="np.array(td64, dtype=object) converts to int" - ) - request.node.add_marker(mark) + def test_from_timedelta64_scalar_object(self, constructor): td = Timedelta(1) td64 = td.to_timedelta64() @@ -2407,8 +2401,20 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls, request): with pytest.raises(TypeError, match="Cannot cast"): constructor(scalar, dtype=dtype) - def test_from_out_of_bounds_datetime(self, constructor): + @pytest.mark.parametrize("cls", [datetime, np.datetime64]) + def test_from_out_of_bounds_datetime(self, constructor, cls): scalar = datetime(9999, 1, 1) + if cls is np.datetime64: + scalar = np.datetime64(scalar, "D") + result = constructor(scalar) + + assert type(get1(result)) is cls + + @pytest.mark.parametrize("cls", [timedelta, np.timedelta64]) + def test_from_out_of_bounds_timedelta(self, constructor, cls): + scalar = datetime(9999, 1, 1) - datetime(1970, 1, 1) + if cls is np.timedelta64: + scalar = np.timedelta64(scalar, "D") result = constructor(scalar) - assert type(get1(result)) is datetime + assert type(get1(result)) is cls diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 06bdb8a6cf0a2..64d5a5e9b3fff 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._libs.tslibs import OutOfBoundsTimedelta + from pandas import Timedelta, offsets, to_timedelta @@ -197,6 +199,31 @@ def test_overflow_on_construction(): Timedelta(timedelta(days=13 * 19999)) +def test_construction_out_of_bounds_td64(): + # TODO: parametrize over units just above/below the implementation bounds + # once GH#38964 is resolved + + # Timedelta.max is just under 106752 days + td64 = np.timedelta64(106752, "D") + assert td64.astype("m8[ns]").view("i8") < 0 # i.e. naive astype will be wrong + + msg = "106752 days" + with pytest.raises(OutOfBoundsTimedelta, match=msg): + Timedelta(td64) + + # But just back in bounds and we are OK + assert Timedelta(td64 - 1) == td64 - 1 + + td64 *= -1 + assert td64.astype("m8[ns]").view("i8") > 0 # i.e. naive astype will be wrong + + with pytest.raises(OutOfBoundsTimedelta, match=msg): + Timedelta(td64) + + # But just back in bounds and we are OK + assert Timedelta(td64 + 1) == td64 + 1 + + @pytest.mark.parametrize( "fmt,exp", [ diff --git a/pandas/util/_exceptions.py b/pandas/util/_exceptions.py index 0723a37b1ba82..54bfcbddfc3dd 100644 --- a/pandas/util/_exceptions.py +++ b/pandas/util/_exceptions.py @@ -10,7 +10,7 @@ def rewrite_exception(old_name: str, new_name: str): try: yield except Exception as err: - msg = err.args[0] + msg = str(err.args[0]) msg = msg.replace(old_name, new_name) args: Tuple[str, ...] = (msg,) if len(err.args) > 1: