diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9d44c770f58ef..7c0ead128e10f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -476,6 +476,7 @@ Other API changes - Passing data with dtype of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; timedelta64 data with lower resolution will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) - Passing ``dtype`` of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; passing a dtype with lower resolution for :class:`Series` or :class:`DataFrame` will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) - Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`) +- Passing ``datetime64`` values with resolution other than nanosecond to :func:`to_datetime` will retain the input resolution if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`50369`) - Passing a string in ISO-8601 format to :class:`Timestamp` will retain the resolution of the parsed input if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49737`) - The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`) - Changed behavior of :meth:`Series.quantile` and :meth:`DataFrame.quantile` with :class:`SparseDtype` to retain sparse dtype (:issue:`49583`) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 492f45af09e80..fa560cd0853f6 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -106,6 +106,7 @@ cpdef cnp.ndarray astype_overflowsafe( cnp.dtype dtype, # ndarray[datetime64[anyunit]] bint copy=*, bint round_ok=*, + bint is_coerce=*, ) cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1 diff --git a/pandas/_libs/tslibs/np_datetime.pyi b/pandas/_libs/tslibs/np_datetime.pyi index d80d26375412b..0cb0e3b0237d7 100644 --- a/pandas/_libs/tslibs/np_datetime.pyi +++ b/pandas/_libs/tslibs/np_datetime.pyi @@ -13,6 +13,7 @@ def astype_overflowsafe( dtype: np.dtype, copy: bool = ..., round_ok: bool = ..., + is_coerce: bool = ..., ) -> np.ndarray: ... def is_unitless(dtype: np.dtype) -> bool: ... def compare_mismatched_resolutions( diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index b1e4022527437..aa3411385595b 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -308,6 +308,7 @@ cpdef ndarray astype_overflowsafe( cnp.dtype dtype, bint copy=True, bint round_ok=True, + bint is_coerce=False, ): """ Convert an ndarray with datetime64[X] to datetime64[Y] @@ -385,7 +386,9 @@ cpdef ndarray astype_overflowsafe( try: check_dts_bounds(&dts, to_unit) except OutOfBoundsDatetime as err: - if is_td: + if is_coerce: + new_value = NPY_DATETIME_NAT + elif is_td: from_abbrev = np.datetime_data(values.dtype)[0] np_val = np.timedelta64(value, from_abbrev) msg = ( @@ -395,8 +398,8 @@ cpdef ndarray astype_overflowsafe( raise OutOfBoundsTimedelta(msg) from err else: raise - - new_value = npy_datetimestruct_to_datetime(to_unit, &dts) + else: + new_value = npy_datetimestruct_to_datetime(to_unit, &dts) # Analogous to: iresult[i] = new_value (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = new_value diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 243a7c547bbb5..267abdb8d0104 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -483,7 +483,7 @@ def _coerce_to_type(x): if is_datetime64tz_dtype(x.dtype): dtype = x.dtype elif is_datetime64_dtype(x.dtype): - x = to_datetime(x) + x = to_datetime(x).astype("datetime64[ns]", copy=False) dtype = np.dtype("datetime64[ns]") elif is_timedelta64_dtype(x.dtype): x = to_timedelta(x) @@ -527,7 +527,12 @@ def _convert_bin_to_numeric_type(bins, dtype): raise ValueError("bins must be of timedelta64 dtype") elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): if bins_dtype in ["datetime", "datetime64"]: - bins = to_datetime(bins).view(np.int64) + bins = to_datetime(bins) + if is_datetime64_dtype(bins): + # As of 2.0, to_datetime may give non-nano, so we need to convert + # here until the rest of this file recognizes non-nano + bins = bins.astype("datetime64[ns]", copy=False) + bins = bins.view(np.int64) else: raise ValueError("bins must be of datetime64 dtype") diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 27328809e23d8..eaa7339f3747a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -28,7 +28,9 @@ Timedelta, Timestamp, astype_overflowsafe, + get_unit_from_dtype, iNaT, + is_supported_unit, nat_strings, parsing, timezones as libtimezones, @@ -50,7 +52,6 @@ from pandas.core.dtypes.common import ( ensure_object, is_datetime64_dtype, - is_datetime64_ns_dtype, is_datetime64tz_dtype, is_float, is_integer, @@ -68,6 +69,7 @@ from pandas.arrays import ( DatetimeArray, IntegerArray, + PandasArray, ) from pandas.core import algorithms from pandas.core.algorithms import unique @@ -384,6 +386,8 @@ def _convert_listlike_datetimes( """ if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype="O") + elif isinstance(arg, PandasArray): + arg = np.array(arg) arg_dtype = getattr(arg, "dtype", None) # these are shortcutable @@ -395,7 +399,17 @@ def _convert_listlike_datetimes( arg = arg.tz_convert(None).tz_localize("utc") return arg - elif is_datetime64_ns_dtype(arg_dtype): + elif is_datetime64_dtype(arg_dtype): + arg_dtype = cast(np.dtype, arg_dtype) + if not is_supported_unit(get_unit_from_dtype(arg_dtype)): + # We go to closest supported reso, i.e. "s" + arg = astype_overflowsafe( + # TODO: looks like we incorrectly raise with errors=="ignore" + np.asarray(arg), + np.dtype("M8[s]"), + is_coerce=errors == "coerce", + ) + if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) elif utc: diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 5dbef7e5e1ecd..d8bad3cce9a36 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1027,23 +1027,28 @@ def test_to_datetime_dt64s_and_str(self, arg, format): @pytest.mark.parametrize( "dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")] ) - def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): - msg = "^Out of bounds nanosecond timestamp: .*, at position 0$" - with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime(dt, errors="raise") + @pytest.mark.parametrize("errors", ["raise", "ignore", "coerce"]) + def test_to_datetime_dt64s_out_of_ns_bounds(self, cache, dt, errors): + # GH#50369 We cast to the nearest supported reso, i.e. "s" + ts = to_datetime(dt, errors=errors, cache=cache) + assert isinstance(ts, Timestamp) + assert ts.unit == "s" + assert ts.asm8 == dt - # TODO(2.0): The Timestamp and to_datetime behaviors should match; - # as of 2022-09-28, the Timestamp constructor has been updated - # to cast to M8[s] but to_datetime has not ts = Timestamp(dt) assert ts.unit == "s" assert ts.asm8 == dt + def test_to_datetime_dt64d_out_of_bounds(self, cache): + dt64 = np.datetime64(np.iinfo(np.int64).max, "D") + msg = "Out of bounds nanosecond timestamp" with pytest.raises(OutOfBoundsDatetime, match=msg): - Timestamp(np.datetime64(np.iinfo(np.int64).max, "D")) + Timestamp(dt64) + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime(dt64, errors="raise", cache=cache) - assert to_datetime(dt, errors="coerce", cache=cache) is NaT + assert to_datetime(dt64, errors="coerce", cache=cache) is NaT @pytest.mark.parametrize("unit", ["s", "D"]) def test_to_datetime_array_of_dt64s(self, cache, unit): @@ -2516,23 +2521,16 @@ def test_string_na_nat_conversion_with_name(self, cache): assert dresult.name == "foo" @pytest.mark.parametrize( - "dtype", - [ - "datetime64[h]", - "datetime64[m]", - "datetime64[s]", - "datetime64[ms]", - "datetime64[us]", - "datetime64[ns]", - ], + "unit", + ["h", "m", "s", "ms", "us", "ns"], ) - def test_dti_constructor_numpy_timeunits(self, cache, dtype): + def test_dti_constructor_numpy_timeunits(self, cache, unit): # GH 9114 + dtype = np.dtype(f"M8[{unit}]") base = to_datetime(["2000-01-01T00:00", "2000-01-02T00:00", "NaT"], cache=cache) values = base.values.astype(dtype) - unit = dtype.split("[")[-1][:-1] if unit in ["h", "m"]: # we cast to closest supported unit unit = "s" @@ -2541,7 +2539,7 @@ def test_dti_constructor_numpy_timeunits(self, cache, dtype): assert expected.dtype == exp_dtype tm.assert_index_equal(DatetimeIndex(values), expected) - tm.assert_index_equal(to_datetime(values, cache=cache), base) + tm.assert_index_equal(to_datetime(values, cache=cache), expected) def test_dayfirst(self, cache): # GH 5917