From 22359ed676438e88374a298dacf780a87079ef63 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 31 Jan 2023 10:17:49 -0800 Subject: [PATCH 1/3] API: integer values and non-nano dtype --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/arrays/datetimes.py | 12 ++++++++++-- pandas/tests/series/test_constructors.py | 10 ++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 4039276caa0af..ebf1e04f28d6f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -595,6 +595,7 @@ Other API changes - Passing ``dtype`` of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; passing a dtype with lower resolution for :class:`Series` or :class:`DataFrame` will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) - Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`) - Passing ``datetime64`` values with resolution other than nanosecond to :func:`to_datetime` will retain the input resolution if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`50369`) +- Passing integer values and a non-nanosecond datetime64 dtype (e.g. "datetime64[s]") :class:`DataFrame`, :class:`Series`, or :class:`Index` will treat the values as multiples of the dtype's unit, matching the behavior of e.g. ``Series(np.array(values, dtype="M8[s]"))`` (:issue:`??`) - Passing a string in ISO-8601 format to :class:`Timestamp` will retain the resolution of the parsed input if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49737`) - The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`) - Changed behavior of :meth:`Series.quantile` and :meth:`DataFrame.quantile` with :class:`SparseDtype` to retain sparse dtype (:issue:`49583`) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d570a8822649a..7ebbe64a66d59 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -335,6 +335,7 @@ def _from_sequence_not_strict( dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous, + out_unit=unit, ) # We have to call this again after possibly inferring a tz above _validate_tz_from_dtype(dtype, tz, explicit_tz_none) @@ -1966,6 +1967,7 @@ def _sequence_to_dt64ns( dayfirst: bool = False, yearfirst: bool = False, ambiguous: TimeAmbiguous = "raise", + out_unit: str | None = None, ): """ Parameters @@ -1977,6 +1979,8 @@ def _sequence_to_dt64ns( yearfirst : bool, default False ambiguous : str, bool, or arraylike, default 'raise' See pandas._libs.tslibs.tzconversion.tz_localize_to_utc. + out_unit : str or None, default None + Desired output resolution. Returns ------- @@ -2004,6 +2008,10 @@ def _sequence_to_dt64ns( data, copy = maybe_convert_dtype(data, copy, tz=tz) data_dtype = getattr(data, "dtype", None) + out_dtype = DT64NS_DTYPE + if out_unit is not None: + out_dtype = np.dtype(f"M8[{out_unit}]") + if ( is_object_dtype(data_dtype) or is_string_dtype(data_dtype) @@ -2096,7 +2104,7 @@ def _sequence_to_dt64ns( # assume this data are epoch timestamps if data.dtype != INT64_DTYPE: data = data.astype(np.int64, copy=False) - result = data.view(DT64NS_DTYPE) + result = data.view(out_dtype) if copy: result = result.copy() @@ -2209,7 +2217,7 @@ def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None): # GH#23675, GH#45573 deprecated to treat symmetrically with integer dtypes. # Note: data.astype(np.int64) fails ARM tests, see # https://github.com/pandas-dev/pandas/issues/49468. - data = data.astype("M8[ns]").view("i8") + data = data.astype("i8") copy = False elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 7f65bce873126..bd6db2befdb0b 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -52,6 +52,16 @@ class TestSeriesConstructors: + def test_from_ints_with_non_nano_dt64_dtype(self, index_or_series): + values = np.arange(10) + + res = index_or_series(values, dtype="M8[s]") + expected = index_or_series(values.astype("M8[s]")) + tm.assert_equal(res, expected) + + res = index_or_series(list(values), dtype="M8[s]") + tm.assert_equal(res, expected) + def test_from_na_value_and_interval_of_datetime_dtype(self): # GH#41805 ser = Series([None], dtype="interval[datetime64[ns]]") From 2c28cf32c8a2c301dcf827650ae46fdb96c3a195 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Feb 2023 08:07:42 -0800 Subject: [PATCH 2/3] troubleshoot circleci --- pandas/core/arrays/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e3aec22e4c57c..0766b1c6a5262 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2211,7 +2211,7 @@ def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None): # GH#23675, GH#45573 deprecated to treat symmetrically with integer dtypes. # Note: data.astype(np.int64) fails ARM tests, see # https://github.com/pandas-dev/pandas/issues/49468. - data = data.astype("i8") + data = data.astype(DT64NS_DTYPE).view("i8") copy = False elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype): From 970638dfc92c6f1c97c359af067ba5e0057ffd3e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 Feb 2023 12:51:29 -0800 Subject: [PATCH 3/3] Update doc/source/whatsnew/v2.0.0.rst --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7683808341792..2341ea7b6fbad 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -628,7 +628,7 @@ Other API changes - Passing ``dtype`` of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; passing a dtype with lower resolution for :class:`Series` or :class:`DataFrame` will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) - Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`) - Passing ``datetime64`` values with resolution other than nanosecond to :func:`to_datetime` will retain the input resolution if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`50369`) -- Passing integer values and a non-nanosecond datetime64 dtype (e.g. "datetime64[s]") :class:`DataFrame`, :class:`Series`, or :class:`Index` will treat the values as multiples of the dtype's unit, matching the behavior of e.g. ``Series(np.array(values, dtype="M8[s]"))`` (:issue:`??`) +- Passing integer values and a non-nanosecond datetime64 dtype (e.g. "datetime64[s]") :class:`DataFrame`, :class:`Series`, or :class:`Index` will treat the values as multiples of the dtype's unit, matching the behavior of e.g. ``Series(np.array(values, dtype="M8[s]"))`` (:issue:`51092`) - Passing a string in ISO-8601 format to :class:`Timestamp` will retain the resolution of the parsed input if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49737`) - The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`) - Changed behavior of :meth:`Series.quantile` and :meth:`DataFrame.quantile` with :class:`SparseDtype` to retain sparse dtype (:issue:`49583`)