Skip to content

API: preserve reso in Timelta(td64_obj) #48910

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Oct 7, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev)
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
cpdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso)

cdef dict attrname_to_abbrevs

Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def periods_per_day(reso: int) -> int: ...
def periods_per_second(reso: int) -> int: ...
def is_supported_unit(reso: int) -> bool: ...
def npy_unit_to_abbrev(reso: int) -> str: ...
def get_supported_reso(reso: int) -> int: ...

class PeriodDtypeBase:
_dtype_code: int # PeriodDtypeCode
Expand Down
13 changes: 13 additions & 0 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,19 @@ class NpyDatetimeUnit(Enum):
NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC


cpdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso):
# If we have an unsupported reso, return the nearest supported reso.
if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# TODO: or raise ValueError? trying this gives unraisable errors, but
# "except? -1" breaks at compile-time for unknown reasons
return NPY_DATETIMEUNIT.NPY_FR_ns
if reso < NPY_DATETIMEUNIT.NPY_FR_s:
return NPY_DATETIMEUNIT.NPY_FR_s
elif reso > NPY_DATETIMEUNIT.NPY_FR_ns:
return NPY_DATETIMEUNIT.NPY_FR_ns
return reso


def is_supported_unit(NPY_DATETIMEUNIT reso):
return (
reso == NPY_DATETIMEUNIT.NPY_FR_ns
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/timedeltas.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ cdef class _Timedelta(timedelta):
cdef _ensure_components(_Timedelta self)
cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op)
cdef _Timedelta _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=*)
cpdef _maybe_cast_to_matching_resos(self, _Timedelta other)
145 changes: 67 additions & 78 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ from pandas._libs.tslibs.conversion cimport (
cast_from_unit,
precision_from_unit,
)
from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev
from pandas._libs.tslibs.dtypes cimport (
get_supported_reso,
npy_unit_to_abbrev,
)
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_NaT as NaT,
Expand Down Expand Up @@ -939,6 +942,7 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
cdef:
_Timedelta td_base

assert value != NPY_NAT
# For millisecond and second resos, we cannot actually pass int(value) because
# many cases would fall outside of the pytimedelta implementation bounds.
# We pass 0 instead, and override seconds, microseconds, days.
Expand Down Expand Up @@ -1530,12 +1534,7 @@ cdef class _Timedelta(timedelta):
def _as_unit(self, str unit, bint round_ok=True):
dtype = np.dtype(f"m8[{unit}]")
reso = get_unit_from_dtype(dtype)
try:
return self._as_reso(reso, round_ok=round_ok)
except OverflowError as err:
raise OutOfBoundsTimedelta(
f"Cannot cast {self} to unit='{unit}' without overflow."
) from err
return self._as_reso(reso, round_ok=round_ok)

@cython.cdivision(False)
cdef _Timedelta _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=True):
Expand All @@ -1545,9 +1544,26 @@ cdef class _Timedelta(timedelta):
if reso == self._reso:
return self

value = convert_reso(self.value, self._reso, reso, round_ok=round_ok)
try:
value = convert_reso(self.value, self._reso, reso, round_ok=round_ok)
except OverflowError as err:
unit = npy_unit_to_abbrev(reso)
raise OutOfBoundsTimedelta(
f"Cannot cast {self} to unit='{unit}' without overflow."
) from err

return type(self)._from_value_and_reso(value, reso=reso)

cpdef _maybe_cast_to_matching_resos(self, _Timedelta other):
"""
If _resos do not match, cast to the higher resolution, raising on overflow.
"""
if self._reso > other._reso:
other = other._as_reso(self._reso)
elif self._reso < other._reso:
self = self._as_reso(other._reso)
return self, other


# Python front end to C extension type _Timedelta
# This serves as the box for timedelta64
Expand Down Expand Up @@ -1685,10 +1701,27 @@ class Timedelta(_Timedelta):
elif PyDelta_Check(value):
value = convert_to_timedelta64(value, 'ns')
elif is_timedelta64_object(value):
if get_timedelta64_value(value) == NPY_NAT:
# Retain the resolution if possible, otherwise cast to the nearest
# supported resolution.
new_value = get_timedelta64_value(value)
if new_value == NPY_NAT:
# i.e. np.timedelta64("NaT")
return NaT
value = ensure_td64ns(value)

reso = get_datetime64_unit(value)
new_reso = get_supported_reso(reso)
if reso != NPY_DATETIMEUNIT.NPY_FR_GENERIC:
try:
new_value = convert_reso(
get_timedelta64_value(value),
reso,
new_reso,
round_ok=True,
)
except (OverflowError, OutOfBoundsDatetime) as err:
raise OutOfBoundsTimedelta(value) from err
return cls._from_value_and_reso(new_value, reso=new_reso)

elif is_tick_object(value):
value = np.timedelta64(value.nanos, 'ns')
elif is_integer_object(value) or is_float_object(value):
Expand Down Expand Up @@ -1827,11 +1860,7 @@ class Timedelta(_Timedelta):
if other is NaT:
return np.nan
if other._reso != self._reso:
raise ValueError(
"division between Timedeltas with mismatched resolutions "
"are not supported. Explicitly cast to matching resolutions "
"before dividing."
)
self, other = self._maybe_cast_to_matching_resos(other)
return self.value / float(other.value)

elif is_integer_object(other) or is_float_object(other):
Expand All @@ -1858,11 +1887,7 @@ class Timedelta(_Timedelta):
if other is NaT:
return np.nan
if self._reso != other._reso:
raise ValueError(
"division between Timedeltas with mismatched resolutions "
"are not supported. Explicitly cast to matching resolutions "
"before dividing."
)
self, other = self._maybe_cast_to_matching_resos(other)
return float(other.value) / self.value

elif is_array(other):
Expand All @@ -1884,17 +1909,14 @@ class Timedelta(_Timedelta):
def __floordiv__(self, other):
# numpy does not implement floordiv for timedelta64 dtype, so we cannot
# just defer
orig = other
if _should_cast_to_timedelta(other):
# We interpret NaT as timedelta64("NaT")
other = Timedelta(other)
if other is NaT:
return np.nan
if self._reso != other._reso:
raise ValueError(
"floordivision between Timedeltas with mismatched resolutions "
"are not supported. Explicitly cast to matching resolutions "
"before dividing."
)
self, other = self._maybe_cast_to_matching_resos(other)
return self.value // other.value

elif is_integer_object(other) or is_float_object(other):
Expand All @@ -1910,9 +1932,16 @@ class Timedelta(_Timedelta):

if other.dtype.kind == 'm':
# also timedelta-like
if self._reso != NPY_FR_ns:
raise NotImplementedError
return _broadcast_floordiv_td64(self.value, other, _floordiv)
# TODO: could suppress
# RuntimeWarning: invalid value encountered in floor_divide
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be great (especially if it comes from a pandas routine) so we can turn on -W:::pandas in the future

result = self.asm8 // other
mask = other.view("i8") == NPY_NAT
if mask.any():
# We differ from numpy here
result = result.astype("f8")
result[mask] = np.nan
return result

elif other.dtype.kind in ['i', 'u', 'f']:
if other.ndim == 0:
return self // other.item()
Expand All @@ -1932,11 +1961,7 @@ class Timedelta(_Timedelta):
if other is NaT:
return np.nan
if self._reso != other._reso:
raise ValueError(
"floordivision between Timedeltas with mismatched resolutions "
"are not supported. Explicitly cast to matching resolutions "
"before dividing."
)
self, other = self._maybe_cast_to_matching_resos(other)
return other.value // self.value

elif is_array(other):
Expand All @@ -1947,9 +1972,15 @@ class Timedelta(_Timedelta):

if other.dtype.kind == 'm':
# also timedelta-like
if self._reso != NPY_FR_ns:
raise NotImplementedError
return _broadcast_floordiv_td64(self.value, other, _rfloordiv)
# TODO: could suppress
# RuntimeWarning: invalid value encountered in floor_divide
result = other // self.asm8
mask = other.view("i8") == NPY_NAT
if mask.any():
# We differ from numpy here
result = result.astype("f8")
result[mask] = np.nan
return result

# Includes integer array // Timedelta, disallowed in GH#19761
raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__')
Expand Down Expand Up @@ -1999,45 +2030,3 @@ cdef bint _should_cast_to_timedelta(object obj):
return (
is_any_td_scalar(obj) or obj is None or obj is NaT or isinstance(obj, str)
)


cdef _floordiv(int64_t value, right):
return value // right


cdef _rfloordiv(int64_t value, right):
# analogous to referencing operator.div, but there is no operator.rfloordiv
return right // value


cdef _broadcast_floordiv_td64(
int64_t value,
ndarray other,
object (*operation)(int64_t value, object right)
):
"""
Boilerplate code shared by Timedelta.__floordiv__ and
Timedelta.__rfloordiv__ because np.timedelta64 does not implement these.

Parameters
----------
value : int64_t; `self.value` from a Timedelta object
other : object
operation : function, either _floordiv or _rfloordiv

Returns
-------
result : varies based on `other`
"""
# assumes other.dtype.kind == 'm', i.e. other is timedelta-like
# assumes other.ndim != 0

# We need to watch out for np.timedelta64('NaT').
mask = other.view('i8') == NPY_NAT

res = operation(value, other.astype('m8[ns]', copy=False).astype('i8'))

if mask.any():
res = res.astype('f8')
res[mask] = np.nan
return res
10 changes: 8 additions & 2 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import numpy as np

from pandas._libs import lib
from pandas._libs.tslibs import (
get_unit_from_dtype,
is_supported_unit,
)
from pandas._typing import (
AxisInt,
Dtype,
Expand Down Expand Up @@ -439,10 +443,12 @@ def _cmp_method(self, other, op):
def _wrap_ndarray_result(self, result: np.ndarray):
# If we have timedelta64[ns] result, return a TimedeltaArray instead
# of a PandasArray
if result.dtype == "timedelta64[ns]":
if result.dtype.kind == "m" and is_supported_unit(
get_unit_from_dtype(result.dtype)
):
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray._simple_new(result)
return TimedeltaArray._simple_new(result, dtype=result.dtype)
return type(self)(result)

# ------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,10 @@ def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64:
if not isinstance(value, self._scalar_type) and value is not NaT:
raise ValueError("'value' should be a Timedelta.")
self._check_compatible_with(value, setitem=setitem)
if value is NaT:
return np.timedelta64(value.value, "ns")
else:
return value._as_unit(self._unit).asm8
return np.timedelta64(value.value, "ns")

def _scalar_from_string(self, value) -> Timedelta | NaTType:
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,13 @@ def array(

data = extract_array(data, extract_numpy=True)

if isinstance(data, ExtensionArray) and (
dtype is None or is_dtype_equal(dtype, data.dtype)
):
if copy:
return data.copy()
return data

# this returns None for not-found dtypes.
if isinstance(dtype, str):
dtype = registry.find(dtype) or dtype
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ def _calculate_deltas(
_times = np.asarray(
times.view(np.int64), dtype=np.float64 # type: ignore[union-attr]
)
_halflife = float(Timedelta(halflife).value)
# TODO: generalize to non-nano?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this currently assumes np.diff(_times) and _halflife are both ns, so I think as long as they are both brought to the higher of the resolution it should be fine

_halflife = float(Timedelta(halflife)._as_unit("ns").value)
return np.diff(_times) / _halflife


Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ def test_numeric_arr_mul_tdscalar(self, scalar_td, numeric_idx, box_with_array):
box = box_with_array
index = numeric_idx
expected = TimedeltaIndex([Timedelta(days=n) for n in range(len(index))])
if isinstance(scalar_td, np.timedelta64) and box not in [Index, Series]:
# TODO(2.0): once TDA.astype converts to m8, just do expected.astype
tda = expected._data
dtype = scalar_td.dtype
expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype)

index = tm.box_expected(index, box)
expected = tm.box_expected(expected, box)
Expand Down Expand Up @@ -249,6 +254,14 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array
index = numeric_idx[1:3]

expected = TimedeltaIndex(["3 Days", "36 Hours"])
if isinstance(three_days, np.timedelta64) and box not in [Index, Series]:
# TODO(2.0): just use expected.astype
tda = expected._data
dtype = three_days.dtype
if dtype < np.dtype("m8[s]"):
# i.e. resolution is lower -> use lowest supported resolution
dtype = np.dtype("m8[s]")
expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype)

index = tm.box_expected(index, box)
expected = tm.box_expected(expected, box)
Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/dtypes/cast/test_promote.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,14 +463,23 @@ def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype_r
[pd.Timedelta(days=1), np.timedelta64(24, "h"), datetime.timedelta(1)],
ids=["pd.Timedelta", "np.timedelta64", "datetime.timedelta"],
)
def test_maybe_promote_any_with_timedelta64(any_numpy_dtype_reduced, fill_value):
def test_maybe_promote_any_with_timedelta64(
any_numpy_dtype_reduced, fill_value, request
):
dtype = np.dtype(any_numpy_dtype_reduced)

# filling anything but timedelta with timedelta casts to object
if is_timedelta64_dtype(dtype):
expected_dtype = dtype
# for timedelta dtypes, scalar values get cast to pd.Timedelta.value
exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64()

if isinstance(fill_value, np.timedelta64) and fill_value.dtype != "m8[ns]":
mark = pytest.mark.xfail(
reason="maybe_promote not yet updated to handle non-nano "
"Timedelta scalar"
)
request.node.add_marker(mark)
else:
expected_dtype = np.dtype(object)
exp_val_for_scalar = fill_value
Expand Down
Loading