From 91c5771dde83873cba3f473bcc8168c78eb76cd9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 26 Aug 2019 18:27:11 -0700 Subject: [PATCH 1/7] implement sum for TDA/TDI --- pandas/core/arrays/timedeltas.py | 11 ++++++++++ pandas/core/indexes/timedeltas.py | 2 ++ pandas/tests/arrays/test_timedeltas.py | 30 ++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 3609c68a26c0f..b3271e51c9bbe 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -14,6 +14,7 @@ precision_from_unit, ) import pandas.compat as compat +from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( @@ -384,6 +385,16 @@ def astype(self, dtype, copy=True): return self return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy) + def sum(self, axis=None, skipna=True, *args, **kwargs): + nv.validate_min(args, kwargs) + nv.validate_minmax_axis(axis) + if not len(self): + return NaT + if skipna: + if self._hasnans: + return self.dropna().sum(axis=axis, *args, **kwargs) + return Timedelta(self._data.sum()) + # ---------------------------------------------------------------- # Rendering Methods diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index b03d60c7b5b37..24127ccfd48e2 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -30,6 +30,7 @@ from pandas.core.indexes.datetimelike import ( DatetimeIndexOpsMixin, DatetimelikeDelegateMixin, + ea_passthrough, ) from pandas.core.indexes.numeric import Int64Index from pandas.core.ops import get_op_result_name @@ -173,6 +174,7 @@ def _join_i8_wrapper(joinf, **kwargs): _datetimelike_ops = TimedeltaArray._datetimelike_ops _datetimelike_methods = TimedeltaArray._datetimelike_methods _other_ops = TimedeltaArray._other_ops + sum = ea_passthrough(TimedeltaArray.sum) # ------------------------------------------------------------------- # Constructors diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 540c3343b2a1b..dd26efb9ef6b6 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -168,3 +168,33 @@ def test_min_max_empty(self, skipna): result = arr.max(skipna=skipna) assert result is pd.NaT + + def test_sum(self): + tdi = pd.TimedeltaIndex(["3H", "3H", "NaT", "2H", "5H", "4H"]) + arr = tdi._data + + result = arr.sum(skipna=True) + expected = pd.Timedelta(hours=17) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = tdi.sum(skipna=True) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = arr.sum(skipna=False) + assert result is pd.NaT + + result = tdi.sum(skipna=False) + assert result is pd.NaT + + @pytest.mark.parametrize("skipna", [True, False]) + def test_sum_empty(self, skipna): + tdi = pd.TimedeltaIndex([]) + arr = tdi._data + + result = tdi.sum(skipna=skipna) + assert result is pd.NaT + + result = arr.sum(skipna=skipna) + assert result is pd.NaT From 7f72b77c15352264866983966071ed384d2ae7cb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 26 Aug 2019 18:44:25 -0700 Subject: [PATCH 2/7] implement+test TDA/TDI std --- pandas/core/arrays/timedeltas.py | 40 +++++++++++++++++++++----- pandas/core/indexes/timedeltas.py | 1 + pandas/tests/arrays/test_timedeltas.py | 39 ++++++++++++++++--------- 3 files changed, 59 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index b3271e51c9bbe..cc6929163e804 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -42,6 +42,7 @@ ) from pandas.core.dtypes.missing import isna +from pandas.core import nanops from pandas.core.algorithms import checked_add_with_arr import pandas.core.common as com from pandas.core.ops.invalid import invalid_comparison @@ -385,15 +386,40 @@ def astype(self, dtype, copy=True): return self return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy) - def sum(self, axis=None, skipna=True, *args, **kwargs): - nv.validate_min(args, kwargs) - nv.validate_minmax_axis(axis) + def sum( + self, + axis=None, + dtype=None, + out=None, + keepdims=False, + initial=None, + skipna=True, + min_count=0, + ): + nv.validate_sum( + (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial) + ) if not len(self): return NaT - if skipna: - if self._hasnans: - return self.dropna().sum(axis=axis, *args, **kwargs) - return Timedelta(self._data.sum()) + if not skipna and self._hasnans: + return NaT + + result = nanops.nansum( + self._data, axis=axis, skipna=skipna, min_count=min_count + ) + return Timedelta(result) + + def std(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True): + nv.validate_stat_ddof_func( + (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std" + ) + if not len(self): + return NaT + if not skipna and self._hasnans: + return NaT + + result = nanops.nanstd(self._data, axis=axis, skipna=skipna, ddof=ddof) + return Timedelta(result) # ---------------------------------------------------------------- # Rendering Methods diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 24127ccfd48e2..ef9a96cd0f5e6 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -175,6 +175,7 @@ def _join_i8_wrapper(joinf, **kwargs): _datetimelike_methods = TimedeltaArray._datetimelike_methods _other_ops = TimedeltaArray._other_ops sum = ea_passthrough(TimedeltaArray.sum) + std = ea_passthrough(TimedeltaArray.std) # ------------------------------------------------------------------- # Constructors diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index dd26efb9ef6b6..5f88d80975526 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -143,6 +143,18 @@ def test_setitem_objects(self, obj): class TestReductions: + @pytest.mark.parametrize("name", ["sum", "std", "min", "max"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_reductions_empty(self, name, skipna): + tdi = pd.TimedeltaIndex([]) + arr = tdi._data + + result = getattr(tdi, name)(skipna=skipna) + assert result is pd.NaT + + result = getattr(arr, name)(skipna=skipna) + assert result is pd.NaT + def test_min_max(self): arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"]) @@ -160,15 +172,6 @@ def test_min_max(self): result = arr.max(skipna=False) assert result is pd.NaT - @pytest.mark.parametrize("skipna", [True, False]) - def test_min_max_empty(self, skipna): - arr = TimedeltaArray._from_sequence([]) - result = arr.min(skipna=skipna) - assert result is pd.NaT - - result = arr.max(skipna=skipna) - assert result is pd.NaT - def test_sum(self): tdi = pd.TimedeltaIndex(["3H", "3H", "NaT", "2H", "5H", "4H"]) arr = tdi._data @@ -188,13 +191,21 @@ def test_sum(self): result = tdi.sum(skipna=False) assert result is pd.NaT - @pytest.mark.parametrize("skipna", [True, False]) - def test_sum_empty(self, skipna): - tdi = pd.TimedeltaIndex([]) + def test_std(self): + tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) arr = tdi._data - result = tdi.sum(skipna=skipna) + result = arr.std(skipna=True) + expected = pd.Timedelta(hours=2) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = tdi.std(skipna=True) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = arr.std(skipna=False) assert result is pd.NaT - result = arr.sum(skipna=skipna) + result = tdi.std(skipna=False) assert result is pd.NaT From 2978fa5c2405913f261ea8156c87b7315370070d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 26 Aug 2019 20:01:52 -0700 Subject: [PATCH 3/7] implement+test TDA/TDI median --- pandas/core/arrays/timedeltas.py | 8 ++++++++ pandas/core/indexes/timedeltas.py | 1 + pandas/tests/arrays/test_timedeltas.py | 21 ++++++++++++++++++++- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index cc6929163e804..a74b0deab5e0e 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -421,6 +421,14 @@ def std(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=Tr result = nanops.nanstd(self._data, axis=axis, skipna=skipna, ddof=ddof) return Timedelta(result) + def median( + self, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True + ): + nv.validate_median( + (), dict(out=out, overwrite_input=overwrite_input, keepdims=keepdims) + ) + return nanops.nanmedian(self._data, axis=axis, skipna=skipna) + # ---------------------------------------------------------------- # Rendering Methods diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index ef9a96cd0f5e6..1d999a7b43e89 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -176,6 +176,7 @@ def _join_i8_wrapper(joinf, **kwargs): _other_ops = TimedeltaArray._other_ops sum = ea_passthrough(TimedeltaArray.sum) std = ea_passthrough(TimedeltaArray.std) + median = ea_passthrough(TimedeltaArray.median) # ------------------------------------------------------------------- # Constructors diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 5f88d80975526..bff53bf073893 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -143,7 +143,7 @@ def test_setitem_objects(self, obj): class TestReductions: - @pytest.mark.parametrize("name", ["sum", "std", "min", "max"]) + @pytest.mark.parametrize("name", ["sum", "std", "min", "max", "median"]) @pytest.mark.parametrize("skipna", [True, False]) def test_reductions_empty(self, name, skipna): tdi = pd.TimedeltaIndex([]) @@ -209,3 +209,22 @@ def test_std(self): result = tdi.std(skipna=False) assert result is pd.NaT + + def test_median(self): + tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) + arr = tdi._data + + result = arr.median(skipna=True) + expected = pd.Timedelta(hours=2) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = tdi.median(skipna=True) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = arr.std(skipna=False) + assert result is pd.NaT + + result = tdi.std(skipna=False) + assert result is pd.NaT From ac16d03fa40e4e3443454dba71288197648097ea Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 26 Aug 2019 20:21:00 -0700 Subject: [PATCH 4/7] test for np.sum --- pandas/tests/arrays/test_timedeltas.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index bff53bf073893..c5eea31d73421 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -191,6 +191,20 @@ def test_sum(self): result = tdi.sum(skipna=False) assert result is pd.NaT + def test_npsum(self): + # GH#25335 np.sum should return a Timedelta, not timedelta64 + tdi = pd.TimedeltaIndex(["3H", "3H", "2H", "5H", "4H"]) + arr = tdi._data + + result = np.sum(tdi) + expected = pd.Timedelta(hours=17) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = np.sum(arr) + assert isinstance(result, pd.Timedelta) + assert result == expected + def test_std(self): tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) arr = tdi._data From 73479672df51857fd5d5f650e89bf63efe966017 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 27 Aug 2019 18:39:42 -0700 Subject: [PATCH 5/7] _data->array --- pandas/tests/arrays/test_timedeltas.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index c5eea31d73421..b9b717c908355 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -147,7 +147,7 @@ class TestReductions: @pytest.mark.parametrize("skipna", [True, False]) def test_reductions_empty(self, name, skipna): tdi = pd.TimedeltaIndex([]) - arr = tdi._data + arr = tdi.array result = getattr(tdi, name)(skipna=skipna) assert result is pd.NaT @@ -174,7 +174,7 @@ def test_min_max(self): def test_sum(self): tdi = pd.TimedeltaIndex(["3H", "3H", "NaT", "2H", "5H", "4H"]) - arr = tdi._data + arr = tdi.array result = arr.sum(skipna=True) expected = pd.Timedelta(hours=17) @@ -194,7 +194,7 @@ def test_sum(self): def test_npsum(self): # GH#25335 np.sum should return a Timedelta, not timedelta64 tdi = pd.TimedeltaIndex(["3H", "3H", "2H", "5H", "4H"]) - arr = tdi._data + arr = tdi.array result = np.sum(tdi) expected = pd.Timedelta(hours=17) @@ -207,7 +207,7 @@ def test_npsum(self): def test_std(self): tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) - arr = tdi._data + arr = tdi.array result = arr.std(skipna=True) expected = pd.Timedelta(hours=2) @@ -226,7 +226,7 @@ def test_std(self): def test_median(self): tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) - arr = tdi._data + arr = tdi.array result = arr.median(skipna=True) expected = pd.Timedelta(hours=2) From b4c14b377916b322d8711ef6eeb291c1f4d49adf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 27 Aug 2019 18:43:55 -0700 Subject: [PATCH 6/7] test with min_count --- pandas/tests/arrays/test_timedeltas.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index b9b717c908355..42e7bee97e671 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -191,6 +191,20 @@ def test_sum(self): result = tdi.sum(skipna=False) assert result is pd.NaT + result = arr.sum(min_count=9) + assert result is pd.NaT + + result = tdi.sum(min_count=9) + assert result is pd.NaT + + result = arr.sum(min_count=1) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = tdi.sum(min_count=1) + assert isinstance(result, pd.Timedelta) + assert result == expected + def test_npsum(self): # GH#25335 np.sum should return a Timedelta, not timedelta64 tdi = pd.TimedeltaIndex(["3H", "3H", "2H", "5H", "4H"]) From 4c7b4d29981c0e1cf5c842a32da33b5212964642 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 24 Sep 2019 15:55:09 -0700 Subject: [PATCH 7/7] add types --- pandas/core/arrays/timedeltas.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a74b0deab5e0e..6c9462ff4fa4d 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -391,10 +391,10 @@ def sum( axis=None, dtype=None, out=None, - keepdims=False, + keepdims: bool = False, initial=None, - skipna=True, - min_count=0, + skipna: bool = True, + min_count: int = 0, ): nv.validate_sum( (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial) @@ -409,7 +409,15 @@ def sum( ) return Timedelta(result) - def std(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True): + def std( + self, + axis=None, + dtype=None, + out=None, + ddof: int = 1, + keepdims: bool = False, + skipna: bool = True, + ): nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std" ) @@ -422,7 +430,12 @@ def std(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=Tr return Timedelta(result) def median( - self, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True + self, + axis=None, + out=None, + overwrite_input: bool = False, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_median( (), dict(out=out, overwrite_input=overwrite_input, keepdims=keepdims)