From e1e42962b0bad2998e41a7233f691d8203780708 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 1 Oct 2020 14:04:48 +0200 Subject: [PATCH 1/8] Fix timedelta problem when input is more precise than nanoseconds --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 6 ++++-- pandas/tests/tools/test_to_timedelta.py | 6 ++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index ddee06aeab779..d4a23bd2b1874 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -303,7 +303,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with ``datetime64``, ``timedelta64`` or ``Period`` dtype placement of ``NaT`` values being inconsistent with ``NumPy`` (:issue:`36176`, :issue:`36254`) - Inconsistency in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` setitem casting arrays of strings to datetimelike scalars but not scalar strings (:issue:`36261`) - Bug in :class:`DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`) - +- Bug in :class:`Timedelta` incorrectly deleted all decimals when input had more decimals than nanoseconds (:issue:`36738`) Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index ee32ed53a908b..f749aba814b9d 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -404,9 +404,11 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: m = 10**(3 -len(frac)) * 1000 * 1000 elif len(frac) > 3 and len(frac) <= 6: m = 10**(6 -len(frac)) * 1000 - else: + elif len(frac) > 6 and len(frac) <= 9: m = 10**(9 -len(frac)) - + else: + m = 1 + frac = frac[:9] r = int(''.join(frac)) * m result += timedelta_as_neg(r, neg) diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index f68d83f7f4d58..09a80c5df01f7 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -179,3 +179,9 @@ def test_to_timedelta_nullable_int64_dtype(self): result = to_timedelta(Series([1, None], dtype="Int64"), unit="days") tm.assert_series_equal(result, expected) + + def test_to_timedelta_precision_over_nanos(self): + # GH: 36738 + result = to_timedelta("8:53:08.71800000001") + expected = pd.Timedelta("8:53:08.718") + assert result == expected From 78a99381befba7a6b55e4a0391d67a0a07d9c309 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 1 Oct 2020 14:07:53 +0200 Subject: [PATCH 2/8] Modify whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d4a23bd2b1874..077a795c90f53 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -303,7 +303,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with ``datetime64``, ``timedelta64`` or ``Period`` dtype placement of ``NaT`` values being inconsistent with ``NumPy`` (:issue:`36176`, :issue:`36254`) - Inconsistency in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` setitem casting arrays of strings to datetimelike scalars but not scalar strings (:issue:`36261`) - Bug in :class:`DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`) -- Bug in :class:`Timedelta` incorrectly deleted all decimals when input had more decimals than nanoseconds (:issue:`36738`) +- Bug in :class:`Timedelta` incorrectly deleted all decimals when input had a higher precision than nanoseconds (:issue:`36738`) Timedelta ^^^^^^^^^ From 4d6c9dc6930ebd5d8118e8bd3b98242be2cb94a8 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 11 Oct 2020 23:40:36 +0200 Subject: [PATCH 3/8] Add docstring and test --- pandas/_libs/tslibs/timedeltas.pyx | 3 +++ pandas/core/tools/timedeltas.py | 6 ++++++ pandas/tests/tools/test_to_timedelta.py | 5 +++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f749aba814b9d..8752d38512146 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1134,6 +1134,9 @@ class Timedelta(_Timedelta): Notes ----- The ``.value`` attribute is always in ns. + + If the precision is higher than nanoseconds, the precision of the duration is + truncated to nanoseconds. """ def __new__(cls, object value=_no_input, unit=None, **kwargs): diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 372eac29bad9e..78fbf4caa26a0 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -58,6 +58,12 @@ def to_timedelta(arg, unit=None, errors="raise"): timedelta64 or numpy.array of timedelta64 Output type returned if parsing succeeded. + Notes + ----- + + If the precision is higher than nanoseconds, the precision of the duration is + truncated to nanoseconds. + See Also -------- DataFrame.astype : Cast argument to a specified dtype. diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 09a80c5df01f7..4e6df255d8fe4 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -180,8 +180,9 @@ def test_to_timedelta_nullable_int64_dtype(self): tm.assert_series_equal(result, expected) - def test_to_timedelta_precision_over_nanos(self): + @pytest.mark.parametrize("func", ["Timedelta", "to_timedelta"]) + def test_to_timedelta_precision_over_nanos(self, func): # GH: 36738 - result = to_timedelta("8:53:08.71800000001") expected = pd.Timedelta("8:53:08.718") + result = getattr(pd, func)("8:53:08.71800000001") assert result == expected From f4ff3e019548ad7148d5d24b58bf66e68e85ec7a Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 12 Oct 2020 00:12:52 +0200 Subject: [PATCH 4/8] Fix docstring --- pandas/core/tools/timedeltas.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 78fbf4caa26a0..10d93114a92f9 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -58,18 +58,18 @@ def to_timedelta(arg, unit=None, errors="raise"): timedelta64 or numpy.array of timedelta64 Output type returned if parsing succeeded. - Notes - ----- - - If the precision is higher than nanoseconds, the precision of the duration is - truncated to nanoseconds. - See Also -------- DataFrame.astype : Cast argument to a specified dtype. to_datetime : Convert argument to datetime. convert_dtypes : Convert dtypes. + Notes + ----- + + If the precision is higher than nanoseconds, the precision of the duration is + truncated to nanoseconds. + Examples -------- Parsing a single string to a Timedelta: From f79106d6e11e9ccd1fbcf9be1a87ba99d69ea847 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 12 Oct 2020 00:29:51 +0200 Subject: [PATCH 5/8] Add additional tests --- pandas/tests/tools/test_to_timedelta.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 4e6df255d8fe4..0f90f0eeb6e35 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -180,9 +180,17 @@ def test_to_timedelta_nullable_int64_dtype(self): tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + ("input", "expected"), + [ + ("8:53:08.71800000001", "8:53:08.718"), + ("8:53:08.718001", "8:53:08.718001"), + ("8:53:08.7180000001", "8:53:08.7180000001"), + ], + ) @pytest.mark.parametrize("func", ["Timedelta", "to_timedelta"]) - def test_to_timedelta_precision_over_nanos(self, func): + def test_to_timedelta_precision_over_nanos(self, input, expected, func): # GH: 36738 - expected = pd.Timedelta("8:53:08.718") - result = getattr(pd, func)("8:53:08.71800000001") + expected = pd.Timedelta(expected) + result = getattr(pd, func)(input) assert result == expected From 83111d92b0e8bb38c99e6996f30162467ee159dd Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 12 Nov 2020 13:01:31 +0100 Subject: [PATCH 6/8] Adress review comments --- pandas/core/tools/timedeltas.py | 1 - pandas/tests/tools/test_to_timedelta.py | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 61a1e30b57f33..13805c0247ace 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -68,7 +68,6 @@ def to_timedelta(arg, unit=None, errors="raise"): Notes ----- - If the precision is higher than nanoseconds, the precision of the duration is truncated to nanoseconds. diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 63a2e56fe5a55..31ede5eb75413 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -217,11 +217,12 @@ def test_to_timedelta_nullable_int64_dtype(self): ("8:53:08.71800000001", "8:53:08.718"), ("8:53:08.718001", "8:53:08.718001"), ("8:53:08.7180000001", "8:53:08.7180000001"), + ("-8:53:08.71800000001", "-8:53:08.718"), ], ) - @pytest.mark.parametrize("func", ["Timedelta", "to_timedelta"]) + @pytest.mark.parametrize("func", [pd.Timedelta, pd.to_timedelta]) def test_to_timedelta_precision_over_nanos(self, input, expected, func): # GH: 36738 expected = pd.Timedelta(expected) - result = getattr(pd, func)(input) + result = func(input) assert result == expected From 4ce27c8017133aa4ccea2aca2ceca9dfc9b2f263 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 14 Nov 2020 13:04:24 +0100 Subject: [PATCH 7/8] Add comments and testcases --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/tools/timedeltas.py | 2 +- pandas/tests/tools/test_to_timedelta.py | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 1a18f01d80856..b1d7089437ea4 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -404,7 +404,7 @@ Datetimelike - Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`) - Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`) - Bug in :func:`to_datetime` with a read-only array incorrectly raising (:issue:`34857`) -- Bug in :class:`Timedelta` incorrectly deleted all decimals when input had a higher precision than nanoseconds (:issue:`36738`) +- Bug in :class:`Timedelta` incorrectly truncating to sub-second portion of a string input when it has precision higher than nanoseconds (:issue:`36738`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 13805c0247ace..6a9fd7a542a44 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -69,7 +69,7 @@ def to_timedelta(arg, unit=None, errors="raise"): Notes ----- If the precision is higher than nanoseconds, the precision of the duration is - truncated to nanoseconds. + truncated to nanoseconds for string inputs. Examples -------- diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 31ede5eb75413..0cbbbf8209f35 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -218,6 +218,7 @@ def test_to_timedelta_nullable_int64_dtype(self): ("8:53:08.718001", "8:53:08.718001"), ("8:53:08.7180000001", "8:53:08.7180000001"), ("-8:53:08.71800000001", "-8:53:08.718"), + ("8:53:08.7180000089", "8:53:08.718000008"), ], ) @pytest.mark.parametrize("func", [pd.Timedelta, pd.to_timedelta]) @@ -225,4 +226,5 @@ def test_to_timedelta_precision_over_nanos(self, input, expected, func): # GH: 36738 expected = pd.Timedelta(expected) result = func(input) + print(result) assert result == expected From 8b00890ac786ffdb09663d1da2f9a812a012336c Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 14 Nov 2020 19:55:15 +0100 Subject: [PATCH 8/8] Move whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/tools/test_to_timedelta.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index b1d7089437ea4..fea647b919e30 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -404,13 +404,13 @@ Datetimelike - Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`) - Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`) - Bug in :func:`to_datetime` with a read-only array incorrectly raising (:issue:`34857`) -- Bug in :class:`Timedelta` incorrectly truncating to sub-second portion of a string input when it has precision higher than nanoseconds (:issue:`36738`) Timedelta ^^^^^^^^^ - Bug in :class:`TimedeltaIndex`, :class:`Series`, and :class:`DataFrame` floor-division with ``timedelta64`` dtypes and ``NaT`` in the denominator (:issue:`35529`) - Bug in parsing of ISO 8601 durations in :class:`Timedelta`, :meth:`pd.to_datetime` (:issue:`37159`, fixes :issue:`29773` and :issue:`36204`) - Bug in :func:`to_timedelta` with a read-only array incorrectly raising (:issue:`34857`) +- Bug in :class:`Timedelta` incorrectly truncating to sub-second portion of a string input when it has precision higher than nanoseconds (:issue:`36738`) Timezones ^^^^^^^^^ diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 0cbbbf8209f35..585ad4a7fab51 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -226,5 +226,4 @@ def test_to_timedelta_precision_over_nanos(self, input, expected, func): # GH: 36738 expected = pd.Timedelta(expected) result = func(input) - print(result) assert result == expected