From f1a93d1e03a3f6b47cecbc3ff0f3fbdca4c15545 Mon Sep 17 00:00:00 2001 From: Hassan Kibirige Date: Tue, 29 Mar 2016 01:36:30 -0500 Subject: [PATCH 1/2] BUG: pandas.Timedelta min and max limits *Problem* Pandas Timedelta derives from `datetime.timedelta` and increases the resolution of the timedeltas to nanoseconds. As such Pandas.Timedelta has a smaller range of values. *Solution* This change modifies the advertised `min` and `max` timedeltas. --- doc/source/whatsnew/v0.18.1.txt | 1 + pandas/tseries/tests/test_timedeltas.py | 29 +++++++++++++++++++++++++ pandas/tslib.pyx | 5 +++++ 3 files changed, 35 insertions(+) diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index e6ea9217347ea..afda5696923f0 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -129,6 +129,7 @@ Bug Fixes - Bug in ``Timestamp.__repr__`` that caused ``pprint`` to fail in nested structures (:issue:`12622`) +- Bug in ``Timedelta.min`` and ``Timedelta.max``, the properties now report the true minimum/maximum timedeltas as recognized by Pandas. (:issue:`12727`) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 4bdd0ed462852..434c44e402461 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -1083,6 +1083,35 @@ def test_timedelta_hash_equality(self): ns_td = Timedelta(1, 'ns') self.assertNotEqual(hash(ns_td), hash(ns_td.to_pytimedelta())) + def test_implementation_limits(self): + min_td = Timedelta(Timedelta.min) + max_td = Timedelta(Timedelta.max) + + # GH 12727 + # timedelta limits correspond to int64 boundaries + self.assertTrue(min_td.value == np.iinfo(np.int64).min + 1) + self.assertTrue(max_td.value == np.iinfo(np.int64).max) + + # Beyond lower limit, a NAT before the Overflow + self.assertIsInstance(min_td - Timedelta(1, 'ns'), + pd.tslib.NaTType) + + with tm.assertRaises(OverflowError): + min_td - Timedelta(2, 'ns') + + with tm.assertRaises(OverflowError): + max_td + Timedelta(1, 'ns') + + # Same tests using the internal nanosecond values + td = Timedelta(min_td.value - 1, 'ns') + self.assertIsInstance(td, pd.tslib.NaTType) + + with tm.assertRaises(OverflowError): + Timedelta(min_td.value - 2, 'ns') + + with tm.assertRaises(OverflowError): + Timedelta(max_td.value + 1, 'ns') + class TestTimedeltaIndex(tm.TestCase): _multiprocess_can_split_ = True diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index dc089785238d9..98e6f1d1c53f4 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -2722,6 +2722,11 @@ class Timedelta(_Timedelta): __pos__ = _op_unary_method(lambda x: x, '__pos__') __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') + +# Resolution is in nanoseconds +Timedelta.min = Timedelta(np.iinfo(np.int64).min+1, 'ns') +Timedelta.max = Timedelta(np.iinfo(np.int64).max, 'ns') + cdef PyTypeObject* td_type = Timedelta cdef inline bint is_timedelta(object o): From 2b78e5a9001fc52a9d364261e0e64076e431b396 Mon Sep 17 00:00:00 2001 From: Hassan Kibirige Date: Tue, 29 Mar 2016 09:35:44 -0500 Subject: [PATCH 2/2] DOC: Timedelta & timestamp limitations --- doc/source/gotchas.rst | 21 --------------------- doc/source/timedeltas.rst | 20 ++++++++++++++++++++ doc/source/timeseries.rst | 23 ++++++++++++++++++++++- doc/source/whatsnew/v0.18.1.txt | 2 +- 4 files changed, 43 insertions(+), 23 deletions(-) diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index fe7ab67b7f759..490b593b4c9c2 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -356,27 +356,6 @@ such as ``numpy.logical_and``. See the `this old issue `__ for a more detailed discussion. -.. _gotchas.timestamp-limits: - -Timestamp limitations ---------------------- - -Minimum and maximum timestamps -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Since pandas represents timestamps in nanosecond resolution, the timespan that -can be represented using a 64-bit integer is limited to approximately 584 years: - -.. ipython:: python - - begin = pd.Timestamp.min - begin - - end = pd.Timestamp.max - end - -See :ref:`here ` for ways to represent data outside these bound. - Parsing Dates from Text Files ----------------------------- diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst index 29a75f3423cfa..fa5ffd0831706 100644 --- a/doc/source/timedeltas.rst +++ b/doc/source/timedeltas.rst @@ -109,6 +109,26 @@ The ``unit`` keyword argument specifies the unit of the Timedelta: to_timedelta(np.arange(5), unit='s') to_timedelta(np.arange(5), unit='d') +.. _timedeltas.limitations: + +Timedelta limitations +~~~~~~~~~~~~~~~~~~~~~ + +Pandas represents ``Timedeltas`` in nanosecond resolution using +64 bit integers. As such, the 64 bit integer limits determine +the ``Timedelta`` limits. + +.. ipython:: python + min_int = np.iinfo(np.int64).min + max_int = np.iinfo(np.int64).max + + # Note: the smallest integer gives a NaT + Timedelta(min_int) + Timedelta(min_int+1) == Timedelta.min + Timedelta(max_int) == Timedelta.max + + # (min_int - 1) and (max_int + 1) result in OverflowErrors + .. _timedeltas.operations: Operations diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index c912d7d8b9818..1f9c66ea62717 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -307,6 +307,27 @@ using various combinations of parameters like ``start``, ``end``, The start and end dates are strictly inclusive. So it will not generate any dates outside of those dates if specified. +.. _timeseries.timestamp-limits: + +Timestamp limitations +--------------------- + +Minimum and maximum timestamps +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Since pandas represents timestamps in nanosecond resolution, the timespan that +can be represented using a 64-bit integer is limited to approximately 584 years: + +.. ipython:: python + + begin = pd.Timestamp.min + begin + + end = pd.Timestamp.max + end + +See :ref:`here ` for ways to represent data outside these bound. + .. _timeseries.datetimeindex: DatetimeIndex @@ -1691,7 +1712,7 @@ the quarter end: Representing out-of-bounds spans -------------------------------- -If you have data that is outside of the ``Timestamp`` bounds, see :ref:`Timestamp limitations `, +If you have data that is outside of the ``Timestamp`` bounds, see :ref:`Timestamp limitations `, then you can use a ``PeriodIndex`` and/or ``Series`` of ``Periods`` to do computations. .. ipython:: python diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index afda5696923f0..8cff34ad7b748 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -129,7 +129,7 @@ Bug Fixes - Bug in ``Timestamp.__repr__`` that caused ``pprint`` to fail in nested structures (:issue:`12622`) -- Bug in ``Timedelta.min`` and ``Timedelta.max``, the properties now report the true minimum/maximum timedeltas as recognized by Pandas. (:issue:`12727`) +- Bug in ``Timedelta.min`` and ``Timedelta.max``, the properties now report the true minimum/maximum ``timedeltas`` as recognized by Pandas. See :ref:`documentation `. (:issue:`12727`)