diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 6eac7b4831f0f..9807639143ddb 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -15,6 +15,14 @@ def setup(self): self.int = pd.Int64Index(np.arange(N).repeat(5)) self.float = pd.Float64Index(np.random.randn(N).repeat(5)) + # Convenience naming. + self.checked_add = pd.core.nanops._checked_add_with_arr + + self.arr = np.arange(1000000) + self.arrpos = np.arange(1000000) + self.arrneg = np.arange(-1000000, 0) + self.arrmixed = np.array([1, -1]).repeat(500000) + def time_int_factorize(self): self.int.factorize() @@ -29,3 +37,21 @@ def time_int_duplicated(self): def time_float_duplicated(self): self.float.duplicated() + + def time_add_overflow_pos_scalar(self): + self.checked_add(self.arr, 1) + + def time_add_overflow_neg_scalar(self): + self.checked_add(self.arr, -1) + + def time_add_overflow_zero_scalar(self): + self.checked_add(self.arr, 0) + + def time_add_overflow_pos_arr(self): + self.checked_add(self.arr, self.arrpos) + + def time_add_overflow_neg_arr(self): + self.checked_add(self.arr, self.arrneg) + + def time_add_overflow_mixed_arr(self): + self.checked_add(self.arr, self.arrmixed) diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 9719fd87dfb2e..8470525dd01fa 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -1,5 +1,5 @@ from .pandas_vb_common import * -from pandas import to_timedelta +from pandas import to_timedelta, Timestamp class timedelta_convert_int(object): @@ -47,3 +47,14 @@ def time_timedelta_convert_coerce(self): def time_timedelta_convert_ignore(self): to_timedelta(self.arr, errors='ignore') + + +class timedelta_add_overflow(object): + goal_time = 0.2 + + def setup(self): + self.td = to_timedelta(np.arange(1000000)) + self.ts = Timestamp('2000') + + def test_add_td_ts(self): + self.td + self.ts diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 5180b9a092f6c..3605387d767ec 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -35,6 +35,7 @@ Bug Fixes - Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`) +- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 564586eec5a8e..d7d68ad536be5 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -11,6 +11,7 @@ import pandas.hashtable as _hash from pandas import compat, lib, algos, tslib +from pandas.compat.numpy import _np_version_under1p10 from pandas.types.common import (_ensure_int64, _ensure_object, _ensure_float64, _get_dtype, is_float, is_scalar, @@ -829,9 +830,37 @@ def _checked_add_with_arr(arr, b): Raises ------ - OverflowError if any x + y exceeds the maximum int64 value. + OverflowError if any x + y exceeds the maximum or minimum int64 value. """ - if (np.iinfo(np.int64).max - b < arr).any(): - raise OverflowError("Python int too large to " - "convert to C long") + # For performance reasons, we broadcast 'b' to the new array 'b2' + # so that it has the same size as 'arr'. + if _np_version_under1p10: + if lib.isscalar(b): + b2 = np.empty(arr.shape) + b2.fill(b) + else: + b2 = b + else: + b2 = np.broadcast_to(b, arr.shape) + + # gh-14324: For each element in 'arr' and its corresponding element + # in 'b2', we check the sign of the element in 'b2'. If it is positive, + # we then check whether its sum with the element in 'arr' exceeds + # np.iinfo(np.int64).max. If so, we have an overflow error. If it + # it is negative, we then check whether its sum with the element in + # 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow + # error as well. + mask1 = b2 > 0 + mask2 = b2 < 0 + + if not mask1.any(): + to_raise = (np.iinfo(np.int64).min - b2 > arr).any() + elif not mask2.any(): + to_raise = (np.iinfo(np.int64).max - b2 < arr).any() + else: + to_raise = ((np.iinfo(np.int64).max - b2[mask1] < arr[mask1]).any() or + (np.iinfo(np.int64).min - b2[mask2] > arr[mask2]).any()) + + if to_raise: + raise OverflowError("Overflow in int64 addition") return arr + b diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index f00fdd196abea..be634228b1b6e 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1004,13 +1004,20 @@ def prng(self): def test_int64_add_overflow(): # see gh-14068 - msg = "too (big|large) to convert" + msg = "Overflow in int64 addition" m = np.iinfo(np.int64).max + n = np.iinfo(np.int64).min with tm.assertRaisesRegexp(OverflowError, msg): nanops._checked_add_with_arr(np.array([m, m]), m) with tm.assertRaisesRegexp(OverflowError, msg): nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m])) + with tm.assertRaisesRegexp(OverflowError, msg): + nanops._checked_add_with_arr(np.array([n, n]), n) + with tm.assertRaisesRegexp(OverflowError, msg): + nanops._checked_add_with_arr(np.array([n, n]), np.array([n, n])) + with tm.assertRaisesRegexp(OverflowError, msg): + nanops._checked_add_with_arr(np.array([m, n]), np.array([n, n])) with tm.assertRaisesRegexp(OverflowError, msg): with tm.assert_produces_warning(RuntimeWarning): nanops._checked_add_with_arr(np.array([m, m]), diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 38e210d698035..f0d14014d6559 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -1957,6 +1957,8 @@ def test_add_overflow(self): to_timedelta(106580, 'D') + Timestamp('2000') with tm.assertRaisesRegexp(OverflowError, msg): Timestamp('2000') + to_timedelta(106580, 'D') + + msg = "Overflow in int64 addition" with tm.assertRaisesRegexp(OverflowError, msg): to_timedelta([106580], 'D') + Timestamp('2000') with tm.assertRaisesRegexp(OverflowError, msg):