diff --git a/RELEASE.rst b/RELEASE.rst index ece8de259021f..1970b00c05add 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -96,6 +96,7 @@ pandas 0.11.1 - Properly convert np.datetime64 objects in a Series (GH3416_) - Raise a TypeError on invalid datetime/timedelta operations e.g. add datetimes, multiple timedelta x datetime + - Fix ``.diff`` on datelike and timedelta operations (GH3100_) .. _GH3164: https://github.com/pydata/pandas/issues/3164 .. _GH2786: https://github.com/pydata/pandas/issues/2786 @@ -108,6 +109,7 @@ pandas 0.11.1 .. _GH3379: https://github.com/pydata/pandas/issues/3379 .. _GH3480: https://github.com/pydata/pandas/issues/3480 .. _GH2852: https://github.com/pydata/pandas/issues/2852 +.. _GH3100: https://github.com/pydata/pandas/issues/3100 .. _GH3454: https://github.com/pydata/pandas/issues/3454 .. _GH3457: https://github.com/pydata/pandas/issues/3457 .. _GH3491: https://github.com/pydata/pandas/issues/3491 diff --git a/pandas/core/common.py b/pandas/core/common.py index 893d912dcece8..4aefa73ae8ee8 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -617,9 +617,18 @@ def func(arr, indexer, out, fill_value=np.nan): def diff(arr, n, axis=0): + """ difference of n between self, + analagoust to s-s.shift(n) """ + n = int(n) dtype = arr.dtype - if issubclass(dtype.type, np.integer): + na = np.nan + + if is_timedelta64_dtype(arr) or is_datetime64_dtype(arr): + dtype = 'timedelta64[ns]' + arr = arr.view('i8') + na = tslib.iNaT + elif issubclass(dtype.type, np.integer): dtype = np.float64 elif issubclass(dtype.type, np.bool_): dtype = np.object_ @@ -628,7 +637,7 @@ def diff(arr, n, axis=0): na_indexer = [slice(None)] * arr.ndim na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None) - out_arr[tuple(na_indexer)] = np.nan + out_arr[tuple(na_indexer)] = na if arr.ndim == 2 and arr.dtype.name in _diff_special: f = _diff_special[arr.dtype.name] @@ -642,7 +651,24 @@ def diff(arr, n, axis=0): lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) lag_indexer = tuple(lag_indexer) - out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer] + # need to make sure that we account for na for datelike/timedelta + # we don't actually want to subtract these i8 numbers + if dtype == 'timedelta64[ns]': + res = arr[res_indexer] + lag = arr[lag_indexer] + + mask = (arr[res_indexer] == na) | (arr[lag_indexer] == na) + if mask.any(): + res = res.copy() + res[mask] = 0 + lag = lag.copy() + lag[mask] = 0 + + result = res-lag + result[mask] = na + out_arr[res_indexer] = result + else: + out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer] return out_arr diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 981d74d8ba94b..6fbce9df753d8 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -4039,6 +4039,17 @@ def test_diff(self): xp = self.ts - self.ts assert_series_equal(rs, xp) + # datetime diff (GH3100) + s = Series(date_range('20130102',periods=5)) + rs = s-s.shift(1) + xp = s.diff() + assert_series_equal(rs, xp) + + # timedelta diff + nrs = rs-rs.shift(1) + nxp = xp.diff() + assert_series_equal(nrs, nxp) + def test_pct_change(self): rs = self.ts.pct_change(fill_method=None) assert_series_equal(rs, self.ts / self.ts.shift(1) - 1)