Skip to content

Commit a025546

Browse files
committed
Merge pull request #3554 from jreback/GH3100
BUG: Fix .diff() on datelike and timedelta operations (GH3100_)
2 parents 23f6058 + 435a7a3 commit a025546

File tree

3 files changed

+42
-3
lines changed

3 files changed

+42
-3
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ pandas 0.11.1
9696
- Properly convert np.datetime64 objects in a Series (GH3416_)
9797
- Raise a TypeError on invalid datetime/timedelta operations
9898
e.g. add datetimes, multiple timedelta x datetime
99+
- Fix ``.diff`` on datelike and timedelta operations (GH3100_)
99100

100101
.. _GH3164: https://github.com/pydata/pandas/issues/3164
101102
.. _GH2786: https://github.com/pydata/pandas/issues/2786
@@ -108,6 +109,7 @@ pandas 0.11.1
108109
.. _GH3379: https://github.com/pydata/pandas/issues/3379
109110
.. _GH3480: https://github.com/pydata/pandas/issues/3480
110111
.. _GH2852: https://github.com/pydata/pandas/issues/2852
112+
.. _GH3100: https://github.com/pydata/pandas/issues/3100
111113
.. _GH3454: https://github.com/pydata/pandas/issues/3454
112114
.. _GH3457: https://github.com/pydata/pandas/issues/3457
113115
.. _GH3491: https://github.com/pydata/pandas/issues/3491

pandas/core/common.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -617,9 +617,18 @@ def func(arr, indexer, out, fill_value=np.nan):
617617

618618

619619
def diff(arr, n, axis=0):
620+
""" difference of n between self,
621+
analagoust to s-s.shift(n) """
622+
620623
n = int(n)
621624
dtype = arr.dtype
622-
if issubclass(dtype.type, np.integer):
625+
na = np.nan
626+
627+
if is_timedelta64_dtype(arr) or is_datetime64_dtype(arr):
628+
dtype = 'timedelta64[ns]'
629+
arr = arr.view('i8')
630+
na = tslib.iNaT
631+
elif issubclass(dtype.type, np.integer):
623632
dtype = np.float64
624633
elif issubclass(dtype.type, np.bool_):
625634
dtype = np.object_
@@ -628,7 +637,7 @@ def diff(arr, n, axis=0):
628637

629638
na_indexer = [slice(None)] * arr.ndim
630639
na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None)
631-
out_arr[tuple(na_indexer)] = np.nan
640+
out_arr[tuple(na_indexer)] = na
632641

633642
if arr.ndim == 2 and arr.dtype.name in _diff_special:
634643
f = _diff_special[arr.dtype.name]
@@ -642,7 +651,24 @@ def diff(arr, n, axis=0):
642651
lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
643652
lag_indexer = tuple(lag_indexer)
644653

645-
out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer]
654+
# need to make sure that we account for na for datelike/timedelta
655+
# we don't actually want to subtract these i8 numbers
656+
if dtype == 'timedelta64[ns]':
657+
res = arr[res_indexer]
658+
lag = arr[lag_indexer]
659+
660+
mask = (arr[res_indexer] == na) | (arr[lag_indexer] == na)
661+
if mask.any():
662+
res = res.copy()
663+
res[mask] = 0
664+
lag = lag.copy()
665+
lag[mask] = 0
666+
667+
result = res-lag
668+
result[mask] = na
669+
out_arr[res_indexer] = result
670+
else:
671+
out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer]
646672

647673
return out_arr
648674

pandas/tests/test_series.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4039,6 +4039,17 @@ def test_diff(self):
40394039
xp = self.ts - self.ts
40404040
assert_series_equal(rs, xp)
40414041

4042+
# datetime diff (GH3100)
4043+
s = Series(date_range('20130102',periods=5))
4044+
rs = s-s.shift(1)
4045+
xp = s.diff()
4046+
assert_series_equal(rs, xp)
4047+
4048+
# timedelta diff
4049+
nrs = rs-rs.shift(1)
4050+
nxp = xp.diff()
4051+
assert_series_equal(nrs, nxp)
4052+
40424053
def test_pct_change(self):
40434054
rs = self.ts.pct_change(fill_method=None)
40444055
assert_series_equal(rs, self.ts / self.ts.shift(1) - 1)

0 commit comments

Comments
 (0)