diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 3e81a923a114c..70a33a6915516 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -811,6 +811,7 @@ Bug Fixes - Bug in ``read_csv`` when using the ``nrows`` or ``chunksize`` parameters if file contains only a header line (:issue:`9535`) - Bug in serialization of ``category`` types in HDF5 in presence of alternate encodings. (:issue:`10366`) - Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`) +- Bug in ``pd.DataFrame.diff`` when DataFrame is not consolidated (:issue:`10907`) - Bug in ``pd.unique`` for arrays with the ``datetime64`` or ``timedelta64`` dtype that meant an array with object dtype was returned instead the original dtype (:issue:`9431`) - Bug in ``DatetimeIndex.take`` and ``TimedeltaIndex.take`` may not raise ``IndexError`` against invalid index (:issue:`10295`) - Bug in ``Series([np.nan]).astype('M8[ms]')``, which now returns ``Series([pd.NaT])`` (:issue:`10747`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1d6269ae904d2..15069bf23672b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2414,7 +2414,7 @@ def _verify_integrity(self): 'tot_items: {1}'.format(len(self.items), tot_items)) - def apply(self, f, axes=None, filter=None, do_integrity_check=False, **kwargs): + def apply(self, f, axes=None, filter=None, do_integrity_check=False, consolidate=True, **kwargs): """ iterate over the blocks, collect and create a new block manager @@ -2425,6 +2425,7 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, **kwargs): filter : list, if supplied, only call the block if the filter is in the block do_integrity_check : boolean, default False. Do the block manager integrity check + consolidate: boolean, default True. Join together blocks having same dtype Returns ------- @@ -2443,6 +2444,9 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, **kwargs): else: kwargs['filter'] = filter_locs + if consolidate: + self._consolidate_inplace() + if f == 'where': align_copy = True if kwargs.get('align', True): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 58c6d15f8ada5..57a43592b3866 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10771,6 +10771,14 @@ def test_diff(self): assert_series_equal(the_diff['A'], tf['A'] - tf['A'].shift(1)) + # issue 10907 + df = pd.DataFrame({'y': pd.Series([2]), 'z': pd.Series([3])}) + df.insert(0, 'x', 1) + result = df.diff(axis=1) + expected = pd.DataFrame({'x':np.nan, 'y':pd.Series(1), 'z':pd.Series(1)}).astype('float64') + self.assert_frame_equal(result, expected) + + def test_diff_timedelta(self): # GH 4533 df = DataFrame(dict(time=[Timestamp('20130101 9:01'),