diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 92eb1d54ae676..5e7d4947aecea 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -278,7 +278,7 @@ API changes - ``DataFrame.info()`` now ends its output with a newline character (:issue:`8114`) - add ``copy=True`` argument to ``pd.concat`` to enable pass thrue of complete blocks (:issue:`8252`) -- ``.fillna`` will now raise a ``NotImplementedError`` when passed a ``DataFrame`` (:issue:`8377`) + .. _whatsnew_0150.dt: @@ -785,7 +785,7 @@ Enhancements meta-engine that automatically uses whichever version of openpyxl is installed. (:issue:`7177`) - +- ``DataFrame.fillna`` can now accept a ``DataFrame`` as a fill value (:issue:`8377`) .. _whatsnew_0150.performance: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 30f2d6bf093f2..9f9b543f0fa7d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2231,10 +2231,10 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap - value : scalar, dict, or Series - Value to use to fill holes (e.g. 0), alternately a dict/Series of + value : scalar, dict, Series, or DataFrame + Value to use to fill holes (e.g. 0), alternately a dict/Series/DataFrame of values specifying which value to use for each index (for a Series) or - column (for a DataFrame). (values not in the dict/Series will not be + column (for a DataFrame). (values not in the dict/Series/DataFrame will not be filled). This value cannot be a list. axis : {0, 1}, default 0 * 0: fill column-by-column @@ -2342,7 +2342,7 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, inplace=inplace, downcast=downcast) elif isinstance(value, DataFrame) and self.ndim == 2: - raise NotImplementedError("can't use fillna with a DataFrame, use .where instead") + new_data = self.where(self.notnull(), value) else: raise ValueError("invalid fill value with a %s" % type(value)) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 99b1fa09a8d51..84d8c4ac39461 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7630,6 +7630,29 @@ def test_fillna_dict_series(self): with assertRaisesRegexp(NotImplementedError, 'column by column'): df.fillna(df.max(1), axis=1) + def test_fillna_dataframe(self): + # GH 8377 + df = DataFrame({'a': [nan, 1, 2, nan, nan], + 'b': [1, 2, 3, nan, nan], + 'c': [nan, 1, 2, 3, 4]}, + index = list('VWXYZ')) + + # df2 may have different index and columns + df2 = DataFrame({'a': [nan, 10, 20, 30, 40], + 'b': [50, 60, 70, 80, 90], + 'foo': ['bar']*5}, + index = list('VWXuZ')) + + result = df.fillna(df2) + + # only those columns and indices which are shared get filled + expected = DataFrame({'a': [nan, 1, 2, nan, 40], + 'b': [1, 2, 3, nan, 90], + 'c': [nan, 1, 2, 3, 4]}, + index = list('VWXYZ')) + + assert_frame_equal(result, expected) + def test_fillna_columns(self): df = DataFrame(np.random.randn(10, 10)) df.values[:, ::2] = np.nan @@ -7643,6 +7666,7 @@ def test_fillna_columns(self): expected = df.astype(float).fillna(method='ffill', axis=1) assert_frame_equal(result, expected) + def test_fillna_invalid_method(self): with assertRaisesRegexp(ValueError, 'ffil'): self.frame.fillna(method='ffil') @@ -7652,8 +7676,6 @@ def test_fillna_invalid_value(self): self.assertRaises(TypeError, self.frame.fillna, [1, 2]) # tuple self.assertRaises(TypeError, self.frame.fillna, (1, 2)) - # frame - self.assertRaises(NotImplementedError, self.frame.fillna, self.frame) # frame with series self.assertRaises(ValueError, self.frame.iloc[:,0].fillna, self.frame)