Skip to content

EHN: Allow DataFrame.fillna to accept a DataFrame as its fill value. #8388

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 26, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ API changes
- ``DataFrame.info()`` now ends its output with a newline character (:issue:`8114`)
- add ``copy=True`` argument to ``pd.concat`` to enable pass thrue of complete blocks (:issue:`8252`)

- ``.fillna`` will now raise a ``NotImplementedError`` when passed a ``DataFrame`` (:issue:`8377`)


.. _whatsnew_0150.dt:

Expand Down Expand Up @@ -785,7 +785,7 @@ Enhancements
meta-engine that automatically uses whichever version of openpyxl is
installed. (:issue:`7177`)


- ``DataFrame.fillna`` can now accept a ``DataFrame`` as a fill value (:issue:`8377`)

.. _whatsnew_0150.performance:

Expand Down
8 changes: 4 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2231,10 +2231,10 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
Method to use for filling holes in reindexed Series
pad / ffill: propagate last valid observation forward to next valid
backfill / bfill: use NEXT valid observation to fill gap
value : scalar, dict, or Series
Value to use to fill holes (e.g. 0), alternately a dict/Series of
value : scalar, dict, Series, or DataFrame
Value to use to fill holes (e.g. 0), alternately a dict/Series/DataFrame of
values specifying which value to use for each index (for a Series) or
column (for a DataFrame). (values not in the dict/Series will not be
column (for a DataFrame). (values not in the dict/Series/DataFrame will not be
filled). This value cannot be a list.
axis : {0, 1}, default 0
* 0: fill column-by-column
Expand Down Expand Up @@ -2342,7 +2342,7 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
inplace=inplace,
downcast=downcast)
elif isinstance(value, DataFrame) and self.ndim == 2:
raise NotImplementedError("can't use fillna with a DataFrame, use .where instead")
new_data = self.where(self.notnull(), value)
else:
raise ValueError("invalid fill value with a %s" % type(value))

Expand Down
26 changes: 24 additions & 2 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7630,6 +7630,29 @@ def test_fillna_dict_series(self):
with assertRaisesRegexp(NotImplementedError, 'column by column'):
df.fillna(df.max(1), axis=1)

def test_fillna_dataframe(self):
# GH 8377
df = DataFrame({'a': [nan, 1, 2, nan, nan],
'b': [1, 2, 3, nan, nan],
'c': [nan, 1, 2, 3, 4]},
index = list('VWXYZ'))

# df2 may have different index and columns
df2 = DataFrame({'a': [nan, 10, 20, 30, 40],
'b': [50, 60, 70, 80, 90],
'foo': ['bar']*5},
index = list('VWXuZ'))

result = df.fillna(df2)

# only those columns and indices which are shared get filled
expected = DataFrame({'a': [nan, 1, 2, nan, 40],
'b': [1, 2, 3, nan, 90],
'c': [nan, 1, 2, 3, 4]},
index = list('VWXYZ'))

assert_frame_equal(result, expected)

def test_fillna_columns(self):
df = DataFrame(np.random.randn(10, 10))
df.values[:, ::2] = np.nan
Expand All @@ -7643,6 +7666,7 @@ def test_fillna_columns(self):
expected = df.astype(float).fillna(method='ffill', axis=1)
assert_frame_equal(result, expected)


def test_fillna_invalid_method(self):
with assertRaisesRegexp(ValueError, 'ffil'):
self.frame.fillna(method='ffil')
Expand All @@ -7652,8 +7676,6 @@ def test_fillna_invalid_value(self):
self.assertRaises(TypeError, self.frame.fillna, [1, 2])
# tuple
self.assertRaises(TypeError, self.frame.fillna, (1, 2))
# frame
self.assertRaises(NotImplementedError, self.frame.fillna, self.frame)
# frame with series
self.assertRaises(ValueError, self.frame.iloc[:,0].fillna, self.frame)

Expand Down