diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5603730974c7e..40a6b27ee32a8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3468,8 +3468,8 @@ def replace(self, to_replace, value=None, method='pad', axis=0, return self new_data = self._data - if isinstance(to_replace, dict): - if isinstance(value, dict): # {'A' : NA} -> {'A' : 0} + if isinstance(to_replace, (dict, Series)): + if isinstance(value, (dict, Series)): # {'A' : NA} -> {'A' : 0} new_data = self._data for c, src in to_replace.iteritems(): if c in value and c in self: @@ -3481,7 +3481,7 @@ def replace(self, to_replace, value=None, method='pad', axis=0, if k in self: new_data = new_data.replace(src, value, filter = [ k ], inplace=inplace) else: - raise ValueError('Fill value must be scalar or dict') + raise ValueError('Fill value must be scalar or dict or Series') elif isinstance(to_replace, (list, np.ndarray)): # [NA, ''] -> [0, 'missing'] @@ -3501,7 +3501,7 @@ def replace(self, to_replace, value=None, method='pad', axis=0, else: # dest iterable dict-like - if isinstance(value, dict): # NA -> {'A' : 0, 'B' : -1} + if isinstance(value, (dict, Series)): # NA -> {'A' : 0, 'B' : -1} new_data = self._data for k, v in value.iteritems(): @@ -3528,7 +3528,7 @@ def _interpolate(self, to_replace, method, axis, inplace, limit): method = com._clean_fill_method(method) - if isinstance(to_replace, dict): + if isinstance(to_replace, (dict, Series)): if axis == 1: return self.T.replace(to_replace, method=method, limit=limit).T diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 4163c6ad8f60f..59f750d4570ad 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -259,10 +259,15 @@ def _try_cast_result(self, result): we may have roundtripped thru object in the mean-time """ return result - def replace(self, to_replace, value, inplace=False): + def replace(self, to_replace, value, inplace=False, filter=None): """ replace the to_replace value with value, possible to create new blocks here this is just a call to putmask """ mask = com.mask_missing(self.values, to_replace) + if filter is not None: + for i, item in enumerate(self.items): + if item not in filter: + mask[i] = False + if not mask.any(): if inplace: return [ self ] @@ -886,14 +891,15 @@ def apply(self, f, *args, **kwargs): ---------- f : the callable or function name to operate on at the block level axes : optional (if not supplied, use self.axes) - filter : callable, if supplied, only call the block if the filter is True + filter : list, if supplied, only call the block if the filter is in the block """ axes = kwargs.pop('axes',None) - filter = kwargs.pop('filter',None) + filter = kwargs.get('filter') result_blocks = [] for blk in self.blocks: if filter is not None: + kwargs['filter'] = set(kwargs['filter']) if not blk.items.isin(filter).any(): result_blocks.append(blk) continue diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index f143d0fcacc2f..07d84613fd86d 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -5587,6 +5587,26 @@ def test_replace(self): df = DataFrame(index=['a', 'b']) assert_frame_equal(df, df.replace(5, 7)) + def test_resplace_series_dict(self): + # from GH 3064 + df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}}) + result = df.replace(0, {'zero': 0.5, 'one': 1.0}) + expected = DataFrame({'zero': {'a': 0.5, 'b': 1}, 'one': {'a': 2.0, 'b': 1.0}}) + assert_frame_equal(result, expected) + + result = df.replace(0, df.mean()) + assert_frame_equal(result, expected) + + # series to series/dict + df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}}) + s = Series({'zero': 0.0, 'one': 2.0}) + result = df.replace(s, {'zero': 0.5, 'one': 1.0}) + expected = DataFrame({'zero': {'a': 0.5, 'b': 1}, 'one': {'a': 1.0, 'b': 0.0}}) + assert_frame_equal(result, expected) + + result = df.replace(s, df.mean()) + assert_frame_equal(result, expected) + def test_replace_mixed(self): self.mixed_frame['foo'][5:20] = nan self.mixed_frame['A'][-10:] = nan