Skip to content

BUG: replace with a dict misbehaving (GH 3064), due to incorrect filtering #3072

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 17, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3468,8 +3468,8 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
return self

new_data = self._data
if isinstance(to_replace, dict):
if isinstance(value, dict): # {'A' : NA} -> {'A' : 0}
if isinstance(to_replace, (dict, Series)):
if isinstance(value, (dict, Series)): # {'A' : NA} -> {'A' : 0}
new_data = self._data
for c, src in to_replace.iteritems():
if c in value and c in self:
Expand All @@ -3481,7 +3481,7 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
if k in self:
new_data = new_data.replace(src, value, filter = [ k ], inplace=inplace)
else:
raise ValueError('Fill value must be scalar or dict')
raise ValueError('Fill value must be scalar or dict or Series')

elif isinstance(to_replace, (list, np.ndarray)):
# [NA, ''] -> [0, 'missing']
Expand All @@ -3501,7 +3501,7 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
else:

# dest iterable dict-like
if isinstance(value, dict): # NA -> {'A' : 0, 'B' : -1}
if isinstance(value, (dict, Series)): # NA -> {'A' : 0, 'B' : -1}

new_data = self._data
for k, v in value.iteritems():
Expand All @@ -3528,7 +3528,7 @@ def _interpolate(self, to_replace, method, axis, inplace, limit):

method = com._clean_fill_method(method)

if isinstance(to_replace, dict):
if isinstance(to_replace, (dict, Series)):
if axis == 1:
return self.T.replace(to_replace, method=method,
limit=limit).T
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,15 @@ def _try_cast_result(self, result):
we may have roundtripped thru object in the mean-time """
return result

def replace(self, to_replace, value, inplace=False):
def replace(self, to_replace, value, inplace=False, filter=None):
""" replace the to_replace value with value, possible to create new blocks here
this is just a call to putmask """
mask = com.mask_missing(self.values, to_replace)
if filter is not None:
for i, item in enumerate(self.items):
if item not in filter:
mask[i] = False

if not mask.any():
if inplace:
return [ self ]
Expand Down Expand Up @@ -886,14 +891,15 @@ def apply(self, f, *args, **kwargs):
----------
f : the callable or function name to operate on at the block level
axes : optional (if not supplied, use self.axes)
filter : callable, if supplied, only call the block if the filter is True
filter : list, if supplied, only call the block if the filter is in the block
"""

axes = kwargs.pop('axes',None)
filter = kwargs.pop('filter',None)
filter = kwargs.get('filter')
result_blocks = []
for blk in self.blocks:
if filter is not None:
kwargs['filter'] = set(kwargs['filter'])
if not blk.items.isin(filter).any():
result_blocks.append(blk)
continue
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5587,6 +5587,26 @@ def test_replace(self):
df = DataFrame(index=['a', 'b'])
assert_frame_equal(df, df.replace(5, 7))

def test_resplace_series_dict(self):
# from GH 3064
df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}})
result = df.replace(0, {'zero': 0.5, 'one': 1.0})
expected = DataFrame({'zero': {'a': 0.5, 'b': 1}, 'one': {'a': 2.0, 'b': 1.0}})
assert_frame_equal(result, expected)

result = df.replace(0, df.mean())
assert_frame_equal(result, expected)

# series to series/dict
df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}})
s = Series({'zero': 0.0, 'one': 2.0})
result = df.replace(s, {'zero': 0.5, 'one': 1.0})
expected = DataFrame({'zero': {'a': 0.5, 'b': 1}, 'one': {'a': 1.0, 'b': 0.0}})
assert_frame_equal(result, expected)

result = df.replace(s, df.mean())
assert_frame_equal(result, expected)

def test_replace_mixed(self):
self.mixed_frame['foo'][5:20] = nan
self.mixed_frame['A'][-10:] = nan
Expand Down