Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.2.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Fixed regressions
~~~~~~~~~~~~~~~~~
- Regression in :func:`concat` between two :class:`DataFrames` where one has an :class:`Index` that is all-None and the other is :class:`DatetimeIndex` incorrectly raising (:issue:`40841`)
- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
-
- Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)

.. ---------------------------------------------------------------------------

Expand Down
14 changes: 12 additions & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
TYPE_CHECKING,
Any,
Callable,
Iterable,
Sequence,
cast,
)
import warnings
Expand Down Expand Up @@ -763,8 +765,8 @@ def _replace_regex(
@final
def _replace_list(
self,
src_list: list[Any],
dest_list: list[Any],
src_list: Iterable[Any],
dest_list: Sequence[Any],
inplace: bool = False,
regex: bool = False,
) -> list[Block]:
Expand All @@ -779,6 +781,14 @@ def _replace_list(
# so un-tile here
return self.replace(src_list, dest_list[0], inplace, regex)

# https://github.com/pandas-dev/pandas/issues/40371
# the following pairs check code caused a regression so we catch that case here
# until the issue is fixed properly in can_hold_element

# error: "Iterable[Any]" has no attribute "tolist"
if hasattr(src_list, "tolist"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could maybe change this to a simple isinstance(src_list, np.array)

src_list = src_list.tolist() # type: ignore[attr-defined]

# Exclude anything that we know we won't contain
pairs = [
(x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1428,6 +1428,25 @@ def test_replace_bytes(self, frame_or_series):
obj = obj.replace({None: np.nan})
tm.assert_equal(obj, expected)

@pytest.mark.parametrize(
"data, to_replace, value, expected",
[
([1], [1.0], [0], [0]),
([1], [1], [0], [0]),
([1.0], [1.0], [0], [0.0]),
([1.0], [1], [0], [0.0]),
],
)
@pytest.mark.parametrize("box", [list, tuple, np.array])

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to test pd.array too? Better safe than sorry. :)

Copy link
Member

@simonjayhawkins simonjayhawkins May 25, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would fail. EA is not a documented allowed type for the to_replace argument of the replace method. to be fair, nor is numpy array or tuple. The safest way to restore the old undocumented behavior is to revert #38097, but not sure that a performance hit for users passing the correct type is the way forward.

Any container, even a list, with numpy scalars will still fail. This is a fix for a specific case for an arguably incorrect usage without a perf hit for correct usage.

def test_replace_list_with_mixed_type(
self, data, to_replace, value, expected, box, frame_or_series
):
# GH#40371
obj = frame_or_series(data)
expected = frame_or_series(expected)
result = obj.replace(box(to_replace), value)
tm.assert_equal(result, expected)


class TestDataFrameReplaceRegex:
@pytest.mark.parametrize(
Expand Down