Skip to content

Commit 4699c2b

Browse files
authored
BUG: isin numeric vs string (#38279)
1 parent e132d85 commit 4699c2b

File tree

2 files changed

+18
-24
lines changed

2 files changed

+18
-24
lines changed

pandas/core/algorithms.py

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,8 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
445445
elif isinstance(values, ABCMultiIndex):
446446
# Avoid raising in extract_array
447447
values = np.array(values)
448+
else:
449+
values = extract_array(values, extract_numpy=True)
448450

449451
comps = _ensure_arraylike(comps)
450452
comps = extract_array(comps, extract_numpy=True)
@@ -459,11 +461,14 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
459461
elif needs_i8_conversion(values.dtype) and not is_object_dtype(comps.dtype):
460462
# e.g. comps are integers and values are datetime64s
461463
return np.zeros(comps.shape, dtype=bool)
464+
# TODO: not quite right ... Sparse/Categorical
465+
elif needs_i8_conversion(values.dtype):
466+
return isin(comps, values.astype(object))
462467

463-
comps, dtype = _ensure_data(comps)
464-
values, _ = _ensure_data(values, dtype=dtype)
465-
466-
f = htable.ismember_object
468+
elif is_extension_array_dtype(comps.dtype) or is_extension_array_dtype(
469+
values.dtype
470+
):
471+
return isin(np.asarray(comps), np.asarray(values))
467472

468473
# GH16012
469474
# Ensure np.in1d doesn't get object types or it *may* throw an exception
@@ -476,23 +481,15 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
476481
f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c))
477482
else:
478483
f = np.in1d
479-
elif is_integer_dtype(comps.dtype):
480-
try:
481-
values = values.astype("int64", copy=False)
482-
comps = comps.astype("int64", copy=False)
483-
f = htable.ismember_int64
484-
except (TypeError, ValueError, OverflowError):
485-
values = values.astype(object)
486-
comps = comps.astype(object)
487-
488-
elif is_float_dtype(comps.dtype):
489-
try:
490-
values = values.astype("float64", copy=False)
491-
comps = comps.astype("float64", copy=False)
492-
f = htable.ismember_float64
493-
except (TypeError, ValueError):
494-
values = values.astype(object)
495-
comps = comps.astype(object)
484+
485+
else:
486+
common = np.find_common_type([values.dtype, comps.dtype], [])
487+
values = values.astype(common, copy=False)
488+
comps = comps.astype(common, copy=False)
489+
name = common.name
490+
if name == "bool":
491+
name = "uint8"
492+
f = getattr(htable, f"ismember_{name}")
496493

497494
return f(comps, values)
498495

pandas/tests/test_algos.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,7 +1044,6 @@ def test_different_nans_as_float64(self):
10441044
expected = np.array([True, True])
10451045
tm.assert_numpy_array_equal(result, expected)
10461046

1047-
@pytest.mark.xfail(reason="problem related with issue #34125")
10481047
def test_isin_int_df_string_search(self):
10491048
"""Comparing df with int`s (1,2) with a string at isin() ("1")
10501049
-> should not match values because int 1 is not equal str 1"""
@@ -1053,7 +1052,6 @@ def test_isin_int_df_string_search(self):
10531052
expected_false = DataFrame({"values": [False, False]})
10541053
tm.assert_frame_equal(result, expected_false)
10551054

1056-
@pytest.mark.xfail(reason="problem related with issue #34125")
10571055
def test_isin_nan_df_string_search(self):
10581056
"""Comparing df with nan value (np.nan,2) with a string at isin() ("NaN")
10591057
-> should not match values because np.nan is not equal str NaN"""
@@ -1062,7 +1060,6 @@ def test_isin_nan_df_string_search(self):
10621060
expected_false = DataFrame({"values": [False, False]})
10631061
tm.assert_frame_equal(result, expected_false)
10641062

1065-
@pytest.mark.xfail(reason="problem related with issue #34125")
10661063
def test_isin_float_df_string_search(self):
10671064
"""Comparing df with floats (1.4245,2.32441) with a string at isin() ("1.4245")
10681065
-> should not match values because float 1.4245 is not equal str 1.4245"""

0 commit comments

Comments
 (0)