Skip to content

Commit 3fdf0d3

Browse files
authored
BUG: merge not raising for String and numeric merges (#56441)
* BUG: merge not raising for String and numeric merges * Add coverage
1 parent 8b5a7d8 commit 3fdf0d3

File tree

3 files changed

+20
-10
lines changed

3 files changed

+20
-10
lines changed

doc/source/whatsnew/v2.2.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,7 @@ Reshaping
673673
- Bug in :func:`merge_asof` raising ``TypeError`` when ``by`` dtype is not ``object``, ``int64``, or ``uint64`` (:issue:`22794`)
674674
- Bug in :func:`merge_asof` raising incorrect error for string dtype (:issue:`56444`)
675675
- Bug in :func:`merge_asof` when using a :class:`Timedelta` tolerance on a :class:`ArrowDtype` column (:issue:`56486`)
676+
- Bug in :func:`merge` not raising when merging string columns with numeric columns (:issue:`56441`)
676677
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
677678
- Bug in :meth:`DataFrame.melt` where an exception was raised if ``var_name`` was not a string (:issue:`55948`)
678679
- Bug in :meth:`DataFrame.melt` where it would not preserve the datetime (:issue:`55254`)

pandas/core/reshape/merge.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,8 +1379,12 @@ def _maybe_coerce_merge_keys(self) -> None:
13791379

13801380
lk_is_cat = isinstance(lk.dtype, CategoricalDtype)
13811381
rk_is_cat = isinstance(rk.dtype, CategoricalDtype)
1382-
lk_is_object = is_object_dtype(lk.dtype)
1383-
rk_is_object = is_object_dtype(rk.dtype)
1382+
lk_is_object_or_string = is_object_dtype(lk.dtype) or is_string_dtype(
1383+
lk.dtype
1384+
)
1385+
rk_is_object_or_string = is_object_dtype(rk.dtype) or is_string_dtype(
1386+
rk.dtype
1387+
)
13841388

13851389
# if either left or right is a categorical
13861390
# then the must match exactly in categories & ordered
@@ -1477,14 +1481,14 @@ def _maybe_coerce_merge_keys(self) -> None:
14771481
# incompatible dtypes GH 9780, GH 15800
14781482

14791483
# bool values are coerced to object
1480-
elif (lk_is_object and is_bool_dtype(rk.dtype)) or (
1481-
is_bool_dtype(lk.dtype) and rk_is_object
1484+
elif (lk_is_object_or_string and is_bool_dtype(rk.dtype)) or (
1485+
is_bool_dtype(lk.dtype) and rk_is_object_or_string
14821486
):
14831487
pass
14841488

14851489
# object values are allowed to be merged
1486-
elif (lk_is_object and is_numeric_dtype(rk.dtype)) or (
1487-
is_numeric_dtype(lk.dtype) and rk_is_object
1490+
elif (lk_is_object_or_string and is_numeric_dtype(rk.dtype)) or (
1491+
is_numeric_dtype(lk.dtype) and rk_is_object_or_string
14881492
):
14891493
inferred_left = lib.infer_dtype(lk, skipna=False)
14901494
inferred_right = lib.infer_dtype(rk, skipna=False)
@@ -1523,7 +1527,7 @@ def _maybe_coerce_merge_keys(self) -> None:
15231527
# allows datetime with different resolutions
15241528
continue
15251529

1526-
elif lk_is_object and rk_is_object:
1530+
elif is_object_dtype(lk.dtype) and is_object_dtype(rk.dtype):
15271531
continue
15281532

15291533
# Houston, we have a problem!

pandas/tests/reshape/merge/test_join.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
import pandas.util._test_decorators as td
7+
68
import pandas as pd
79
from pandas import (
810
Categorical,
@@ -118,7 +120,10 @@ def test_handle_overlap_arbitrary_key(self, df, df2):
118120
assert "key1.foo" in joined
119121
assert "key2.bar" in joined
120122

121-
def test_join_on(self, target_source):
123+
@pytest.mark.parametrize(
124+
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
125+
)
126+
def test_join_on(self, target_source, infer_string):
122127
target, source = target_source
123128

124129
merged = target.join(source, on="C")
@@ -150,8 +155,8 @@ def test_join_on(self, target_source):
150155
# overlap
151156
source_copy = source.copy()
152157
msg = (
153-
"You are trying to merge on float64 and object columns for key 'A'. "
154-
"If you wish to proceed you should use pd.concat"
158+
"You are trying to merge on float64 and object|string columns for key "
159+
"'A'. If you wish to proceed you should use pd.concat"
155160
)
156161
with pytest.raises(ValueError, match=msg):
157162
target.join(source_copy, on="A")

0 commit comments

Comments
 (0)