diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6642f5855f4fe..a05391aacd7d8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -846,6 +846,7 @@ Reshaping - Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`) - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`) - Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`) +- Bug in :meth:`DataFrame.join` where passing a non-pandas object like a ``polars.DataFrame`` with the ``on=`` parameter raised a misleading error message instead of a ``TypeError``. (:issue:`61434`) Sparse ^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6158e19737185..f650bd9527979 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10885,6 +10885,27 @@ def join( raise ValueError("Other Series must have a name") other = DataFrame({other.name: other}) + if on is not None: + if isinstance(other, Iterable) and not isinstance( + other, (DataFrame, Series, str, bytes, bytearray) + ): + invalid = next( + (obj for obj in other if not isinstance(obj, (DataFrame, Series))), + None, + ) + if invalid is not None: + raise TypeError( + f"Join with 'on={on}' requires a pandas DataFrame or Series, " + "or an iterable of such objects as 'other'. Got an " + f"invalid element of type {type(invalid).__name__} instead." + ) + elif not isinstance(other, (DataFrame, Series)): + raise TypeError( + f"Join with 'on={on}' requires a pandas DataFrame or Series as " + "'other'. Got " + f"{type(other).__name__} instead." + ) + if isinstance(other, DataFrame): if how == "cross": return merge( diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index aaa9485cab580..4aa0c6e686a76 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -418,6 +418,35 @@ def test_suppress_future_warning_with_sort_kw(sort): tm.assert_frame_equal(result, expected) +def test_join_with_invalid_non_pandas_objects_raises_typeerror(): + # GH#61434 + # case - 'other' is an invalid non-pandas object + df1 = DataFrame( + { + "Column2": [10, 20, 30], + "Column3": ["A", "B", "C"], + "Column4": ["Lala", "YesYes", "NoNo"], + } + ) + + class FakeOther: + def __init__(self): + self.Column2 = [10, 20, 30] + self.Column3 = ["A", "B", "C"] + + invalid_other = FakeOther() + + with pytest.raises(TypeError, match="requires a pandas DataFrame or Series"): + df1.join(invalid_other, on=["Column2", "Column3"], how="inner") + + # 'other' is an iterable with mixed types + df2 = DataFrame({"Column2": [10, 20, 30], "Column3": ["A", "B", "C"]}) + mixed_iterable = [df2, 42] + + with pytest.raises(TypeError, match="requires a pandas DataFrame or Series"): + df1.join(mixed_iterable, on=["Column2", "Column3"], how="inner") + + class TestDataFrameJoin: def test_join(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data