diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index d53de30187156..22f9ebd8aab98 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -407,7 +407,7 @@ Reshaping - Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) - Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) - Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) - +- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) Numeric ^^^^^^^ diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index a2180ecc4632f..738e1ea9062f6 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -55,6 +55,7 @@ is_dict_like, is_iterator, is_file_like, + is_array_like, is_list_like, is_hashable, is_named_tuple) diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 8010a213efaf0..6fed25a0012f2 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -267,6 +267,39 @@ def is_list_like(obj): not isinstance(obj, string_and_binary_types)) +def is_array_like(obj): + """ + Check if the object is array-like. + + For an object to be considered array-like, it must be list-like and + have a `dtype` attribute. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_array_like : bool + Whether `obj` has array-like properties. + + Examples + -------- + >>> is_array_like(np.array([1, 2, 3])) + True + >>> is_array_like(pd.Series(["a", "b"])) + True + >>> is_array_like(pd.Index(["2016-01-01"])) + True + >>> is_array_like([1, 2, 3]) + False + >>> is_array_like(("a", "b")) + False + """ + + return is_list_like(obj) and hasattr(obj, "dtype") + + def is_nested_list_like(obj): """ Check if the object is list-like, and that all of its elements diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index ad2a433b5632b..8ee30bf72d313 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -10,7 +10,7 @@ from pandas.compat import range, lzip, zip, map, filter import pandas.compat as compat -from pandas import (Categorical, Series, DataFrame, +from pandas import (Categorical, DataFrame, Index, MultiIndex, Timedelta) from pandas.core.frame import _merge_doc from pandas.core.dtypes.common import ( @@ -18,6 +18,7 @@ is_datetime64_dtype, needs_i8_conversion, is_int64_dtype, + is_array_like, is_categorical_dtype, is_integer_dtype, is_float_dtype, @@ -814,13 +815,12 @@ def _get_merge_keys(self): join_names = [] right_drop = [] left_drop = [] + left, right = self.left, self.right stacklevel = 5 # Number of stack levels from df.merge - is_lkey = lambda x: isinstance( - x, (np.ndarray, Series)) and len(x) == len(left) - is_rkey = lambda x: isinstance( - x, (np.ndarray, Series)) and len(x) == len(right) + is_lkey = lambda x: is_array_like(x) and len(x) == len(left) + is_rkey = lambda x: is_array_like(x) and len(x) == len(right) # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A # user could, for example, request 'left_index' and 'left_by'. In a diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 1cbcf3f9109a4..7e6430accc546 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -30,7 +30,7 @@ class TestTypes(Base): 'is_period_dtype', 'is_interval', 'is_interval_dtype', 'is_re', 'is_re_compilable', 'is_dict_like', 'is_iterator', 'is_file_like', - 'is_list_like', 'is_hashable', + 'is_list_like', 'is_hashable', 'is_array_like', 'is_named_tuple', 'pandas_dtype', 'union_categoricals', 'infer_dtype'] deprecated = ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence'] diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 33c570a814e7d..b4f5d67530fbd 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -78,6 +78,23 @@ def test_is_list_like_fails(ll): assert not inference.is_list_like(ll) +def test_is_array_like(): + assert inference.is_array_like(Series([])) + assert inference.is_array_like(Series([1, 2])) + assert inference.is_array_like(np.array(["a", "b"])) + assert inference.is_array_like(Index(["2016-01-01"])) + + class DtypeList(list): + dtype = "special" + + assert inference.is_array_like(DtypeList()) + + assert not inference.is_array_like([1, 2, 3]) + assert not inference.is_array_like(tuple()) + assert not inference.is_array_like("foo") + assert not inference.is_array_like(123) + + @pytest.mark.parametrize('inner', [ [], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), Series([]), Series(['a']).str, (x for x in range(5)) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 70b84f7a6225b..b9a667499b7a0 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1370,6 +1370,32 @@ def f(): household.join(log_return, how='outer') pytest.raises(NotImplementedError, f) + @pytest.mark.parametrize("klass", [None, np.asarray, Series, Index]) + def test_merge_datetime_index(self, klass): + # see gh-19038 + df = DataFrame([1, 2, 3], + ["2016-01-01", "2017-01-01", "2018-01-01"], + columns=["a"]) + df.index = pd.to_datetime(df.index) + on_vector = df.index.year + + if klass is not None: + on_vector = klass(on_vector) + + expected = DataFrame({"a": [1, 2, 3]}) + + if klass == np.asarray: + # The join key is added for ndarray. + expected["key_1"] = [2016, 2017, 2018] + + result = df.merge(df, on=["a", on_vector], how="inner") + tm.assert_frame_equal(result, expected) + + expected = DataFrame({"a_x": [1, 2, 3], + "a_y": [1, 2, 3]}) + result = df.merge(df, on=[df.index.year], how="inner") + tm.assert_frame_equal(result, expected) + class TestMergeDtypes(object):