Skip to content

BUG: Allow merging on Index vectors #19073

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ Reshaping
- Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`)
- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`)
- Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`)

- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`)

Numeric
^^^^^^^
Expand Down
1 change: 1 addition & 0 deletions pandas/core/dtypes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
is_dict_like,
is_iterator,
is_file_like,
is_array_like,
is_list_like,
is_hashable,
is_named_tuple)
Expand Down
33 changes: 33 additions & 0 deletions pandas/core/dtypes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,39 @@ def is_list_like(obj):
not isinstance(obj, string_and_binary_types))


def is_array_like(obj):
"""
Check if the object is array-like.

For an object to be considered array-like, it must be list-like and
have a `dtype` attribute.

Parameters
----------
obj : The object to check.

Returns
-------
is_array_like : bool
Whether `obj` has array-like properties.

Examples
--------
>>> is_array_like(np.array([1, 2, 3]))
True
>>> is_array_like(pd.Series(["a", "b"]))
True
>>> is_array_like(pd.Index(["2016-01-01"]))
True
>>> is_array_like([1, 2, 3])
False
>>> is_array_like(("a", "b"))
False
"""

return is_list_like(obj) and hasattr(obj, "dtype")


def is_nested_list_like(obj):
"""
Check if the object is list-like, and that all of its elements
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
from pandas.compat import range, lzip, zip, map, filter
import pandas.compat as compat

from pandas import (Categorical, Series, DataFrame,
from pandas import (Categorical, DataFrame,
Index, MultiIndex, Timedelta)
from pandas.core.frame import _merge_doc
from pandas.core.dtypes.common import (
is_datetime64tz_dtype,
is_datetime64_dtype,
needs_i8_conversion,
is_int64_dtype,
is_array_like,
is_categorical_dtype,
is_integer_dtype,
is_float_dtype,
Expand Down Expand Up @@ -814,13 +815,12 @@ def _get_merge_keys(self):
join_names = []
right_drop = []
left_drop = []

left, right = self.left, self.right
stacklevel = 5 # Number of stack levels from df.merge

is_lkey = lambda x: isinstance(
x, (np.ndarray, Series)) and len(x) == len(left)
is_rkey = lambda x: isinstance(
x, (np.ndarray, Series)) and len(x) == len(right)
is_lkey = lambda x: is_array_like(x) and len(x) == len(left)
is_rkey = lambda x: is_array_like(x) and len(x) == len(right)

# Note that pd.merge_asof() has separate 'on' and 'by' parameters. A
# user could, for example, request 'left_index' and 'left_by'. In a
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/api/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class TestTypes(Base):
'is_period_dtype', 'is_interval', 'is_interval_dtype',
'is_re', 'is_re_compilable',
'is_dict_like', 'is_iterator', 'is_file_like',
'is_list_like', 'is_hashable',
'is_list_like', 'is_hashable', 'is_array_like',
'is_named_tuple',
'pandas_dtype', 'union_categoricals', 'infer_dtype']
deprecated = ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence']
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,23 @@ def test_is_list_like_fails(ll):
assert not inference.is_list_like(ll)


def test_is_array_like():
assert inference.is_array_like(Series([]))
assert inference.is_array_like(Series([1, 2]))
assert inference.is_array_like(np.array(["a", "b"]))
assert inference.is_array_like(Index(["2016-01-01"]))

class DtypeList(list):
dtype = "special"

assert inference.is_array_like(DtypeList())

assert not inference.is_array_like([1, 2, 3])
assert not inference.is_array_like(tuple())
assert not inference.is_array_like("foo")
assert not inference.is_array_like(123)


@pytest.mark.parametrize('inner', [
[], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]),
Series([]), Series(['a']).str, (x for x in range(5))
Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1370,6 +1370,32 @@ def f():
household.join(log_return, how='outer')
pytest.raises(NotImplementedError, f)

@pytest.mark.parametrize("klass", [None, np.asarray, Series, Index])
def test_merge_datetime_index(self, klass):
# see gh-19038
df = DataFrame([1, 2, 3],
["2016-01-01", "2017-01-01", "2018-01-01"],
columns=["a"])
df.index = pd.to_datetime(df.index)
on_vector = df.index.year

if klass is not None:
on_vector = klass(on_vector)

expected = DataFrame({"a": [1, 2, 3]})

if klass == np.asarray:
# The join key is added for ndarray.
expected["key_1"] = [2016, 2017, 2018]

result = df.merge(df, on=["a", on_vector], how="inner")
tm.assert_frame_equal(result, expected)

expected = DataFrame({"a_x": [1, 2, 3],
"a_y": [1, 2, 3]})
result = df.merge(df, on=[df.index.year], how="inner")
tm.assert_frame_equal(result, expected)


class TestMergeDtypes(object):

Expand Down