Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions pandas/tools/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def get_result(self):
join_index, left_indexer, right_indexer = self._get_join_info()

# this is a bit kludgy
ldata, rdata = self._get_merge_data(self.join_names)
ldata, rdata = self._get_merge_data()

# TODO: more efficiently handle group keys to avoid extra consolidation!
join_op = _BlockJoinOperation([ldata, rdata], join_index,
Expand Down Expand Up @@ -154,13 +154,19 @@ def _get_join_info(self):

return join_index, left_indexer, right_indexer

def _get_merge_data(self, join_names):
def _get_merge_data(self):
"""
Handles overlapping column names etc.
"""
ldata, rdata = self.left._data, self.right._data
lsuf, rsuf = self.suffixes
exclude_names = [x for x in join_names if x is not None]
exclude_names = set(x for x in self.join_names if x is not None)
if self.left_on is not None:
exclude_names -= set(c.name if hasattr(c, 'name') else c
for c in self.left_on)
if self.right_on is not None:
exclude_names -= set(c.name if hasattr(c, 'name') else c
for c in self.right_on)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ogrisel I'm struggling to find a case where exclude_names would be non-empty after this block of code. Any ideas (since you worked on this most recently)? Gonna keep hacking at it a bit here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it can, actually. Just going to remove this code as the test suite is pretty exhaustive and never passes non-empty exclude names to the maybe_rename_join method

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Basically I implemented this to make the new test_handle_overlap_arbitrary_key test pass.

ldata, rdata = ldata._maybe_rename_join(rdata, lsuf, rsuf,
exclude=exclude_names,
copydata=False)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tools/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,14 @@ def test_handle_overlap(self):
self.assert_('key1.foo' in joined)
self.assert_('key1.bar' in joined)

def test_handle_overlap_arbitrary_key(self):
joined = merge(self.df, self.df2,
left_on='key2', right_on='key1',
suffixes=['.foo', '.bar'])

self.assert_('key1.foo' in joined)
self.assert_('key2.bar' in joined)

def test_merge_common(self):
joined = merge(self.df, self.df2)
exp = merge(self.df, self.df2, on=['key1', 'key2'])
Expand Down