Skip to content

ENH: GH12034 RangeIndex.union with RangeIndex returns RangeIndex if possible #12109

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 21, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ Range Index

A ``RangeIndex`` has been added to the ``Int64Index`` sub-classes to support a memory saving alternative for common use cases. This has a similar implementation to the python ``range`` object (``xrange`` in python 2), in that it only stores the start, stop, and step values for the index. It will transparently interact with the user API, converting to ``Int64Index`` if needed.

This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`, :issue:`12070`, :issue:`12071`)
This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`, :issue:`12070`, :issue:`12071`, :issue:`12109`)

Previous Behavior:

Expand Down
43 changes: 42 additions & 1 deletion pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4307,7 +4307,48 @@ def union(self, other):
-------
union : Index
"""
# note: could return a RangeIndex in some circumstances
self._assert_can_do_setop(other)
if len(other) == 0 or self.equals(other):
return self
if len(self) == 0:
return other
if isinstance(other, RangeIndex):
start_s, step_s = self._start, self._step
end_s = self._start + self._step * (len(self) - 1)
start_o, step_o = other._start, other._step
end_o = other._start + other._step * (len(other) - 1)
if self._step < 0:
start_s, step_s, end_s = end_s, -step_s, start_s
if other._step < 0:
start_o, step_o, end_o = end_o, -step_o, start_o
if len(self) == 1 and len(other) == 1:
step_s = step_o = abs(self._start - other._start)
elif len(self) == 1:
step_s = step_o
elif len(other) == 1:
step_o = step_s
start_r = min(start_s, start_o)
end_r = max(end_s, end_o)
if step_o == step_s:
if ((start_s - start_o) % step_s == 0 and
(start_s - end_o) <= step_s and
(start_o - end_s) <= step_s):
return RangeIndex(start_r, end_r + step_s, step_s)
if ((step_s % 2 == 0) and
(abs(start_s - start_o) <= step_s / 2) and
(abs(end_s - end_o) <= step_s / 2)):
return RangeIndex(start_r, end_r + step_s / 2, step_s / 2)
elif step_o % step_s == 0:
if ((start_o - start_s) % step_s == 0 and
(start_o + step_s >= start_s) and
(end_o - step_s <= end_s)):
return RangeIndex(start_r, end_r + step_s, step_s)
elif step_s % step_o == 0:
if ((start_s - start_o) % step_o == 0 and
(start_s + step_o >= start_o) and
(end_s - step_o <= end_o)):
return RangeIndex(start_r, end_r + step_o, step_o)

return self._int64index.union(other)

def join(self, other, how='left', level=None, return_indexers=False):
Expand Down
33 changes: 33 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4130,6 +4130,39 @@ def test_union_noncomparable(self):
expected = np.concatenate((other, self.index))
self.assert_numpy_array_equal(result, expected)

def test_union(self):
RI = RangeIndex
I64 = Int64Index
cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)),
(RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)),
(RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)),
(RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)),
(RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)),
(RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)),
(RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)),
(RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)),
(RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)),
(RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)),
(RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)),
(RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)),
(RI(), RI(), RI()),
(RI(0, -10, -2), RI(), RI(0, -10, -2)),
(RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)),
(RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)),
(RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)),
(RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)),
(RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)),
(RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])),
(RI(0, 10, 1), I64([]), RI(0, 10, 1)),
(RI(), I64([1, 5, 6]), I64([1, 5, 6]))]
for idx1, idx2, expected in cases:
res1 = idx1.union(idx2)
res2 = idx2.union(idx1)
res3 = idx1._int64index.union(idx2)
tm.assert_index_equal(res1, expected, exact=True)
tm.assert_index_equal(res2, expected, exact=True)
tm.assert_index_equal(res3, expected)

def test_nbytes(self):

# memory savings vs int index
Expand Down