Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ Range Index

A ``RangeIndex`` has been added to the ``Int64Index`` sub-classes to support a memory saving alternative for common use cases. This has a similar implementation to the python ``range`` object (``xrange`` in python 2), in that it only stores the start, stop, and step values for the index. It will transparently interact with the user API, converting to ``Int64Index`` if needed.

This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`, :issue:`12070`, :issue:`12071`)
This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`, :issue:`12070`, :issue:`12071`, :issue:`12109`)

Previous Behavior:

Expand Down
43 changes: 42 additions & 1 deletion pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4307,7 +4307,48 @@ def union(self, other):
-------
union : Index
"""
# note: could return a RangeIndex in some circumstances
self._assert_can_do_setop(other)
if len(other) == 0 or self.equals(other):
return self
if len(self) == 0:
return other
if isinstance(other, RangeIndex):
start_s, step_s = self._start, self._step
end_s = self._start + self._step * (len(self) - 1)
start_o, step_o = other._start, other._step
end_o = other._start + other._step * (len(other) - 1)
if self._step < 0:
start_s, step_s, end_s = end_s, -step_s, start_s
if other._step < 0:
start_o, step_o, end_o = end_o, -step_o, start_o
if len(self) == 1 and len(other) == 1:
step_s = step_o = abs(self._start - other._start)
elif len(self) == 1:
step_s = step_o
elif len(other) == 1:
step_o = step_s
start_r = min(start_s, start_o)
end_r = max(end_s, end_o)
if step_o == step_s:
if ((start_s - start_o) % step_s == 0 and
(start_s - end_o) <= step_s and
(start_o - end_s) <= step_s):
return RangeIndex(start_r, end_r + step_s, step_s)
if ((step_s % 2 == 0) and
(abs(start_s - start_o) <= step_s / 2) and
(abs(end_s - end_o) <= step_s / 2)):
return RangeIndex(start_r, end_r + step_s / 2, step_s / 2)
elif step_o % step_s == 0:
if ((start_o - start_s) % step_s == 0 and
(start_o + step_s >= start_s) and
(end_o - step_s <= end_s)):
return RangeIndex(start_r, end_r + step_s, step_s)
elif step_s % step_o == 0:
if ((start_s - start_o) % step_o == 0 and
(start_s + step_o >= start_o) and
(end_s - step_o <= end_o)):
return RangeIndex(start_r, end_r + step_o, step_o)

return self._int64index.union(other)

def join(self, other, how='left', level=None, return_indexers=False):
Expand Down
33 changes: 33 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4130,6 +4130,39 @@ def test_union_noncomparable(self):
expected = np.concatenate((other, self.index))
self.assert_numpy_array_equal(result, expected)

def test_union(self):
RI = RangeIndex
I64 = Int64Index
cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)),
(RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)),
(RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)),
(RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)),
(RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)),
(RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)),
(RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)),
(RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)),
(RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)),
(RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)),
(RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)),
(RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)),
(RI(), RI(), RI()),
(RI(0, -10, -2), RI(), RI(0, -10, -2)),
(RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)),
(RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)),
(RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)),
(RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)),
(RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)),
(RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])),
(RI(0, 10, 1), I64([]), RI(0, 10, 1)),
(RI(), I64([1, 5, 6]), I64([1, 5, 6]))]
for idx1, idx2, expected in cases:
res1 = idx1.union(idx2)
res2 = idx2.union(idx1)
res3 = idx1._int64index.union(idx2)
tm.assert_index_equal(res1, expected, exact=True)
tm.assert_index_equal(res2, expected, exact=True)
tm.assert_index_equal(res3, expected)

def test_nbytes(self):

# memory savings vs int index
Expand Down