From aad37a52acc43129502fd5870f1a55419aeff912 Mon Sep 17 00:00:00 2001 From: Ka Wo Chen Date: Thu, 21 Jan 2016 06:57:53 -0500 Subject: [PATCH] ENH: GH12034 RangeIndex.union returns RangeIndex if possible --- doc/source/whatsnew/v0.18.0.txt | 2 +- pandas/core/index.py | 43 ++++++++++++++++++++++++++++++++- pandas/tests/test_index.py | 33 +++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 2706cb200dd54..2be438dd7890e 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -110,7 +110,7 @@ Range Index A ``RangeIndex`` has been added to the ``Int64Index`` sub-classes to support a memory saving alternative for common use cases. This has a similar implementation to the python ``range`` object (``xrange`` in python 2), in that it only stores the start, stop, and step values for the index. It will transparently interact with the user API, converting to ``Int64Index`` if needed. -This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`, :issue:`12070`, :issue:`12071`) +This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`, :issue:`12070`, :issue:`12071`, :issue:`12109`) Previous Behavior: diff --git a/pandas/core/index.py b/pandas/core/index.py index 1fbb717bf76d8..558da897b241e 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -4307,7 +4307,48 @@ def union(self, other): ------- union : Index """ - # note: could return a RangeIndex in some circumstances + self._assert_can_do_setop(other) + if len(other) == 0 or self.equals(other): + return self + if len(self) == 0: + return other + if isinstance(other, RangeIndex): + start_s, step_s = self._start, self._step + end_s = self._start + self._step * (len(self) - 1) + start_o, step_o = other._start, other._step + end_o = other._start + other._step * (len(other) - 1) + if self._step < 0: + start_s, step_s, end_s = end_s, -step_s, start_s + if other._step < 0: + start_o, step_o, end_o = end_o, -step_o, start_o + if len(self) == 1 and len(other) == 1: + step_s = step_o = abs(self._start - other._start) + elif len(self) == 1: + step_s = step_o + elif len(other) == 1: + step_o = step_s + start_r = min(start_s, start_o) + end_r = max(end_s, end_o) + if step_o == step_s: + if ((start_s - start_o) % step_s == 0 and + (start_s - end_o) <= step_s and + (start_o - end_s) <= step_s): + return RangeIndex(start_r, end_r + step_s, step_s) + if ((step_s % 2 == 0) and + (abs(start_s - start_o) <= step_s / 2) and + (abs(end_s - end_o) <= step_s / 2)): + return RangeIndex(start_r, end_r + step_s / 2, step_s / 2) + elif step_o % step_s == 0: + if ((start_o - start_s) % step_s == 0 and + (start_o + step_s >= start_s) and + (end_o - step_s <= end_s)): + return RangeIndex(start_r, end_r + step_s, step_s) + elif step_s % step_o == 0: + if ((start_s - start_o) % step_o == 0 and + (start_s + step_o >= start_o) and + (end_s - step_o <= end_o)): + return RangeIndex(start_r, end_r + step_o, step_o) + return self._int64index.union(other) def join(self, other, how='left', level=None, return_indexers=False): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index b0210c9fde2e9..68150bfbca3f9 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -4130,6 +4130,39 @@ def test_union_noncomparable(self): expected = np.concatenate((other, self.index)) self.assert_numpy_array_equal(result, expected) + def test_union(self): + RI = RangeIndex + I64 = Int64Index + cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)), + (RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)), + (RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)), + (RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)), + (RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)), + (RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)), + (RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)), + (RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)), + (RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)), + (RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)), + (RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)), + (RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)), + (RI(), RI(), RI()), + (RI(0, -10, -2), RI(), RI(0, -10, -2)), + (RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)), + (RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)), + (RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)), + (RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)), + (RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)), + (RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])), + (RI(0, 10, 1), I64([]), RI(0, 10, 1)), + (RI(), I64([1, 5, 6]), I64([1, 5, 6]))] + for idx1, idx2, expected in cases: + res1 = idx1.union(idx2) + res2 = idx2.union(idx1) + res3 = idx1._int64index.union(idx2) + tm.assert_index_equal(res1, expected, exact=True) + tm.assert_index_equal(res2, expected, exact=True) + tm.assert_index_equal(res3, expected) + def test_nbytes(self): # memory savings vs int index