From aad37a52acc43129502fd5870f1a55419aeff912 Mon Sep 17 00:00:00 2001
From: Ka Wo Chen <kawoc@tepper.cmu.edu>
Date: Thu, 21 Jan 2016 06:57:53 -0500
Subject: [PATCH] ENH: GH12034 RangeIndex.union returns RangeIndex if possible

---
 doc/source/whatsnew/v0.18.0.txt |  2 +-
 pandas/core/index.py            | 43 ++++++++++++++++++++++++++++++++-
 pandas/tests/test_index.py      | 33 +++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
index 2706cb200dd54..2be438dd7890e 100644
--- a/doc/source/whatsnew/v0.18.0.txt
+++ b/doc/source/whatsnew/v0.18.0.txt
@@ -110,7 +110,7 @@ Range Index
 
 A ``RangeIndex`` has been added to the ``Int64Index`` sub-classes to support a memory saving alternative for common use cases. This has a similar implementation to the python ``range`` object (``xrange`` in python 2), in that it only stores the start, stop, and step values for the index. It will transparently interact with the user API, converting to ``Int64Index`` if needed.
 
-This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`, :issue:`12070`, :issue:`12071`)
+This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`, :issue:`12070`, :issue:`12071`, :issue:`12109`)
 
 Previous Behavior:
 
diff --git a/pandas/core/index.py b/pandas/core/index.py
index 1fbb717bf76d8..558da897b241e 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -4307,7 +4307,48 @@ def union(self, other):
         -------
         union : Index
         """
-        # note: could return a RangeIndex in some circumstances
+        self._assert_can_do_setop(other)
+        if len(other) == 0 or self.equals(other):
+            return self
+        if len(self) == 0:
+            return other
+        if isinstance(other, RangeIndex):
+            start_s, step_s = self._start, self._step
+            end_s = self._start + self._step * (len(self) - 1)
+            start_o, step_o = other._start, other._step
+            end_o = other._start + other._step * (len(other) - 1)
+            if self._step < 0:
+                start_s, step_s, end_s = end_s, -step_s, start_s
+            if other._step < 0:
+                start_o, step_o, end_o = end_o, -step_o, start_o
+            if len(self) == 1 and len(other) == 1:
+                step_s = step_o = abs(self._start - other._start)
+            elif len(self) == 1:
+                step_s = step_o
+            elif len(other) == 1:
+                step_o = step_s
+            start_r = min(start_s, start_o)
+            end_r = max(end_s, end_o)
+            if step_o == step_s:
+                if ((start_s - start_o) % step_s == 0 and
+                        (start_s - end_o) <= step_s and
+                        (start_o - end_s) <= step_s):
+                    return RangeIndex(start_r, end_r + step_s, step_s)
+                if ((step_s % 2 == 0) and
+                        (abs(start_s - start_o) <= step_s / 2) and
+                        (abs(end_s - end_o) <= step_s / 2)):
+                    return RangeIndex(start_r, end_r + step_s / 2, step_s / 2)
+            elif step_o % step_s == 0:
+                if ((start_o - start_s) % step_s == 0 and
+                        (start_o + step_s >= start_s) and
+                        (end_o - step_s <= end_s)):
+                    return RangeIndex(start_r, end_r + step_s, step_s)
+            elif step_s % step_o == 0:
+                if ((start_s - start_o) % step_o == 0 and
+                        (start_s + step_o >= start_o) and
+                        (end_s - step_o <= end_o)):
+                    return RangeIndex(start_r, end_r + step_o, step_o)
+
         return self._int64index.union(other)
 
     def join(self, other, how='left', level=None, return_indexers=False):
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index b0210c9fde2e9..68150bfbca3f9 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -4130,6 +4130,39 @@ def test_union_noncomparable(self):
         expected = np.concatenate((other, self.index))
         self.assert_numpy_array_equal(result, expected)
 
+    def test_union(self):
+        RI = RangeIndex
+        I64 = Int64Index
+        cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)),
+            (RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)),
+            (RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)),
+            (RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)),
+            (RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)),
+            (RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)),
+            (RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)),
+            (RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)),
+            (RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)),
+            (RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)),
+            (RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)),
+            (RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)),
+            (RI(), RI(), RI()),
+            (RI(0, -10, -2), RI(), RI(0, -10, -2)),
+            (RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)),
+            (RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)),
+            (RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)),
+            (RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)),
+            (RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)),
+            (RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])),
+            (RI(0, 10, 1), I64([]), RI(0, 10, 1)),
+            (RI(), I64([1, 5, 6]), I64([1, 5, 6]))]
+        for idx1, idx2, expected in cases:
+            res1 = idx1.union(idx2)
+            res2 = idx2.union(idx1)
+            res3 = idx1._int64index.union(idx2)
+            tm.assert_index_equal(res1, expected, exact=True)
+            tm.assert_index_equal(res2, expected, exact=True)
+            tm.assert_index_equal(res3, expected)
+
     def test_nbytes(self):
 
         # memory savings vs int index