BUG: TimedeltaIndex.intersection

kirkhansen · kirkhansen · commit ea61c24a51b8 · 2018-09-14T09:01:44.000-05:00
Fixes pandas-dev#17391
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1476,6 +1476,12 @@ def is_monotonic(self):
         """ alias for is_monotonic_increasing (deprecated) """
         return self.is_monotonic_increasing
 
+    @property
+    def _is_strictly_monotonic(self):
+        """ Checks if the index is sorted """
+        return (self._is_strictly_monotonic_increasing or
+                self._is_strictly_monotonic_decreasing)
+
     @property
     def is_monotonic_increasing(self):
         """
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -36,9 +36,12 @@
 
 from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
 from pandas.core.indexes.base import Index, _index_shared_docs
+from pandas.tseries.offsets import index_offsets_equal
+import pandas.tseries.frequencies as frequencies
 from pandas.util._decorators import Appender, cache_readonly
 import pandas.core.dtypes.concat as _concat
 
+
 import pandas.core.indexes.base as ibase
 _index_doc_kwargs = dict(ibase._index_doc_kwargs)
 
@@ -679,6 +682,94 @@ def astype(self, dtype, copy=True):
             raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
         return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
 
+    def _intersect_ascending(self, other):
+        # to make our life easier, "sort" the two ranges
+        if self[0] <= other[0]:
+            left, right = self, other
+        else:
+            left, right = other, self
+
+        end = min(left[-1], right[-1])
+        start = right[0]
+
+        if end < start:
+            return []
+        return left.values[slice(*left.slice_locs(start, end))]
+
+    def _intersect_descending(self, other):
+        # this is essentially a flip of _intersect_ascending
+        if self[0] >= other[0]:
+            left, right = self, other
+        else:
+            left, right = other, self
+
+        start = min(left[0], right[0])
+        end = right[-1]
+
+        if end > start:
+            return Index()
+        return left.values[slice(*left.slice_locs(start, end))]
+
+    def intersection(self, other):
+        """
+        Specialized intersection for DateTimeIndexOpsMixin objects.
+        May be much faster than Index.intersection.
+
+        Parameters
+        ----------
+        other : Index or array-like
+
+        Returns
+        -------
+        Index
+            A shallow copied intersection between the two things passed in
+        """
+        self._assert_can_do_setop(other)
+
+        if self.equals(other):
+            return self._get_consensus_name(other)
+
+        lengths = len(self), len(other)
+        if lengths[0] == 0:
+            return self
+        if lengths[1] == 0:
+            return other
+
+        if not isinstance(other, Index):
+            result = Index.intersection(self, other)
+            return result
+        elif (index_offsets_equal(self, other) or
+                (not self._is_strictly_monotonic or
+                    not other._is_strictly_monotonic)):
+            result = Index.intersection(self, other)
+            result = self._shallow_copy(result._values, name=result.name,
+                                        tz=getattr(self, 'tz', None),
+                                        freq=None
+                                        )
+            if result.freq is None:
+                result.offset = frequencies.to_offset(result.inferred_freq)
+            return result
+
+        # handle intersecting things like this
+        # idx1 = pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s')
+        # idx2 = pd.to_timedelta((2, 3, 4, 8), unit='s')
+        if lengths[0] != lengths[1] and (
+                max(self) != max(other) or min(self) != min(other)):
+            return Index.intersection(self, other)
+
+        # coerce into same order
+        self_ascending = self.is_monotonic_increasing
+        if self_ascending != other.is_monotonic_increasing:
+            other = other.sort_values(ascending=self_ascending)
+
+        if self_ascending:
+            intersected_slice = self._intersect_ascending(other)
+        else:
+            intersected_slice = self._intersect_descending(other)
+
+        intersected = self._shallow_copy(intersected_slice)
+        return intersected._get_consensus_name(other)
+
 
 def _ensure_datetimelike_to_i8(other):
     """ helper for coercing an input scalar or array to i8 """
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -978,62 +978,6 @@ def _wrap_union_result(self, other, result):
             raise ValueError('Passed item and index have different timezone')
         return self._simple_new(result, name=name, freq=None, tz=self.tz)
 
-    def intersection(self, other):
-        """
-        Specialized intersection for DatetimeIndex objects. May be much faster
-        than Index.intersection
-
-        Parameters
-        ----------
-        other : DatetimeIndex or array-like
-
-        Returns
-        -------
-        y : Index or DatetimeIndex
-        """
-        self._assert_can_do_setop(other)
-        if not isinstance(other, DatetimeIndex):
-            try:
-                other = DatetimeIndex(other)
-            except (TypeError, ValueError):
-                pass
-            result = Index.intersection(self, other)
-            if isinstance(result, DatetimeIndex):
-                if result.freq is None:
-                    result.freq = to_offset(result.inferred_freq)
-            return result
-
-        elif (other.freq is None or self.freq is None or
-              other.freq != self.freq or
-              not other.freq.isAnchored() or
-              (not self.is_monotonic or not other.is_monotonic)):
-            result = Index.intersection(self, other)
-            result = self._shallow_copy(result._values, name=result.name,
-                                        tz=result.tz, freq=None)
-            if result.freq is None:
-                result.freq = to_offset(result.inferred_freq)
-            return result
-
-        if len(self) == 0:
-            return self
-        if len(other) == 0:
-            return other
-        # to make our life easier, "sort" the two ranges
-        if self[0] <= other[0]:
-            left, right = self, other
-        else:
-            left, right = other, self
-
-        end = min(left[-1], right[-1])
-        start = right[0]
-
-        if end < start:
-            return type(self)(data=[])
-        else:
-            lslice = slice(*left.slice_locs(start, end))
-            left_chunk = left.values[lslice]
-            return self._shallow_copy(left_chunk)
-
     def _parsed_string_to_bounds(self, reso, parsed):
         """
         Calculate datetime bounds for parsed time string and its resolution.
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
@@ -402,48 +402,6 @@ def _wrap_union_result(self, other, result):
         name = self.name if self.name == other.name else None
         return self._simple_new(result, name=name, freq=None)
 
-    def intersection(self, other):
-        """
-        Specialized intersection for TimedeltaIndex objects. May be much faster
-        than Index.intersection
-
-        Parameters
-        ----------
-        other : TimedeltaIndex or array-like
-
-        Returns
-        -------
-        y : Index or TimedeltaIndex
-        """
-        self._assert_can_do_setop(other)
-        if not isinstance(other, TimedeltaIndex):
-            try:
-                other = TimedeltaIndex(other)
-            except (TypeError, ValueError):
-                pass
-            result = Index.intersection(self, other)
-            return result
-
-        if len(self) == 0:
-            return self
-        if len(other) == 0:
-            return other
-        # to make our life easier, "sort" the two ranges
-        if self[0] <= other[0]:
-            left, right = self, other
-        else:
-            left, right = other, self
-
-        end = min(left[-1], right[-1])
-        start = right[0]
-
-        if end < start:
-            return type(self)(data=[])
-        else:
-            lslice = slice(*left.slice_locs(start, end))
-            left_chunk = left.values[lslice]
-            return self._shallow_copy(left_chunk)
-
     def _maybe_promote(self, other):
         if other.inferred_type == 'timedelta':
             other = TimedeltaIndex(other)
diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 import pandas as pd
 import pandas.util.testing as tm
@@ -73,3 +74,97 @@ def test_intersection_bug_1708(self):
         result = index_1 & index_2
         expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
         tm.assert_index_equal(result, expected)
+
+
+@pytest.mark.parametrize('idx1,idx2,expected', [
+    (pd.to_timedelta(range(2, 6), unit='s'),
+     pd.to_timedelta(range(3), unit='s'),
+     TimedeltaIndex(['00:00:002'])),
+    (pd.to_timedelta(range(3), unit='s'),
+     pd.to_timedelta(range(2, 6), unit='s'),
+     TimedeltaIndex(['00:00:002'])),
+])
+def test_intersection_intersects_ascending(idx1, idx2, expected):
+    result = idx1.intersection(idx2)
+    assert result.equals(expected)
+
+
+@pytest.mark.parametrize('idx1,idx2,expected', [
+    (pd.to_timedelta(range(6, 3, -1), unit='s'),
+     pd.to_timedelta(range(5, 1, -1), unit='s'),
+     TimedeltaIndex(['00:00:05', '00:00:04'])),
+    (pd.to_timedelta(range(5, 1, -1), unit='s'),
+     pd.to_timedelta(range(6, 3, -1), unit='s'),
+     TimedeltaIndex(['00:00:05', '00:00:04'])),
+])
+def test_intersection_intersects_descending(idx1, idx2, expected):
+    # GH 17391
+    result = idx1.intersection(idx2)
+    assert result.equals(expected)
+
+
+def test_intersection_intersects_descending_no_intersect():
+    idx1 = pd.to_timedelta(range(6, 4, -1), unit='s')
+    idx2 = pd.to_timedelta(range(4, 1, -1), unit='s')
+    result = idx1.intersection(idx2)
+    assert len(result) == 0
+
+
+def test_intersection_intersects_len_1():
+    idx1 = pd.to_timedelta(range(1, 2), unit='s')
+    idx2 = pd.to_timedelta(range(1, 0, -1), unit='s')
+    intersection = idx1.intersection(idx2)
+    expected = TimedeltaIndex(['00:00:01'],
+                              dtype='timedelta64[ns]')
+    tm.assert_index_equal(intersection, expected)
+
+
+def test_intersection_can_intersect_self():
+    idx = pd.to_timedelta(range(1, 2), unit='s')
+    result = idx.intersection(idx)
+    tm.assert_index_equal(idx, result)
+
+
+def test_intersection_not_sorted():
+    idx1 = pd.to_timedelta((1, 3, 2, 5, 4), unit='s')
+    idx2 = pd.to_timedelta((1, 2, 3, 5, 4), unit='s')
+    result = idx1.intersection(idx2)
+    expected = idx1
+    tm.assert_index_equal(result, expected)
+
+
+def test_intersection_not_unique():
+    idx1 = pd.to_timedelta((1, 2, 2, 3, 3, 5), unit='s')
+    idx2 = pd.to_timedelta((1, 2, 3, 4), unit='s')
+    result = idx1.intersection(idx2)
+    expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
+    tm.assert_index_equal(result, expected)
+
+    result = idx2.intersection(idx1)
+    expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
+    tm.assert_index_equal(result, expected)
+
+
+@pytest.mark.parametrize("index1, index2, expected", [
+    (pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s'),
+     pd.to_timedelta((2, 3, 4, 8), unit='s'),
+     pd.to_timedelta((2, 3, 4, 8), unit='s')),
+    (pd.to_timedelta((1, 2, 3, 4, 5), unit='s'),
+     pd.to_timedelta((2, 3, 4), unit='s'),
+     pd.to_timedelta((2, 3, 4), unit='s')),
+    (pd.to_timedelta((2, 4, 5, 6), unit='s'),
+     pd.to_timedelta((2, 3, 4), unit='s'),
+     pd.to_timedelta((2, 4), unit='s')),
+])
+def test_intersection_different_lengths(index1, index2, expected):
+    def intersect(idx1, idx2, expected):
+        result = idx1.intersection(idx2)
+        tm.assert_index_equal(result, expected)
+        result = idx2.intersection(idx1)
+        tm.assert_index_equal(result, expected)
+
+    intersect(index1, index2, expected)
+    intersect(index1.sort_values(ascending=False),
+              index2.sort_values(ascending=False),
+              expected.sort_values(ascending=False)
+              )
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
@@ -112,6 +112,41 @@ def wrapper(self, other):
     return wrapper
 
 
+def apply_index_wraps(func):
+    @functools.wraps(func)
+    def wrapper(self, other):
+        result = func(self, other)
+        if self.normalize:
+            result = result.to_period('D').to_timestamp()
+        return result
+    return wrapper
+
+
+def _is_normalized(dt):
+    if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or
+            dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0):
+        return False
+    return True
+
+
+def index_offsets_equal(first, second):
+    """
+    Checks if the two indexes have an offset, and if they equal each other
+    Parameters
+    ----------
+    first: Index
+    second: Index
+
+    Returns
+    -------
+    bool
+    """
+    first = getattr(first, 'freq', None)
+    second = getattr(second, 'freq', None)
+    are_offsets_equal = True
+    if first is None or second is None or first != second:
+        are_offsets_equal = False
+    return are_offsets_equal
 # ---------------------------------------------------------------------
 # DateOffset