Skip to content

Commit 22a94f4

Browse files
committed
BUG: TimedeltaIndex.intersection
Fixes pandas-dev#17391
1 parent 9bc42f9 commit 22a94f4

File tree

6 files changed

+231
-105
lines changed

6 files changed

+231
-105
lines changed

pandas/core/indexes/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,6 +1584,12 @@ def is_monotonic(self):
15841584
"""
15851585
return self.is_monotonic_increasing
15861586

1587+
@property
1588+
def _is_strictly_monotonic(self):
1589+
""" Checks if the index is sorted """
1590+
return (self._is_strictly_monotonic_increasing or
1591+
self._is_strictly_monotonic_decreasing)
1592+
15871593
@property
15881594
def is_monotonic_increasing(self):
15891595
"""

pandas/core/indexes/datetimelike.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929

3030
import pandas.io.formats.printing as printing
3131

32+
from pandas.tseries.offsets import index_offsets_equal
33+
import pandas.tseries.frequencies as frequencies
34+
35+
3236
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
3337

3438

@@ -572,6 +576,94 @@ def _time_shift(self, periods, freq=None):
572576
result.name = self.name
573577
return result
574578

579+
def _intersect_ascending(self, other):
580+
# to make our life easier, "sort" the two ranges
581+
if self[0] <= other[0]:
582+
left, right = self, other
583+
else:
584+
left, right = other, self
585+
586+
end = min(left[-1], right[-1])
587+
start = right[0]
588+
589+
if end < start:
590+
return []
591+
return left.values[slice(*left.slice_locs(start, end))]
592+
593+
def _intersect_descending(self, other):
594+
# this is essentially a flip of _intersect_ascending
595+
if self[0] >= other[0]:
596+
left, right = self, other
597+
else:
598+
left, right = other, self
599+
600+
start = min(left[0], right[0])
601+
end = right[-1]
602+
603+
if end > start:
604+
return Index()
605+
return left.values[slice(*left.slice_locs(start, end))]
606+
607+
def intersection(self, other):
608+
"""
609+
Specialized intersection for DateTimeIndexOpsMixin objects.
610+
May be much faster than Index.intersection.
611+
612+
Parameters
613+
----------
614+
other : Index or array-like
615+
616+
Returns
617+
-------
618+
Index
619+
A shallow copied intersection between the two things passed in
620+
"""
621+
self._assert_can_do_setop(other)
622+
623+
if self.equals(other):
624+
return self._get_consensus_name(other)
625+
626+
lengths = len(self), len(other)
627+
if lengths[0] == 0:
628+
return self
629+
if lengths[1] == 0:
630+
return other
631+
632+
if not isinstance(other, Index):
633+
result = Index.intersection(self, other)
634+
return result
635+
elif (index_offsets_equal(self, other) or
636+
(not self._is_strictly_monotonic or
637+
not other._is_strictly_monotonic)):
638+
result = Index.intersection(self, other)
639+
result = self._shallow_copy(result._values, name=result.name,
640+
tz=getattr(self, 'tz', None),
641+
freq=None
642+
)
643+
if result.freq is None:
644+
result.offset = frequencies.to_offset(result.inferred_freq)
645+
return result
646+
647+
# handle intersecting things like this
648+
# idx1 = pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s')
649+
# idx2 = pd.to_timedelta((2, 3, 4, 8), unit='s')
650+
if lengths[0] != lengths[1] and (
651+
max(self) != max(other) or min(self) != min(other)):
652+
return Index.intersection(self, other)
653+
654+
# coerce into same order
655+
self_ascending = self.is_monotonic_increasing
656+
if self_ascending != other.is_monotonic_increasing:
657+
other = other.sort_values(ascending=self_ascending)
658+
659+
if self_ascending:
660+
intersected_slice = self._intersect_ascending(other)
661+
else:
662+
intersected_slice = self._intersect_descending(other)
663+
664+
intersected = self._shallow_copy(intersected_slice)
665+
return intersected._get_consensus_name(other)
666+
575667

576668
def wrap_arithmetic_op(self, other, result):
577669
if result is NotImplemented:

pandas/core/indexes/datetimes.py

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -550,66 +550,6 @@ def _wrap_setop_result(self, other, result):
550550
raise ValueError('Passed item and index have different timezone')
551551
return self._shallow_copy(result, name=name, freq=None, tz=self.tz)
552552

553-
def intersection(self, other):
554-
"""
555-
Specialized intersection for DatetimeIndex objects. May be much faster
556-
than Index.intersection
557-
558-
Parameters
559-
----------
560-
other : DatetimeIndex or array-like
561-
562-
Returns
563-
-------
564-
y : Index or DatetimeIndex
565-
"""
566-
self._assert_can_do_setop(other)
567-
568-
if self.equals(other):
569-
return self._get_reconciled_name_object(other)
570-
571-
if not isinstance(other, DatetimeIndex):
572-
try:
573-
other = DatetimeIndex(other)
574-
except (TypeError, ValueError):
575-
pass
576-
result = Index.intersection(self, other)
577-
if isinstance(result, DatetimeIndex):
578-
if result.freq is None:
579-
result.freq = to_offset(result.inferred_freq)
580-
return result
581-
582-
elif (other.freq is None or self.freq is None or
583-
other.freq != self.freq or
584-
not other.freq.isAnchored() or
585-
(not self.is_monotonic or not other.is_monotonic)):
586-
result = Index.intersection(self, other)
587-
result = self._shallow_copy(result._values, name=result.name,
588-
tz=result.tz, freq=None)
589-
if result.freq is None:
590-
result.freq = to_offset(result.inferred_freq)
591-
return result
592-
593-
if len(self) == 0:
594-
return self
595-
if len(other) == 0:
596-
return other
597-
# to make our life easier, "sort" the two ranges
598-
if self[0] <= other[0]:
599-
left, right = self, other
600-
else:
601-
left, right = other, self
602-
603-
end = min(left[-1], right[-1])
604-
start = right[0]
605-
606-
if end < start:
607-
return type(self)(data=[])
608-
else:
609-
lslice = slice(*left.slice_locs(start, end))
610-
left_chunk = left.values[lslice]
611-
return self._shallow_copy(left_chunk)
612-
613553
# --------------------------------------------------------------------
614554

615555
@Appender(_index_shared_docs['astype'])

pandas/core/indexes/timedeltas.py

Lines changed: 3 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -404,51 +404,9 @@ def _fast_union(self, other):
404404
else:
405405
return left
406406

407-
def intersection(self, other):
408-
"""
409-
Specialized intersection for TimedeltaIndex objects. May be much faster
410-
than Index.intersection
411-
412-
Parameters
413-
----------
414-
other : TimedeltaIndex or array-like
415-
416-
Returns
417-
-------
418-
y : Index or TimedeltaIndex
419-
"""
420-
self._assert_can_do_setop(other)
421-
422-
if self.equals(other):
423-
return self._get_reconciled_name_object(other)
424-
425-
if not isinstance(other, TimedeltaIndex):
426-
try:
427-
other = TimedeltaIndex(other)
428-
except (TypeError, ValueError):
429-
pass
430-
result = Index.intersection(self, other)
431-
return result
432-
433-
if len(self) == 0:
434-
return self
435-
if len(other) == 0:
436-
return other
437-
# to make our life easier, "sort" the two ranges
438-
if self[0] <= other[0]:
439-
left, right = self, other
440-
else:
441-
left, right = other, self
442-
443-
end = min(left[-1], right[-1])
444-
start = right[0]
445-
446-
if end < start:
447-
return type(self)(data=[])
448-
else:
449-
lslice = slice(*left.slice_locs(start, end))
450-
left_chunk = left.values[lslice]
451-
return self._shallow_copy(left_chunk)
407+
def _wrap_union_result(self, other, result):
408+
name = self.name if self.name == other.name else None
409+
return self._simple_new(result, name=name, freq=None)
452410

453411
def _maybe_promote(self, other):
454412
if other.inferred_type == 'timedelta':

pandas/tests/indexes/timedeltas/test_setops.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
import pytest
23

34
import pandas as pd
45
import pandas.util.testing as tm
@@ -73,3 +74,97 @@ def test_intersection_bug_1708(self):
7374
result = index_1 & index_2
7475
expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
7576
tm.assert_index_equal(result, expected)
77+
78+
79+
@pytest.mark.parametrize('idx1,idx2,expected', [
80+
(pd.to_timedelta(range(2, 6), unit='s'),
81+
pd.to_timedelta(range(3), unit='s'),
82+
TimedeltaIndex(['00:00:002'])),
83+
(pd.to_timedelta(range(3), unit='s'),
84+
pd.to_timedelta(range(2, 6), unit='s'),
85+
TimedeltaIndex(['00:00:002'])),
86+
])
87+
def test_intersection_intersects_ascending(idx1, idx2, expected):
88+
result = idx1.intersection(idx2)
89+
assert result.equals(expected)
90+
91+
92+
@pytest.mark.parametrize('idx1,idx2,expected', [
93+
(pd.to_timedelta(range(6, 3, -1), unit='s'),
94+
pd.to_timedelta(range(5, 1, -1), unit='s'),
95+
TimedeltaIndex(['00:00:05', '00:00:04'])),
96+
(pd.to_timedelta(range(5, 1, -1), unit='s'),
97+
pd.to_timedelta(range(6, 3, -1), unit='s'),
98+
TimedeltaIndex(['00:00:05', '00:00:04'])),
99+
])
100+
def test_intersection_intersects_descending(idx1, idx2, expected):
101+
# GH 17391
102+
result = idx1.intersection(idx2)
103+
assert result.equals(expected)
104+
105+
106+
def test_intersection_intersects_descending_no_intersect():
107+
idx1 = pd.to_timedelta(range(6, 4, -1), unit='s')
108+
idx2 = pd.to_timedelta(range(4, 1, -1), unit='s')
109+
result = idx1.intersection(idx2)
110+
assert len(result) == 0
111+
112+
113+
def test_intersection_intersects_len_1():
114+
idx1 = pd.to_timedelta(range(1, 2), unit='s')
115+
idx2 = pd.to_timedelta(range(1, 0, -1), unit='s')
116+
intersection = idx1.intersection(idx2)
117+
expected = TimedeltaIndex(['00:00:01'],
118+
dtype='timedelta64[ns]')
119+
tm.assert_index_equal(intersection, expected)
120+
121+
122+
def test_intersection_can_intersect_self():
123+
idx = pd.to_timedelta(range(1, 2), unit='s')
124+
result = idx.intersection(idx)
125+
tm.assert_index_equal(idx, result)
126+
127+
128+
def test_intersection_not_sorted():
129+
idx1 = pd.to_timedelta((1, 3, 2, 5, 4), unit='s')
130+
idx2 = pd.to_timedelta((1, 2, 3, 5, 4), unit='s')
131+
result = idx1.intersection(idx2)
132+
expected = idx1
133+
tm.assert_index_equal(result, expected)
134+
135+
136+
def test_intersection_not_unique():
137+
idx1 = pd.to_timedelta((1, 2, 2, 3, 3, 5), unit='s')
138+
idx2 = pd.to_timedelta((1, 2, 3, 4), unit='s')
139+
result = idx1.intersection(idx2)
140+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
141+
tm.assert_index_equal(result, expected)
142+
143+
result = idx2.intersection(idx1)
144+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
145+
tm.assert_index_equal(result, expected)
146+
147+
148+
@pytest.mark.parametrize("index1, index2, expected", [
149+
(pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s'),
150+
pd.to_timedelta((2, 3, 4, 8), unit='s'),
151+
pd.to_timedelta((2, 3, 4, 8), unit='s')),
152+
(pd.to_timedelta((1, 2, 3, 4, 5), unit='s'),
153+
pd.to_timedelta((2, 3, 4), unit='s'),
154+
pd.to_timedelta((2, 3, 4), unit='s')),
155+
(pd.to_timedelta((2, 4, 5, 6), unit='s'),
156+
pd.to_timedelta((2, 3, 4), unit='s'),
157+
pd.to_timedelta((2, 4), unit='s')),
158+
])
159+
def test_intersection_different_lengths(index1, index2, expected):
160+
def intersect(idx1, idx2, expected):
161+
result = idx1.intersection(idx2)
162+
tm.assert_index_equal(result, expected)
163+
result = idx2.intersection(idx1)
164+
tm.assert_index_equal(result, expected)
165+
166+
intersect(index1, index2, expected)
167+
intersect(index1.sort_values(ascending=False),
168+
index2.sort_values(ascending=False),
169+
expected.sort_values(ascending=False)
170+
)

pandas/tseries/offsets.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,41 @@ def wrapper(self, other):
103103
return wrapper
104104

105105

106+
def apply_index_wraps(func):
107+
@functools.wraps(func)
108+
def wrapper(self, other):
109+
result = func(self, other)
110+
if self.normalize:
111+
result = result.to_period('D').to_timestamp()
112+
return result
113+
return wrapper
114+
115+
116+
def _is_normalized(dt):
117+
if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or
118+
dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0):
119+
return False
120+
return True
121+
122+
123+
def index_offsets_equal(first, second):
124+
"""
125+
Checks if the two indexes have an offset, and if they equal each other
126+
Parameters
127+
----------
128+
first: Index
129+
second: Index
130+
131+
Returns
132+
-------
133+
bool
134+
"""
135+
first = getattr(first, 'freq', None)
136+
second = getattr(second, 'freq', None)
137+
are_offsets_equal = True
138+
if first is None or second is None or first != second:
139+
are_offsets_equal = False
140+
return are_offsets_equal
106141
# ---------------------------------------------------------------------
107142
# DateOffset
108143

0 commit comments

Comments
 (0)