Skip to content

Commit e12e38d

Browse files
committed
BUG: TimedeltaIndex.intersection
Fixes pandas-dev#17391
1 parent ce62a5c commit e12e38d

File tree

6 files changed

+230
-105
lines changed

6 files changed

+230
-105
lines changed

pandas/core/indexes/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,6 +1481,12 @@ def is_monotonic(self):
14811481
""" alias for is_monotonic_increasing (deprecated) """
14821482
return self.is_monotonic_increasing
14831483

1484+
@property
1485+
def _is_strictly_monotonic(self):
1486+
""" Checks if the index is sorted """
1487+
return (self._is_strictly_monotonic_increasing or
1488+
self._is_strictly_monotonic_decreasing)
1489+
14841490
@property
14851491
def is_monotonic_increasing(self):
14861492
"""

pandas/core/indexes/datetimelike.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,12 @@
3838
from pandas.core.arrays import PeriodArray
3939
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
4040
from pandas.core.indexes.base import Index, _index_shared_docs
41+
from pandas.tseries.offsets import index_offsets_equal
42+
import pandas.tseries.frequencies as frequencies
4143
from pandas.util._decorators import Appender, cache_readonly
4244
import pandas.core.dtypes.concat as _concat
4345

46+
4447
import pandas.core.indexes.base as ibase
4548
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
4649

@@ -742,6 +745,94 @@ def _time_shift(self, periods, freq=None):
742745
result.name = self.name
743746
return result
744747

748+
def _intersect_ascending(self, other):
749+
# to make our life easier, "sort" the two ranges
750+
if self[0] <= other[0]:
751+
left, right = self, other
752+
else:
753+
left, right = other, self
754+
755+
end = min(left[-1], right[-1])
756+
start = right[0]
757+
758+
if end < start:
759+
return []
760+
return left.values[slice(*left.slice_locs(start, end))]
761+
762+
def _intersect_descending(self, other):
763+
# this is essentially a flip of _intersect_ascending
764+
if self[0] >= other[0]:
765+
left, right = self, other
766+
else:
767+
left, right = other, self
768+
769+
start = min(left[0], right[0])
770+
end = right[-1]
771+
772+
if end > start:
773+
return Index()
774+
return left.values[slice(*left.slice_locs(start, end))]
775+
776+
def intersection(self, other):
777+
"""
778+
Specialized intersection for DateTimeIndexOpsMixin objects.
779+
May be much faster than Index.intersection.
780+
781+
Parameters
782+
----------
783+
other : Index or array-like
784+
785+
Returns
786+
-------
787+
Index
788+
A shallow copied intersection between the two things passed in
789+
"""
790+
self._assert_can_do_setop(other)
791+
792+
if self.equals(other):
793+
return self._get_consensus_name(other)
794+
795+
lengths = len(self), len(other)
796+
if lengths[0] == 0:
797+
return self
798+
if lengths[1] == 0:
799+
return other
800+
801+
if not isinstance(other, Index):
802+
result = Index.intersection(self, other)
803+
return result
804+
elif (index_offsets_equal(self, other) or
805+
(not self._is_strictly_monotonic or
806+
not other._is_strictly_monotonic)):
807+
result = Index.intersection(self, other)
808+
result = self._shallow_copy(result._values, name=result.name,
809+
tz=getattr(self, 'tz', None),
810+
freq=None
811+
)
812+
if result.freq is None:
813+
result.offset = frequencies.to_offset(result.inferred_freq)
814+
return result
815+
816+
# handle intersecting things like this
817+
# idx1 = pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s')
818+
# idx2 = pd.to_timedelta((2, 3, 4, 8), unit='s')
819+
if lengths[0] != lengths[1] and (
820+
max(self) != max(other) or min(self) != min(other)):
821+
return Index.intersection(self, other)
822+
823+
# coerce into same order
824+
self_ascending = self.is_monotonic_increasing
825+
if self_ascending != other.is_monotonic_increasing:
826+
other = other.sort_values(ascending=self_ascending)
827+
828+
if self_ascending:
829+
intersected_slice = self._intersect_ascending(other)
830+
else:
831+
intersected_slice = self._intersect_descending(other)
832+
833+
intersected = self._shallow_copy(intersected_slice)
834+
return intersected._get_consensus_name(other)
835+
745836

746837
def _ensure_datetimelike_to_i8(other, to_utc=False):
747838
"""

pandas/core/indexes/datetimes.py

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -756,66 +756,6 @@ def _wrap_setop_result(self, other, result):
756756
raise ValueError('Passed item and index have different timezone')
757757
return self._shallow_copy(result, name=name, freq=None, tz=self.tz)
758758

759-
def intersection(self, other):
760-
"""
761-
Specialized intersection for DatetimeIndex objects. May be much faster
762-
than Index.intersection
763-
764-
Parameters
765-
----------
766-
other : DatetimeIndex or array-like
767-
768-
Returns
769-
-------
770-
y : Index or DatetimeIndex
771-
"""
772-
self._assert_can_do_setop(other)
773-
774-
if self.equals(other):
775-
return self._get_reconciled_name_object(other)
776-
777-
if not isinstance(other, DatetimeIndex):
778-
try:
779-
other = DatetimeIndex(other)
780-
except (TypeError, ValueError):
781-
pass
782-
result = Index.intersection(self, other)
783-
if isinstance(result, DatetimeIndex):
784-
if result.freq is None:
785-
result.freq = to_offset(result.inferred_freq)
786-
return result
787-
788-
elif (other.freq is None or self.freq is None or
789-
other.freq != self.freq or
790-
not other.freq.isAnchored() or
791-
(not self.is_monotonic or not other.is_monotonic)):
792-
result = Index.intersection(self, other)
793-
result = self._shallow_copy(result._values, name=result.name,
794-
tz=result.tz, freq=None)
795-
if result.freq is None:
796-
result.freq = to_offset(result.inferred_freq)
797-
return result
798-
799-
if len(self) == 0:
800-
return self
801-
if len(other) == 0:
802-
return other
803-
# to make our life easier, "sort" the two ranges
804-
if self[0] <= other[0]:
805-
left, right = self, other
806-
else:
807-
left, right = other, self
808-
809-
end = min(left[-1], right[-1])
810-
start = right[0]
811-
812-
if end < start:
813-
return type(self)(data=[])
814-
else:
815-
lslice = slice(*left.slice_locs(start, end))
816-
left_chunk = left.values[lslice]
817-
return self._shallow_copy(left_chunk)
818-
819759
def _parsed_string_to_bounds(self, reso, parsed):
820760
"""
821761
Calculate datetime bounds for parsed time string and its resolution.

pandas/core/indexes/timedeltas.py

Lines changed: 3 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -378,51 +378,9 @@ def _fast_union(self, other):
378378
else:
379379
return left
380380

381-
def intersection(self, other):
382-
"""
383-
Specialized intersection for TimedeltaIndex objects. May be much faster
384-
than Index.intersection
385-
386-
Parameters
387-
----------
388-
other : TimedeltaIndex or array-like
389-
390-
Returns
391-
-------
392-
y : Index or TimedeltaIndex
393-
"""
394-
self._assert_can_do_setop(other)
395-
396-
if self.equals(other):
397-
return self._get_reconciled_name_object(other)
398-
399-
if not isinstance(other, TimedeltaIndex):
400-
try:
401-
other = TimedeltaIndex(other)
402-
except (TypeError, ValueError):
403-
pass
404-
result = Index.intersection(self, other)
405-
return result
406-
407-
if len(self) == 0:
408-
return self
409-
if len(other) == 0:
410-
return other
411-
# to make our life easier, "sort" the two ranges
412-
if self[0] <= other[0]:
413-
left, right = self, other
414-
else:
415-
left, right = other, self
416-
417-
end = min(left[-1], right[-1])
418-
start = right[0]
419-
420-
if end < start:
421-
return type(self)(data=[])
422-
else:
423-
lslice = slice(*left.slice_locs(start, end))
424-
left_chunk = left.values[lslice]
425-
return self._shallow_copy(left_chunk)
381+
def _wrap_union_result(self, other, result):
382+
name = self.name if self.name == other.name else None
383+
return self._simple_new(result, name=name, freq=None)
426384

427385
def _maybe_promote(self, other):
428386
if other.inferred_type == 'timedelta':

pandas/tests/indexes/timedeltas/test_setops.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
import pytest
23

34
import pandas as pd
45
import pandas.util.testing as tm
@@ -73,3 +74,97 @@ def test_intersection_bug_1708(self):
7374
result = index_1 & index_2
7475
expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
7576
tm.assert_index_equal(result, expected)
77+
78+
79+
@pytest.mark.parametrize('idx1,idx2,expected', [
80+
(pd.to_timedelta(range(2, 6), unit='s'),
81+
pd.to_timedelta(range(3), unit='s'),
82+
TimedeltaIndex(['00:00:002'])),
83+
(pd.to_timedelta(range(3), unit='s'),
84+
pd.to_timedelta(range(2, 6), unit='s'),
85+
TimedeltaIndex(['00:00:002'])),
86+
])
87+
def test_intersection_intersects_ascending(idx1, idx2, expected):
88+
result = idx1.intersection(idx2)
89+
assert result.equals(expected)
90+
91+
92+
@pytest.mark.parametrize('idx1,idx2,expected', [
93+
(pd.to_timedelta(range(6, 3, -1), unit='s'),
94+
pd.to_timedelta(range(5, 1, -1), unit='s'),
95+
TimedeltaIndex(['00:00:05', '00:00:04'])),
96+
(pd.to_timedelta(range(5, 1, -1), unit='s'),
97+
pd.to_timedelta(range(6, 3, -1), unit='s'),
98+
TimedeltaIndex(['00:00:05', '00:00:04'])),
99+
])
100+
def test_intersection_intersects_descending(idx1, idx2, expected):
101+
# GH 17391
102+
result = idx1.intersection(idx2)
103+
assert result.equals(expected)
104+
105+
106+
def test_intersection_intersects_descending_no_intersect():
107+
idx1 = pd.to_timedelta(range(6, 4, -1), unit='s')
108+
idx2 = pd.to_timedelta(range(4, 1, -1), unit='s')
109+
result = idx1.intersection(idx2)
110+
assert len(result) == 0
111+
112+
113+
def test_intersection_intersects_len_1():
114+
idx1 = pd.to_timedelta(range(1, 2), unit='s')
115+
idx2 = pd.to_timedelta(range(1, 0, -1), unit='s')
116+
intersection = idx1.intersection(idx2)
117+
expected = TimedeltaIndex(['00:00:01'],
118+
dtype='timedelta64[ns]')
119+
tm.assert_index_equal(intersection, expected)
120+
121+
122+
def test_intersection_can_intersect_self():
123+
idx = pd.to_timedelta(range(1, 2), unit='s')
124+
result = idx.intersection(idx)
125+
tm.assert_index_equal(idx, result)
126+
127+
128+
def test_intersection_not_sorted():
129+
idx1 = pd.to_timedelta((1, 3, 2, 5, 4), unit='s')
130+
idx2 = pd.to_timedelta((1, 2, 3, 5, 4), unit='s')
131+
result = idx1.intersection(idx2)
132+
expected = idx1
133+
tm.assert_index_equal(result, expected)
134+
135+
136+
def test_intersection_not_unique():
137+
idx1 = pd.to_timedelta((1, 2, 2, 3, 3, 5), unit='s')
138+
idx2 = pd.to_timedelta((1, 2, 3, 4), unit='s')
139+
result = idx1.intersection(idx2)
140+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
141+
tm.assert_index_equal(result, expected)
142+
143+
result = idx2.intersection(idx1)
144+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
145+
tm.assert_index_equal(result, expected)
146+
147+
148+
@pytest.mark.parametrize("index1, index2, expected", [
149+
(pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s'),
150+
pd.to_timedelta((2, 3, 4, 8), unit='s'),
151+
pd.to_timedelta((2, 3, 4, 8), unit='s')),
152+
(pd.to_timedelta((1, 2, 3, 4, 5), unit='s'),
153+
pd.to_timedelta((2, 3, 4), unit='s'),
154+
pd.to_timedelta((2, 3, 4), unit='s')),
155+
(pd.to_timedelta((2, 4, 5, 6), unit='s'),
156+
pd.to_timedelta((2, 3, 4), unit='s'),
157+
pd.to_timedelta((2, 4), unit='s')),
158+
])
159+
def test_intersection_different_lengths(index1, index2, expected):
160+
def intersect(idx1, idx2, expected):
161+
result = idx1.intersection(idx2)
162+
tm.assert_index_equal(result, expected)
163+
result = idx2.intersection(idx1)
164+
tm.assert_index_equal(result, expected)
165+
166+
intersect(index1, index2, expected)
167+
intersect(index1.sort_values(ascending=False),
168+
index2.sort_values(ascending=False),
169+
expected.sort_values(ascending=False)
170+
)

pandas/tseries/offsets.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,41 @@ def wrapper(self, other):
112112
return wrapper
113113

114114

115+
def apply_index_wraps(func):
116+
@functools.wraps(func)
117+
def wrapper(self, other):
118+
result = func(self, other)
119+
if self.normalize:
120+
result = result.to_period('D').to_timestamp()
121+
return result
122+
return wrapper
123+
124+
125+
def _is_normalized(dt):
126+
if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or
127+
dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0):
128+
return False
129+
return True
130+
131+
132+
def index_offsets_equal(first, second):
133+
"""
134+
Checks if the two indexes have an offset, and if they equal each other
135+
Parameters
136+
----------
137+
first: Index
138+
second: Index
139+
140+
Returns
141+
-------
142+
bool
143+
"""
144+
first = getattr(first, 'freq', None)
145+
second = getattr(second, 'freq', None)
146+
are_offsets_equal = True
147+
if first is None or second is None or first != second:
148+
are_offsets_equal = False
149+
return are_offsets_equal
115150
# ---------------------------------------------------------------------
116151
# DateOffset
117152

0 commit comments

Comments
 (0)