Skip to content

Commit 918276c

Browse files
committed
BUG: TimedeltaIndex.intersection
Fixes #17391
1 parent fa557f7 commit 918276c

File tree

6 files changed

+210
-98
lines changed

6 files changed

+210
-98
lines changed

pandas/core/indexes/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,6 +1200,12 @@ def is_monotonic(self):
12001200
""" alias for is_monotonic_increasing (deprecated) """
12011201
return self.is_monotonic_increasing
12021202

1203+
@property
1204+
def _is_strictly_monotonic(self):
1205+
""" Checks if the index is sorted """
1206+
return (self._is_strictly_monotonic_increasing or
1207+
self._is_strictly_monotonic_decreasing)
1208+
12031209
@property
12041210
def is_monotonic_increasing(self):
12051211
"""

pandas/core/indexes/datetimelike.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from pandas._libs.period import Period
2929

3030
from pandas.core.indexes.base import Index, _index_shared_docs
31+
from pandas.tseries.offsets import index_offsets_equal
3132
from pandas.util._decorators import Appender, cache_readonly
3233
import pandas.core.dtypes.concat as _concat
3334
import pandas.tseries.frequencies as frequencies
@@ -854,6 +855,94 @@ def _concat_same_dtype(self, to_concat, name):
854855
new_data = np.concatenate([c.asi8 for c in to_concat])
855856
return self._simple_new(new_data, **attribs)
856857

858+
def _intersect_ascending(self, other):
859+
# to make our life easier, "sort" the two ranges
860+
if self[0] <= other[0]:
861+
left, right = self, other
862+
else:
863+
left, right = other, self
864+
865+
end = min(left[-1], right[-1])
866+
start = right[0]
867+
868+
if end < start:
869+
return []
870+
return left.values[slice(*left.slice_locs(start, end))]
871+
872+
def _intersect_descending(self, other):
873+
# this is essentially a flip of _intersect_ascending
874+
if self[0] >= other[0]:
875+
left, right = self, other
876+
else:
877+
left, right = other, self
878+
879+
start = min(left[0], right[0])
880+
end = right[-1]
881+
882+
if end > start:
883+
return Index()
884+
return left.values[slice(*left.slice_locs(start, end))]
885+
886+
def intersection(self, other):
887+
"""
888+
Specialized intersection for DateTimeIndexOpsMixin objects.
889+
May be much faster than Index.intersection.
890+
891+
Parameters
892+
----------
893+
other : Index or array-like
894+
895+
Returns
896+
-------
897+
Index
898+
A shallow copied intersection between the two things passed in
899+
"""
900+
self._assert_can_do_setop(other)
901+
902+
if self.equals(other):
903+
return self._get_consensus_name(other)
904+
905+
lengths = len(self), len(other)
906+
if lengths[0] == 0:
907+
return self
908+
if lengths[1] == 0:
909+
return other
910+
911+
if not isinstance(other, Index):
912+
result = Index.intersection(self, other)
913+
return result
914+
elif (index_offsets_equal(self, other) or
915+
(not self._is_strictly_monotonic or
916+
not other._is_strictly_monotonic)):
917+
result = Index.intersection(self, other)
918+
result = self._shallow_copy(result._values, name=result.name,
919+
tz=getattr(self, 'tz', None),
920+
freq=None
921+
)
922+
if result.freq is None:
923+
result.offset = frequencies.to_offset(result.inferred_freq)
924+
return result
925+
926+
# handle intersecting things like this
927+
# idx1 = pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s')
928+
# idx2 = pd.to_timedelta((2, 3, 4, 8), unit='s')
929+
if lengths[0] != lengths[1] and (
930+
max(self) != max(other) or min(self) != min(other)):
931+
return Index.intersection(self, other)
932+
933+
# coerce into same order
934+
self_ascending = self.is_monotonic_increasing
935+
if self_ascending != other.is_monotonic_increasing:
936+
other = other.sort_values(ascending=self_ascending)
937+
938+
if self_ascending:
939+
intersected_slice = self._intersect_ascending(other)
940+
else:
941+
intersected_slice = self._intersect_descending(other)
942+
943+
intersected = self._shallow_copy(intersected_slice)
944+
return intersected._get_consensus_name(other)
945+
857946

858947
def _ensure_datetimelike_to_i8(other):
859948
""" helper for coercing an input scalar or array to i8 """

pandas/core/indexes/datetimes.py

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,62 +1189,6 @@ def _wrap_union_result(self, other, result):
11891189
raise ValueError('Passed item and index have different timezone')
11901190
return self._simple_new(result, name=name, freq=None, tz=self.tz)
11911191

1192-
def intersection(self, other):
1193-
"""
1194-
Specialized intersection for DatetimeIndex objects. May be much faster
1195-
than Index.intersection
1196-
1197-
Parameters
1198-
----------
1199-
other : DatetimeIndex or array-like
1200-
1201-
Returns
1202-
-------
1203-
y : Index or DatetimeIndex
1204-
"""
1205-
self._assert_can_do_setop(other)
1206-
if not isinstance(other, DatetimeIndex):
1207-
try:
1208-
other = DatetimeIndex(other)
1209-
except (TypeError, ValueError):
1210-
pass
1211-
result = Index.intersection(self, other)
1212-
if isinstance(result, DatetimeIndex):
1213-
if result.freq is None:
1214-
result.offset = to_offset(result.inferred_freq)
1215-
return result
1216-
1217-
elif (other.offset is None or self.offset is None or
1218-
other.offset != self.offset or
1219-
not other.offset.isAnchored() or
1220-
(not self.is_monotonic or not other.is_monotonic)):
1221-
result = Index.intersection(self, other)
1222-
result = self._shallow_copy(result._values, name=result.name,
1223-
tz=result.tz, freq=None)
1224-
if result.freq is None:
1225-
result.offset = to_offset(result.inferred_freq)
1226-
return result
1227-
1228-
if len(self) == 0:
1229-
return self
1230-
if len(other) == 0:
1231-
return other
1232-
# to make our life easier, "sort" the two ranges
1233-
if self[0] <= other[0]:
1234-
left, right = self, other
1235-
else:
1236-
left, right = other, self
1237-
1238-
end = min(left[-1], right[-1])
1239-
start = right[0]
1240-
1241-
if end < start:
1242-
return type(self)(data=[])
1243-
else:
1244-
lslice = slice(*left.slice_locs(start, end))
1245-
left_chunk = left.values[lslice]
1246-
return self._shallow_copy(left_chunk)
1247-
12481192
def _parsed_string_to_bounds(self, reso, parsed):
12491193
"""
12501194
Calculate datetime bounds for parsed time string and its resolution.

pandas/core/indexes/timedeltas.py

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -596,48 +596,6 @@ def _wrap_union_result(self, other, result):
596596
name = self.name if self.name == other.name else None
597597
return self._simple_new(result, name=name, freq=None)
598598

599-
def intersection(self, other):
600-
"""
601-
Specialized intersection for TimedeltaIndex objects. May be much faster
602-
than Index.intersection
603-
604-
Parameters
605-
----------
606-
other : TimedeltaIndex or array-like
607-
608-
Returns
609-
-------
610-
y : Index or TimedeltaIndex
611-
"""
612-
self._assert_can_do_setop(other)
613-
if not isinstance(other, TimedeltaIndex):
614-
try:
615-
other = TimedeltaIndex(other)
616-
except (TypeError, ValueError):
617-
pass
618-
result = Index.intersection(self, other)
619-
return result
620-
621-
if len(self) == 0:
622-
return self
623-
if len(other) == 0:
624-
return other
625-
# to make our life easier, "sort" the two ranges
626-
if self[0] <= other[0]:
627-
left, right = self, other
628-
else:
629-
left, right = other, self
630-
631-
end = min(left[-1], right[-1])
632-
start = right[0]
633-
634-
if end < start:
635-
return type(self)(data=[])
636-
else:
637-
lslice = slice(*left.slice_locs(start, end))
638-
left_chunk = left.values[lslice]
639-
return self._shallow_copy(left_chunk)
640-
641599
def _maybe_promote(self, other):
642600
if other.inferred_type == 'timedelta':
643601
other = TimedeltaIndex(other)

pandas/tests/indexes/timedeltas/test_setops.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
import pytest
23

34
import pandas as pd
45
import pandas.util.testing as tm
@@ -74,3 +75,97 @@ def test_intersection_bug_1708(self):
7475
result = index_1 & index_2
7576
expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
7677
tm.assert_index_equal(result, expected)
78+
79+
80+
@pytest.mark.parametrize('idx1,idx2,expected', [
81+
(pd.to_timedelta(range(2, 6), unit='s'),
82+
pd.to_timedelta(range(3), unit='s'),
83+
TimedeltaIndex(['00:00:002'])),
84+
(pd.to_timedelta(range(3), unit='s'),
85+
pd.to_timedelta(range(2, 6), unit='s'),
86+
TimedeltaIndex(['00:00:002'])),
87+
])
88+
def test_intersection_intersects_ascending(idx1, idx2, expected):
89+
result = idx1.intersection(idx2)
90+
assert result.equals(expected)
91+
92+
93+
@pytest.mark.parametrize('idx1,idx2,expected', [
94+
(pd.to_timedelta(range(6, 3, -1), unit='s'),
95+
pd.to_timedelta(range(5, 1, -1), unit='s'),
96+
TimedeltaIndex(['00:00:05', '00:00:04'])),
97+
(pd.to_timedelta(range(5, 1, -1), unit='s'),
98+
pd.to_timedelta(range(6, 3, -1), unit='s'),
99+
TimedeltaIndex(['00:00:05', '00:00:04'])),
100+
])
101+
def test_intersection_intersects_descending(idx1, idx2, expected):
102+
# GH 17391
103+
result = idx1.intersection(idx2)
104+
assert result.equals(expected)
105+
106+
107+
def test_intersection_intersects_descending_no_intersect():
108+
idx1 = pd.to_timedelta(range(6, 4, -1), unit='s')
109+
idx2 = pd.to_timedelta(range(4, 1, -1), unit='s')
110+
result = idx1.intersection(idx2)
111+
assert len(result) == 0
112+
113+
114+
def test_intersection_intersects_len_1():
115+
idx1 = pd.to_timedelta(range(1, 2), unit='s')
116+
idx2 = pd.to_timedelta(range(1, 0, -1), unit='s')
117+
intersection = idx1.intersection(idx2)
118+
expected = TimedeltaIndex(['00:00:01'],
119+
dtype='timedelta64[ns]')
120+
tm.assert_index_equal(intersection, expected)
121+
122+
123+
def test_intersection_can_intersect_self():
124+
idx = pd.to_timedelta(range(1, 2), unit='s')
125+
result = idx.intersection(idx)
126+
tm.assert_index_equal(idx, result)
127+
128+
129+
def test_intersection_not_sorted():
130+
idx1 = pd.to_timedelta((1, 3, 2, 5, 4), unit='s')
131+
idx2 = pd.to_timedelta((1, 2, 3, 5, 4), unit='s')
132+
result = idx1.intersection(idx2)
133+
expected = idx1
134+
tm.assert_index_equal(result, expected)
135+
136+
137+
def test_intersection_not_unique():
138+
idx1 = pd.to_timedelta((1, 2, 2, 3, 3, 5), unit='s')
139+
idx2 = pd.to_timedelta((1, 2, 3, 4), unit='s')
140+
result = idx1.intersection(idx2)
141+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
142+
tm.assert_index_equal(result, expected)
143+
144+
result = idx2.intersection(idx1)
145+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
146+
tm.assert_index_equal(result, expected)
147+
148+
149+
@pytest.mark.parametrize("index1, index2, expected", [
150+
(pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s'),
151+
pd.to_timedelta((2, 3, 4, 8), unit='s'),
152+
pd.to_timedelta((2, 3, 4, 8), unit='s')),
153+
(pd.to_timedelta((1, 2, 3, 4, 5), unit='s'),
154+
pd.to_timedelta((2, 3, 4), unit='s'),
155+
pd.to_timedelta((2, 3, 4), unit='s')),
156+
(pd.to_timedelta((2, 4, 5, 6), unit='s'),
157+
pd.to_timedelta((2, 3, 4), unit='s'),
158+
pd.to_timedelta((2, 4), unit='s')),
159+
])
160+
def test_intersection_different_lengths(index1, index2, expected):
161+
def intersect(idx1, idx2, expected):
162+
result = idx1.intersection(idx2)
163+
tm.assert_index_equal(result, expected)
164+
result = idx2.intersection(idx1)
165+
tm.assert_index_equal(result, expected)
166+
167+
intersect(index1, index2, expected)
168+
intersect(index1.sort_values(ascending=False),
169+
index2.sort_values(ascending=False),
170+
expected.sort_values(ascending=False)
171+
)

pandas/tseries/offsets.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,26 @@ def _is_normalized(dt):
119119
return False
120120
return True
121121

122+
123+
def index_offsets_equal(first, second):
124+
"""
125+
Checks if the two indexes have an offset, and if they equal each other
126+
Parameters
127+
----------
128+
first: Index
129+
second: Index
130+
131+
Returns
132+
-------
133+
bool
134+
"""
135+
first = getattr(first, 'offset', None)
136+
second = getattr(second, 'offset', None)
137+
are_offsets_equal = True
138+
if first is None or second is None or first != second:
139+
are_offsets_equal = False
140+
return are_offsets_equal
141+
122142
# ---------------------------------------------------------------------
123143
# DateOffset
124144

0 commit comments

Comments
 (0)