Skip to content

Commit ed5c821

Browse files
committed
BUG: TimedeltaIndex.intersection
Fixes pandas-dev#17391
1 parent 0828c25 commit ed5c821

File tree

6 files changed

+225
-98
lines changed

6 files changed

+225
-98
lines changed

pandas/core/indexes/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1407,6 +1407,12 @@ def is_monotonic(self):
14071407
""" alias for is_monotonic_increasing (deprecated) """
14081408
return self.is_monotonic_increasing
14091409

1410+
@property
1411+
def _is_strictly_monotonic(self):
1412+
""" Checks if the index is sorted """
1413+
return (self._is_strictly_monotonic_increasing or
1414+
self._is_strictly_monotonic_decreasing)
1415+
14101416
@property
14111417
def is_monotonic_increasing(self):
14121418
"""

pandas/core/indexes/datetimelike.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646

4747
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
4848
from pandas.core.indexes.base import Index, _index_shared_docs
49+
from pandas.tseries.offsets import index_offsets_equal
4950
from pandas.util._decorators import Appender, cache_readonly
5051
import pandas.core.dtypes.concat as _concat
5152
import pandas.tseries.frequencies as frequencies
@@ -878,6 +879,94 @@ def astype(self, dtype, copy=True):
878879
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
879880
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
880881

882+
def _intersect_ascending(self, other):
883+
# to make our life easier, "sort" the two ranges
884+
if self[0] <= other[0]:
885+
left, right = self, other
886+
else:
887+
left, right = other, self
888+
889+
end = min(left[-1], right[-1])
890+
start = right[0]
891+
892+
if end < start:
893+
return []
894+
return left.values[slice(*left.slice_locs(start, end))]
895+
896+
def _intersect_descending(self, other):
897+
# this is essentially a flip of _intersect_ascending
898+
if self[0] >= other[0]:
899+
left, right = self, other
900+
else:
901+
left, right = other, self
902+
903+
start = min(left[0], right[0])
904+
end = right[-1]
905+
906+
if end > start:
907+
return Index()
908+
return left.values[slice(*left.slice_locs(start, end))]
909+
910+
def intersection(self, other):
911+
"""
912+
Specialized intersection for DateTimeIndexOpsMixin objects.
913+
May be much faster than Index.intersection.
914+
915+
Parameters
916+
----------
917+
other : Index or array-like
918+
919+
Returns
920+
-------
921+
Index
922+
A shallow copied intersection between the two things passed in
923+
"""
924+
self._assert_can_do_setop(other)
925+
926+
if self.equals(other):
927+
return self._get_consensus_name(other)
928+
929+
lengths = len(self), len(other)
930+
if lengths[0] == 0:
931+
return self
932+
if lengths[1] == 0:
933+
return other
934+
935+
if not isinstance(other, Index):
936+
result = Index.intersection(self, other)
937+
return result
938+
elif (index_offsets_equal(self, other) or
939+
(not self._is_strictly_monotonic or
940+
not other._is_strictly_monotonic)):
941+
result = Index.intersection(self, other)
942+
result = self._shallow_copy(result._values, name=result.name,
943+
tz=getattr(self, 'tz', None),
944+
freq=None
945+
)
946+
if result.freq is None:
947+
result.offset = frequencies.to_offset(result.inferred_freq)
948+
return result
949+
950+
# handle intersecting things like this
951+
# idx1 = pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s')
952+
# idx2 = pd.to_timedelta((2, 3, 4, 8), unit='s')
953+
if lengths[0] != lengths[1] and (
954+
max(self) != max(other) or min(self) != min(other)):
955+
return Index.intersection(self, other)
956+
957+
# coerce into same order
958+
self_ascending = self.is_monotonic_increasing
959+
if self_ascending != other.is_monotonic_increasing:
960+
other = other.sort_values(ascending=self_ascending)
961+
962+
if self_ascending:
963+
intersected_slice = self._intersect_ascending(other)
964+
else:
965+
intersected_slice = self._intersect_descending(other)
966+
967+
intersected = self._shallow_copy(intersected_slice)
968+
return intersected._get_consensus_name(other)
969+
881970

882971
def _ensure_datetimelike_to_i8(other):
883972
""" helper for coercing an input scalar or array to i8 """

pandas/core/indexes/datetimes.py

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,62 +1090,6 @@ def _wrap_union_result(self, other, result):
10901090
raise ValueError('Passed item and index have different timezone')
10911091
return self._simple_new(result, name=name, freq=None, tz=self.tz)
10921092

1093-
def intersection(self, other):
1094-
"""
1095-
Specialized intersection for DatetimeIndex objects. May be much faster
1096-
than Index.intersection
1097-
1098-
Parameters
1099-
----------
1100-
other : DatetimeIndex or array-like
1101-
1102-
Returns
1103-
-------
1104-
y : Index or DatetimeIndex
1105-
"""
1106-
self._assert_can_do_setop(other)
1107-
if not isinstance(other, DatetimeIndex):
1108-
try:
1109-
other = DatetimeIndex(other)
1110-
except (TypeError, ValueError):
1111-
pass
1112-
result = Index.intersection(self, other)
1113-
if isinstance(result, DatetimeIndex):
1114-
if result.freq is None:
1115-
result.freq = to_offset(result.inferred_freq)
1116-
return result
1117-
1118-
elif (other.freq is None or self.freq is None or
1119-
other.freq != self.freq or
1120-
not other.freq.isAnchored() or
1121-
(not self.is_monotonic or not other.is_monotonic)):
1122-
result = Index.intersection(self, other)
1123-
result = self._shallow_copy(result._values, name=result.name,
1124-
tz=result.tz, freq=None)
1125-
if result.freq is None:
1126-
result.freq = to_offset(result.inferred_freq)
1127-
return result
1128-
1129-
if len(self) == 0:
1130-
return self
1131-
if len(other) == 0:
1132-
return other
1133-
# to make our life easier, "sort" the two ranges
1134-
if self[0] <= other[0]:
1135-
left, right = self, other
1136-
else:
1137-
left, right = other, self
1138-
1139-
end = min(left[-1], right[-1])
1140-
start = right[0]
1141-
1142-
if end < start:
1143-
return type(self)(data=[])
1144-
else:
1145-
lslice = slice(*left.slice_locs(start, end))
1146-
left_chunk = left.values[lslice]
1147-
return self._shallow_copy(left_chunk)
1148-
11491093
def _parsed_string_to_bounds(self, reso, parsed):
11501094
"""
11511095
Calculate datetime bounds for parsed time string and its resolution.

pandas/core/indexes/timedeltas.py

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -433,48 +433,6 @@ def _wrap_union_result(self, other, result):
433433
name = self.name if self.name == other.name else None
434434
return self._simple_new(result, name=name, freq=None)
435435

436-
def intersection(self, other):
437-
"""
438-
Specialized intersection for TimedeltaIndex objects. May be much faster
439-
than Index.intersection
440-
441-
Parameters
442-
----------
443-
other : TimedeltaIndex or array-like
444-
445-
Returns
446-
-------
447-
y : Index or TimedeltaIndex
448-
"""
449-
self._assert_can_do_setop(other)
450-
if not isinstance(other, TimedeltaIndex):
451-
try:
452-
other = TimedeltaIndex(other)
453-
except (TypeError, ValueError):
454-
pass
455-
result = Index.intersection(self, other)
456-
return result
457-
458-
if len(self) == 0:
459-
return self
460-
if len(other) == 0:
461-
return other
462-
# to make our life easier, "sort" the two ranges
463-
if self[0] <= other[0]:
464-
left, right = self, other
465-
else:
466-
left, right = other, self
467-
468-
end = min(left[-1], right[-1])
469-
start = right[0]
470-
471-
if end < start:
472-
return type(self)(data=[])
473-
else:
474-
lslice = slice(*left.slice_locs(start, end))
475-
left_chunk = left.values[lslice]
476-
return self._shallow_copy(left_chunk)
477-
478436
def _maybe_promote(self, other):
479437
if other.inferred_type == 'timedelta':
480438
other = TimedeltaIndex(other)

pandas/tests/indexes/timedeltas/test_setops.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
import pytest
23

34
import pandas as pd
45
import pandas.util.testing as tm
@@ -73,3 +74,97 @@ def test_intersection_bug_1708(self):
7374
result = index_1 & index_2
7475
expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
7576
tm.assert_index_equal(result, expected)
77+
78+
79+
@pytest.mark.parametrize('idx1,idx2,expected', [
80+
(pd.to_timedelta(range(2, 6), unit='s'),
81+
pd.to_timedelta(range(3), unit='s'),
82+
TimedeltaIndex(['00:00:002'])),
83+
(pd.to_timedelta(range(3), unit='s'),
84+
pd.to_timedelta(range(2, 6), unit='s'),
85+
TimedeltaIndex(['00:00:002'])),
86+
])
87+
def test_intersection_intersects_ascending(idx1, idx2, expected):
88+
result = idx1.intersection(idx2)
89+
assert result.equals(expected)
90+
91+
92+
@pytest.mark.parametrize('idx1,idx2,expected', [
93+
(pd.to_timedelta(range(6, 3, -1), unit='s'),
94+
pd.to_timedelta(range(5, 1, -1), unit='s'),
95+
TimedeltaIndex(['00:00:05', '00:00:04'])),
96+
(pd.to_timedelta(range(5, 1, -1), unit='s'),
97+
pd.to_timedelta(range(6, 3, -1), unit='s'),
98+
TimedeltaIndex(['00:00:05', '00:00:04'])),
99+
])
100+
def test_intersection_intersects_descending(idx1, idx2, expected):
101+
# GH 17391
102+
result = idx1.intersection(idx2)
103+
assert result.equals(expected)
104+
105+
106+
def test_intersection_intersects_descending_no_intersect():
107+
idx1 = pd.to_timedelta(range(6, 4, -1), unit='s')
108+
idx2 = pd.to_timedelta(range(4, 1, -1), unit='s')
109+
result = idx1.intersection(idx2)
110+
assert len(result) == 0
111+
112+
113+
def test_intersection_intersects_len_1():
114+
idx1 = pd.to_timedelta(range(1, 2), unit='s')
115+
idx2 = pd.to_timedelta(range(1, 0, -1), unit='s')
116+
intersection = idx1.intersection(idx2)
117+
expected = TimedeltaIndex(['00:00:01'],
118+
dtype='timedelta64[ns]')
119+
tm.assert_index_equal(intersection, expected)
120+
121+
122+
def test_intersection_can_intersect_self():
123+
idx = pd.to_timedelta(range(1, 2), unit='s')
124+
result = idx.intersection(idx)
125+
tm.assert_index_equal(idx, result)
126+
127+
128+
def test_intersection_not_sorted():
129+
idx1 = pd.to_timedelta((1, 3, 2, 5, 4), unit='s')
130+
idx2 = pd.to_timedelta((1, 2, 3, 5, 4), unit='s')
131+
result = idx1.intersection(idx2)
132+
expected = idx1
133+
tm.assert_index_equal(result, expected)
134+
135+
136+
def test_intersection_not_unique():
137+
idx1 = pd.to_timedelta((1, 2, 2, 3, 3, 5), unit='s')
138+
idx2 = pd.to_timedelta((1, 2, 3, 4), unit='s')
139+
result = idx1.intersection(idx2)
140+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
141+
tm.assert_index_equal(result, expected)
142+
143+
result = idx2.intersection(idx1)
144+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
145+
tm.assert_index_equal(result, expected)
146+
147+
148+
@pytest.mark.parametrize("index1, index2, expected", [
149+
(pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s'),
150+
pd.to_timedelta((2, 3, 4, 8), unit='s'),
151+
pd.to_timedelta((2, 3, 4, 8), unit='s')),
152+
(pd.to_timedelta((1, 2, 3, 4, 5), unit='s'),
153+
pd.to_timedelta((2, 3, 4), unit='s'),
154+
pd.to_timedelta((2, 3, 4), unit='s')),
155+
(pd.to_timedelta((2, 4, 5, 6), unit='s'),
156+
pd.to_timedelta((2, 3, 4), unit='s'),
157+
pd.to_timedelta((2, 4), unit='s')),
158+
])
159+
def test_intersection_different_lengths(index1, index2, expected):
160+
def intersect(idx1, idx2, expected):
161+
result = idx1.intersection(idx2)
162+
tm.assert_index_equal(result, expected)
163+
result = idx2.intersection(idx1)
164+
tm.assert_index_equal(result, expected)
165+
166+
intersect(index1, index2, expected)
167+
intersect(index1.sort_values(ascending=False),
168+
index2.sort_values(ascending=False),
169+
expected.sort_values(ascending=False)
170+
)

pandas/tseries/offsets.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,41 @@ def wrapper(self, other):
112112
return wrapper
113113

114114

115+
def apply_index_wraps(func):
116+
@functools.wraps(func)
117+
def wrapper(self, other):
118+
result = func(self, other)
119+
if self.normalize:
120+
result = result.to_period('D').to_timestamp()
121+
return result
122+
return wrapper
123+
124+
125+
def _is_normalized(dt):
126+
if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or
127+
dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0):
128+
return False
129+
return True
130+
131+
132+
def index_offsets_equal(first, second):
133+
"""
134+
Checks if the two indexes have an offset, and if they equal each other
135+
Parameters
136+
----------
137+
first: Index
138+
second: Index
139+
140+
Returns
141+
-------
142+
bool
143+
"""
144+
first = getattr(first, 'freq', None)
145+
second = getattr(second, 'freq', None)
146+
are_offsets_equal = True
147+
if first is None or second is None or first != second:
148+
are_offsets_equal = False
149+
return are_offsets_equal
115150
# ---------------------------------------------------------------------
116151
# DateOffset
117152

0 commit comments

Comments
 (0)