Skip to content

Commit 407bf12

Browse files
committed
BUG: TimedeltaIndex.intersection
Fixes pandas-dev#17391
1 parent 365eac4 commit 407bf12

File tree

6 files changed

+225
-98
lines changed

6 files changed

+225
-98
lines changed

pandas/core/indexes/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,6 +1392,12 @@ def is_monotonic(self):
13921392
""" alias for is_monotonic_increasing (deprecated) """
13931393
return self.is_monotonic_increasing
13941394

1395+
@property
1396+
def _is_strictly_monotonic(self):
1397+
""" Checks if the index is sorted """
1398+
return (self._is_strictly_monotonic_increasing or
1399+
self._is_strictly_monotonic_decreasing)
1400+
13951401
@property
13961402
def is_monotonic_increasing(self):
13971403
"""

pandas/core/indexes/datetimelike.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646

4747
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
4848
from pandas.core.indexes.base import Index, _index_shared_docs
49+
from pandas.tseries.offsets import index_offsets_equal
4950
from pandas.util._decorators import Appender, cache_readonly
5051
import pandas.core.dtypes.concat as _concat
5152
import pandas.tseries.frequencies as frequencies
@@ -902,6 +903,94 @@ def astype(self, dtype, copy=True):
902903
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
903904
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
904905

906+
def _intersect_ascending(self, other):
907+
# to make our life easier, "sort" the two ranges
908+
if self[0] <= other[0]:
909+
left, right = self, other
910+
else:
911+
left, right = other, self
912+
913+
end = min(left[-1], right[-1])
914+
start = right[0]
915+
916+
if end < start:
917+
return []
918+
return left.values[slice(*left.slice_locs(start, end))]
919+
920+
def _intersect_descending(self, other):
921+
# this is essentially a flip of _intersect_ascending
922+
if self[0] >= other[0]:
923+
left, right = self, other
924+
else:
925+
left, right = other, self
926+
927+
start = min(left[0], right[0])
928+
end = right[-1]
929+
930+
if end > start:
931+
return Index()
932+
return left.values[slice(*left.slice_locs(start, end))]
933+
934+
def intersection(self, other):
935+
"""
936+
Specialized intersection for DateTimeIndexOpsMixin objects.
937+
May be much faster than Index.intersection.
938+
939+
Parameters
940+
----------
941+
other : Index or array-like
942+
943+
Returns
944+
-------
945+
Index
946+
A shallow copied intersection between the two things passed in
947+
"""
948+
self._assert_can_do_setop(other)
949+
950+
if self.equals(other):
951+
return self._get_consensus_name(other)
952+
953+
lengths = len(self), len(other)
954+
if lengths[0] == 0:
955+
return self
956+
if lengths[1] == 0:
957+
return other
958+
959+
if not isinstance(other, Index):
960+
result = Index.intersection(self, other)
961+
return result
962+
elif (index_offsets_equal(self, other) or
963+
(not self._is_strictly_monotonic or
964+
not other._is_strictly_monotonic)):
965+
result = Index.intersection(self, other)
966+
result = self._shallow_copy(result._values, name=result.name,
967+
tz=getattr(self, 'tz', None),
968+
freq=None
969+
)
970+
if result.freq is None:
971+
result.offset = frequencies.to_offset(result.inferred_freq)
972+
return result
973+
974+
# handle intersecting things like this
975+
# idx1 = pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s')
976+
# idx2 = pd.to_timedelta((2, 3, 4, 8), unit='s')
977+
if lengths[0] != lengths[1] and (
978+
max(self) != max(other) or min(self) != min(other)):
979+
return Index.intersection(self, other)
980+
981+
# coerce into same order
982+
self_ascending = self.is_monotonic_increasing
983+
if self_ascending != other.is_monotonic_increasing:
984+
other = other.sort_values(ascending=self_ascending)
985+
986+
if self_ascending:
987+
intersected_slice = self._intersect_ascending(other)
988+
else:
989+
intersected_slice = self._intersect_descending(other)
990+
991+
intersected = self._shallow_copy(intersected_slice)
992+
return intersected._get_consensus_name(other)
993+
905994

906995
def _ensure_datetimelike_to_i8(other):
907996
""" helper for coercing an input scalar or array to i8 """

pandas/core/indexes/datetimes.py

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,62 +1169,6 @@ def _wrap_union_result(self, other, result):
11691169
raise ValueError('Passed item and index have different timezone')
11701170
return self._simple_new(result, name=name, freq=None, tz=self.tz)
11711171

1172-
def intersection(self, other):
1173-
"""
1174-
Specialized intersection for DatetimeIndex objects. May be much faster
1175-
than Index.intersection
1176-
1177-
Parameters
1178-
----------
1179-
other : DatetimeIndex or array-like
1180-
1181-
Returns
1182-
-------
1183-
y : Index or DatetimeIndex
1184-
"""
1185-
self._assert_can_do_setop(other)
1186-
if not isinstance(other, DatetimeIndex):
1187-
try:
1188-
other = DatetimeIndex(other)
1189-
except (TypeError, ValueError):
1190-
pass
1191-
result = Index.intersection(self, other)
1192-
if isinstance(result, DatetimeIndex):
1193-
if result.freq is None:
1194-
result.freq = to_offset(result.inferred_freq)
1195-
return result
1196-
1197-
elif (other.freq is None or self.freq is None or
1198-
other.freq != self.freq or
1199-
not other.freq.isAnchored() or
1200-
(not self.is_monotonic or not other.is_monotonic)):
1201-
result = Index.intersection(self, other)
1202-
result = self._shallow_copy(result._values, name=result.name,
1203-
tz=result.tz, freq=None)
1204-
if result.freq is None:
1205-
result.freq = to_offset(result.inferred_freq)
1206-
return result
1207-
1208-
if len(self) == 0:
1209-
return self
1210-
if len(other) == 0:
1211-
return other
1212-
# to make our life easier, "sort" the two ranges
1213-
if self[0] <= other[0]:
1214-
left, right = self, other
1215-
else:
1216-
left, right = other, self
1217-
1218-
end = min(left[-1], right[-1])
1219-
start = right[0]
1220-
1221-
if end < start:
1222-
return type(self)(data=[])
1223-
else:
1224-
lslice = slice(*left.slice_locs(start, end))
1225-
left_chunk = left.values[lslice]
1226-
return self._shallow_copy(left_chunk)
1227-
12281172
def _parsed_string_to_bounds(self, reso, parsed):
12291173
"""
12301174
Calculate datetime bounds for parsed time string and its resolution.

pandas/core/indexes/timedeltas.py

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -475,48 +475,6 @@ def _wrap_union_result(self, other, result):
475475
name = self.name if self.name == other.name else None
476476
return self._simple_new(result, name=name, freq=None)
477477

478-
def intersection(self, other):
479-
"""
480-
Specialized intersection for TimedeltaIndex objects. May be much faster
481-
than Index.intersection
482-
483-
Parameters
484-
----------
485-
other : TimedeltaIndex or array-like
486-
487-
Returns
488-
-------
489-
y : Index or TimedeltaIndex
490-
"""
491-
self._assert_can_do_setop(other)
492-
if not isinstance(other, TimedeltaIndex):
493-
try:
494-
other = TimedeltaIndex(other)
495-
except (TypeError, ValueError):
496-
pass
497-
result = Index.intersection(self, other)
498-
return result
499-
500-
if len(self) == 0:
501-
return self
502-
if len(other) == 0:
503-
return other
504-
# to make our life easier, "sort" the two ranges
505-
if self[0] <= other[0]:
506-
left, right = self, other
507-
else:
508-
left, right = other, self
509-
510-
end = min(left[-1], right[-1])
511-
start = right[0]
512-
513-
if end < start:
514-
return type(self)(data=[])
515-
else:
516-
lslice = slice(*left.slice_locs(start, end))
517-
left_chunk = left.values[lslice]
518-
return self._shallow_copy(left_chunk)
519-
520478
def _maybe_promote(self, other):
521479
if other.inferred_type == 'timedelta':
522480
other = TimedeltaIndex(other)

pandas/tests/indexes/timedeltas/test_setops.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
import pytest
23

34
import pandas as pd
45
import pandas.util.testing as tm
@@ -73,3 +74,97 @@ def test_intersection_bug_1708(self):
7374
result = index_1 & index_2
7475
expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
7576
tm.assert_index_equal(result, expected)
77+
78+
79+
@pytest.mark.parametrize('idx1,idx2,expected', [
80+
(pd.to_timedelta(range(2, 6), unit='s'),
81+
pd.to_timedelta(range(3), unit='s'),
82+
TimedeltaIndex(['00:00:002'])),
83+
(pd.to_timedelta(range(3), unit='s'),
84+
pd.to_timedelta(range(2, 6), unit='s'),
85+
TimedeltaIndex(['00:00:002'])),
86+
])
87+
def test_intersection_intersects_ascending(idx1, idx2, expected):
88+
result = idx1.intersection(idx2)
89+
assert result.equals(expected)
90+
91+
92+
@pytest.mark.parametrize('idx1,idx2,expected', [
93+
(pd.to_timedelta(range(6, 3, -1), unit='s'),
94+
pd.to_timedelta(range(5, 1, -1), unit='s'),
95+
TimedeltaIndex(['00:00:05', '00:00:04'])),
96+
(pd.to_timedelta(range(5, 1, -1), unit='s'),
97+
pd.to_timedelta(range(6, 3, -1), unit='s'),
98+
TimedeltaIndex(['00:00:05', '00:00:04'])),
99+
])
100+
def test_intersection_intersects_descending(idx1, idx2, expected):
101+
# GH 17391
102+
result = idx1.intersection(idx2)
103+
assert result.equals(expected)
104+
105+
106+
def test_intersection_intersects_descending_no_intersect():
107+
idx1 = pd.to_timedelta(range(6, 4, -1), unit='s')
108+
idx2 = pd.to_timedelta(range(4, 1, -1), unit='s')
109+
result = idx1.intersection(idx2)
110+
assert len(result) == 0
111+
112+
113+
def test_intersection_intersects_len_1():
114+
idx1 = pd.to_timedelta(range(1, 2), unit='s')
115+
idx2 = pd.to_timedelta(range(1, 0, -1), unit='s')
116+
intersection = idx1.intersection(idx2)
117+
expected = TimedeltaIndex(['00:00:01'],
118+
dtype='timedelta64[ns]')
119+
tm.assert_index_equal(intersection, expected)
120+
121+
122+
def test_intersection_can_intersect_self():
123+
idx = pd.to_timedelta(range(1, 2), unit='s')
124+
result = idx.intersection(idx)
125+
tm.assert_index_equal(idx, result)
126+
127+
128+
def test_intersection_not_sorted():
129+
idx1 = pd.to_timedelta((1, 3, 2, 5, 4), unit='s')
130+
idx2 = pd.to_timedelta((1, 2, 3, 5, 4), unit='s')
131+
result = idx1.intersection(idx2)
132+
expected = idx1
133+
tm.assert_index_equal(result, expected)
134+
135+
136+
def test_intersection_not_unique():
137+
idx1 = pd.to_timedelta((1, 2, 2, 3, 3, 5), unit='s')
138+
idx2 = pd.to_timedelta((1, 2, 3, 4), unit='s')
139+
result = idx1.intersection(idx2)
140+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
141+
tm.assert_index_equal(result, expected)
142+
143+
result = idx2.intersection(idx1)
144+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
145+
tm.assert_index_equal(result, expected)
146+
147+
148+
@pytest.mark.parametrize("index1, index2, expected", [
149+
(pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s'),
150+
pd.to_timedelta((2, 3, 4, 8), unit='s'),
151+
pd.to_timedelta((2, 3, 4, 8), unit='s')),
152+
(pd.to_timedelta((1, 2, 3, 4, 5), unit='s'),
153+
pd.to_timedelta((2, 3, 4), unit='s'),
154+
pd.to_timedelta((2, 3, 4), unit='s')),
155+
(pd.to_timedelta((2, 4, 5, 6), unit='s'),
156+
pd.to_timedelta((2, 3, 4), unit='s'),
157+
pd.to_timedelta((2, 4), unit='s')),
158+
])
159+
def test_intersection_different_lengths(index1, index2, expected):
160+
def intersect(idx1, idx2, expected):
161+
result = idx1.intersection(idx2)
162+
tm.assert_index_equal(result, expected)
163+
result = idx2.intersection(idx1)
164+
tm.assert_index_equal(result, expected)
165+
166+
intersect(index1, index2, expected)
167+
intersect(index1.sort_values(ascending=False),
168+
index2.sort_values(ascending=False),
169+
expected.sort_values(ascending=False)
170+
)

pandas/tseries/offsets.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,41 @@ def wrapper(self, other):
112112
return wrapper
113113

114114

115+
def apply_index_wraps(func):
116+
@functools.wraps(func)
117+
def wrapper(self, other):
118+
result = func(self, other)
119+
if self.normalize:
120+
result = result.to_period('D').to_timestamp()
121+
return result
122+
return wrapper
123+
124+
125+
def _is_normalized(dt):
126+
if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or
127+
dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0):
128+
return False
129+
return True
130+
131+
132+
def index_offsets_equal(first, second):
133+
"""
134+
Checks if the two indexes have an offset, and if they equal each other
135+
Parameters
136+
----------
137+
first: Index
138+
second: Index
139+
140+
Returns
141+
-------
142+
bool
143+
"""
144+
first = getattr(first, 'freq', None)
145+
second = getattr(second, 'freq', None)
146+
are_offsets_equal = True
147+
if first is None or second is None or first != second:
148+
are_offsets_equal = False
149+
return are_offsets_equal
115150
# ---------------------------------------------------------------------
116151
# DateOffset
117152

0 commit comments

Comments
 (0)