Skip to content

Commit 5af3c53

Browse files
committed
BUG: TimedeltaIndex.intersection
Fixes pandas-dev#17391
1 parent d30c4a0 commit 5af3c53

File tree

6 files changed

+227
-98
lines changed

6 files changed

+227
-98
lines changed

pandas/core/indexes/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,6 +1412,12 @@ def is_monotonic(self):
14121412
""" alias for is_monotonic_increasing (deprecated) """
14131413
return self.is_monotonic_increasing
14141414

1415+
@property
1416+
def _is_strictly_monotonic(self):
1417+
""" Checks if the index is sorted """
1418+
return (self._is_strictly_monotonic_increasing or
1419+
self._is_strictly_monotonic_decreasing)
1420+
14151421
@property
14161422
def is_monotonic_increasing(self):
14171423
"""

pandas/core/indexes/datetimelike.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,12 @@
3636

3737
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
3838
from pandas.core.indexes.base import Index, _index_shared_docs
39+
from pandas.tseries.offsets import index_offsets_equal
40+
import pandas.tseries.frequencies as frequencies
3941
from pandas.util._decorators import Appender, cache_readonly
4042
import pandas.core.dtypes.concat as _concat
4143

44+
4245
import pandas.core.indexes.base as ibase
4346
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
4447

@@ -679,6 +682,94 @@ def astype(self, dtype, copy=True):
679682
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
680683
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
681684

685+
def _intersect_ascending(self, other):
686+
# to make our life easier, "sort" the two ranges
687+
if self[0] <= other[0]:
688+
left, right = self, other
689+
else:
690+
left, right = other, self
691+
692+
end = min(left[-1], right[-1])
693+
start = right[0]
694+
695+
if end < start:
696+
return []
697+
return left.values[slice(*left.slice_locs(start, end))]
698+
699+
def _intersect_descending(self, other):
700+
# this is essentially a flip of _intersect_ascending
701+
if self[0] >= other[0]:
702+
left, right = self, other
703+
else:
704+
left, right = other, self
705+
706+
start = min(left[0], right[0])
707+
end = right[-1]
708+
709+
if end > start:
710+
return Index()
711+
return left.values[slice(*left.slice_locs(start, end))]
712+
713+
def intersection(self, other):
714+
"""
715+
Specialized intersection for DateTimeIndexOpsMixin objects.
716+
May be much faster than Index.intersection.
717+
718+
Parameters
719+
----------
720+
other : Index or array-like
721+
722+
Returns
723+
-------
724+
Index
725+
A shallow copied intersection between the two things passed in
726+
"""
727+
self._assert_can_do_setop(other)
728+
729+
if self.equals(other):
730+
return self._get_consensus_name(other)
731+
732+
lengths = len(self), len(other)
733+
if lengths[0] == 0:
734+
return self
735+
if lengths[1] == 0:
736+
return other
737+
738+
if not isinstance(other, Index):
739+
result = Index.intersection(self, other)
740+
return result
741+
elif (index_offsets_equal(self, other) or
742+
(not self._is_strictly_monotonic or
743+
not other._is_strictly_monotonic)):
744+
result = Index.intersection(self, other)
745+
result = self._shallow_copy(result._values, name=result.name,
746+
tz=getattr(self, 'tz', None),
747+
freq=None
748+
)
749+
if result.freq is None:
750+
result.offset = frequencies.to_offset(result.inferred_freq)
751+
return result
752+
753+
# handle intersecting things like this
754+
# idx1 = pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s')
755+
# idx2 = pd.to_timedelta((2, 3, 4, 8), unit='s')
756+
if lengths[0] != lengths[1] and (
757+
max(self) != max(other) or min(self) != min(other)):
758+
return Index.intersection(self, other)
759+
760+
# coerce into same order
761+
self_ascending = self.is_monotonic_increasing
762+
if self_ascending != other.is_monotonic_increasing:
763+
other = other.sort_values(ascending=self_ascending)
764+
765+
if self_ascending:
766+
intersected_slice = self._intersect_ascending(other)
767+
else:
768+
intersected_slice = self._intersect_descending(other)
769+
770+
intersected = self._shallow_copy(intersected_slice)
771+
return intersected._get_consensus_name(other)
772+
682773

683774
def _ensure_datetimelike_to_i8(other):
684775
""" helper for coercing an input scalar or array to i8 """

pandas/core/indexes/datetimes.py

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -971,62 +971,6 @@ def _wrap_union_result(self, other, result):
971971
raise ValueError('Passed item and index have different timezone')
972972
return self._simple_new(result, name=name, freq=None, tz=self.tz)
973973

974-
def intersection(self, other):
975-
"""
976-
Specialized intersection for DatetimeIndex objects. May be much faster
977-
than Index.intersection
978-
979-
Parameters
980-
----------
981-
other : DatetimeIndex or array-like
982-
983-
Returns
984-
-------
985-
y : Index or DatetimeIndex
986-
"""
987-
self._assert_can_do_setop(other)
988-
if not isinstance(other, DatetimeIndex):
989-
try:
990-
other = DatetimeIndex(other)
991-
except (TypeError, ValueError):
992-
pass
993-
result = Index.intersection(self, other)
994-
if isinstance(result, DatetimeIndex):
995-
if result.freq is None:
996-
result.freq = to_offset(result.inferred_freq)
997-
return result
998-
999-
elif (other.freq is None or self.freq is None or
1000-
other.freq != self.freq or
1001-
not other.freq.isAnchored() or
1002-
(not self.is_monotonic or not other.is_monotonic)):
1003-
result = Index.intersection(self, other)
1004-
result = self._shallow_copy(result._values, name=result.name,
1005-
tz=result.tz, freq=None)
1006-
if result.freq is None:
1007-
result.freq = to_offset(result.inferred_freq)
1008-
return result
1009-
1010-
if len(self) == 0:
1011-
return self
1012-
if len(other) == 0:
1013-
return other
1014-
# to make our life easier, "sort" the two ranges
1015-
if self[0] <= other[0]:
1016-
left, right = self, other
1017-
else:
1018-
left, right = other, self
1019-
1020-
end = min(left[-1], right[-1])
1021-
start = right[0]
1022-
1023-
if end < start:
1024-
return type(self)(data=[])
1025-
else:
1026-
lslice = slice(*left.slice_locs(start, end))
1027-
left_chunk = left.values[lslice]
1028-
return self._shallow_copy(left_chunk)
1029-
1030974
def _parsed_string_to_bounds(self, reso, parsed):
1031975
"""
1032976
Calculate datetime bounds for parsed time string and its resolution.

pandas/core/indexes/timedeltas.py

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -401,48 +401,6 @@ def _wrap_union_result(self, other, result):
401401
name = self.name if self.name == other.name else None
402402
return self._simple_new(result, name=name, freq=None)
403403

404-
def intersection(self, other):
405-
"""
406-
Specialized intersection for TimedeltaIndex objects. May be much faster
407-
than Index.intersection
408-
409-
Parameters
410-
----------
411-
other : TimedeltaIndex or array-like
412-
413-
Returns
414-
-------
415-
y : Index or TimedeltaIndex
416-
"""
417-
self._assert_can_do_setop(other)
418-
if not isinstance(other, TimedeltaIndex):
419-
try:
420-
other = TimedeltaIndex(other)
421-
except (TypeError, ValueError):
422-
pass
423-
result = Index.intersection(self, other)
424-
return result
425-
426-
if len(self) == 0:
427-
return self
428-
if len(other) == 0:
429-
return other
430-
# to make our life easier, "sort" the two ranges
431-
if self[0] <= other[0]:
432-
left, right = self, other
433-
else:
434-
left, right = other, self
435-
436-
end = min(left[-1], right[-1])
437-
start = right[0]
438-
439-
if end < start:
440-
return type(self)(data=[])
441-
else:
442-
lslice = slice(*left.slice_locs(start, end))
443-
left_chunk = left.values[lslice]
444-
return self._shallow_copy(left_chunk)
445-
446404
def _maybe_promote(self, other):
447405
if other.inferred_type == 'timedelta':
448406
other = TimedeltaIndex(other)

pandas/tests/indexes/timedeltas/test_setops.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
import pytest
23

34
import pandas as pd
45
import pandas.util.testing as tm
@@ -73,3 +74,97 @@ def test_intersection_bug_1708(self):
7374
result = index_1 & index_2
7475
expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
7576
tm.assert_index_equal(result, expected)
77+
78+
79+
@pytest.mark.parametrize('idx1,idx2,expected', [
80+
(pd.to_timedelta(range(2, 6), unit='s'),
81+
pd.to_timedelta(range(3), unit='s'),
82+
TimedeltaIndex(['00:00:002'])),
83+
(pd.to_timedelta(range(3), unit='s'),
84+
pd.to_timedelta(range(2, 6), unit='s'),
85+
TimedeltaIndex(['00:00:002'])),
86+
])
87+
def test_intersection_intersects_ascending(idx1, idx2, expected):
88+
result = idx1.intersection(idx2)
89+
assert result.equals(expected)
90+
91+
92+
@pytest.mark.parametrize('idx1,idx2,expected', [
93+
(pd.to_timedelta(range(6, 3, -1), unit='s'),
94+
pd.to_timedelta(range(5, 1, -1), unit='s'),
95+
TimedeltaIndex(['00:00:05', '00:00:04'])),
96+
(pd.to_timedelta(range(5, 1, -1), unit='s'),
97+
pd.to_timedelta(range(6, 3, -1), unit='s'),
98+
TimedeltaIndex(['00:00:05', '00:00:04'])),
99+
])
100+
def test_intersection_intersects_descending(idx1, idx2, expected):
101+
# GH 17391
102+
result = idx1.intersection(idx2)
103+
assert result.equals(expected)
104+
105+
106+
def test_intersection_intersects_descending_no_intersect():
107+
idx1 = pd.to_timedelta(range(6, 4, -1), unit='s')
108+
idx2 = pd.to_timedelta(range(4, 1, -1), unit='s')
109+
result = idx1.intersection(idx2)
110+
assert len(result) == 0
111+
112+
113+
def test_intersection_intersects_len_1():
114+
idx1 = pd.to_timedelta(range(1, 2), unit='s')
115+
idx2 = pd.to_timedelta(range(1, 0, -1), unit='s')
116+
intersection = idx1.intersection(idx2)
117+
expected = TimedeltaIndex(['00:00:01'],
118+
dtype='timedelta64[ns]')
119+
tm.assert_index_equal(intersection, expected)
120+
121+
122+
def test_intersection_can_intersect_self():
123+
idx = pd.to_timedelta(range(1, 2), unit='s')
124+
result = idx.intersection(idx)
125+
tm.assert_index_equal(idx, result)
126+
127+
128+
def test_intersection_not_sorted():
129+
idx1 = pd.to_timedelta((1, 3, 2, 5, 4), unit='s')
130+
idx2 = pd.to_timedelta((1, 2, 3, 5, 4), unit='s')
131+
result = idx1.intersection(idx2)
132+
expected = idx1
133+
tm.assert_index_equal(result, expected)
134+
135+
136+
def test_intersection_not_unique():
137+
idx1 = pd.to_timedelta((1, 2, 2, 3, 3, 5), unit='s')
138+
idx2 = pd.to_timedelta((1, 2, 3, 4), unit='s')
139+
result = idx1.intersection(idx2)
140+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
141+
tm.assert_index_equal(result, expected)
142+
143+
result = idx2.intersection(idx1)
144+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
145+
tm.assert_index_equal(result, expected)
146+
147+
148+
@pytest.mark.parametrize("index1, index2, expected", [
149+
(pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s'),
150+
pd.to_timedelta((2, 3, 4, 8), unit='s'),
151+
pd.to_timedelta((2, 3, 4, 8), unit='s')),
152+
(pd.to_timedelta((1, 2, 3, 4, 5), unit='s'),
153+
pd.to_timedelta((2, 3, 4), unit='s'),
154+
pd.to_timedelta((2, 3, 4), unit='s')),
155+
(pd.to_timedelta((2, 4, 5, 6), unit='s'),
156+
pd.to_timedelta((2, 3, 4), unit='s'),
157+
pd.to_timedelta((2, 4), unit='s')),
158+
])
159+
def test_intersection_different_lengths(index1, index2, expected):
160+
def intersect(idx1, idx2, expected):
161+
result = idx1.intersection(idx2)
162+
tm.assert_index_equal(result, expected)
163+
result = idx2.intersection(idx1)
164+
tm.assert_index_equal(result, expected)
165+
166+
intersect(index1, index2, expected)
167+
intersect(index1.sort_values(ascending=False),
168+
index2.sort_values(ascending=False),
169+
expected.sort_values(ascending=False)
170+
)

pandas/tseries/offsets.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,41 @@ def wrapper(self, other):
112112
return wrapper
113113

114114

115+
def apply_index_wraps(func):
116+
@functools.wraps(func)
117+
def wrapper(self, other):
118+
result = func(self, other)
119+
if self.normalize:
120+
result = result.to_period('D').to_timestamp()
121+
return result
122+
return wrapper
123+
124+
125+
def _is_normalized(dt):
126+
if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or
127+
dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0):
128+
return False
129+
return True
130+
131+
132+
def index_offsets_equal(first, second):
133+
"""
134+
Checks if the two indexes have an offset, and if they equal each other
135+
Parameters
136+
----------
137+
first: Index
138+
second: Index
139+
140+
Returns
141+
-------
142+
bool
143+
"""
144+
first = getattr(first, 'freq', None)
145+
second = getattr(second, 'freq', None)
146+
are_offsets_equal = True
147+
if first is None or second is None or first != second:
148+
are_offsets_equal = False
149+
return are_offsets_equal
115150
# ---------------------------------------------------------------------
116151
# DateOffset
117152

0 commit comments

Comments
 (0)