Skip to content

Commit ea61c24

Browse files
committed
BUG: TimedeltaIndex.intersection
Fixes pandas-dev#17391
1 parent 6da5a72 commit ea61c24

File tree

6 files changed

+227
-98
lines changed

6 files changed

+227
-98
lines changed

pandas/core/indexes/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,6 +1476,12 @@ def is_monotonic(self):
14761476
""" alias for is_monotonic_increasing (deprecated) """
14771477
return self.is_monotonic_increasing
14781478

1479+
@property
1480+
def _is_strictly_monotonic(self):
1481+
""" Checks if the index is sorted """
1482+
return (self._is_strictly_monotonic_increasing or
1483+
self._is_strictly_monotonic_decreasing)
1484+
14791485
@property
14801486
def is_monotonic_increasing(self):
14811487
"""

pandas/core/indexes/datetimelike.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,12 @@
3636

3737
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
3838
from pandas.core.indexes.base import Index, _index_shared_docs
39+
from pandas.tseries.offsets import index_offsets_equal
40+
import pandas.tseries.frequencies as frequencies
3941
from pandas.util._decorators import Appender, cache_readonly
4042
import pandas.core.dtypes.concat as _concat
4143

44+
4245
import pandas.core.indexes.base as ibase
4346
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
4447

@@ -679,6 +682,94 @@ def astype(self, dtype, copy=True):
679682
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
680683
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
681684

685+
def _intersect_ascending(self, other):
686+
# to make our life easier, "sort" the two ranges
687+
if self[0] <= other[0]:
688+
left, right = self, other
689+
else:
690+
left, right = other, self
691+
692+
end = min(left[-1], right[-1])
693+
start = right[0]
694+
695+
if end < start:
696+
return []
697+
return left.values[slice(*left.slice_locs(start, end))]
698+
699+
def _intersect_descending(self, other):
700+
# this is essentially a flip of _intersect_ascending
701+
if self[0] >= other[0]:
702+
left, right = self, other
703+
else:
704+
left, right = other, self
705+
706+
start = min(left[0], right[0])
707+
end = right[-1]
708+
709+
if end > start:
710+
return Index()
711+
return left.values[slice(*left.slice_locs(start, end))]
712+
713+
def intersection(self, other):
714+
"""
715+
Specialized intersection for DateTimeIndexOpsMixin objects.
716+
May be much faster than Index.intersection.
717+
718+
Parameters
719+
----------
720+
other : Index or array-like
721+
722+
Returns
723+
-------
724+
Index
725+
A shallow copied intersection between the two things passed in
726+
"""
727+
self._assert_can_do_setop(other)
728+
729+
if self.equals(other):
730+
return self._get_consensus_name(other)
731+
732+
lengths = len(self), len(other)
733+
if lengths[0] == 0:
734+
return self
735+
if lengths[1] == 0:
736+
return other
737+
738+
if not isinstance(other, Index):
739+
result = Index.intersection(self, other)
740+
return result
741+
elif (index_offsets_equal(self, other) or
742+
(not self._is_strictly_monotonic or
743+
not other._is_strictly_monotonic)):
744+
result = Index.intersection(self, other)
745+
result = self._shallow_copy(result._values, name=result.name,
746+
tz=getattr(self, 'tz', None),
747+
freq=None
748+
)
749+
if result.freq is None:
750+
result.offset = frequencies.to_offset(result.inferred_freq)
751+
return result
752+
753+
# handle intersecting things like this
754+
# idx1 = pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s')
755+
# idx2 = pd.to_timedelta((2, 3, 4, 8), unit='s')
756+
if lengths[0] != lengths[1] and (
757+
max(self) != max(other) or min(self) != min(other)):
758+
return Index.intersection(self, other)
759+
760+
# coerce into same order
761+
self_ascending = self.is_monotonic_increasing
762+
if self_ascending != other.is_monotonic_increasing:
763+
other = other.sort_values(ascending=self_ascending)
764+
765+
if self_ascending:
766+
intersected_slice = self._intersect_ascending(other)
767+
else:
768+
intersected_slice = self._intersect_descending(other)
769+
770+
intersected = self._shallow_copy(intersected_slice)
771+
return intersected._get_consensus_name(other)
772+
682773

683774
def _ensure_datetimelike_to_i8(other):
684775
""" helper for coercing an input scalar or array to i8 """

pandas/core/indexes/datetimes.py

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -978,62 +978,6 @@ def _wrap_union_result(self, other, result):
978978
raise ValueError('Passed item and index have different timezone')
979979
return self._simple_new(result, name=name, freq=None, tz=self.tz)
980980

981-
def intersection(self, other):
982-
"""
983-
Specialized intersection for DatetimeIndex objects. May be much faster
984-
than Index.intersection
985-
986-
Parameters
987-
----------
988-
other : DatetimeIndex or array-like
989-
990-
Returns
991-
-------
992-
y : Index or DatetimeIndex
993-
"""
994-
self._assert_can_do_setop(other)
995-
if not isinstance(other, DatetimeIndex):
996-
try:
997-
other = DatetimeIndex(other)
998-
except (TypeError, ValueError):
999-
pass
1000-
result = Index.intersection(self, other)
1001-
if isinstance(result, DatetimeIndex):
1002-
if result.freq is None:
1003-
result.freq = to_offset(result.inferred_freq)
1004-
return result
1005-
1006-
elif (other.freq is None or self.freq is None or
1007-
other.freq != self.freq or
1008-
not other.freq.isAnchored() or
1009-
(not self.is_monotonic or not other.is_monotonic)):
1010-
result = Index.intersection(self, other)
1011-
result = self._shallow_copy(result._values, name=result.name,
1012-
tz=result.tz, freq=None)
1013-
if result.freq is None:
1014-
result.freq = to_offset(result.inferred_freq)
1015-
return result
1016-
1017-
if len(self) == 0:
1018-
return self
1019-
if len(other) == 0:
1020-
return other
1021-
# to make our life easier, "sort" the two ranges
1022-
if self[0] <= other[0]:
1023-
left, right = self, other
1024-
else:
1025-
left, right = other, self
1026-
1027-
end = min(left[-1], right[-1])
1028-
start = right[0]
1029-
1030-
if end < start:
1031-
return type(self)(data=[])
1032-
else:
1033-
lslice = slice(*left.slice_locs(start, end))
1034-
left_chunk = left.values[lslice]
1035-
return self._shallow_copy(left_chunk)
1036-
1037981
def _parsed_string_to_bounds(self, reso, parsed):
1038982
"""
1039983
Calculate datetime bounds for parsed time string and its resolution.

pandas/core/indexes/timedeltas.py

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -402,48 +402,6 @@ def _wrap_union_result(self, other, result):
402402
name = self.name if self.name == other.name else None
403403
return self._simple_new(result, name=name, freq=None)
404404

405-
def intersection(self, other):
406-
"""
407-
Specialized intersection for TimedeltaIndex objects. May be much faster
408-
than Index.intersection
409-
410-
Parameters
411-
----------
412-
other : TimedeltaIndex or array-like
413-
414-
Returns
415-
-------
416-
y : Index or TimedeltaIndex
417-
"""
418-
self._assert_can_do_setop(other)
419-
if not isinstance(other, TimedeltaIndex):
420-
try:
421-
other = TimedeltaIndex(other)
422-
except (TypeError, ValueError):
423-
pass
424-
result = Index.intersection(self, other)
425-
return result
426-
427-
if len(self) == 0:
428-
return self
429-
if len(other) == 0:
430-
return other
431-
# to make our life easier, "sort" the two ranges
432-
if self[0] <= other[0]:
433-
left, right = self, other
434-
else:
435-
left, right = other, self
436-
437-
end = min(left[-1], right[-1])
438-
start = right[0]
439-
440-
if end < start:
441-
return type(self)(data=[])
442-
else:
443-
lslice = slice(*left.slice_locs(start, end))
444-
left_chunk = left.values[lslice]
445-
return self._shallow_copy(left_chunk)
446-
447405
def _maybe_promote(self, other):
448406
if other.inferred_type == 'timedelta':
449407
other = TimedeltaIndex(other)

pandas/tests/indexes/timedeltas/test_setops.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
import pytest
23

34
import pandas as pd
45
import pandas.util.testing as tm
@@ -73,3 +74,97 @@ def test_intersection_bug_1708(self):
7374
result = index_1 & index_2
7475
expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
7576
tm.assert_index_equal(result, expected)
77+
78+
79+
@pytest.mark.parametrize('idx1,idx2,expected', [
80+
(pd.to_timedelta(range(2, 6), unit='s'),
81+
pd.to_timedelta(range(3), unit='s'),
82+
TimedeltaIndex(['00:00:002'])),
83+
(pd.to_timedelta(range(3), unit='s'),
84+
pd.to_timedelta(range(2, 6), unit='s'),
85+
TimedeltaIndex(['00:00:002'])),
86+
])
87+
def test_intersection_intersects_ascending(idx1, idx2, expected):
88+
result = idx1.intersection(idx2)
89+
assert result.equals(expected)
90+
91+
92+
@pytest.mark.parametrize('idx1,idx2,expected', [
93+
(pd.to_timedelta(range(6, 3, -1), unit='s'),
94+
pd.to_timedelta(range(5, 1, -1), unit='s'),
95+
TimedeltaIndex(['00:00:05', '00:00:04'])),
96+
(pd.to_timedelta(range(5, 1, -1), unit='s'),
97+
pd.to_timedelta(range(6, 3, -1), unit='s'),
98+
TimedeltaIndex(['00:00:05', '00:00:04'])),
99+
])
100+
def test_intersection_intersects_descending(idx1, idx2, expected):
101+
# GH 17391
102+
result = idx1.intersection(idx2)
103+
assert result.equals(expected)
104+
105+
106+
def test_intersection_intersects_descending_no_intersect():
107+
idx1 = pd.to_timedelta(range(6, 4, -1), unit='s')
108+
idx2 = pd.to_timedelta(range(4, 1, -1), unit='s')
109+
result = idx1.intersection(idx2)
110+
assert len(result) == 0
111+
112+
113+
def test_intersection_intersects_len_1():
114+
idx1 = pd.to_timedelta(range(1, 2), unit='s')
115+
idx2 = pd.to_timedelta(range(1, 0, -1), unit='s')
116+
intersection = idx1.intersection(idx2)
117+
expected = TimedeltaIndex(['00:00:01'],
118+
dtype='timedelta64[ns]')
119+
tm.assert_index_equal(intersection, expected)
120+
121+
122+
def test_intersection_can_intersect_self():
123+
idx = pd.to_timedelta(range(1, 2), unit='s')
124+
result = idx.intersection(idx)
125+
tm.assert_index_equal(idx, result)
126+
127+
128+
def test_intersection_not_sorted():
129+
idx1 = pd.to_timedelta((1, 3, 2, 5, 4), unit='s')
130+
idx2 = pd.to_timedelta((1, 2, 3, 5, 4), unit='s')
131+
result = idx1.intersection(idx2)
132+
expected = idx1
133+
tm.assert_index_equal(result, expected)
134+
135+
136+
def test_intersection_not_unique():
137+
idx1 = pd.to_timedelta((1, 2, 2, 3, 3, 5), unit='s')
138+
idx2 = pd.to_timedelta((1, 2, 3, 4), unit='s')
139+
result = idx1.intersection(idx2)
140+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
141+
tm.assert_index_equal(result, expected)
142+
143+
result = idx2.intersection(idx1)
144+
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
145+
tm.assert_index_equal(result, expected)
146+
147+
148+
@pytest.mark.parametrize("index1, index2, expected", [
149+
(pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s'),
150+
pd.to_timedelta((2, 3, 4, 8), unit='s'),
151+
pd.to_timedelta((2, 3, 4, 8), unit='s')),
152+
(pd.to_timedelta((1, 2, 3, 4, 5), unit='s'),
153+
pd.to_timedelta((2, 3, 4), unit='s'),
154+
pd.to_timedelta((2, 3, 4), unit='s')),
155+
(pd.to_timedelta((2, 4, 5, 6), unit='s'),
156+
pd.to_timedelta((2, 3, 4), unit='s'),
157+
pd.to_timedelta((2, 4), unit='s')),
158+
])
159+
def test_intersection_different_lengths(index1, index2, expected):
160+
def intersect(idx1, idx2, expected):
161+
result = idx1.intersection(idx2)
162+
tm.assert_index_equal(result, expected)
163+
result = idx2.intersection(idx1)
164+
tm.assert_index_equal(result, expected)
165+
166+
intersect(index1, index2, expected)
167+
intersect(index1.sort_values(ascending=False),
168+
index2.sort_values(ascending=False),
169+
expected.sort_values(ascending=False)
170+
)

pandas/tseries/offsets.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,41 @@ def wrapper(self, other):
112112
return wrapper
113113

114114

115+
def apply_index_wraps(func):
116+
@functools.wraps(func)
117+
def wrapper(self, other):
118+
result = func(self, other)
119+
if self.normalize:
120+
result = result.to_period('D').to_timestamp()
121+
return result
122+
return wrapper
123+
124+
125+
def _is_normalized(dt):
126+
if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or
127+
dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0):
128+
return False
129+
return True
130+
131+
132+
def index_offsets_equal(first, second):
133+
"""
134+
Checks if the two indexes have an offset, and if they equal each other
135+
Parameters
136+
----------
137+
first: Index
138+
second: Index
139+
140+
Returns
141+
-------
142+
bool
143+
"""
144+
first = getattr(first, 'freq', None)
145+
second = getattr(second, 'freq', None)
146+
are_offsets_equal = True
147+
if first is None or second is None or first != second:
148+
are_offsets_equal = False
149+
return are_offsets_equal
115150
# ---------------------------------------------------------------------
116151
# DateOffset
117152

0 commit comments

Comments
 (0)