Skip to content

BUG: TimedeltaIndex.intersection #22114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1326,6 +1326,7 @@ Timedelta
- Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`)
- Bug in :class:`TimedeltaIndex` division where dividing by another :class:`TimedeltaIndex` raised ``TypeError`` instead of returning a :class:`Float64Index` (:issue:`23829`, :issue:`22631`)
- Bug in :class:`TimedeltaIndex` comparison operations where comparing against non-``Timedelta``-like objects would raise ``TypeError`` instead of returning all-``False`` for ``__eq__`` and all-``True`` for ``__ne__`` (:issue:`24056`)
- Bug when intersecting a decreasing TimedeltaIndex (:issue:`17391`)

Timezones
^^^^^^^^^
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1584,6 +1584,12 @@ def is_monotonic(self):
"""
return self.is_monotonic_increasing

@property
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we actually using this? (it seems you in-lined it below), I actually prefer that to making a new method

Copy link
Contributor Author

@kirkhansen kirkhansen Sep 14, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The names are similar, but I'm calling this function twice in the intersection function to help me deal with the boolean logic (already hard for me to follow, even with this addition). I can remove this, and put this inline if you really want it that way.

def _is_strictly_monotonic(self):
""" Checks if the index is sorted """
return (self._is_strictly_monotonic_increasing or
self._is_strictly_monotonic_decreasing)

@property
def is_monotonic_increasing(self):
"""
Expand Down
89 changes: 89 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@

import pandas.io.formats.printing as printing

from pandas.tseries.offsets import index_offsets_equal
import pandas.tseries.frequencies as frequencies

_index_doc_kwargs = dict(ibase._index_doc_kwargs)


Expand Down Expand Up @@ -572,6 +575,92 @@ def _time_shift(self, periods, freq=None):
result.name = self.name
return result

def _fast_intersection(self, other):
"""
Speedy intersection that works only if certain assumptions are met.
See intersection for details.
Parameters
Copy link
Member

@mroeschke mroeschke Dec 26, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you give a bit more color to Parameters and Returns

----------
other

Returns
-------

"""
# Coerce into the same order
ascending = self.is_monotonic_increasing
if ascending != other.is_monotonic_increasing:
other = other.sort_values(ascending=ascending)
# Lots of 'if ascending' calls here to setup mirrored function calls
first_comparison = '__le__' if ascending else '__ge__'
second_comparison = '__lt__' if ascending else '__gt__'
if getattr(self[0], first_comparison)(other[0]):
left, right = self, other
else:
left, right = other, self

if ascending:
start = right[0]
end = min(left[-1], right[-1])
else:
start = min(left[0], right[0])
end = right[-1]
if getattr(end, second_comparison, start):
return left.values[slice(*left.slice_locs(start, end))]
return []

def intersection(self, other):
"""
Specialized intersection for DateTimeIndexOpsMixin objects.
May be much faster than Index.intersection.

Fast intersection will occur if
1. Both are in a sorted order
2. Both indexes have a `freq` , and it's the same `freq`
3. Both are monotonic

Parameters
----------
other : Index or array-like

Returns
-------
Index
A shallow copied intersection between the two things passed in
"""
# Run a few checks, and perform a regular intersection
# if the conditions aren't just right for fast intersection
# Perform a regular Index.intersection
self._assert_can_do_setop(other)

if self.equals(other):
return self._get_reconciled_name_object(other)

lengths = len(self), len(other)
if lengths[0] == 0:
return self
if lengths[1] == 0:
return other

if (not index_offsets_equal(self, other) or
not other.freq.isAnchored() or # for period intersections with freq
(not self._is_strictly_monotonic or
not other._is_strictly_monotonic)):
result = Index.intersection(self, other)
if result.empty:
result = result.astype(self.dtype)
freq = self.freq or other.freq
result = self._shallow_copy(result._values, name=result.name,
freq=freq)
if result.freq is None:
result.freq = frequencies.to_offset(result.inferred_freq)
return result

# Conditions met!
intersected_slice = self._fast_intersection(other)
name = ops.get_op_result_name(self, other)
return self._shallow_copy(intersected_slice, name=name)


def wrap_arithmetic_op(self, other, result):
if result is NotImplemented:
Expand Down
60 changes: 0 additions & 60 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,66 +550,6 @@ def _wrap_setop_result(self, other, result):
raise ValueError('Passed item and index have different timezone')
return self._shallow_copy(result, name=name, freq=None, tz=self.tz)

def intersection(self, other):
"""
Specialized intersection for DatetimeIndex objects. May be much faster
than Index.intersection

Parameters
----------
other : DatetimeIndex or array-like

Returns
-------
y : Index or DatetimeIndex
"""
self._assert_can_do_setop(other)

if self.equals(other):
return self._get_reconciled_name_object(other)

if not isinstance(other, DatetimeIndex):
try:
other = DatetimeIndex(other)
except (TypeError, ValueError):
pass
result = Index.intersection(self, other)
if isinstance(result, DatetimeIndex):
if result.freq is None:
result.freq = to_offset(result.inferred_freq)
return result

elif (other.freq is None or self.freq is None or
other.freq != self.freq or
not other.freq.isAnchored() or
(not self.is_monotonic or not other.is_monotonic)):
result = Index.intersection(self, other)
result = self._shallow_copy(result._values, name=result.name,
tz=result.tz, freq=None)
if result.freq is None:
result.freq = to_offset(result.inferred_freq)
return result

if len(self) == 0:
return self
if len(other) == 0:
return other
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self

end = min(left[-1], right[-1])
start = right[0]

if end < start:
return type(self)(data=[])
else:
lslice = slice(*left.slice_locs(start, end))
left_chunk = left.values[lslice]
return self._shallow_copy(left_chunk)

# --------------------------------------------------------------------

@Appender(_index_shared_docs['astype'])
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
Value of `side` parameter should be validated in caller.

"""
assert kind in ['ix', 'loc', 'getitem']
assert kind in ['ix', 'loc', 'getitem', None]

if isinstance(label, datetime):
return Period(label, freq=self.freq)
Expand Down
48 changes: 3 additions & 45 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,51 +404,9 @@ def _fast_union(self, other):
else:
return left

def intersection(self, other):
"""
Specialized intersection for TimedeltaIndex objects. May be much faster
than Index.intersection

Parameters
----------
other : TimedeltaIndex or array-like

Returns
-------
y : Index or TimedeltaIndex
"""
self._assert_can_do_setop(other)

if self.equals(other):
return self._get_reconciled_name_object(other)

if not isinstance(other, TimedeltaIndex):
try:
other = TimedeltaIndex(other)
except (TypeError, ValueError):
pass
result = Index.intersection(self, other)
return result

if len(self) == 0:
return self
if len(other) == 0:
return other
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self

end = min(left[-1], right[-1])
start = right[0]

if end < start:
return type(self)(data=[])
else:
lslice = slice(*left.slice_locs(start, end))
left_chunk = left.values[lslice]
return self._shallow_copy(left_chunk)
def _wrap_union_result(self, other, result):
name = self.name if self.name == other.name else None
return self._simple_new(result, name=name, freq=None)

def _maybe_promote(self, other):
if other.inferred_type == 'timedelta':
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/indexes/datetimes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def test_intersection2(self):

third = Index(['a', 'b', 'c'])
result = first.intersection(third)
expected = pd.Index([], dtype=object)
expected = DatetimeIndex([])
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("tz", [None, 'Asia/Tokyo', 'US/Eastern',
Expand All @@ -151,7 +151,7 @@ def test_intersection(self, tz):
expected3 = date_range('6/1/2000', '6/20/2000', freq='D', name=None)

rng4 = date_range('7/1/2000', '7/31/2000', freq='D', name='idx')
expected4 = DatetimeIndex([], name='idx')
expected4 = DatetimeIndex([], name='idx', freq='D')

for (rng, expected) in [(rng2, expected2), (rng3, expected3),
(rng4, expected4)]:
Expand Down Expand Up @@ -181,14 +181,14 @@ def test_intersection(self, tz):
# GH 7880
rng4 = date_range('7/1/2000', '7/31/2000', freq='D', tz=tz,
name='idx')
expected4 = DatetimeIndex([], tz=tz, name='idx')
expected4 = DatetimeIndex([], tz=tz, name='idx', freq='D')

for (rng, expected) in [(rng2, expected2), (rng3, expected3),
(rng4, expected4)]:
result = base.intersection(rng)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq is None
assert result.freq == expected.freq
assert result.tz == expected.tz

def test_intersection_empty(self):
Expand Down
Loading