Skip to content

BUG: TimedeltaIndex.intersection #17433

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1200,6 +1200,12 @@ def is_monotonic(self):
""" alias for is_monotonic_increasing (deprecated) """
return self.is_monotonic_increasing

@property
def _is_strictly_monotonic(self):
""" Checks if the index is sorted """
return (self._is_strictly_monotonic_increasing or
self._is_strictly_monotonic_decreasing)

@property
def is_monotonic_increasing(self):
"""
Expand Down
89 changes: 89 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from pandas._libs.period import Period

from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.tseries.offsets import index_offsets_equal
from pandas.util._decorators import Appender, cache_readonly
import pandas.core.dtypes.concat as _concat
import pandas.tseries.frequencies as frequencies
Expand Down Expand Up @@ -854,6 +855,94 @@ def _concat_same_dtype(self, to_concat, name):
new_data = np.concatenate([c.asi8 for c in to_concat])
return self._simple_new(new_data, **attribs)

def _intersect_ascending(self, other):
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self

end = min(left[-1], right[-1])
start = right[0]

if end < start:
return []
return left.values[slice(*left.slice_locs(start, end))]

def _intersect_descending(self, other):
# this is essentially a flip of _intersect_ascending
if self[0] >= other[0]:
left, right = self, other
else:
left, right = other, self

start = min(left[0], right[0])
end = right[-1]

if end > start:
return Index()
return left.values[slice(*left.slice_locs(start, end))]

def intersection(self, other):
"""
Specialized intersection for DateTimeIndexOpsMixin objects.
May be much faster than Index.intersection.

Parameters
----------
other : Index or array-like

Returns
-------
Index
A shallow copied intersection between the two things passed in
"""
self._assert_can_do_setop(other)

if self.equals(other):
return self._get_consensus_name(other)

lengths = len(self), len(other)
if lengths[0] == 0:
return self
if lengths[1] == 0:
return other

if not isinstance(other, Index):
result = Index.intersection(self, other)
return result
elif (index_offsets_equal(self, other) or
(not self._is_strictly_monotonic or
not other._is_strictly_monotonic)):
result = Index.intersection(self, other)
result = self._shallow_copy(result._values, name=result.name,
tz=getattr(self, 'tz', None),
freq=None
)
if result.freq is None:
result.offset = frequencies.to_offset(result.inferred_freq)
return result

# handle intersecting things like this
# idx1 = pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s')
# idx2 = pd.to_timedelta((2, 3, 4, 8), unit='s')
if lengths[0] != lengths[1] and (
max(self) != max(other) or min(self) != min(other)):
return Index.intersection(self, other)

# coerce into same order
self_ascending = self.is_monotonic_increasing
if self_ascending != other.is_monotonic_increasing:
other = other.sort_values(ascending=self_ascending)

if self_ascending:
intersected_slice = self._intersect_ascending(other)
else:
intersected_slice = self._intersect_descending(other)

intersected = self._shallow_copy(intersected_slice)
return intersected._get_consensus_name(other)


def _ensure_datetimelike_to_i8(other):
""" helper for coercing an input scalar or array to i8 """
Expand Down
56 changes: 0 additions & 56 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1189,62 +1189,6 @@ def _wrap_union_result(self, other, result):
raise ValueError('Passed item and index have different timezone')
return self._simple_new(result, name=name, freq=None, tz=self.tz)

def intersection(self, other):
"""
Specialized intersection for DatetimeIndex objects. May be much faster
than Index.intersection

Parameters
----------
other : DatetimeIndex or array-like

Returns
-------
y : Index or DatetimeIndex
"""
self._assert_can_do_setop(other)
if not isinstance(other, DatetimeIndex):
try:
other = DatetimeIndex(other)
except (TypeError, ValueError):
pass
result = Index.intersection(self, other)
if isinstance(result, DatetimeIndex):
if result.freq is None:
result.offset = to_offset(result.inferred_freq)
return result

elif (other.offset is None or self.offset is None or
other.offset != self.offset or
not other.offset.isAnchored() or
(not self.is_monotonic or not other.is_monotonic)):
result = Index.intersection(self, other)
result = self._shallow_copy(result._values, name=result.name,
tz=result.tz, freq=None)
if result.freq is None:
result.offset = to_offset(result.inferred_freq)
return result

if len(self) == 0:
return self
if len(other) == 0:
return other
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self

end = min(left[-1], right[-1])
start = right[0]

if end < start:
return type(self)(data=[])
else:
lslice = slice(*left.slice_locs(start, end))
left_chunk = left.values[lslice]
return self._shallow_copy(left_chunk)

def _parsed_string_to_bounds(self, reso, parsed):
"""
Calculate datetime bounds for parsed time string and its resolution.
Expand Down
42 changes: 0 additions & 42 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,48 +596,6 @@ def _wrap_union_result(self, other, result):
name = self.name if self.name == other.name else None
return self._simple_new(result, name=name, freq=None)

def intersection(self, other):
"""
Specialized intersection for TimedeltaIndex objects. May be much faster
than Index.intersection

Parameters
----------
other : TimedeltaIndex or array-like

Returns
-------
y : Index or TimedeltaIndex
"""
self._assert_can_do_setop(other)
if not isinstance(other, TimedeltaIndex):
try:
other = TimedeltaIndex(other)
except (TypeError, ValueError):
pass
result = Index.intersection(self, other)
return result

if len(self) == 0:
return self
if len(other) == 0:
return other
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self

end = min(left[-1], right[-1])
start = right[0]

if end < start:
return type(self)(data=[])
else:
lslice = slice(*left.slice_locs(start, end))
left_chunk = left.values[lslice]
return self._shallow_copy(left_chunk)

def _maybe_promote(self, other):
if other.inferred_type == 'timedelta':
other = TimedeltaIndex(other)
Expand Down
95 changes: 95 additions & 0 deletions pandas/tests/indexes/timedeltas/test_setops.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pytest

import pandas as pd
import pandas.util.testing as tm
Expand Down Expand Up @@ -74,3 +75,97 @@ def test_intersection_bug_1708(self):
result = index_1 & index_2
expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
tm.assert_index_equal(result, expected)


@pytest.mark.parametrize('idx1,idx2,expected', [
(pd.to_timedelta(range(2, 6), unit='s'),
pd.to_timedelta(range(3), unit='s'),
TimedeltaIndex(['00:00:002'])),
(pd.to_timedelta(range(3), unit='s'),
pd.to_timedelta(range(2, 6), unit='s'),
TimedeltaIndex(['00:00:002'])),
])
def test_intersection_intersects_ascending(idx1, idx2, expected):
result = idx1.intersection(idx2)
assert result.equals(expected)


@pytest.mark.parametrize('idx1,idx2,expected', [
(pd.to_timedelta(range(6, 3, -1), unit='s'),
pd.to_timedelta(range(5, 1, -1), unit='s'),
TimedeltaIndex(['00:00:05', '00:00:04'])),
(pd.to_timedelta(range(5, 1, -1), unit='s'),
pd.to_timedelta(range(6, 3, -1), unit='s'),
TimedeltaIndex(['00:00:05', '00:00:04'])),
])
def test_intersection_intersects_descending(idx1, idx2, expected):
# GH 17391
result = idx1.intersection(idx2)
assert result.equals(expected)


def test_intersection_intersects_descending_no_intersect():
idx1 = pd.to_timedelta(range(6, 4, -1), unit='s')
idx2 = pd.to_timedelta(range(4, 1, -1), unit='s')
result = idx1.intersection(idx2)
assert len(result) == 0


def test_intersection_intersects_len_1():
idx1 = pd.to_timedelta(range(1, 2), unit='s')
idx2 = pd.to_timedelta(range(1, 0, -1), unit='s')
intersection = idx1.intersection(idx2)
expected = TimedeltaIndex(['00:00:01'],
dtype='timedelta64[ns]')
tm.assert_index_equal(intersection, expected)


def test_intersection_can_intersect_self():
idx = pd.to_timedelta(range(1, 2), unit='s')
result = idx.intersection(idx)
tm.assert_index_equal(idx, result)


def test_intersection_not_sorted():
idx1 = pd.to_timedelta((1, 3, 2, 5, 4), unit='s')
idx2 = pd.to_timedelta((1, 2, 3, 5, 4), unit='s')
result = idx1.intersection(idx2)
expected = idx1
tm.assert_index_equal(result, expected)


def test_intersection_not_unique():
idx1 = pd.to_timedelta((1, 2, 2, 3, 3, 5), unit='s')
idx2 = pd.to_timedelta((1, 2, 3, 4), unit='s')
result = idx1.intersection(idx2)
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
tm.assert_index_equal(result, expected)

result = idx2.intersection(idx1)
expected = pd.to_timedelta((1, 2, 2, 3, 3), unit='s')
tm.assert_index_equal(result, expected)


@pytest.mark.parametrize("index1, index2, expected", [
(pd.to_timedelta((1, 2, 3, 4, 5, 6, 7, 8), unit='s'),
pd.to_timedelta((2, 3, 4, 8), unit='s'),
pd.to_timedelta((2, 3, 4, 8), unit='s')),
(pd.to_timedelta((1, 2, 3, 4, 5), unit='s'),
pd.to_timedelta((2, 3, 4), unit='s'),
pd.to_timedelta((2, 3, 4), unit='s')),
(pd.to_timedelta((2, 4, 5, 6), unit='s'),
pd.to_timedelta((2, 3, 4), unit='s'),
pd.to_timedelta((2, 4), unit='s')),
])
def test_intersection_different_lengths(index1, index2, expected):
def intersect(idx1, idx2, expected):
result = idx1.intersection(idx2)
tm.assert_index_equal(result, expected)
result = idx2.intersection(idx1)
tm.assert_index_equal(result, expected)

intersect(index1, index2, expected)
intersect(index1.sort_values(ascending=False),
index2.sort_values(ascending=False),
expected.sort_values(ascending=False)
)
20 changes: 20 additions & 0 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,26 @@ def _is_normalized(dt):
return False
return True


def index_offsets_equal(first, second):
"""
Checks if the two indexes have an offset, and if they equal each other
Parameters
----------
first: Index
second: Index

Returns
-------
bool
"""
first = getattr(first, 'offset', None)
second = getattr(second, 'offset', None)
are_offsets_equal = True
if first is None or second is None or first != second:
are_offsets_equal = False
return are_offsets_equal

# ---------------------------------------------------------------------
# DateOffset

Expand Down