From 046426927bbef3d95ef50efbd999ad162bd56895 Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Sun, 3 Feb 2019 15:57:00 +0000 Subject: [PATCH 01/15] Moving intersection method from DatetimeIndex and Timedeltaindex to datetimelike. Point PeriodIndex intersection to Index.intersection --- pandas/core/indexes/datetimelike.py | 78 +++++++++++++++++++++++++++++ pandas/core/indexes/datetimes.py | 73 --------------------------- pandas/core/indexes/period.py | 3 ++ pandas/core/indexes/timedeltas.py | 46 ----------------- 4 files changed, 81 insertions(+), 119 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index da4e7040097a2..9901cdb067e41 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -26,6 +26,7 @@ import pandas.core.indexes.base as ibase from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.tools.timedeltas import to_timedelta +from pandas.tseries.frequencies import to_offset import pandas.io.formats.printing as printing @@ -532,6 +533,83 @@ def isin(self, values): return algorithms.isin(self.asi8, values.asi8) + def intersection(self, other, sort=False): + """ + Specialized intersection for DatetimeIndex and TimedeltaIndex objects. + May be much faster than Index.intersection + + Parameters + ---------- + other : DatetimeIndex or TimedeltaIndex or array-like + sort : False or None, default False + Sort the resulting index if possible. + + .. versionadded:: 0.24.0 + + .. versionchanged:: 0.24.1 + + Changed the default to ``False`` to match the behaviour + from before 0.24.0. + + Returns + ------- + y : Index or DatetimeIndex or TimedeltaIndex + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + + if self.equals(other): + return self._get_reconciled_name_object(other) + + if not isinstance(other, type(self)): + try: + other = self(other) + except (TypeError, ValueError): + pass + result = Index.intersection(self, other, sort=sort) + if isinstance(result, type(self)): + if result.freq is None: + result.freq = to_offset(result.inferred_freq) + return result + + elif (other.freq is None or self.freq is None or + other.freq != self.freq or + not other.freq.isAnchored() or + (not self.is_monotonic or not other.is_monotonic)): + result = Index.intersection(self, other, sort=sort) + # Invalidate the freq of `result`, which may not be correct at + # this point, depending on the values. + result.freq = None + if hasattr(self, 'tz'): + result = self._shallow_copy(result._values, name=result.name, + tz=result.tz, freq=None) + else: + result = self._shallow_copy(result._values, name=result.name, + freq=None) + if result.freq is None: + result.freq = to_offset(result.inferred_freq) + return result + + if len(self) == 0: + return self + if len(other) == 0: + return other + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + else: + left, right = other, self + + end = min(left[-1], right[-1]) + start = right[0] + + if end < start: + return type(self)(data=[]) + else: + lslice = slice(*left.slice_locs(start, end)) + left_chunk = left.values[lslice] + return self._shallow_copy(left_chunk) + @Appender(_index_shared_docs['repeat'] % _index_doc_kwargs) def repeat(self, repeats, axis=None): nv.validate_repeat(tuple(), dict(axis=axis)) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index cb7f60a12d610..b14c7aa2490ff 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -617,79 +617,6 @@ def _wrap_setop_result(self, other, result): name = get_op_result_name(self, other) return self._shallow_copy(result, name=name, freq=None, tz=self.tz) - def intersection(self, other, sort=False): - """ - Specialized intersection for DatetimeIndex objects. May be much faster - than Index.intersection - - Parameters - ---------- - other : DatetimeIndex or array-like - sort : False or None, default False - Sort the resulting index if possible. - - .. versionadded:: 0.24.0 - - .. versionchanged:: 0.24.1 - - Changed the default to ``False`` to match the behaviour - from before 0.24.0. - - Returns - ------- - y : Index or DatetimeIndex - """ - self._validate_sort_keyword(sort) - self._assert_can_do_setop(other) - - if self.equals(other): - return self._get_reconciled_name_object(other) - - if not isinstance(other, DatetimeIndex): - try: - other = DatetimeIndex(other) - except (TypeError, ValueError): - pass - result = Index.intersection(self, other, sort=sort) - if isinstance(result, DatetimeIndex): - if result.freq is None: - result.freq = to_offset(result.inferred_freq) - return result - - elif (other.freq is None or self.freq is None or - other.freq != self.freq or - not other.freq.isAnchored() or - (not self.is_monotonic or not other.is_monotonic)): - result = Index.intersection(self, other, sort=sort) - # Invalidate the freq of `result`, which may not be correct at - # this point, depending on the values. - result.freq = None - result = self._shallow_copy(result._values, name=result.name, - tz=result.tz, freq=None) - if result.freq is None: - result.freq = to_offset(result.inferred_freq) - return result - - if len(self) == 0: - return self - if len(other) == 0: - return other - # to make our life easier, "sort" the two ranges - if self[0] <= other[0]: - left, right = self, other - else: - left, right = other, self - - end = min(left[-1], right[-1]) - start = right[0] - - if end < start: - return type(self)(data=[]) - else: - lslice = slice(*left.slice_locs(start, end)) - left_chunk = left.values[lslice] - return self._shallow_copy(left_chunk) - # -------------------------------------------------------------------- def _get_time_micros(self): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index fb3d4f09cfe5e..3a8892af3e1ae 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -802,6 +802,9 @@ def join(self, other, how='left', level=None, return_indexers=False, return self._apply_meta(result), lidx, ridx return self._apply_meta(result) + def intersection(self, other, sort=False): + return Index.intersection(self, other, sort=sort) + def _assert_can_do_setop(self, other): super(PeriodIndex, self)._assert_can_do_setop(other) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 830925535dab1..34030b77263d7 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -444,52 +444,6 @@ def _fast_union(self, other): else: return left - def intersection(self, other): - """ - Specialized intersection for TimedeltaIndex objects. May be much faster - than Index.intersection - - Parameters - ---------- - other : TimedeltaIndex or array-like - - Returns - ------- - y : Index or TimedeltaIndex - """ - self._assert_can_do_setop(other) - - if self.equals(other): - return self._get_reconciled_name_object(other) - - if not isinstance(other, TimedeltaIndex): - try: - other = TimedeltaIndex(other) - except (TypeError, ValueError): - pass - result = Index.intersection(self, other) - return result - - if len(self) == 0: - return self - if len(other) == 0: - return other - # to make our life easier, "sort" the two ranges - if self[0] <= other[0]: - left, right = self, other - else: - left, right = other, self - - end = min(left[-1], right[-1]) - start = right[0] - - if end < start: - return type(self)(data=[]) - else: - lslice = slice(*left.slice_locs(start, end)) - left_chunk = left.values[lslice] - return self._shallow_copy(left_chunk) - def _maybe_promote(self, other): if other.inferred_type == 'timedelta': other = TimedeltaIndex(other) From 119a995ad880d04a27539ae510a37bbb66ad5097 Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Sun, 3 Feb 2019 16:48:17 +0000 Subject: [PATCH 02/15] add tests for timedeltaindex --- .../tests/indexes/timedeltas/test_setops.py | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index f7c3f764df0a0..2a5a77f0dc578 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -3,6 +3,7 @@ import pandas as pd from pandas import Int64Index, TimedeltaIndex, timedelta_range import pandas.util.testing as tm +import pytest class TestTimedeltaIndex(object): @@ -73,3 +74,80 @@ def test_intersection_bug_1708(self): result = index_1 & index_2 expected = timedelta_range('1 day 01:00:00', periods=3, freq='h') tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_equal(self, sort): + # for equal indicies intersection should return the original index + first = timedelta_range('1 day', periods=4, freq='h') + second = timedelta_range('1 day', periods=4, freq='h') + intersect = first.intersection(second, sort=sort) + if sort is None: + tm.assert_index_equal(intersect, second.sort_values()) + assert tm.equalContents(intersect, second) + + # Corner cases + inter = first.intersection(first, sort=sort) + assert inter is first + + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_zero_length(self, sort): + index_1 = timedelta_range('1 day', periods=4, freq='h') + index_2 = timedelta_range('1 day', periods=0, freq='h') + inter = index_1.intersection(index_2, sort=sort) + tm.assert_index_equal(index_2, inter) + inter_2 = index_2.intersection(index_1, sort=sort) + tm.assert_index_equal(index_2, inter_2) + + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection(self, sort): + # GH 4690 (with tz) + base = timedelta_range('1 day', periods=4, freq='h', name='idx') + + # if target has the same name, it is preserved + rng2 = timedelta_range('1 day', periods=5, freq='h', name='idx') + expected2 = timedelta_range('1 day', periods=4, freq='h', name='idx') + + # if target name is different, it will be reset + rng3 = timedelta_range('1 day', periods=5, freq='h', name='other') + expected3 = timedelta_range('1 day', periods=4, freq='h', name=None) + + rng4 = timedelta_range('1 day', periods=10, freq='h', name='idx')[5:] + expected4 = TimedeltaIndex([], name='idx') + + for (rng, expected) in [(rng2, expected2), (rng3, expected3), + (rng4, expected4)]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + @pytest.mark.parametrize("sort", [None, False]) + def intersection_non_monotonic(self, sort): + # non-monotonic + base = TimedeltaIndex(['1 hour', '2 hour', + '4 hour', '3 hour'], + name='idx') + + rng2 = TimedeltaIndex(['5 hour', '2 hour', + '4 hour', '9 hour'], + name='idx') + expected2 = TimedeltaIndex(['2 hour', '4 hour'], + name='idx') + + rng3 = TimedeltaIndex(['2 hour', '5 hour', + '5 hour', '1 hour'], + name='other') + expected3 = TimedeltaIndex(['1 hour', '2 hour'], + name=None) + + rng4 = base[::-1] + expected4 = base + + for (rng, expected) in [(rng2, expected2), (rng3, expected3), + (rng4, expected4)]: + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq is None From 78821ba4d725e493af96d9cbc658e3000496ea65 Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Sun, 3 Feb 2019 16:57:04 +0000 Subject: [PATCH 03/15] add whatsnew --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 49b2349851479..85e7f613eb6e3 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -36,6 +36,7 @@ Other Enhancements - :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`) - :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`) - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`) +- :meth:`DatetimeIndex.intersection` and :meth:`TimedeltaIndex.intersection` have been moved to :meth:`Datetimelike.intersection` and tests were added for :meth:`TimedeltaIndex.intersection` (:issue:`24966`). .. _whatsnew_0250.api_breaking: From c8d8155bdd529e35b2916e8b62a216969819c4b8 Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Sun, 3 Feb 2019 17:39:48 +0000 Subject: [PATCH 04/15] fixing isort issue --- pandas/core/indexes/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 9901cdb067e41..78390dfa0a3e0 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -26,9 +26,9 @@ import pandas.core.indexes.base as ibase from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.tools.timedeltas import to_timedelta -from pandas.tseries.frequencies import to_offset import pandas.io.formats.printing as printing +from pandas.tseries.frequencies import to_offset _index_doc_kwargs = dict(ibase._index_doc_kwargs) From e6dba96b6ce511c0acb0fe56b4ecd941416888ed Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Sun, 3 Feb 2019 23:24:22 +0000 Subject: [PATCH 05/15] isort issue --- pandas/tests/indexes/timedeltas/test_setops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index 2a5a77f0dc578..fd8e68a2fa92b 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -1,9 +1,9 @@ import numpy as np +import pytest import pandas as pd from pandas import Int64Index, TimedeltaIndex, timedelta_range import pandas.util.testing as tm -import pytest class TestTimedeltaIndex(object): From 4a115a2150089f094d8885702325af550cf31120 Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Wed, 6 Feb 2019 23:48:54 +0000 Subject: [PATCH 06/15] update whatsnew --- doc/source/whatsnew/v0.25.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 85e7f613eb6e3..d68c19f643557 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -37,6 +37,7 @@ Other Enhancements - :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`) - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`) - :meth:`DatetimeIndex.intersection` and :meth:`TimedeltaIndex.intersection` have been moved to :meth:`Datetimelike.intersection` and tests were added for :meth:`TimedeltaIndex.intersection` (:issue:`24966`). +- :meth:`TimedeltaIndex.intersection` now also supports the ``sort`` keyword. .. _whatsnew_0250.api_breaking: @@ -248,7 +249,7 @@ Datetimelike Timedelta ^^^^^^^^^ -- +- Bug in :func:`TimedeltaIndex.intersection` where for non-monotonic indicies in some cases an empty Index was returned when in fact an intersection existed. - - From da2506b44d1fdc2bf2fa18a792d21d2fbcab252c Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Wed, 6 Feb 2019 23:49:16 +0000 Subject: [PATCH 07/15] update docstrings --- pandas/core/indexes/datetimelike.py | 23 ++--------------------- pandas/core/indexes/datetimes.py | 24 ++++++++++++++++++++++++ pandas/core/indexes/period.py | 1 + pandas/core/indexes/timedeltas.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 21 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 78390dfa0a3e0..cb94c59e6e646 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -534,27 +534,6 @@ def isin(self, values): return algorithms.isin(self.asi8, values.asi8) def intersection(self, other, sort=False): - """ - Specialized intersection for DatetimeIndex and TimedeltaIndex objects. - May be much faster than Index.intersection - - Parameters - ---------- - other : DatetimeIndex or TimedeltaIndex or array-like - sort : False or None, default False - Sort the resulting index if possible. - - .. versionadded:: 0.24.0 - - .. versionchanged:: 0.24.1 - - Changed the default to ``False`` to match the behaviour - from before 0.24.0. - - Returns - ------- - y : Index or DatetimeIndex or TimedeltaIndex - """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) @@ -577,6 +556,7 @@ def intersection(self, other, sort=False): not other.freq.isAnchored() or (not self.is_monotonic or not other.is_monotonic)): result = Index.intersection(self, other, sort=sort) + # Invalidate the freq of `result`, which may not be correct at # this point, depending on the values. result.freq = None @@ -594,6 +574,7 @@ def intersection(self, other, sort=False): return self if len(other) == 0: return other + # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b14c7aa2490ff..cf330cac2582d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -613,6 +613,30 @@ def _fast_union(self, other, sort=None): else: return left + def intersection(self, other, sort=False): + """ + Specialized intersection for DatetimeIndex objects. + May be much faster than Index.intersection + + Parameters + ---------- + other : DatetimeIndex or array-like + sort : False or None, default False + Sort the resulting index if possible. + + .. versionadded:: 0.24.0 + + .. versionchanged:: 0.24.1 + + Changed the default to ``False`` to match the behaviour + from before 0.24.0. + + Returns + ------- + y : Index or DatetimeIndex or TimedeltaIndex + """ + return super().intersection(other, sort=sort) + def _wrap_setop_result(self, other, result): name = get_op_result_name(self, other) return self._shallow_copy(result, name=name, freq=None, tz=self.tz) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 3a8892af3e1ae..f55aab6cd593a 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -802,6 +802,7 @@ def join(self, other, how='left', level=None, return_indexers=False, return self._apply_meta(result), lidx, ridx return self._apply_meta(result) + @Appender(Index.intersection.__doc__) def intersection(self, other, sort=False): return Index.intersection(self, other, sort=sort) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 34030b77263d7..72a5f603c5346 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -383,6 +383,34 @@ def join(self, other, how='left', level=None, return_indexers=False, return_indexers=return_indexers, sort=sort) + def intersection(self, other, sort=False): + """ + Specialized intersection for TimedeltaIndex objects. + May be much faster than Index.intersection + + Parameters + ---------- + other : TimedeltaIndex or array-like + sort : False or None, default False + Sort the resulting index if possible. + + .. versionadded:: 0.24.0 + + .. versionchanged:: 0.24.1 + + Changed the default to ``False`` to match the behaviour + from before 0.24.0. + + .. versionchanged:: 0.25.0 + + The `sort` keyword has been added to TimedeltaIndex as well. + + Returns + ------- + y : Index or DatetimeIndex or TimedeltaIndex + """ + return super().intersection(other, sort=sort) + def _wrap_joined_index(self, joined, other): name = get_op_result_name(self, other) if (isinstance(other, TimedeltaIndex) and self.freq == other.freq and From 8e2a80bc6f2adca2dd617dbdabb6ff8a621f6f29 Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Wed, 6 Feb 2019 23:49:39 +0000 Subject: [PATCH 08/15] parametrize tests --- .../tests/indexes/timedeltas/test_setops.py | 115 ++++++++++-------- 1 file changed, 63 insertions(+), 52 deletions(-) diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index fd8e68a2fa92b..897d12d2b33b4 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -4,6 +4,7 @@ import pandas as pd from pandas import Int64Index, TimedeltaIndex, timedelta_range import pandas.util.testing as tm +from pandas.tseries.offsets import Hour class TestTimedeltaIndex(object): @@ -89,65 +90,75 @@ def test_intersection_equal(self, sort): inter = first.intersection(first, sort=sort) assert inter is first + @pytest.mark.parametrize("period_1, period_2", [(0, 4), (4, 0)]) @pytest.mark.parametrize("sort", [None, False]) - def test_intersection_zero_length(self, sort): - index_1 = timedelta_range('1 day', periods=4, freq='h') - index_2 = timedelta_range('1 day', periods=0, freq='h') + def test_intersection_zero_length(self, period_1, period_2, sort): + index_1 = timedelta_range('1 day', periods=period_1, freq='h') + index_2 = timedelta_range('1 day', periods=period_2, freq='h') inter = index_1.intersection(index_2, sort=sort) - tm.assert_index_equal(index_2, inter) - inter_2 = index_2.intersection(index_1, sort=sort) - tm.assert_index_equal(index_2, inter_2) - + tm.assert_index_equal(timedelta_range('1 day', periods=0, freq='h'), + inter) + + @pytest.mark.parametrize("rng, expected", + # if target has the same name, it is preserved + [(timedelta_range('1 day', periods=5, + freq='h', name='idx'), + timedelta_range('1 day', periods=4, + freq='h', name='idx')), + # if target name is different, it will be reset + (timedelta_range('1 day', periods=5, + freq='h', name='other'), + timedelta_range('1 day', periods=4, + freq='h', name=None)), + # if no overlap exists return empty index + (timedelta_range('1 day', periods=10, + freq='h', name='idx')[5:], + TimedeltaIndex([], name='idx')) + ]) @pytest.mark.parametrize("sort", [None, False]) - def test_intersection(self, sort): + def test_intersection(self, rng, expected, sort): # GH 4690 (with tz) base = timedelta_range('1 day', periods=4, freq='h', name='idx') - - # if target has the same name, it is preserved - rng2 = timedelta_range('1 day', periods=5, freq='h', name='idx') - expected2 = timedelta_range('1 day', periods=4, freq='h', name='idx') - - # if target name is different, it will be reset - rng3 = timedelta_range('1 day', periods=5, freq='h', name='other') - expected3 = timedelta_range('1 day', periods=4, freq='h', name=None) - - rng4 = timedelta_range('1 day', periods=10, freq='h', name='idx')[5:] - expected4 = TimedeltaIndex([], name='idx') - - for (rng, expected) in [(rng2, expected2), (rng3, expected3), - (rng4, expected4)]: - result = base.intersection(rng) - tm.assert_index_equal(result, expected) - assert result.name == expected.name - assert result.freq == expected.freq - + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + @pytest.mark.parametrize("rng, expected", + # part intersection works + [(TimedeltaIndex(['5 hour', '2 hour', + '4 hour', '9 hour'], + name='idx'), + TimedeltaIndex(['2 hour', '4 hour'], + name='idx')), + # reordered part intersection + (TimedeltaIndex(['2 hour', '5 hour', + '5 hour', '1 hour'], + name='other'), + TimedeltaIndex(['1 hour', '2 hour'], + name=None)), + # reveresed index + (TimedeltaIndex(['1 hour', '2 hour', + '4 hour', '3 hour'], + name='idx')[::-1], + TimedeltaIndex(['1 hour', '2 hour', + '4 hour', '3 hour'], + name='idx'))]) @pytest.mark.parametrize("sort", [None, False]) - def intersection_non_monotonic(self, sort): + def test_intersection_non_monotonic(self, rng, expected, sort): # non-monotonic - base = TimedeltaIndex(['1 hour', '2 hour', - '4 hour', '3 hour'], + base = TimedeltaIndex(['1 hour', '2 hour', '4 hour', '3 hour'], name='idx') + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name - rng2 = TimedeltaIndex(['5 hour', '2 hour', - '4 hour', '9 hour'], - name='idx') - expected2 = TimedeltaIndex(['2 hour', '4 hour'], - name='idx') - - rng3 = TimedeltaIndex(['2 hour', '5 hour', - '5 hour', '1 hour'], - name='other') - expected3 = TimedeltaIndex(['1 hour', '2 hour'], - name=None) - - rng4 = base[::-1] - expected4 = base - - for (rng, expected) in [(rng2, expected2), (rng3, expected3), - (rng4, expected4)]: - result = base.intersection(rng, sort=sort) - if sort is None: - expected = expected.sort_values() - tm.assert_index_equal(result, expected) - assert result.name == expected.name + # if reveresed order, frequency is still the same + if all(base == rng[::-1]) and sort is None: + assert isinstance(result.freq, Hour) + else: assert result.freq is None From 05c29164eff3aaedcfe6f80752c502520484fdc0 Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Thu, 7 Feb 2019 08:55:42 +0000 Subject: [PATCH 09/15] add compatibility of super with python2 --- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index cf330cac2582d..090a3a1d9bade 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -635,7 +635,7 @@ def intersection(self, other, sort=False): ------- y : Index or DatetimeIndex or TimedeltaIndex """ - return super().intersection(other, sort=sort) + return super(DatetimeIndex, self).intersection(other, sort=sort) def _wrap_setop_result(self, other, result): name = get_op_result_name(self, other) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 72a5f603c5346..41802df3e58cc 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -409,7 +409,7 @@ def intersection(self, other, sort=False): ------- y : Index or DatetimeIndex or TimedeltaIndex """ - return super().intersection(other, sort=sort) + return super(TimedeltaIndex, self).intersection(other, sort=sort) def _wrap_joined_index(self, joined, other): name = get_op_result_name(self, other) From 8554cf9eb73885e570dbc476cd898dc4089963cf Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Fri, 29 Mar 2019 19:25:05 +0000 Subject: [PATCH 10/15] Added PR review comments --- doc/source/whatsnew/v0.25.0.rst | 6 +++--- pandas/core/indexes/datetimelike.py | 2 ++ pandas/tests/indexes/timedeltas/test_setops.py | 5 ++++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index d68c19f643557..ced3423f60352 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -36,8 +36,8 @@ Other Enhancements - :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`) - :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`) - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`) -- :meth:`DatetimeIndex.intersection` and :meth:`TimedeltaIndex.intersection` have been moved to :meth:`Datetimelike.intersection` and tests were added for :meth:`TimedeltaIndex.intersection` (:issue:`24966`). -- :meth:`TimedeltaIndex.intersection` now also supports the ``sort`` keyword. +- :meth:`DatetimeIndex.intersection` and :meth:`TimedeltaIndex.intersection` have been moved to :meth:`Datetimelike.intersection` (:issue:`24966`) +- :meth:`TimedeltaIndex.intersection` now also supports the ``sort`` keyword (:issue:`24471`) .. _whatsnew_0250.api_breaking: @@ -249,7 +249,7 @@ Datetimelike Timedelta ^^^^^^^^^ -- Bug in :func:`TimedeltaIndex.intersection` where for non-monotonic indicies in some cases an empty Index was returned when in fact an intersection existed. +- Bug in :func:`TimedeltaIndex.intersection` where for non-monotonic indicies in some cases an empty Index was returned when in fact an intersection existed (:issue:`25913`) - - diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index cb94c59e6e646..4e76c7af80a1c 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -541,6 +541,8 @@ def intersection(self, other, sort=False): return self._get_reconciled_name_object(other) if not isinstance(other, type(self)): + # try converting other type to own type and ignore Type/ValueErrors + # caused e.g. by trying calling TimedeltaIndex on another object try: other = self(other) except (TypeError, ValueError): diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index 897d12d2b33b4..a5bea2b025909 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -4,6 +4,7 @@ import pandas as pd from pandas import Int64Index, TimedeltaIndex, timedelta_range import pandas.util.testing as tm + from pandas.tseries.offsets import Hour @@ -78,6 +79,7 @@ def test_intersection_bug_1708(self): @pytest.mark.parametrize("sort", [None, False]) def test_intersection_equal(self, sort): + # GH 24471 Test intersection outcome given the sort keyword # for equal indicies intersection should return the original index first = timedelta_range('1 day', periods=4, freq='h') second = timedelta_range('1 day', periods=4, freq='h') @@ -93,6 +95,7 @@ def test_intersection_equal(self, sort): @pytest.mark.parametrize("period_1, period_2", [(0, 4), (4, 0)]) @pytest.mark.parametrize("sort", [None, False]) def test_intersection_zero_length(self, period_1, period_2, sort): + # GH 24471 test for non overlap the intersection should be zero length index_1 = timedelta_range('1 day', periods=period_1, freq='h') index_2 = timedelta_range('1 day', periods=period_2, freq='h') inter = index_1.intersection(index_2, sort=sort) @@ -148,7 +151,7 @@ def test_intersection(self, rng, expected, sort): name='idx'))]) @pytest.mark.parametrize("sort", [None, False]) def test_intersection_non_monotonic(self, rng, expected, sort): - # non-monotonic + # 24471 non-monotonic base = TimedeltaIndex(['1 hour', '2 hour', '4 hour', '3 hour'], name='idx') result = base.intersection(rng, sort=sort) From 666b5238f28020e6c00008df5485d56de4cc167b Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Tue, 2 Apr 2019 10:28:15 +0100 Subject: [PATCH 11/15] jreback comments --- doc/source/whatsnew/v0.25.0.rst | 5 +- pandas/core/indexes/datetimelike.py | 12 ++-- .../tests/indexes/timedeltas/test_setops.py | 68 +++++++++---------- 3 files changed, 37 insertions(+), 48 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index ced3423f60352..dd138e624055c 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -32,12 +32,11 @@ Other Enhancements - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`) - :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`) - :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a mononotically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`) +- :meth:`TimedeltaIndex.intersection` now also supports the ``sort`` keyword (:issue:`24471`) - :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`) - :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`) - :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`) - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`) -- :meth:`DatetimeIndex.intersection` and :meth:`TimedeltaIndex.intersection` have been moved to :meth:`Datetimelike.intersection` (:issue:`24966`) -- :meth:`TimedeltaIndex.intersection` now also supports the ``sort`` keyword (:issue:`24471`) .. _whatsnew_0250.api_breaking: @@ -249,7 +248,7 @@ Datetimelike Timedelta ^^^^^^^^^ -- Bug in :func:`TimedeltaIndex.intersection` where for non-monotonic indicies in some cases an empty Index was returned when in fact an intersection existed (:issue:`25913`) +- Bug in :func:`TimedeltaIndex.intersection` where for non-monotonic indicies in some cases an empty ``Index`` was returned when in fact an intersection existed (:issue:`25913`) - - diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 4e76c7af80a1c..b7ecf30303960 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -541,12 +541,6 @@ def intersection(self, other, sort=False): return self._get_reconciled_name_object(other) if not isinstance(other, type(self)): - # try converting other type to own type and ignore Type/ValueErrors - # caused e.g. by trying calling TimedeltaIndex on another object - try: - other = self(other) - except (TypeError, ValueError): - pass result = Index.intersection(self, other, sort=sort) if isinstance(result, type(self)): if result.freq is None: @@ -573,9 +567,9 @@ def intersection(self, other, sort=False): return result if len(self) == 0: - return self + return self.copy() if len(other) == 0: - return other + return other.copy() # to make our life easier, "sort" the two ranges if self[0] <= other[0]: @@ -583,6 +577,8 @@ def intersection(self, other, sort=False): else: left, right = other, self + # after sorting, the intersection always starts with the right index + # and ends with the index of which the last elements is smallest end = min(left[-1], right[-1]) start = right[0] diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index a5bea2b025909..f9350da5cf52e 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -101,23 +101,21 @@ def test_intersection_zero_length(self, period_1, period_2, sort): inter = index_1.intersection(index_2, sort=sort) tm.assert_index_equal(timedelta_range('1 day', periods=0, freq='h'), inter) - - @pytest.mark.parametrize("rng, expected", - # if target has the same name, it is preserved - [(timedelta_range('1 day', periods=5, - freq='h', name='idx'), - timedelta_range('1 day', periods=4, - freq='h', name='idx')), - # if target name is different, it will be reset - (timedelta_range('1 day', periods=5, - freq='h', name='other'), - timedelta_range('1 day', periods=4, - freq='h', name=None)), - # if no overlap exists return empty index - (timedelta_range('1 day', periods=10, - freq='h', name='idx')[5:], - TimedeltaIndex([], name='idx')) - ]) + tm.assert_copy(index_1, inter) + tm.assert_copy(index_2, inter) + + @pytest.mark.parametrize( + "rng, expected", + # if target has the same name, it is preserved + [ + (timedelta_range('1 day', periods=5, freq='h', name='idx'), + timedelta_range('1 day', periods=4, freq='h', name='idx')), + # if target name is different, it will be reset + (timedelta_range('1 day', periods=5, freq='h', name='other'), + timedelta_range('1 day', periods=4, freq='h', name=None)), + # if no overlap exists return empty index + (timedelta_range('1 day', periods=10, freq='h', name='idx')[5:], + TimedeltaIndex([], name='idx'))]) @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, rng, expected, sort): # GH 4690 (with tz) @@ -129,26 +127,22 @@ def test_intersection(self, rng, expected, sort): assert result.name == expected.name assert result.freq == expected.freq - @pytest.mark.parametrize("rng, expected", - # part intersection works - [(TimedeltaIndex(['5 hour', '2 hour', - '4 hour', '9 hour'], - name='idx'), - TimedeltaIndex(['2 hour', '4 hour'], - name='idx')), - # reordered part intersection - (TimedeltaIndex(['2 hour', '5 hour', - '5 hour', '1 hour'], - name='other'), - TimedeltaIndex(['1 hour', '2 hour'], - name=None)), - # reveresed index - (TimedeltaIndex(['1 hour', '2 hour', - '4 hour', '3 hour'], - name='idx')[::-1], - TimedeltaIndex(['1 hour', '2 hour', - '4 hour', '3 hour'], - name='idx'))]) + @pytest.mark.parametrize( + "rng, expected", + # part intersection works + [ + (TimedeltaIndex(['5 hour', '2 hour', '4 hour', '9 hour'], + name='idx'), + TimedeltaIndex(['2 hour', '4 hour'], name='idx')), + # reordered part intersection + (TimedeltaIndex(['2 hour', '5 hour', '5 hour', '1 hour'], + name='other'), + TimedeltaIndex(['1 hour', '2 hour'], name=None)), + # reveresed index + (TimedeltaIndex(['1 hour', '2 hour', '4 hour', '3 hour'], + name='idx')[::-1], + TimedeltaIndex(['1 hour', '2 hour', '4 hour', '3 hour'], + name='idx'))]) @pytest.mark.parametrize("sort", [None, False]) def test_intersection_non_monotonic(self, rng, expected, sort): # 24471 non-monotonic From ed6c61ddd15d4256a692ce63c2d52bd6db0d0b4f Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Tue, 2 Apr 2019 10:42:00 +0100 Subject: [PATCH 12/15] added test --- pandas/tests/indexes/timedeltas/test_setops.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index f9350da5cf52e..ff7be03e38d97 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -101,8 +101,14 @@ def test_intersection_zero_length(self, period_1, period_2, sort): inter = index_1.intersection(index_2, sort=sort) tm.assert_index_equal(timedelta_range('1 day', periods=0, freq='h'), inter) - tm.assert_copy(index_1, inter) - tm.assert_copy(index_2, inter) + + @pytest.mark.parametrize('sort', [None, False]) + def test_zero_length_input_index(self, sort): + index_1 = timedelta_range('1 day', periods=0, freq='h') + index_2 = timedelta_range('1 day', periods=3, freq='h') + inter = index_1.intersection(index_2, sort=sort) + assert index_1 is not inter + assert index_2 is not inter @pytest.mark.parametrize( "rng, expected", From a9a63f7a40df380e40ebb6951215d46f959618be Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Thu, 4 Apr 2019 20:17:52 +0100 Subject: [PATCH 13/15] further feedback --- pandas/core/indexes/datetimelike.py | 10 +++++----- pandas/core/indexes/timedeltas.py | 2 +- pandas/tests/indexes/timedeltas/test_setops.py | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index b7ecf30303960..19bda211b5a6e 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -540,6 +540,11 @@ def intersection(self, other, sort=False): if self.equals(other): return self._get_reconciled_name_object(other) + if len(self) == 0: + return self.copy() + if len(other) == 0: + return other.copy() + if not isinstance(other, type(self)): result = Index.intersection(self, other, sort=sort) if isinstance(result, type(self)): @@ -566,11 +571,6 @@ def intersection(self, other, sort=False): result.freq = to_offset(result.inferred_freq) return result - if len(self) == 0: - return self.copy() - if len(other) == 0: - return other.copy() - # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 41802df3e58cc..2a61de9390eef 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -403,7 +403,7 @@ def intersection(self, other, sort=False): .. versionchanged:: 0.25.0 - The `sort` keyword has been added to TimedeltaIndex as well. + The `sort` keyword is added Returns ------- diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index ff7be03e38d97..02c43a5f934c6 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -99,8 +99,8 @@ def test_intersection_zero_length(self, period_1, period_2, sort): index_1 = timedelta_range('1 day', periods=period_1, freq='h') index_2 = timedelta_range('1 day', periods=period_2, freq='h') inter = index_1.intersection(index_2, sort=sort) - tm.assert_index_equal(timedelta_range('1 day', periods=0, freq='h'), - inter) + tm.assert_index_equal(inter, + timedelta_range('1 day', periods=0, freq='h')) @pytest.mark.parametrize('sort', [None, False]) def test_zero_length_input_index(self, sort): @@ -109,6 +109,7 @@ def test_zero_length_input_index(self, sort): inter = index_1.intersection(index_2, sort=sort) assert index_1 is not inter assert index_2 is not inter + tm.assert_copy(inter, index_1) @pytest.mark.parametrize( "rng, expected", From 2d8ed0bdc637e906f735aceb32f2036fc640824c Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Sat, 13 Apr 2019 17:10:58 +0100 Subject: [PATCH 14/15] result/expected and comment --- pandas/tests/indexes/timedeltas/test_setops.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index 02c43a5f934c6..e148ff5e0a183 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -98,18 +98,19 @@ def test_intersection_zero_length(self, period_1, period_2, sort): # GH 24471 test for non overlap the intersection should be zero length index_1 = timedelta_range('1 day', periods=period_1, freq='h') index_2 = timedelta_range('1 day', periods=period_2, freq='h') - inter = index_1.intersection(index_2, sort=sort) - tm.assert_index_equal(inter, - timedelta_range('1 day', periods=0, freq='h')) + expected = timedelta_range('1 day', periods=0, freq='h') + result = index_1.intersection(index_2, sort=sort) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize('sort', [None, False]) def test_zero_length_input_index(self, sort): + # GH 24966 test for 0-len intersections are copied index_1 = timedelta_range('1 day', periods=0, freq='h') index_2 = timedelta_range('1 day', periods=3, freq='h') - inter = index_1.intersection(index_2, sort=sort) - assert index_1 is not inter - assert index_2 is not inter - tm.assert_copy(inter, index_1) + result = index_1.intersection(index_2, sort=sort) + assert index_1 is not result + assert index_2 is not result + tm.assert_copy(result, index_1) @pytest.mark.parametrize( "rng, expected", From c7e27bfa46094e61a3633989c93be5cf4de1a622 Mon Sep 17 00:00:00 2001 From: Florian Roessler Date: Mon, 29 Apr 2019 19:31:14 +0100 Subject: [PATCH 15/15] update --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/core/indexes/timedeltas.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 8c9ad2b1fa746..9bf6e55807efa 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -278,7 +278,7 @@ Datetimelike Timedelta ^^^^^^^^^ -- Bug in :func:`TimedeltaIndex.intersection` where for non-monotonic indicies in some cases an empty ``Index`` was returned when in fact an intersection existed (:issue:`25913`) +- Bug in :func:`TimedeltaIndex.intersection` where for non-monotonic indices in some cases an empty ``Index`` was returned when in fact an intersection existed (:issue:`25913`) - - diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 5b36bcf983c53..49fcb4b9899de 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -403,7 +403,7 @@ def intersection(self, other, sort=False): Returns ------- - y : Index or DatetimeIndex or TimedeltaIndex + y : Index or TimedeltaIndex """ return super(TimedeltaIndex, self).intersection(other, sort=sort)