diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 1fb9b5ae695a0..0f31078d7bf43 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -502,7 +502,7 @@ Performance Improvements - Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`) - Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`) -- Improved performance when slicing :class:`RangeIndex` (:issue:`26565`) +- Improved performance of slicing and other selected operation on a :class:`RangeIndex` (:issue:`26565`, :issue:`26617`) - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) - Improved performance of :attr:`IntervalIndex.is_monotonic`, :attr:`IntervalIndex.is_monotonic_increasing` and :attr:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8538687ca3e91..b8c020ff0edb1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4013,11 +4013,7 @@ def __contains__(self, key): @Appender(_index_shared_docs['contains'] % _index_doc_kwargs) def contains(self, key): - hash(key) - try: - return key in self._engine - except (TypeError, ValueError): - return False + return key in self def __hash__(self): raise TypeError("unhashable type: %r" % type(self).__name__) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 82fd7342c027c..14ebc3c7e8e2a 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1,6 +1,7 @@ from datetime import timedelta import operator from sys import getsizeof +from typing import Union import warnings import numpy as np @@ -334,6 +335,14 @@ def is_monotonic_decreasing(self): def has_duplicates(self): return False + def __contains__(self, key: Union[int, np.integer]) -> bool: + hash(key) + try: + key = ensure_python_int(key) + except TypeError: + return False + return key in self._range + @Appender(_index_shared_docs['get_loc']) def get_loc(self, key, method=None, tolerance=None): if is_integer(key) and method is None and tolerance is None: @@ -640,6 +649,12 @@ def __floordiv__(self, other): return self._simple_new(start, start + 1, 1, name=self.name) return self._int64index // other + def all(self) -> bool: + return 0 not in self._range + + def any(self) -> bool: + return any(self._range) + @classmethod def _add_numeric_methods_binary(cls): """ add in numeric methods, specialized to RangeIndex """ @@ -725,4 +740,3 @@ def _evaluate_numeric_binop(self, other): RangeIndex._add_numeric_methods() -RangeIndex._add_logical_methods() diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index bca50186827de..6eece0ed8efee 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -245,10 +245,9 @@ def test_dtype(self): assert self.index.dtype == np.int64 def test_cached_data(self): - # GH 26565 - # Calling RangeIndex._data caches an int64 array of the same length as - # self at self._cached_data. - # This tests whether _cached_data is being set by various operations. + # GH 26565, GH26617 + # Calling RangeIndex._data caches an int64 array of the same length at + # self._cached_data. This test checks whether _cached_data has been set idx = RangeIndex(0, 100, 10) assert idx._cached_data is None @@ -262,6 +261,24 @@ def test_cached_data(self): idx.get_loc(20) assert idx._cached_data is None + 90 in idx + assert idx._cached_data is None + + 91 in idx + assert idx._cached_data is None + + idx.contains(90) + assert idx._cached_data is None + + idx.contains(91) + assert idx._cached_data is None + + idx.all() + assert idx._cached_data is None + + idx.any() + assert idx._cached_data is None + df = pd.DataFrame({'a': range(10)}, index=idx) df.loc[50]