diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index fd47ca14dc788..64175c9d9892c 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -525,7 +525,7 @@ Performance Improvements - Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`) - Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`) -- Improved performance of slicing and other selected operation on a :class:`RangeIndex` (:issue:`26565`, :issue:`26617`) +- Improved performance of slicing and other selected operation on a :class:`RangeIndex` (:issue:`26565`, :issue:`26617`, :issue:`26722`) - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) - Improved performance of :attr:`IntervalIndex.is_monotonic`, :attr:`IntervalIndex.is_monotonic_increasing` and :attr:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4fb9c4197109f..5bf97f44edeed 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -273,8 +273,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if isinstance(data, RangeIndex): return RangeIndex(start=data, copy=copy, dtype=dtype, name=name) elif isinstance(data, range): - return RangeIndex.from_range(data, copy=copy, dtype=dtype, - name=name) + return RangeIndex.from_range(data, dtype=dtype, name=name) # categorical elif is_categorical_dtype(data) or is_categorical_dtype(dtype): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 7daeb9b644a9b..ab39969af8db0 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -6,7 +6,7 @@ import numpy as np -from pandas._libs import index as libindex, lib +from pandas._libs import index as libindex import pandas.compat as compat from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, cache_readonly @@ -82,16 +82,15 @@ def __new__(cls, start=None, stop=None, step=None, "removed in a future version.", FutureWarning, stacklevel=2) if fastpath: - return cls._simple_new(start, stop, step, name=name) + return cls._simple_new(range(start, stop, step), name=name) cls._validate_dtype(dtype) # RangeIndex if isinstance(start, RangeIndex): - if name is None: - name = start.name - return cls._simple_new(name=name, - **dict(start._get_data_as_items())) + name = start.name if name is None else name + start = start._range + return cls._simple_new(start, dtype=dtype, name=name) # validate the arguments if com._all_none(start, stop, step): @@ -108,10 +107,11 @@ def __new__(cls, start=None, stop=None, step=None, if step == 0: raise ValueError("Step must not be zero") - return cls._simple_new(start, stop, step, name) + rng = range(start, stop, step) + return cls._simple_new(rng, dtype=dtype, name=name) @classmethod - def from_range(cls, data, name=None, dtype=None, **kwargs): + def from_range(cls, data, name=None, dtype=None): """ Create RangeIndex from a range object. @@ -124,26 +124,21 @@ def from_range(cls, data, name=None, dtype=None, **kwargs): '{0}(...) must be called with object coercible to a ' 'range, {1} was passed'.format(cls.__name__, repr(data))) - start, stop, step = data.start, data.stop, data.step - return cls(start, stop, step, dtype=dtype, name=name, **kwargs) + cls._validate_dtype(dtype) + return cls._simple_new(data, dtype=dtype, name=name) @classmethod - def _simple_new(cls, start, stop=None, step=None, name=None, - dtype=None, **kwargs): + def _simple_new(cls, values, name=None, dtype=None, **kwargs): result = object.__new__(cls) # handle passed None, non-integers - if start is None and stop is None: + if values is None: # empty - start, stop, step = 0, 0, 1 + values = range(0, 0, 1) + elif not isinstance(values, range): + return Index(values, dtype=dtype, name=name, **kwargs) - if start is None or not is_integer(start): - try: - return cls(start, stop, step, name=name, **kwargs) - except TypeError: - return Index(start, stop, step, name=name, **kwargs) - - result._range = range(start, stop or 0, step or 1) + result._range = values result.name = name for k, v in kwargs.items(): @@ -360,8 +355,7 @@ def tolist(self): def _shallow_copy(self, values=None, **kwargs): if values is None: name = kwargs.get("name", self.name) - return self._simple_new( - name=name, **dict(self._get_data_as_items())) + return self._simple_new(self._range, name=name) else: kwargs.setdefault('name', self.name) return self._int64index._shallow_copy(values, **kwargs) @@ -480,11 +474,13 @@ def intersection(self, other, sort=False): tmp_start = first.start + (second.start - first.start) * \ first.step // gcd * s new_step = first.step * second.step // gcd - new_index = self._simple_new(tmp_start, int_high, new_step) + new_range = range(tmp_start, int_high, new_step) + new_index = self._simple_new(new_range) # adjust index to limiting interval new_start = new_index._min_fitting_element(int_low) - new_index = self._simple_new(new_start, new_index.stop, new_index.step) + new_range = range(new_start, new_index.stop, new_index.step) + new_index = self._simple_new(new_range) if (self.step < 0 and other.step < 0) is not (new_index.step < 0): new_index = new_index[::-1] @@ -609,12 +605,10 @@ def __getitem__(self, key): """ Conserve RangeIndex type for scalar and slice keys. """ - if is_scalar(key): - if not lib.is_integer(key): - raise IndexError("only integers, slices (`:`), " - "ellipsis (`...`), numpy.newaxis (`None`) " - "and integer or boolean " - "arrays are valid indices") + if isinstance(key, slice): + new_range = self._range[key] + return self._simple_new(new_range, name=self.name) + elif is_integer(key): new_key = int(key) try: return self._range[new_key] @@ -622,10 +616,11 @@ def __getitem__(self, key): raise IndexError("index {key} is out of bounds for axis 0 " "with size {size}".format(key=key, size=len(self))) - if isinstance(key, slice): - new_range = self._range[key] - return self.from_range(new_range, name=self.name) - + elif is_scalar(key): + raise IndexError("only integers, slices (`:`), " + "ellipsis (`...`), numpy.newaxis (`None`) " + "and integer or boolean " + "arrays are valid indices") # fall back to Int64Index return super().__getitem__(key) @@ -640,10 +635,12 @@ def __floordiv__(self, other): start = self.start // other step = self.step // other stop = start + len(self) * step - return self._simple_new(start, stop, step, name=self.name) + new_range = range(start, stop, step or 1) + return self._simple_new(new_range, name=self.name) if len(self) == 1: start = self.start // other - return self._simple_new(start, start + 1, 1, name=self.name) + new_range = range(start, start + 1, 1) + return self._simple_new(new_range, name=self.name) return self._int64index // other def all(self) -> bool: diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 6eece0ed8efee..3f474b0166b15 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -94,8 +94,9 @@ def test_constructor_same(self): def test_constructor_range(self): - with pytest.raises(TypeError): - RangeIndex(range(1, 5, 2)) + msg = "Value needs to be a scalar value, was type " + with pytest.raises(TypeError, match=msg): + result = RangeIndex(range(1, 5, 2)) result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) @@ -120,6 +121,9 @@ def test_constructor_range(self): with pytest.raises(TypeError): Index(range(1, 5, 2), dtype='float64') + msg = r'^from_range\(\) got an unexpected keyword argument' + with pytest.raises(TypeError, match=msg): + pd.RangeIndex.from_range(range(10), copy=True) def test_constructor_name(self): # GH12288