Skip to content
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,7 @@ Performance improvements
- Performance improvement in various :class:`MultiIndex` set and indexing operations (:issue:`53955`)
- Performance improvement when doing various reshaping operations on :class:`arrays.IntegerArrays` & :class:`arrays.FloatingArray` by avoiding doing unnecessary validation (:issue:`53013`)
- Performance improvement when indexing with pyarrow timestamp and duration dtypes (:issue:`53368`)
- Performance improvement when passing an array to :meth:`RangeIndex.take`, :meth:`DataFrame.loc`, or :meth:`DataFrame.iloc` and the DataFrame is using a RangeIndex (:issue:`53387`)
-

.. ---------------------------------------------------------------------------
Expand Down
28 changes: 13 additions & 15 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1266,17 +1266,17 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None:

# TODO: other value-dependent functions to standardize here include
# Index._find_common_type_compat
def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:
def find_result_type(left_dtype: DtypeObj, right: Any) -> DtypeObj:
"""
Find the type/dtype for a the result of an operation between these objects.
Find the type/dtype for the result of an operation between objects.

This is similar to find_common_type, but looks at the objects instead
of just their dtypes. This can be useful in particular when one of the
objects does not have a `dtype`.
This is similar to find_common_type, but looks at the right object instead
of just its dtype. This can be useful in particular when the right
object does not have a `dtype`.

Parameters
----------
left : np.ndarray or ExtensionArray
left_dtype : np.dtype or ExtensionDtype
right : Any

Returns
Expand All @@ -1291,26 +1291,24 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:
new_dtype: DtypeObj

if (
isinstance(left, np.ndarray)
and left.dtype.kind in "iuc"
isinstance(left_dtype, np.dtype)
and left_dtype.kind in "iuc"
and (lib.is_integer(right) or lib.is_float(right))
):
# e.g. with int8 dtype and right=512, we want to end up with
# np.int16, whereas infer_dtype_from(512) gives np.int64,
# which will make us upcast too far.
if lib.is_float(right) and right.is_integer() and left.dtype.kind != "f":
if lib.is_float(right) and right.is_integer() and left_dtype.kind != "f":
right = int(right)
new_dtype = np.result_type(left_dtype, right)

new_dtype = np.result_type(left, right)

elif is_valid_na_for_dtype(right, left.dtype):
elif is_valid_na_for_dtype(right, left_dtype):
# e.g. IntervalDtype[int] and None/np.nan
new_dtype = ensure_dtype_can_hold_na(left.dtype)
new_dtype = ensure_dtype_can_hold_na(left_dtype)

else:
dtype, _ = infer_dtype_from(right)

new_dtype = find_common_type([left.dtype, dtype])
new_dtype = find_common_type([left_dtype, dtype])

return new_dtype

Expand Down
7 changes: 5 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,7 +1162,6 @@ def take(
taken = values.take(
indices, allow_fill=allow_fill, fill_value=self._na_value
)
# _constructor so RangeIndex-> Index with an int64 dtype
return self._constructor._simple_new(taken, name=self.name)

@final
Expand Down Expand Up @@ -5560,6 +5559,10 @@ def equals(self, other: Any) -> bool:
if not isinstance(other, Index):
return False

if len(self) != len(other):
# quickly return if the lengths are different
return False

if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype):
# if other is not object, use other's logic for coercion
return other.equals(self)
Expand Down Expand Up @@ -6290,7 +6293,7 @@ def _find_common_type_compat(self, target) -> DtypeObj:
):
return _dtype_obj

dtype = find_result_type(self._values, target)
dtype = find_result_type(self.dtype, target)
dtype = common_dtype_categorical_compat([self, target], dtype)
return dtype

Expand Down
42 changes: 42 additions & 0 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@

if TYPE_CHECKING:
from pandas._typing import (
Axis,
Dtype,
NaPosition,
Self,
Expand Down Expand Up @@ -1105,3 +1106,44 @@ def _arith_method(self, other, op):
except (ValueError, TypeError, ZeroDivisionError):
# test_arithmetic_explicit_conversions
return super()._arith_method(other, op)

def take(
self,
indices,
axis: Axis = 0,
allow_fill: bool = True,
fill_value=None,
**kwargs,
):
if kwargs:
nv.validate_take((), kwargs)
if is_scalar(indices):
raise TypeError("Expected indices to be array-like")
indices = ensure_platform_int(indices)

# raise an exception if allow_fill is True and fill_value is not None
self._maybe_disallow_fill(allow_fill, fill_value, indices)

if len(indices) == 0:
taken = np.array([], dtype=self.dtype)
else:
ind_max = indices.max()
if ind_max >= len(self):
raise IndexError(
f"index {ind_max} is out of bounds for axis 0 with size {len(self)}"
)
ind_min = indices.min()
if ind_min < -len(self):
raise IndexError(
f"index {ind_min} is out of bounds for axis 0 with size {len(self)}"
)
taken = indices.astype(self.dtype, casting="safe")
if ind_min < 0:
taken %= len(self)
if self.step != 1:
taken *= self.step
if self.start != 0:
taken += self.start

# _constructor so RangeIndex-> Index with an int64 dtype
return self._constructor._simple_new(taken, name=self.name)
2 changes: 1 addition & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ def coerce_to_target_dtype(self, other) -> Block:
we can also safely try to coerce to the same dtype
and will receive the same block
"""
new_dtype = find_result_type(self.values, other)
new_dtype = find_result_type(self.values.dtype, other)

return self.astype(new_dtype, copy=False)

Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/indexes/numeric/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,17 @@ def test_range_float_union_dtype(self):
result = other.union(index)
tm.assert_index_equal(result, expected)

def test_range_uint64_union_dtype(self):
# https://github.com/pandas-dev/pandas/issues/26778
index = RangeIndex(start=0, stop=3)
other = Index([0, 10], dtype=np.uint64)
result = index.union(other)
expected = Index([0, 1, 2, 10], dtype=object)
tm.assert_index_equal(result, expected)

result = other.union(index)
tm.assert_index_equal(result, expected)

def test_float64_index_difference(self):
# https://github.com/pandas-dev/pandas/issues/35217
float_index = Index([1.0, 2, 3])
Expand Down
42 changes: 42 additions & 0 deletions pandas/tests/indexes/ranges/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,52 @@ def test_take_fill_value(self):
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)

def test_take_raises_index_error(self):
idx = RangeIndex(1, 4, name="xxx")

msg = "index -5 is out of bounds for (axis 0 with )?size 3"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))

msg = "index -4 is out of bounds for (axis 0 with )?size 3"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -4]))

# no errors
result = idx.take(np.array([1, -3]))
expected = Index([2, 1], dtype=np.int64, name="xxx")
tm.assert_index_equal(result, expected)

def test_take_accepts_empty_array(self):
idx = RangeIndex(1, 4, name="foo")
result = idx.take(np.array([]))
expected = Index([], dtype=np.int64, name="foo")
tm.assert_index_equal(result, expected)

# empty index
idx = RangeIndex(0, name="foo")
result = idx.take(np.array([]))
expected = Index([], dtype=np.int64, name="foo")
tm.assert_index_equal(result, expected)

def test_take_accepts_non_int64_array(self):
idx = RangeIndex(1, 4, name="foo")
result = idx.take(np.array([2, 1], dtype=np.uint32))
expected = Index([3, 2], dtype=np.int64, name="foo")
tm.assert_index_equal(result, expected)

def test_take_when_index_has_step(self):
idx = RangeIndex(1, 11, 3, name="foo") # [1, 4, 7, 10]
result = idx.take(np.array([1, 0, -1, -4]))
expected = Index([4, 1, 10, 1], dtype=np.int64, name="foo")
tm.assert_index_equal(result, expected)

def test_take_when_index_has_negative_step(self):
idx = RangeIndex(11, -4, -2, name="foo") # [11, 9, 7, 5, 3, 1, -1, -3]
result = idx.take(np.array([1, 0, -1, -8]))
expected = Index([9, 11, -3, 11], dtype=np.int64, name="foo")
tm.assert_index_equal(result, expected)


class TestWhere:
def test_where_putmask_range_cast(self):
Expand Down
14 changes: 12 additions & 2 deletions pandas/tests/indexes/ranges/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def test_dtype(self, simple_index):
assert index.dtype == np.int64

def test_cache(self):
# GH 26565, GH26617, GH35432
# GH 26565, GH26617, GH35432, GH53387
# This test checks whether _cache has been set.
# Calling RangeIndex._cache["_data"] creates an int64 array of the same length
# as the RangeIndex and stores it in _cache.
Expand Down Expand Up @@ -264,11 +264,21 @@ def test_cache(self):
df.iloc[5:10]
assert idx._cache == {}

# after calling take, _cache may contain other keys, but not "_data"
idx.take([3, 0, 1])
assert "_data" not in idx._cache

df.loc[[50]]
assert "_data" not in idx._cache

df.iloc[[5, 6, 7, 8, 9]]
assert "_data" not in idx._cache

# idx._cache should contain a _data entry after call to idx._data
idx._data
assert isinstance(idx._data, np.ndarray)
assert idx._data is idx._data # check cached value is reused
assert len(idx._cache) == 1
assert "_data" in idx._cache
expected = np.arange(0, 100, 10, dtype="int64")
tm.assert_numpy_array_equal(idx._cache["_data"], expected)

Expand Down