diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b36499c340fd9..cac7b9d8677b0 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -945,6 +945,7 @@ Indexing - Bug in :meth:`DataFrame.loc` returning :class:`MultiIndex` in wrong order if indexer has duplicates (:issue:`40978`) - Bug in :meth:`DataFrame.__setitem__` raising ``TypeError`` when using a str subclass as the column name with a :class:`DatetimeIndex` (:issue:`37366`) - Bug in :meth:`PeriodIndex.get_loc` failing to raise ``KeyError`` when given a :class:`Period` with a mismatched ``freq`` (:issue:`41670`) +- Bug ``.loc.__getitem__`` with a :class:`UInt64Index` and negative-integer keys raising ``OverflowError`` instead of ``KeyError`` in some cases, wrapping around to positive integers in others (:issue:`41777`) Missing ^^^^^^^ diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index f7cec262ca302..3351bb7cac7d6 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -106,7 +106,8 @@ cdef class IndexEngine: try: return self.mapping.get_item(val) - except (TypeError, ValueError): + except (TypeError, ValueError, OverflowError): + # GH#41775 OverflowError e.g. if we are uint64 and val is -1 raise KeyError(val) cdef inline _get_loc_duplicates(self, object val): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 124903446220d..db718916d7fd7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5410,6 +5410,7 @@ def _find_common_type_compat(self, target) -> DtypeObj: return np.dtype("object") dtype = find_common_type([self.dtype, target_dtype]) + if dtype.kind in ["i", "u"]: # TODO: what about reversed with self being categorical? if ( diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 5f24eb0cfaad6..3dc46f04d1d45 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -49,7 +49,6 @@ TimedeltaArray, ) from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin -import pandas.core.common as com import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( Index, @@ -599,7 +598,7 @@ def _convert_arr_indexer(self, keyarr): try: return self._data._validate_listlike(keyarr, allow_object=True) except (ValueError, TypeError): - return com.asarray_tuplesafe(keyarr) + return super()._convert_arr_indexer(keyarr) class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index de7c522b4fbec..e6526bd0eaf2f 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -37,7 +37,6 @@ ) from pandas.core.dtypes.generic import ABCSeries -import pandas.core.common as com from pandas.core.indexes.base import ( Index, maybe_extract_name, @@ -250,21 +249,6 @@ def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): # we will try to coerce to integers return self._maybe_cast_indexer(label) - @doc(Index._convert_arr_indexer) - def _convert_arr_indexer(self, keyarr) -> np.ndarray: - if not is_unsigned_integer_dtype(self.dtype): - return super()._convert_arr_indexer(keyarr) - - # Cast the indexer to uint64 if possible so that the values returned - # from indexing are also uint64. - dtype = None - if is_integer_dtype(keyarr) or ( - lib.infer_dtype(keyarr, skipna=False) == "integer" - ): - dtype = np.dtype(np.uint64) - - return com.asarray_tuplesafe(keyarr, dtype=dtype) - # ---------------------------------------------------------------- @doc(Index._shallow_copy) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index ab868a3d3713d..dcccd42c52c8c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1010,18 +1010,32 @@ def test_loc_copy_vs_view(self): def test_loc_uint64(self): # GH20722 # Test whether loc accept uint64 max value as index. - s = Series([1, 2], index=[np.iinfo("uint64").max - 1, np.iinfo("uint64").max]) + umax = np.iinfo("uint64").max + ser = Series([1, 2], index=[umax - 1, umax]) - result = s.loc[np.iinfo("uint64").max - 1] - expected = s.iloc[0] + result = ser.loc[umax - 1] + expected = ser.iloc[0] assert result == expected - result = s.loc[[np.iinfo("uint64").max - 1]] - expected = s.iloc[[0]] + result = ser.loc[[umax - 1]] + expected = ser.iloc[[0]] tm.assert_series_equal(result, expected) - result = s.loc[[np.iinfo("uint64").max - 1, np.iinfo("uint64").max]] - tm.assert_series_equal(result, s) + result = ser.loc[[umax - 1, umax]] + tm.assert_series_equal(result, ser) + + def test_loc_uint64_disallow_negative(self): + # GH#41775 + umax = np.iinfo("uint64").max + ser = Series([1, 2], index=[umax - 1, umax]) + + with pytest.raises(KeyError, match="-1"): + # don't wrap around + ser.loc[-1] + + with pytest.raises(KeyError, match="-1"): + # don't wrap around + ser.loc[[-1]] def test_loc_setitem_empty_append_expands_rows(self): # GH6173, various appends to an empty dataframe