diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 322f431a37a79..a5bd0dbe95808 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -404,4 +404,5 @@ Bug Fixes - Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`) - Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`) +- Bug in ``MultiIndex.__contains__`` throws an ``IndexError`` for large multiindices (:issue:`10645`) - Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`) diff --git a/pandas/index.pyx b/pandas/index.pyx index 1678e3b280ee5..9259ed922eb13 100644 --- a/pandas/index.pyx +++ b/pandas/index.pyx @@ -143,6 +143,11 @@ cdef class IndexEngine: return self._get_loc_duplicates(val) values = self._get_index_values() loc = _bin_search(values, val) # .searchsorted(val, side='left') + + # GH10675 + if len(values) <= loc or 0 > loc: + raise KeyError(val) + if util.get_value_at(values, loc) != val: raise KeyError(val) return loc diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 81c6366b4cb41..9c0b488a53265 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1253,6 +1253,14 @@ def test_get_loc(self): with tm.assertRaises(TypeError): idx.get_loc('a', method='nearest') + def test_get_loc_keyerror(self): + # GH10645 + mi = pd.MultiIndex.from_arrays([range(100), range(100)]) + self.assertRaises(KeyError, lambda: mi.get_loc((1000001, 0))) + + mi = pd.MultiIndex.from_arrays([range(1000000), range(1000000)]) + self.assertRaises(KeyError, lambda: mi.get_loc((1000001, 0))) + def test_slice_locs(self): for dtype in [int, float]: idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype))