From af28c7fa033bdfa9eaad9917d5f750116a7cc7c3 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 11 Nov 2020 21:14:42 +0100 Subject: [PATCH 1/5] Bug in loc did not raise KeyError when missing combination with slice(None) was given --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexing.py | 2 ++ pandas/tests/indexing/multiindex/test_loc.py | 14 ++++++++++++++ 3 files changed, 17 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f751a91cecf19..7e99fc900a46e 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -470,6 +470,7 @@ Indexing - Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when numeric label was given for object :class:`Index` although label was in :class:`Index` (:issue:`26491`) - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) +- Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`) Missing ^^^^^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c5e331a104726..2c7a8320824ad 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1107,6 +1107,8 @@ def _getitem_axis(self, key, axis: int): # nested tuple slicing if is_nested_tuple(key, labels): locs = labels.get_locs(key) + if not len(locs): + raise KeyError(key) indexer = [slice(None)] * self.ndim indexer[axis] = locs return self.obj.iloc[tuple(indexer)] diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 646520a9ac54f..24da240d09525 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -591,6 +591,20 @@ def test_missing_key_raises_keyerror2(self): with pytest.raises(KeyError, match=r"\(0, 3\)"): ser.loc[0, 3] + def test_missing_key_combination(self): + # GH: 19556 + mi = MultiIndex.from_arrays( + [ + np.array(["a", "a", "b", "b"]), + np.array(["1", "2", "2", "3"]), + np.array(["c", "d", "c", "d"]), + ], + names=["one", "two", "three"], + ) + df = pd.DataFrame(np.random.rand(4, 3), index=mi) + with pytest.raises(KeyError, match=r"\('b', '1', slice\(None, None, None\)\)"): + df.loc[("b", "1", slice(None)), :] + def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data): df = multiindex_year_month_day_dataframe_random_data From 5d68a7bc215e2892a5d350a224f5336f1cd5e9d7 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 11 Nov 2020 21:23:25 +0100 Subject: [PATCH 2/5] Fix pattern --- pandas/tests/indexing/multiindex/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 24da240d09525..776281f8bc714 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -601,7 +601,7 @@ def test_missing_key_combination(self): ], names=["one", "two", "three"], ) - df = pd.DataFrame(np.random.rand(4, 3), index=mi) + df = DataFrame(np.random.rand(4, 3), index=mi) with pytest.raises(KeyError, match=r"\('b', '1', slice\(None, None, None\)\)"): df.loc[("b", "1", slice(None)), :] From 97368da7a44b1919aa9149581ae6401d0c44f750 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 12 Nov 2020 13:14:27 +0100 Subject: [PATCH 3/5] Raise in get_locs --- pandas/core/indexes/multi.py | 19 ++++++++++++++----- pandas/core/indexing.py | 2 -- pandas/tests/indexing/multiindex/test_loc.py | 5 ++++- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 5a3f2b0853c4f..20113c23e9c8a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3128,19 +3128,26 @@ def _convert_to_indexer(r) -> Int64Index: r = r.nonzero()[0] return Int64Index(r) - def _update_indexer(idxr: Optional[Index], indexer: Optional[Index]) -> Index: + def _update_indexer( + idxr: Optional[Index], indexer: Optional[Index], key + ) -> Index: if indexer is None: indexer = Index(np.arange(n)) if idxr is None: return indexer - return indexer.intersection(idxr) + indexer_intersection = indexer.intersection(idxr) + if indexer_intersection.empty and not idxr.empty and not indexer.empty: + raise KeyError(key) + return indexer_intersection for i, k in enumerate(seq): if com.is_bool_indexer(k): # a boolean indexer, must be the same length! k = np.asarray(k) - indexer = _update_indexer(_convert_to_indexer(k), indexer=indexer) + indexer = _update_indexer( + _convert_to_indexer(k), indexer=indexer, key=seq + ) elif is_list_like(k): # a collection of labels to include from this level (these @@ -3160,14 +3167,14 @@ def _update_indexer(idxr: Optional[Index], indexer: Optional[Index]) -> Index: continue if indexers is not None: - indexer = _update_indexer(indexers, indexer=indexer) + indexer = _update_indexer(indexers, indexer=indexer, key=seq) else: # no matches we are done return np.array([], dtype=np.int64) elif com.is_null_slice(k): # empty slice - indexer = _update_indexer(None, indexer=indexer) + indexer = _update_indexer(None, indexer=indexer, key=seq) elif isinstance(k, slice): @@ -3177,6 +3184,7 @@ def _update_indexer(idxr: Optional[Index], indexer: Optional[Index]) -> Index: self._get_level_indexer(k, level=i, indexer=indexer) ), indexer=indexer, + key=seq, ) else: # a single label @@ -3185,6 +3193,7 @@ def _update_indexer(idxr: Optional[Index], indexer: Optional[Index]) -> Index: self.get_loc_level(k, level=i, drop_level=False)[0] ), indexer=indexer, + key=seq, ) # empty indexer diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2c7a8320824ad..c5e331a104726 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1107,8 +1107,6 @@ def _getitem_axis(self, key, axis: int): # nested tuple slicing if is_nested_tuple(key, labels): locs = labels.get_locs(key) - if not len(locs): - raise KeyError(key) indexer = [slice(None)] * self.ndim indexer[axis] = locs return self.obj.iloc[tuple(indexer)] diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 776281f8bc714..b427eb9926343 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -602,8 +602,11 @@ def test_missing_key_combination(self): names=["one", "two", "three"], ) df = DataFrame(np.random.rand(4, 3), index=mi) - with pytest.raises(KeyError, match=r"\('b', '1', slice\(None, None, None\)\)"): + msg = r"\('b', '1', slice\(None, None, None\)\)" + with pytest.raises(KeyError, match=msg): df.loc[("b", "1", slice(None)), :] + with pytest.raises(KeyError, match=msg): + df.index.get_locs(("b", "1", slice(None))) def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data): From 8e02be2924bc5beebd4f600f9c2e676b6f92b43c Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 13 Nov 2020 11:50:03 +0100 Subject: [PATCH 4/5] Add test --- pandas/tests/indexing/multiindex/test_loc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index b427eb9926343..5d37812d27a1c 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -607,6 +607,8 @@ def test_missing_key_combination(self): df.loc[("b", "1", slice(None)), :] with pytest.raises(KeyError, match=msg): df.index.get_locs(("b", "1", slice(None))) + with pytest.raises(KeyError, match=r"\('b', '1'\)"): + df.loc[('b', '1'), :] def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data): From c74d0c64c2915c4b251510f2de015c53f99a6c7c Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 13 Nov 2020 13:19:11 +0100 Subject: [PATCH 5/5] Run black --- pandas/tests/indexing/multiindex/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 5d37812d27a1c..a7c8740ce06ed 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -608,7 +608,7 @@ def test_missing_key_combination(self): with pytest.raises(KeyError, match=msg): df.index.get_locs(("b", "1", slice(None))) with pytest.raises(KeyError, match=r"\('b', '1'\)"): - df.loc[('b', '1'), :] + df.loc[("b", "1"), :] def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data):