From 855f710c9af8dfea1e381eea08d10795c67126cb Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 8 Nov 2020 23:29:20 +0100 Subject: [PATCH 1/3] BUG: Fix return of missing values when applying loc to single level of MultiIndex --- doc/source/whatsnew/v1.2.0.rst | 2 ++ pandas/core/indexes/base.py | 4 ++-- pandas/tests/indexing/multiindex/test_loc.py | 12 ++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d07db18ee5df0..f41f65ad802d6 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -1,4 +1,5 @@ .. _whatsnew_120: +.. _whatsnew_120: What's new in 1.2.0 (??) ------------------------ @@ -466,6 +467,7 @@ Indexing - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` with a level named "0" (:issue:`37194`) - Bug in :meth:`Series.__getitem__` when using an unsigned integer array as an indexer giving incorrect results or segfaulting instead of raising ``KeyError`` (:issue:`37218`) - Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) +- Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 545d1d834fe2d..0a938dbc100e1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3842,9 +3842,9 @@ def _get_leaf_sorter(labels): else: left_lev_indexer = ensure_int64(left_lev_indexer) rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) - + old_codes = left.codes[level] new_lev_codes = algos.take_nd( - rev_indexer, left.codes[level], allow_fill=False + rev_indexer, old_codes[old_codes != -1], allow_fill=False ) new_codes = list(left.codes) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index d79af1ea6b804..202bfde7cc26a 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -598,3 +598,15 @@ def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_da result = ser[2000, 5] expected = df.loc[2000, 5]["A"] tm.assert_series_equal(result, expected) + + +def test_loc_with_nan(): + # GH: 27104 + df = DataFrame( + {"col": [1, 2, 5], "ind1": ["a", "d", np.nan], "ind2": [1, 4, 5]} + ).set_index(["ind1", "ind2"]) + result = df.loc[["a"]] + expected = DataFrame( + {"col": [1]}, index=MultiIndex.from_tuples([("a", 1)], names=["ind1", "ind2"]) + ) + tm.assert_frame_equal(result, expected) From a7a50921bd1adece4da07c318c169a8e421bea59 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 9 Nov 2020 00:06:57 +0100 Subject: [PATCH 2/3] Delete duplicate --- doc/source/whatsnew/v1.2.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f41f65ad802d6..b5cc17d2f1235 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -1,5 +1,4 @@ .. _whatsnew_120: -.. _whatsnew_120: What's new in 1.2.0 (??) ------------------------ From f12c0500e9fa98478c575523f84d571a4ae97f06 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 9 Nov 2020 02:09:29 +0100 Subject: [PATCH 3/3] Add non list case --- pandas/tests/indexing/multiindex/test_loc.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 202bfde7cc26a..5a9da262dc54d 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -610,3 +610,7 @@ def test_loc_with_nan(): {"col": [1]}, index=MultiIndex.from_tuples([("a", 1)], names=["ind1", "ind2"]) ) tm.assert_frame_equal(result, expected) + + result = df.loc["a"] + expected = DataFrame({"col": [1]}, index=Index([1], name="ind2")) + tm.assert_frame_equal(result, expected)