From a9a4ff4635d8e2d0a62b61b2e1699b3e5de26984 Mon Sep 17 00:00:00 2001 From: Theodor Athanasiadis Date: Thu, 21 Jan 2021 23:45:59 +0200 Subject: [PATCH 1/6] TST: Added test case for Multiindex slicing with NaNs issue #25154 --- .../tests/indexing/multiindex/test_getitem.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 0ad9f947d2039..5568561794007 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -231,6 +231,26 @@ def test_frame_getitem_nan_multiindex(nulls_fixture): tm.assert_frame_equal(result, expected) +def test_frame_getitem_nan_cols_multiindex(nulls_fixture): + # Slicing MultiIndex including levels with nan values, for more information + # see GH#25154 + data = [[1, 2, 3], [4, 5, 6]] + index = ["First", nulls_fixture] + columns = MultiIndex.from_tuples([("a", "foo"), ("b", "foo"), ("b", nulls_fixture)]) + df = DataFrame(data=data, columns=columns, index=index, dtype="int64") + + # Slicing out 'b', ['foo', nan] + cols = (["b"], ["foo", nulls_fixture]) + result = df.loc[:, cols] + expected_columns = MultiIndex.from_tuples([("b", "foo"), ("b", nulls_fixture)]) + expected_index = ["First", nulls_fixture] + expected = DataFrame( + [[2, 3], [5, 6]], columns=expected_columns, index=expected_index, dtype="int64" + ) + + tm.assert_frame_equal(result, expected) + + # ---------------------------------------------------------------------------- # test indexing of DataFrame with multi-level Index with duplicates # ---------------------------------------------------------------------------- From da9a5f08ed39238ef0f780eb0306deb3e57adad0 Mon Sep 17 00:00:00 2001 From: Theodor Athanasiadis Date: Sun, 24 Jan 2021 01:01:48 +0200 Subject: [PATCH 2/6] Removed unecessary definitions. --- pandas/tests/indexing/multiindex/test_getitem.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 2d543f5c456b4..ef52692f0253d 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -232,18 +232,22 @@ def test_frame_getitem_nan_multiindex(nulls_fixture): def test_frame_getitem_nan_cols_multiindex(nulls_fixture): # Slicing MultiIndex including levels with nan values, for more information # see GH#25154 - data = [[1, 2, 3], [4, 5, 6]] index = ["First", nulls_fixture] - columns = MultiIndex.from_tuples([("a", "foo"), ("b", "foo"), ("b", nulls_fixture)]) - df = DataFrame(data=data, columns=columns, index=index, dtype="int64") + df = DataFrame( + [[1, 2, 3], [4, 5, 6]], + columns=MultiIndex.from_tuples( + [("a", "foo"), ("b", "foo"), ("b", nulls_fixture)] + ), + index=index, + dtype="int64", + ) # Slicing out 'b', ['foo', nan] cols = (["b"], ["foo", nulls_fixture]) result = df.loc[:, cols] expected_columns = MultiIndex.from_tuples([("b", "foo"), ("b", nulls_fixture)]) - expected_index = ["First", nulls_fixture] expected = DataFrame( - [[2, 3], [5, 6]], columns=expected_columns, index=expected_index, dtype="int64" + [[2, 3], [5, 6]], columns=expected_columns, index=index, dtype="int64" ) tm.assert_frame_equal(result, expected) From df14980fb41f0d4c6519eff5fc66bbdbfe57e2f3 Mon Sep 17 00:00:00 2001 From: Theodor Athanasiadis Date: Fri, 5 Feb 2021 00:22:23 +0200 Subject: [PATCH 3/6] Addition of test cases. --- .../tests/indexing/multiindex/test_getitem.py | 86 +++++++++++++++---- 1 file changed, 71 insertions(+), 15 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index ef52692f0253d..b008f047c539e 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -19,7 +19,6 @@ [(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))], ) def test_series_getitem_multiindex(access_method, level1_value, expected): - # GH 6018 # series regression getitem with a multi-index @@ -87,7 +86,8 @@ def test_series_getitem_returns_scalar( (lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), (lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), (lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"), - (lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s) + (lambda s: s.__getitem__(len(s)), KeyError, ""), + # match should include len(s) (lambda s: s[len(s)], KeyError, ""), # match should include len(s) ( lambda s: s.iloc[len(s)], @@ -229,28 +229,84 @@ def test_frame_getitem_nan_multiindex(nulls_fixture): tm.assert_frame_equal(result, expected) -def test_frame_getitem_nan_cols_multiindex(nulls_fixture): +@pytest.mark.parametrize( + "indexer,expected", + [ + ( + (["b"], ["bar", np.nan]), + ( + DataFrame( + [[2, 3], [5, 6]], + columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]), + dtype="int64", + ) + ), + ), + ( + (["a", "b"]), + ( + DataFrame( + [[1, 2, 3], [4, 5, 6]], + columns=MultiIndex.from_tuples( + [("a", "foo"), ("b", "bar"), ("b", np.nan)] + ), + dtype="int64", + ) + ), + ), + ( + (["b"]), + ( + DataFrame( + [[2, 3], [5, 6]], + columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]), + dtype="int64", + ) + ), + ), + ( + (["b"], ["bar"]), + ( + DataFrame( + [[2], [5]], + columns=MultiIndex.from_tuples([("b", "bar")]), + dtype="int64", + ) + ), + ), + ( + (["b"], [np.nan]), + ( + DataFrame( + [[3], [6]], + columns=MultiIndex.from_tuples([("b", np.nan)]), + dtype="int64", + ) + ), + ), + (("b", np.nan), Series([3, 6], dtype="int64", name=("b", np.nan))), + ], +) +def test_frame_getitem_nan_cols_multiindex( + indexer, + expected, + nulls_fixture, +): # Slicing MultiIndex including levels with nan values, for more information # see GH#25154 - index = ["First", nulls_fixture] df = DataFrame( [[1, 2, 3], [4, 5, 6]], columns=MultiIndex.from_tuples( - [("a", "foo"), ("b", "foo"), ("b", nulls_fixture)] + [("a", "foo"), ("b", "bar"), ("b", nulls_fixture)] ), - index=index, dtype="int64", ) - # Slicing out 'b', ['foo', nan] - cols = (["b"], ["foo", nulls_fixture]) - result = df.loc[:, cols] - expected_columns = MultiIndex.from_tuples([("b", "foo"), ("b", nulls_fixture)]) - expected = DataFrame( - [[2, 3], [5, 6]], columns=expected_columns, index=index, dtype="int64" - ) - - tm.assert_frame_equal(result, expected) + result = df.loc[:, indexer] + if isinstance(result, DataFrame): + tm.assert_frame_equal(result, expected, check_column_type=False) + elif isinstance(result, Series): + tm.assert_series_equal(result, expected) # ---------------------------------------------------------------------------- From 071256c9bc0bd69e3e33acab3c226c68e9960032 Mon Sep 17 00:00:00 2001 From: Theodor Athanasiadis Date: Fri, 5 Feb 2021 01:07:44 +0200 Subject: [PATCH 4/6] Modifications in test_frame_getitem_nan_cols_multiindex to use tm.assert_equal --- pandas/tests/indexing/multiindex/test_getitem.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index b008f047c539e..73e35e7598c3e 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -279,7 +279,9 @@ def test_frame_getitem_nan_multiindex(nulls_fixture): ( DataFrame( [[3], [6]], - columns=MultiIndex.from_tuples([("b", np.nan)]), + columns=MultiIndex( + codes=[[1], [-1]], levels=[["a", "b"], ["bar", "foo"]] + ), dtype="int64", ) ), @@ -303,10 +305,7 @@ def test_frame_getitem_nan_cols_multiindex( ) result = df.loc[:, indexer] - if isinstance(result, DataFrame): - tm.assert_frame_equal(result, expected, check_column_type=False) - elif isinstance(result, Series): - tm.assert_series_equal(result, expected) + tm.assert_equal(result, expected) # ---------------------------------------------------------------------------- From 548ca8a34052856e82add2bd09b56330c5236a53 Mon Sep 17 00:00:00 2001 From: Theodor Athanasiadis Date: Fri, 5 Feb 2021 01:59:22 +0200 Subject: [PATCH 5/6] Reverting changes in test_series_getitem_indexing_errors --- pandas/tests/indexing/multiindex/test_getitem.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 73e35e7598c3e..9d6b43bd48d07 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -86,8 +86,7 @@ def test_series_getitem_returns_scalar( (lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), (lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), (lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"), - (lambda s: s.__getitem__(len(s)), KeyError, ""), - # match should include len(s) + (lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s) (lambda s: s[len(s)], KeyError, ""), # match should include len(s) ( lambda s: s.iloc[len(s)], From ada89892f7dc645a9383886cc86fc69abe3ad902 Mon Sep 17 00:00:00 2001 From: Theodor Athanasiadis Date: Fri, 5 Feb 2021 02:46:40 +0200 Subject: [PATCH 6/6] Added new line in test_series_getitem_multiindex --- pandas/tests/indexing/multiindex/test_getitem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 9d6b43bd48d07..d0ef95d2fa56c 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -19,6 +19,7 @@ [(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))], ) def test_series_getitem_multiindex(access_method, level1_value, expected): + # GH 6018 # series regression getitem with a multi-index