From e48b556d274ed6eecf3dbd9a92af5199d98df67a Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 21 Aug 2021 17:26:51 +0530 Subject: [PATCH 1/3] TST: groupby.first/last retains categorical dtype --- pandas/tests/groupby/test_categorical.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 63ae54cafc900..6472a566e9196 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1741,3 +1741,15 @@ def test_groupby_categorical_indices_unused_categories(): assert result.keys() == expected.keys() for key in result.keys(): tm.assert_numpy_array_equal(result[key], expected[key]) + + +def test_groupby_last_first_preserve_categoricaldtype(): + # GH#33090 + df = DataFrame({"a": [1, 2, 3]}) + df["b"] = df["a"].astype("category") + result = df.groupby("a")["b"].last() + expected = Series(Categorical([1, 2, 3]), name="b", index=[1, 2, 3]) + expected.index.name = "a" + tm.assert_series_equal(expected, result) + result = df.groupby("a")["b"].first() + tm.assert_series_equal(expected, result) From 247369357dd4ad75f83b7500479cadb8e11fa759 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 21 Aug 2021 20:09:36 +0530 Subject: [PATCH 2/3] parametrized over first/last --- pandas/tests/groupby/test_categorical.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 6472a566e9196..45252006425ff 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1743,13 +1743,12 @@ def test_groupby_categorical_indices_unused_categories(): tm.assert_numpy_array_equal(result[key], expected[key]) -def test_groupby_last_first_preserve_categoricaldtype(): +@pytest.mark.parametrize("func", ["first", "last"]) +def test_groupby_last_first_preserve_categoricaldtype(func): # GH#33090 df = DataFrame({"a": [1, 2, 3]}) df["b"] = df["a"].astype("category") - result = df.groupby("a")["b"].last() + result = getattr(df.groupby("a")["b"], func)() expected = Series(Categorical([1, 2, 3]), name="b", index=[1, 2, 3]) expected.index.name = "a" tm.assert_series_equal(expected, result) - result = df.groupby("a")["b"].first() - tm.assert_series_equal(expected, result) From 2c92b6a37480d88143f693eb10279e7a57e7a35f Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Thu, 26 Aug 2021 14:42:00 +0530 Subject: [PATCH 3/3] suggested change --- pandas/tests/groupby/test_categorical.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 45252006425ff..d989cde09380a 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1749,6 +1749,7 @@ def test_groupby_last_first_preserve_categoricaldtype(func): df = DataFrame({"a": [1, 2, 3]}) df["b"] = df["a"].astype("category") result = getattr(df.groupby("a")["b"], func)() - expected = Series(Categorical([1, 2, 3]), name="b", index=[1, 2, 3]) - expected.index.name = "a" + expected = Series( + Categorical([1, 2, 3]), name="b", index=Index([1, 2, 3], name="a") + ) tm.assert_series_equal(expected, result)