From 35a4dd665ce1992964e6146aa4e87fb321808d3b Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 28 Mar 2020 14:16:15 +0100 Subject: [PATCH 1/7] BUG: Add test for #28641 to ensure that error does not occur again --- pandas/tests/groupby/aggregate/test_aggregate.py | 7 +++++++ pandas/tests/groupby/test_categorical.py | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index e860ea1a3d052..3317636f5f1b0 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -773,6 +773,13 @@ def test_aggregate_mixed_types(): tm.assert_frame_equal(result, expected) +def test_aggregate_categorical_lost_index(): + # GH: 28641 + result = pd.DataFrame({"A": [1997], "B": pd.Series(["b"], dtype="category").cat.as_ordered()}).groupby("A").agg({"B": "min"}) + expected = pd.DataFrame({"B": ["b"]}, index=pd.Index([1997], name="A")) + tm.assert_frame_equal(result, expected) + + @pytest.mark.xfail(reason="Not implemented.") def test_aggregate_udf_na_extension_type(): # https://github.com/pandas-dev/pandas/pull/31359 diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 9ea5252b91e13..c2c0be859085c 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1380,3 +1380,11 @@ def test_groupby_agg_non_numeric(): result = df.groupby([1, 2, 1]).nunique() tm.assert_frame_equal(result, expected) + + +def test_groupy_first_returns_categorical(): + # GH 28641: Issue mentioned in first comment. + df = pd.DataFrame({"A": [1997], "B": pd.Series(["b"], dtype="category").cat.as_ordered()}) + result = df.groupby("A")["B"].first() + expected = pd.Series(["b"], index=pd.Index([1997], name="A"), name="B") + tm.assert_series_equal(result, expected) From 3f7bd8004d4e5d59b8b7bc01dba7b1f0095eebc1 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 28 Mar 2020 14:18:27 +0100 Subject: [PATCH 2/7] BUG: Run black pandas --- pandas/tests/groupby/aggregate/test_aggregate.py | 8 +++++++- pandas/tests/groupby/test_categorical.py | 4 +++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3317636f5f1b0..8537e5ec115f2 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -775,7 +775,13 @@ def test_aggregate_mixed_types(): def test_aggregate_categorical_lost_index(): # GH: 28641 - result = pd.DataFrame({"A": [1997], "B": pd.Series(["b"], dtype="category").cat.as_ordered()}).groupby("A").agg({"B": "min"}) + result = ( + pd.DataFrame( + {"A": [1997], "B": pd.Series(["b"], dtype="category").cat.as_ordered()} + ) + .groupby("A") + .agg({"B": "min"}) + ) expected = pd.DataFrame({"B": ["b"]}, index=pd.Index([1997], name="A")) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index c2c0be859085c..5f47504489363 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1384,7 +1384,9 @@ def test_groupby_agg_non_numeric(): def test_groupy_first_returns_categorical(): # GH 28641: Issue mentioned in first comment. - df = pd.DataFrame({"A": [1997], "B": pd.Series(["b"], dtype="category").cat.as_ordered()}) + df = pd.DataFrame( + {"A": [1997], "B": pd.Series(["b"], dtype="category").cat.as_ordered()} + ) result = df.groupby("A")["B"].first() expected = pd.Series(["b"], index=pd.Index([1997], name="A"), name="B") tm.assert_series_equal(result, expected) From 7bb76ce08219ddf0c2e74086f4f62c32f84c5996 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 31 Mar 2020 17:27:24 +0200 Subject: [PATCH 3/7] Implement requested review changes --- pandas/tests/groupby/aggregate/test_aggregate.py | 12 ++++-------- pandas/tests/groupby/test_categorical.py | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 8537e5ec115f2..78448b298a9ef 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -774,14 +774,10 @@ def test_aggregate_mixed_types(): def test_aggregate_categorical_lost_index(): - # GH: 28641 - result = ( - pd.DataFrame( - {"A": [1997], "B": pd.Series(["b"], dtype="category").cat.as_ordered()} - ) - .groupby("A") - .agg({"B": "min"}) - ) + # GH: 28641 groupby drops index, when grouping over categorical column with min/max + ds = pd.Series(["b"], dtype="category").cat.as_ordered() + df = pd.DataFrame({"A": [1997], "B": ds}) + result = df.groupby("A").agg({"B": "min"}) expected = pd.DataFrame({"B": ["b"]}, index=pd.Index([1997], name="A")) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 5f47504489363..81f7250b1982b 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1383,7 +1383,7 @@ def test_groupby_agg_non_numeric(): def test_groupy_first_returns_categorical(): - # GH 28641: Issue mentioned in first comment. + # GH 28641: groupby drops index, when grouping over categorical column with first df = pd.DataFrame( {"A": [1997], "B": pd.Series(["b"], dtype="category").cat.as_ordered()} ) From 145f171778847902030e7aba2fe07b8c4958ca38 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 31 Mar 2020 20:04:16 +0200 Subject: [PATCH 4/7] Implement requested review changes --- pandas/tests/groupby/aggregate/test_aggregate.py | 6 ++++-- pandas/tests/groupby/test_categorical.py | 9 ++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 78448b298a9ef..fd03ebd024387 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -773,11 +773,12 @@ def test_aggregate_mixed_types(): tm.assert_frame_equal(result, expected) -def test_aggregate_categorical_lost_index(): +@pytest.mark.parametrize("func", ["min", "max"]) +def test_aggregate_categorical_lost_index(func: str): # GH: 28641 groupby drops index, when grouping over categorical column with min/max ds = pd.Series(["b"], dtype="category").cat.as_ordered() df = pd.DataFrame({"A": [1997], "B": ds}) - result = df.groupby("A").agg({"B": "min"}) + result = df.groupby("A").agg({"B": func}) expected = pd.DataFrame({"B": ["b"]}, index=pd.Index([1997], name="A")) tm.assert_frame_equal(result, expected) @@ -785,6 +786,7 @@ def test_aggregate_categorical_lost_index(): @pytest.mark.xfail(reason="Not implemented.") def test_aggregate_udf_na_extension_type(): # https://github.com/pandas-dev/pandas/pull/31359 + # GH 31256 # This is currently failing to cast back to Int64Dtype. # The presence of the NA causes two problems # 1. NA is not an instance of Int64Dtype.type (numpy.int64) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 81f7250b1982b..d342411c8c8a0 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1382,11 +1382,14 @@ def test_groupby_agg_non_numeric(): tm.assert_frame_equal(result, expected) -def test_groupy_first_returns_categorical(): - # GH 28641: groupby drops index, when grouping over categorical column with first +@pytest.mark.parametrize("func", ["first", "last"]) +def test_groupy_first_returns_categorical(func): + # GH 28641: groupby drops index, when grouping over categorical column with + # first/last df = pd.DataFrame( {"A": [1997], "B": pd.Series(["b"], dtype="category").cat.as_ordered()} ) - result = df.groupby("A")["B"].first() + df_grouped = df.groupby("A")["B"] + result = getattr(df_grouped, func)() expected = pd.Series(["b"], index=pd.Index([1997], name="A"), name="B") tm.assert_series_equal(result, expected) From 85f7f6fb10cb541bccefa4f19e2178e77d283d2d Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 1 Apr 2020 22:06:31 +0200 Subject: [PATCH 5/7] Rename test --- pandas/tests/groupby/test_categorical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index d342411c8c8a0..2f6bf3ff4c3db 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1383,9 +1383,9 @@ def test_groupby_agg_non_numeric(): @pytest.mark.parametrize("func", ["first", "last"]) -def test_groupy_first_returns_categorical(func): +def test_groupy_first_returned_categorical_instead_of_dataframe(func): # GH 28641: groupby drops index, when grouping over categorical column with - # first/last + # first/last. Renamed Categorical instead of DataFrame previously. df = pd.DataFrame( {"A": [1997], "B": pd.Series(["b"], dtype="category").cat.as_ordered()} ) From 8628b839305ccf363af344538ff750ada5fb3070 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 4 Apr 2020 01:30:21 +0200 Subject: [PATCH 6/7] Change xfail Message in test --- pandas/tests/groupby/aggregate/test_aggregate.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index fd03ebd024387..28e6b57fd03d0 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -783,10 +783,9 @@ def test_aggregate_categorical_lost_index(func: str): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(reason="Not implemented.") +@pytest.mark.xfail(reason="Not implemented;see GH 31256") def test_aggregate_udf_na_extension_type(): # https://github.com/pandas-dev/pandas/pull/31359 - # GH 31256 # This is currently failing to cast back to Int64Dtype. # The presence of the NA causes two problems # 1. NA is not an instance of Int64Dtype.type (numpy.int64) From a7577919e4ab53c8f31116677f5edf95d6233541 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 12 Apr 2020 19:03:34 +0200 Subject: [PATCH 7/7] Merge tests at end of file --- pandas/tests/groupby/test_categorical.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 873a98837e433..9ff66d21cac5f 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1395,7 +1395,6 @@ def test_groupy_first_returned_categorical_instead_of_dataframe(func): tm.assert_series_equal(result, expected) - def test_read_only_category_no_sort(): # GH33410 cats = np.array([1, 2])