From 78daa2c3a950595018ec828b83690a0d494e2750 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sun, 6 Oct 2019 04:07:20 +0100 Subject: [PATCH 1/6] Add test for pivot table with categorical data --- pandas/tests/reshape/test_pivot.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 582084e3bfb5a..433664067109b 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2554,3 +2554,28 @@ def test_margin_normalize(self): names=["A", "B"], ) tm.assert_frame_equal(result, expected) + + def test_pivot_with_categorical(self, dropna): + # gh-21370 + idx = [np.nan, "low", "high", "low", np.nan] + col = [np.nan, "A", "B", np.nan, "A"] + df = pd.DataFrame( + { + "In": pd.Categorical(idx, categories=["low", "high"], ordered=True), + "Col": pd.Categorical(col, categories=["A", "B"], ordered=True), + "Val": range(1, 6), + } + ) + result = df.pivot_table(index="In", columns="Col", values="Val") + + expected_cols = pd.CategoricalIndex(["A", "B"], ordered=True, name="Col") + + expected = pd.DataFrame( + data=[[2.0, np.nan], [np.nan, 3.0]], columns=expected_cols + ) + expected.index = Index( + pd.Categorical(["low", "high"], categories=["low", "high"], ordered=True), + name="In", + ) + + tm.assert_frame_equal(result, expected) From f67be6af0afe51e349af1544a7b9dc8b09d8cc88 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sun, 6 Oct 2019 04:27:49 +0100 Subject: [PATCH 2/6] Add second test case for only columns/value --- pandas/tests/reshape/test_pivot.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 433664067109b..a237fcc0d9087 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2566,6 +2566,7 @@ def test_pivot_with_categorical(self, dropna): "Val": range(1, 6), } ) + # case with index/columns/value result = df.pivot_table(index="In", columns="Col", values="Val") expected_cols = pd.CategoricalIndex(["A", "B"], ordered=True, name="Col") @@ -2579,3 +2580,12 @@ def test_pivot_with_categorical(self, dropna): ) tm.assert_frame_equal(result, expected) + + # case with columns/value + result = df.pivot_table(columns="Col", values="Val") + + expected = pd.DataFrame( + data=[[3.5, 3.0]], columns=expected_cols, index=Index(["Val"]) + ) + + tm.assert_frame_equal(result, expected) From e0a180a58e9a3a6abecba487967174a47c4bc61b Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sun, 6 Oct 2019 04:30:26 +0100 Subject: [PATCH 3/6] Remove unused dropna --- pandas/tests/reshape/test_pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index a237fcc0d9087..5a27d9ba7e102 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2555,7 +2555,7 @@ def test_margin_normalize(self): ) tm.assert_frame_equal(result, expected) - def test_pivot_with_categorical(self, dropna): + def test_pivot_with_categorical(self): # gh-21370 idx = [np.nan, "low", "high", "low", np.nan] col = [np.nan, "A", "B", np.nan, "A"] From a682baf21e88c8ecd91f5e575fe885920e2fc7dd Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Mon, 7 Oct 2019 01:41:17 +0100 Subject: [PATCH 4/6] Pusn test up and add observed fixture --- pandas/tests/reshape/test_pivot.py | 70 +++++++++++++++--------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 5a27d9ba7e102..3f6566ee7f473 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1656,6 +1656,41 @@ def test_categorical_margins_category(self, observed): table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) tm.assert_frame_equal(table, expected) + def test_pivot_with_categorical(self, observed): + # gh-21370 + idx = [np.nan, "low", "high", "low", np.nan] + col = [np.nan, "A", "B", np.nan, "A"] + df = pd.DataFrame( + { + "In": pd.Categorical(idx, categories=["low", "high"], ordered=True), + "Col": pd.Categorical(col, categories=["A", "B"], ordered=True), + "Val": range(1, 6), + } + ) + # case with index/columns/value + result = df.pivot_table(index="In", columns="Col", values="Val") + + expected_cols = pd.CategoricalIndex(["A", "B"], ordered=True, name="Col") + + expected = pd.DataFrame( + data=[[2.0, np.nan], [np.nan, 3.0]], columns=expected_cols + ) + expected.index = Index( + pd.Categorical(["low", "high"], categories=["low", "high"], ordered=True), + name="In", + ) + + tm.assert_frame_equal(result, expected) + + # case with columns/value + result = df.pivot_table(columns="Col", values="Val") + + expected = pd.DataFrame( + data=[[3.5, 3.0]], columns=expected_cols, index=Index(["Val"]) + ) + + tm.assert_frame_equal(result, expected) + def test_categorical_aggfunc(self, observed): # GH 9534 df = pd.DataFrame( @@ -2554,38 +2589,3 @@ def test_margin_normalize(self): names=["A", "B"], ) tm.assert_frame_equal(result, expected) - - def test_pivot_with_categorical(self): - # gh-21370 - idx = [np.nan, "low", "high", "low", np.nan] - col = [np.nan, "A", "B", np.nan, "A"] - df = pd.DataFrame( - { - "In": pd.Categorical(idx, categories=["low", "high"], ordered=True), - "Col": pd.Categorical(col, categories=["A", "B"], ordered=True), - "Val": range(1, 6), - } - ) - # case with index/columns/value - result = df.pivot_table(index="In", columns="Col", values="Val") - - expected_cols = pd.CategoricalIndex(["A", "B"], ordered=True, name="Col") - - expected = pd.DataFrame( - data=[[2.0, np.nan], [np.nan, 3.0]], columns=expected_cols - ) - expected.index = Index( - pd.Categorical(["low", "high"], categories=["low", "high"], ordered=True), - name="In", - ) - - tm.assert_frame_equal(result, expected) - - # case with columns/value - result = df.pivot_table(columns="Col", values="Val") - - expected = pd.DataFrame( - data=[[3.5, 3.0]], columns=expected_cols, index=Index(["Val"]) - ) - - tm.assert_frame_equal(result, expected) From f19e1bfcb62844ae0aaf92405088167e23996add Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Mon, 7 Oct 2019 21:26:39 +0100 Subject: [PATCH 5/6] Replace ordered=True with ordered fixure --- pandas/tests/reshape/test_pivot.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 3f6566ee7f473..c608a0f6bbc42 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1656,27 +1656,35 @@ def test_categorical_margins_category(self, observed): table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) tm.assert_frame_equal(table, expected) - def test_pivot_with_categorical(self, observed): + def test_pivot_with_categorical(self, observed, ordered_fixture): # gh-21370 idx = [np.nan, "low", "high", "low", np.nan] col = [np.nan, "A", "B", np.nan, "A"] df = pd.DataFrame( { - "In": pd.Categorical(idx, categories=["low", "high"], ordered=True), - "Col": pd.Categorical(col, categories=["A", "B"], ordered=True), + "In": pd.Categorical( + idx, categories=["low", "high"], ordered=ordered_fixture + ), + "Col": pd.Categorical( + col, categories=["A", "B"], ordered=ordered_fixture + ), "Val": range(1, 6), } ) # case with index/columns/value result = df.pivot_table(index="In", columns="Col", values="Val") - expected_cols = pd.CategoricalIndex(["A", "B"], ordered=True, name="Col") + expected_cols = pd.CategoricalIndex( + ["A", "B"], ordered=ordered_fixture, name="Col" + ) expected = pd.DataFrame( data=[[2.0, np.nan], [np.nan, 3.0]], columns=expected_cols ) expected.index = Index( - pd.Categorical(["low", "high"], categories=["low", "high"], ordered=True), + pd.Categorical( + ["low", "high"], categories=["low", "high"], ordered=ordered_fixture + ), name="In", ) From e2c028ad3c9d9cbb1fc971293bfbfc90afbb3b7a Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Thu, 10 Oct 2019 01:45:05 +0100 Subject: [PATCH 6/6] Pass in observed keyword argument --- pandas/tests/reshape/test_pivot.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index c608a0f6bbc42..a8386d21ba27f 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1672,7 +1672,9 @@ def test_pivot_with_categorical(self, observed, ordered_fixture): } ) # case with index/columns/value - result = df.pivot_table(index="In", columns="Col", values="Val") + result = df.pivot_table( + index="In", columns="Col", values="Val", observed=observed + ) expected_cols = pd.CategoricalIndex( ["A", "B"], ordered=ordered_fixture, name="Col" @@ -1691,7 +1693,7 @@ def test_pivot_with_categorical(self, observed, ordered_fixture): tm.assert_frame_equal(result, expected) # case with columns/value - result = df.pivot_table(columns="Col", values="Val") + result = df.pivot_table(columns="Col", values="Val", observed=observed) expected = pd.DataFrame( data=[[3.5, 3.0]], columns=expected_cols, index=Index(["Val"])