From 225ad4306c95e3d0f40d859866b88a07038dfc9c Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 24 Feb 2020 14:51:03 -0600 Subject: [PATCH 01/12] Add test --- pandas/tests/groupby/test_apply.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 18ad5d90b3f60..d88c78ed9df59 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -866,3 +866,24 @@ def fct(group): [[1.0, 2.0], [3.0], [np.nan]], index=pd.Index(["a", "b", "none"], name="A") ) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("contains_na", [True, False]) +def test_apply_mean_with_nullable_integer(contains_na): + # https://github.com/pandas-dev/pandas/issues/32219 + if contains_na: + values = { + "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], + } + else: + values = {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]} + + groups = pd.DataFrame(values, dtype="Int64").groupby("a") + result = groups.mean() + + idx = pd.Index([1, 2, 3], dtype=object, name="a") + values = np.array([1.5] * 3, dtype=float) + expected = pd.DataFrame({"b": values}, index=idx) + + tm.assert_frame_equal(result, expected) From fc683e1f54698a19d1dddc96142f01834729245d Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 24 Feb 2020 14:51:18 -0600 Subject: [PATCH 02/12] Catch TypeError --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1bb512aee39e2..555e496080cdd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1083,7 +1083,7 @@ def _cython_agg_blocks( result = type(block.values)._from_sequence( result.ravel(), dtype=block.values.dtype ) - except ValueError: + except (ValueError, TypeError): # reshape to be valid for non-Extension Block result = result.reshape(1, -1) From dec6560d900dad26dd28334f07600852c445b099 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 24 Feb 2020 14:51:29 -0600 Subject: [PATCH 03/12] Release note --- doc/source/whatsnew/v1.0.2.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index f491774991090..91752ef157a49 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -80,6 +80,7 @@ Bug fixes - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) - Fixed bug where :meth:`GroupBy.first` and :meth:`GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) - Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) +- Fixed bug where :meth:`DataFrameGroupBy.mean` would raise a ``TypeError`` on ``Int64`` dtype columns (:issue:`32219`) .. --------------------------------------------------------------------------- From 60c3ccb245a7df39d94f7dfa9bf688eb6e94da37 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 24 Feb 2020 14:59:02 -0600 Subject: [PATCH 04/12] Nit --- pandas/tests/groupby/test_apply.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index d88c78ed9df59..3e2f2d318fc43 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -883,7 +883,7 @@ def test_apply_mean_with_nullable_integer(contains_na): result = groups.mean() idx = pd.Index([1, 2, 3], dtype=object, name="a") - values = np.array([1.5] * 3, dtype=float) - expected = pd.DataFrame({"b": values}, index=idx) + arr = np.array([1.5] * 3, dtype=float) + expected = pd.DataFrame({"b": arr}, index=idx) tm.assert_frame_equal(result, expected) From c5bd077867ca7cc35644f2cbb5f6d9450789a233 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 24 Feb 2020 15:29:32 -0600 Subject: [PATCH 05/12] Change parameter --- pandas/tests/groupby/test_apply.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 3e2f2d318fc43..0fb208aa427e3 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -868,17 +868,18 @@ def fct(group): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("contains_na", [True, False]) -def test_apply_mean_with_nullable_integer(contains_na): - # https://github.com/pandas-dev/pandas/issues/32219 - if contains_na: - values = { +@pytest.mark.parametrize( + "values", + [ + { "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], - } - else: - values = {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]} - + }, + {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, + ], +) +def test_apply_mean_with_nullable_integer(values): + # https://github.com/pandas-dev/pandas/issues/32219 groups = pd.DataFrame(values, dtype="Int64").groupby("a") result = groups.mean() From 780c4cd2614d920ff3b65354a09ade1d76cc3fb8 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 24 Feb 2020 20:27:32 -0600 Subject: [PATCH 06/12] Add median and var tests --- pandas/tests/groupby/test_apply.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 0fb208aa427e3..b7a03a22ddc68 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -878,13 +878,36 @@ def fct(group): {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, ], ) -def test_apply_mean_with_nullable_integer(values): +@pytest.mark.parametrize("function", ["mean", "median"]) +def test_apply_mean_median_with_nullable_integer(values, function): # https://github.com/pandas-dev/pandas/issues/32219 groups = pd.DataFrame(values, dtype="Int64").groupby("a") - result = groups.mean() + result = getattr(groups, function)() idx = pd.Index([1, 2, 3], dtype=object, name="a") arr = np.array([1.5] * 3, dtype=float) expected = pd.DataFrame({"b": arr}, index=idx) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + { + "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], + }, + {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, + ], +) +def test_apply_var_with_nullable_integer(values): + # https://github.com/pandas-dev/pandas/issues/32219 + groups = pd.DataFrame(values, dtype="Int64").groupby("a") + result = groups.var() + + idx = pd.Index([1, 2, 3], dtype=object, name="a") + arr = np.array([0.5] * 3, dtype=float) + expected = pd.DataFrame({"b": arr}, index=idx) + + tm.assert_frame_equal(result, expected) From 6d4a94b7714cf58d814de310cd4284d87023c4d0 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 24 Feb 2020 20:32:58 -0600 Subject: [PATCH 07/12] Make one test --- pandas/tests/groupby/test_apply.py | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index b7a03a22ddc68..3a9c83dbcf79e 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -878,36 +878,15 @@ def fct(group): {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, ], ) -@pytest.mark.parametrize("function", ["mean", "median"]) -def test_apply_mean_median_with_nullable_integer(values, function): +@pytest.mark.parametrize("function", ["mean", "median", "var"]) +def test_apply_to_nullable_integer_returns_float(values, function): # https://github.com/pandas-dev/pandas/issues/32219 groups = pd.DataFrame(values, dtype="Int64").groupby("a") result = getattr(groups, function)() + output = 0.5 if function == "var" else 1.5 + arr = np.array([output] * 3, dtype=float) idx = pd.Index([1, 2, 3], dtype=object, name="a") - arr = np.array([1.5] * 3, dtype=float) - expected = pd.DataFrame({"b": arr}, index=idx) - - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "values", - [ - { - "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], - "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], - }, - {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, - ], -) -def test_apply_var_with_nullable_integer(values): - # https://github.com/pandas-dev/pandas/issues/32219 - groups = pd.DataFrame(values, dtype="Int64").groupby("a") - result = groups.var() - - idx = pd.Index([1, 2, 3], dtype=object, name="a") - arr = np.array([0.5] * 3, dtype=float) expected = pd.DataFrame({"b": arr}, index=idx) tm.assert_frame_equal(result, expected) From 651de5eb6938baf4b360c0d980841067c7e00dc4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 24 Feb 2020 20:36:47 -0600 Subject: [PATCH 08/12] Update release note --- doc/source/whatsnew/v1.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 91752ef157a49..7a6192f02c416 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -80,7 +80,7 @@ Bug fixes - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) - Fixed bug where :meth:`GroupBy.first` and :meth:`GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) - Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) -- Fixed bug where :meth:`DataFrameGroupBy.mean` would raise a ``TypeError`` on ``Int64`` dtype columns (:issue:`32219`) +- Fixed bug where :meth:`DataFrameGroupBy.mean`, :meth:`DataFrameGroupBy.median`, :meth:`DataFrameGroupBy.var`, and :meth:`DataFrameGroupBy.std` would raise a ``TypeError`` on ``Int64`` dtype columns (:issue:`32219`) .. --------------------------------------------------------------------------- From c60dbcbb04d3a9f8fb5c7679a1e5f50488c298a7 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 10 Mar 2020 22:51:49 -0500 Subject: [PATCH 09/12] Move test --- pandas/tests/groupby/test_apply.py | 24 ------------------------ pandas/tests/groupby/test_function.py | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 3a9c83dbcf79e..18ad5d90b3f60 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -866,27 +866,3 @@ def fct(group): [[1.0, 2.0], [3.0], [np.nan]], index=pd.Index(["a", "b", "none"], name="A") ) tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize( - "values", - [ - { - "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], - "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], - }, - {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, - ], -) -@pytest.mark.parametrize("function", ["mean", "median", "var"]) -def test_apply_to_nullable_integer_returns_float(values, function): - # https://github.com/pandas-dev/pandas/issues/32219 - groups = pd.DataFrame(values, dtype="Int64").groupby("a") - result = getattr(groups, function)() - - output = 0.5 if function == "var" else 1.5 - arr = np.array([output] * 3, dtype=float) - idx = pd.Index([1, 2, 3], dtype=object, name="a") - expected = pd.DataFrame({"b": arr}, index=idx) - - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 83080aa98648f..8debf5ac6de16 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1605,3 +1605,27 @@ def test_groupby_mean_no_overflow(): } ) assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840 + + +@pytest.mark.parametrize( + "values", + [ + { + "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], + }, + {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, + ], +) +@pytest.mark.parametrize("function", ["mean", "median", "var"]) +def test_apply_to_nullable_integer_returns_float(values, function): + # https://github.com/pandas-dev/pandas/issues/32219 + groups = pd.DataFrame(values, dtype="Int64").groupby("a") + result = getattr(groups, function)() + + output = 0.5 if function == "var" else 1.5 + arr = np.array([output] * 3, dtype=float) + idx = pd.Index([1, 2, 3], dtype=object, name="a") + expected = pd.DataFrame({"b": arr}, index=idx) + + tm.assert_frame_equal(result, expected) From bcf986777575e98478658d1862dedcf6474176f6 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 11 Mar 2020 09:45:00 -0500 Subject: [PATCH 10/12] Add more testing --- pandas/tests/groupby/test_function.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 2ae57d65da4be..93b517189ee49 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1618,14 +1618,23 @@ def test_groupby_mean_no_overflow(): ], ) @pytest.mark.parametrize("function", ["mean", "median", "var"]) -def test_apply_to_nullable_integer_returns_float(values, function): +@pytest.mark.parametrize("use_agg", [True, False]) +def test_apply_to_nullable_integer_returns_float(values, function, use_agg): # https://github.com/pandas-dev/pandas/issues/32219 - groups = pd.DataFrame(values, dtype="Int64").groupby("a") - result = getattr(groups, function)() - output = 0.5 if function == "var" else 1.5 arr = np.array([output] * 3, dtype=float) idx = pd.Index([1, 2, 3], dtype=object, name="a") expected = pd.DataFrame({"b": arr}, index=idx) - tm.assert_frame_equal(result, expected) + groups = pd.DataFrame(values, dtype="Int64").groupby("a") + + if use_agg: + result = groups.agg(function) + tm.assert_frame_equal(result, expected) + + result = groups.agg([function]) + expected.columns = MultiIndex.from_tuples([("b", function)]) + tm.assert_frame_equal(result, expected) + else: + result = getattr(groups, function)() + tm.assert_frame_equal(result, expected) From 66fa0de03fc81069459187f8bcbe67b8a0afd52a Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 11 Mar 2020 09:50:09 -0500 Subject: [PATCH 11/12] Fix merge --- doc/source/whatsnew/v1.0.2.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 4923d0a577f1b..10191ef1923c7 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -88,9 +88,7 @@ Bug fixes - Fixed bug in :meth:`DataFrame.convert_dtypes` where ``BooleanDtype`` columns were converted to ``Int64`` (:issue:`32287`) - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) - Fixed bug where :meth:`pandas.core.groupby.GroupBy.first` and :meth:`pandas.core.groupby.GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) -- Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) - Fixed bug where :meth:`DataFrameGroupBy.mean`, :meth:`DataFrameGroupBy.median`, :meth:`DataFrameGroupBy.var`, and :meth:`DataFrameGroupBy.std` would raise a ``TypeError`` on ``Int64`` dtype columns (:issue:`32219`) -- Fixed bug in :meth:`DataFrame.convert_dtypes`, where ``BooleanDtype`` columns were converted to ``Int64`` (:issue:`32287`) **Strings** From 3dadd53cfef28bebc52300c017fe218edeb0c70d Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 11 Mar 2020 15:55:24 -0500 Subject: [PATCH 12/12] No param --- pandas/tests/groupby/test_function.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 93b517189ee49..9c33843cdcecc 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1618,8 +1618,7 @@ def test_groupby_mean_no_overflow(): ], ) @pytest.mark.parametrize("function", ["mean", "median", "var"]) -@pytest.mark.parametrize("use_agg", [True, False]) -def test_apply_to_nullable_integer_returns_float(values, function, use_agg): +def test_apply_to_nullable_integer_returns_float(values, function): # https://github.com/pandas-dev/pandas/issues/32219 output = 0.5 if function == "var" else 1.5 arr = np.array([output] * 3, dtype=float) @@ -1628,13 +1627,12 @@ def test_apply_to_nullable_integer_returns_float(values, function, use_agg): groups = pd.DataFrame(values, dtype="Int64").groupby("a") - if use_agg: - result = groups.agg(function) - tm.assert_frame_equal(result, expected) + result = getattr(groups, function)() + tm.assert_frame_equal(result, expected) - result = groups.agg([function]) - expected.columns = MultiIndex.from_tuples([("b", function)]) - tm.assert_frame_equal(result, expected) - else: - result = getattr(groups, function)() - tm.assert_frame_equal(result, expected) + result = groups.agg(function) + tm.assert_frame_equal(result, expected) + + result = groups.agg([function]) + expected.columns = MultiIndex.from_tuples([("b", function)]) + tm.assert_frame_equal(result, expected)