From 44aca56effaceb7a424ffb9cd8d9ddd590c01267 Mon Sep 17 00:00:00 2001 From: HHest <3169669+HHest@users.noreply.github.com> Date: Sat, 27 Apr 2019 18:16:10 +0200 Subject: [PATCH 1/9] Fix .transform crash when SeriesGroupBy is empty (pandas-dev#26208) --- pandas/core/groupby/generic.py | 7 +++++-- pandas/tests/groupby/test_transform.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f8b9ddce6000e..eeaa3f908893c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -917,8 +917,11 @@ def transform(self, func, *args, **kwargs): s = klass(res, indexer) results.append(s) - from pandas.core.reshape.concat import concat - result = concat(results).sort_index() + if results: + from pandas.core.reshape.concat import concat + result = concat(results).sort_index() + else: + result = Series([]) # we will only try to coerce the result type if # we have a numeric dtype, as these are *always* udfs diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index e330329644269..fb6fa0aecc7bb 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -880,3 +880,13 @@ def test_transform_absent_categories(func): result = getattr(df.y.groupby(df.x), func)() expected = df.y assert_series_equal(result, expected) + + +def test_transform_empty_df(): + # 26208 + # handle empty SeriesGroupBy + d = pd.DataFrame({1: [], 2: []}) + g = d.groupby(1) + result = g[2].transform(lambda x: x) + expected = pd.Series([], name=2) + assert_series_equal(result, expected) From cbffeb68c65c83ebb8e5221c0cefcccc6f555541 Mon Sep 17 00:00:00 2001 From: HHest <3169669+HHest@users.noreply.github.com> Date: Sat, 27 Apr 2019 18:16:10 +0200 Subject: [PATCH 2/9] Fix .transform crash when SeriesGroupBy is empty (pandas-dev#26208) --- pandas/core/groupby/generic.py | 7 +++++-- pandas/tests/groupby/test_transform.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f8b9ddce6000e..eeaa3f908893c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -917,8 +917,11 @@ def transform(self, func, *args, **kwargs): s = klass(res, indexer) results.append(s) - from pandas.core.reshape.concat import concat - result = concat(results).sort_index() + if results: + from pandas.core.reshape.concat import concat + result = concat(results).sort_index() + else: + result = Series([]) # we will only try to coerce the result type if # we have a numeric dtype, as these are *always* udfs diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index e330329644269..fb6fa0aecc7bb 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -880,3 +880,13 @@ def test_transform_absent_categories(func): result = getattr(df.y.groupby(df.x), func)() expected = df.y assert_series_equal(result, expected) + + +def test_transform_empty_df(): + # 26208 + # handle empty SeriesGroupBy + d = pd.DataFrame({1: [], 2: []}) + g = d.groupby(1) + result = g[2].transform(lambda x: x) + expected = pd.Series([], name=2) + assert_series_equal(result, expected) From d86d07bd8dcc3531eb81eddb9cd03e00742f39ec Mon Sep 17 00:00:00 2001 From: HHest <3169669+HHest@users.noreply.github.com> Date: Sat, 27 Apr 2019 19:55:10 +0200 Subject: [PATCH 3/9] whats new for issue #26208 --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 485a0a06fc247..56777f23698c7 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -391,6 +391,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.window.Rolling.min` and :meth:`pandas.core.window.Rolling.max` that caused a memory leak (:issue:`25893`) - Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`) - Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`) +- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty SeriesGroupBy would raise error. Now returns empty Series, just like .apply and .agg. (:issue:`26208`) Reshaping From c85491db5c0c53f147f6b6329ce6dfbd63e169fe Mon Sep 17 00:00:00 2001 From: HHest <3169669+HHest@users.noreply.github.com> Date: Fri, 3 May 2019 23:11:11 +0200 Subject: [PATCH 4/9] Simplify what's new comment --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 56777f23698c7..e4bf729d44138 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -391,7 +391,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.window.Rolling.min` and :meth:`pandas.core.window.Rolling.max` that caused a memory leak (:issue:`25893`) - Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`) - Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`) -- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty SeriesGroupBy would raise error. Now returns empty Series, just like .apply and .agg. (:issue:`26208`) +- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise error. Reshaping From 9b5c16a08ab5331bb84378c2eb97355c6d7d08ea Mon Sep 17 00:00:00 2001 From: HHest <3169669+HHest@users.noreply.github.com> Date: Fri, 3 May 2019 23:22:26 +0200 Subject: [PATCH 5/9] Add comment to explain change --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index eeaa3f908893c..0a0b672f3360f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -917,7 +917,7 @@ def transform(self, func, *args, **kwargs): s = klass(res, indexer) results.append(s) - if results: + if results: # otherwise concat raises on empty results from pandas.core.reshape.concat import concat result = concat(results).sort_index() else: From cf92a15d21f1d539b18d022050b20aea400c65cd Mon Sep 17 00:00:00 2001 From: HHest <3169669+HHest@users.noreply.github.com> Date: Sun, 12 May 2019 22:10:07 +0200 Subject: [PATCH 6/9] Update fixes according to requests by reviewer. --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/core/groupby/generic.py | 3 ++- pandas/tests/groupby/test_grouping.py | 11 +++++++++++ pandas/tests/groupby/test_transform.py | 10 ---------- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index e4bf729d44138..a6127c91a7a25 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -391,7 +391,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.window.Rolling.min` and :meth:`pandas.core.window.Rolling.max` that caused a memory leak (:issue:`25893`) - Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`) - Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`) -- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise error. +- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise error (:issue:`26208`) Reshaping diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 0a0b672f3360f..f1681062a5630 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -917,7 +917,8 @@ def transform(self, func, *args, **kwargs): s = klass(res, indexer) results.append(s) - if results: # otherwise concat raises on empty results + # check for empty "results" to avoid concat ValueError + if results: from pandas.core.reshape.concat import concat result = concat(results).sort_index() else: diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 013c370899de1..13c1ceb5eee20 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -556,6 +556,17 @@ def test_list_grouper_with_nat(self): expected = {pd.Timestamp('2011-01-01'): 365} tm.assert_dict_equal(result.groups, expected) + @pytest.mark.parametrize('func', ['transform']) + def test_evaluate_with_empty_groups(self, func): + # 26208 + # test transform'ing empty groups + # (not testing other agg fns, because they return + # different index objects. + d = pd.DataFrame({1: [], 2: []}) + g = d.groupby(1) + result = getattr(g[2], func)(lambda x: x) + expected = pd.Series([], name=2) + assert_series_equal(result, expected) # get_group # -------------------------------- diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index fb6fa0aecc7bb..e330329644269 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -880,13 +880,3 @@ def test_transform_absent_categories(func): result = getattr(df.y.groupby(df.x), func)() expected = df.y assert_series_equal(result, expected) - - -def test_transform_empty_df(): - # 26208 - # handle empty SeriesGroupBy - d = pd.DataFrame({1: [], 2: []}) - g = d.groupby(1) - result = g[2].transform(lambda x: x) - expected = pd.Series([], name=2) - assert_series_equal(result, expected) From 61b652c61deb4f7cf1d39844be01e8a2436a1048 Mon Sep 17 00:00:00 2001 From: HHest <3169669+HHest@users.noreply.github.com> Date: Tue, 14 May 2019 18:48:23 +0200 Subject: [PATCH 7/9] Add other test cases, per reviewer req. --- pandas/tests/groupby/test_grouping.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 6924eede41c12..b3a4181c79456 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -552,18 +552,22 @@ def test_list_grouper_with_nat(self): expected = {pd.Timestamp('2011-01-01'): 365} tm.assert_dict_equal(result.groups, expected) - @pytest.mark.parametrize('func', ['transform']) - def test_evaluate_with_empty_groups(self, func): + @pytest.mark.parametrize('func,expected', [ + ('transform', pd.Series([], name=2, index=pd.RangeIndex(0, 0, 1))), + ('agg', pd.Series([], name=2, index=pd.Float64Index([], name=1))), + ('apply', pd.Series([], name=2, index=pd.Float64Index([], name=1))), + ]) + def test_evaluate_with_empty_groups(self, func, expected): # 26208 # test transform'ing empty groups # (not testing other agg fns, because they return # different index objects. - d = pd.DataFrame({1: [], 2: []}) - g = d.groupby(1) + df = pd.DataFrame({1: [], 2: []}) + g = df.groupby(1) result = getattr(g[2], func)(lambda x: x) - expected = pd.Series([], name=2) assert_series_equal(result, expected) + # get_group # -------------------------------- From bbbc7f4915edb6474bcc14ee5308428cb84f4f64 Mon Sep 17 00:00:00 2001 From: HHest <3169669+HHest@users.noreply.github.com> Date: Tue, 14 May 2019 18:53:26 +0200 Subject: [PATCH 8/9] fix pep8 --- pandas/tests/groupby/test_grouping.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index b3a4181c79456..72b795f2c249d 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -552,7 +552,9 @@ def test_list_grouper_with_nat(self): expected = {pd.Timestamp('2011-01-01'): 365} tm.assert_dict_equal(result.groups, expected) - @pytest.mark.parametrize('func,expected', [ + @pytest.mark.parametrize( + 'func,expected', + [ ('transform', pd.Series([], name=2, index=pd.RangeIndex(0, 0, 1))), ('agg', pd.Series([], name=2, index=pd.Float64Index([], name=1))), ('apply', pd.Series([], name=2, index=pd.Float64Index([], name=1))), From 807b7ccf664e7adfca07eed8db0fbac775d09f2b Mon Sep 17 00:00:00 2001 From: HHest <3169669+HHest@users.noreply.github.com> Date: Tue, 14 May 2019 19:10:27 +0200 Subject: [PATCH 9/9] Remove [] when creating empty Series, per reviewer. --- pandas/core/groupby/generic.py | 2 +- pandas/tests/groupby/test_grouping.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f72d27f6b9b51..f4c53151bddc1 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -921,7 +921,7 @@ def transform(self, func, *args, **kwargs): from pandas.core.reshape.concat import concat result = concat(results).sort_index() else: - result = Series([]) + result = Series() # we will only try to coerce the result type if # we have a numeric dtype, as these are *always* udfs diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 72b795f2c249d..4c84c29ff98cb 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -555,9 +555,9 @@ def test_list_grouper_with_nat(self): @pytest.mark.parametrize( 'func,expected', [ - ('transform', pd.Series([], name=2, index=pd.RangeIndex(0, 0, 1))), - ('agg', pd.Series([], name=2, index=pd.Float64Index([], name=1))), - ('apply', pd.Series([], name=2, index=pd.Float64Index([], name=1))), + ('transform', pd.Series(name=2, index=pd.RangeIndex(0, 0, 1))), + ('agg', pd.Series(name=2, index=pd.Float64Index([], name=1))), + ('apply', pd.Series(name=2, index=pd.Float64Index([], name=1))), ]) def test_evaluate_with_empty_groups(self, func, expected): # 26208