From 11703aa0301400fdfd69ef8aa3f4a6c9a29a42bf Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 15 May 2018 10:57:02 -0700 Subject: [PATCH 1/7] Added test for failure --- pandas/tests/frame/test_apply.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index af39c8f01cf73..9e599b36d4e31 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -554,6 +554,14 @@ def test_apply_non_numpy_dtype(self): result = df.apply(lambda x: x) assert_frame_equal(result, df) + def test_apply_dup_names_multi_agg(self): + # GH 21063 + df = pd.DataFrame([[0, 1], [2, 3]], columns=['a', 'a']) + expected = pd.DataFrame([[0, 1]], columns=['a', 'a'], index=['min']) + result = df.agg(['min']) + + tm.assert_frame_equal(result, expected) + class TestInferOutputShape(object): # the user has supplied an opaque UDF where From ee782642e1626c3f15b94c3aa12c30f9119ab365 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 15 May 2018 11:26:24 -0700 Subject: [PATCH 2/7] Prevented unlimited recursive aggregation --- pandas/core/base.py | 5 ++--- pandas/core/frame.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 5022beabef76b..fddcf70c75287 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -590,9 +590,9 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis): # multiples else: - for col in obj: + for index, col in enumerate(obj): try: - colg = self._gotitem(col, ndim=1, subset=obj[col]) + colg = self._gotitem(col, ndim=1, subset=obj.iloc[:, [index]]) results.append(colg.aggregate(arg)) keys.append(col) except (TypeError, DataError): @@ -675,7 +675,6 @@ def _gotitem(self, key, ndim, subset=None): subset : object, default None subset to act on """ - # create a new object to prevent aliasing if subset is None: subset = self.obj diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0437c479c9d81..202abfbac1d19 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5748,7 +5748,7 @@ def _gotitem(self, key, ndim, subset=None): subset = self # TODO: _shallow_copy(subset)? - return self[key] + return subset[key] _agg_doc = dedent(""" The aggregation operations are always performed over an axis, either the From 254af5f935c769be2c2a3047ab4fefba5ded5fc1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 15 May 2018 11:28:04 -0700 Subject: [PATCH 3/7] LINT fixup --- pandas/core/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index fddcf70c75287..d174d34ff9333 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -592,7 +592,8 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis): else: for index, col in enumerate(obj): try: - colg = self._gotitem(col, ndim=1, subset=obj.iloc[:, [index]]) + colg = self._gotitem(col, ndim=1, + subset=obj.iloc[:, [index]]) results.append(colg.aggregate(arg)) keys.append(col) except (TypeError, DataError): From 2d4cf40749436fef86869dfd954afc5e83feb319 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 15 May 2018 15:27:22 -0700 Subject: [PATCH 4/7] Fixed regression against groupby apply --- pandas/core/base.py | 2 +- pandas/core/frame.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 6958414ff5141..aa051c6f5eaef 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -593,7 +593,7 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis): for index, col in enumerate(obj): try: colg = self._gotitem(col, ndim=1, - subset=obj.iloc[:, [index]]) + subset=obj.iloc[:, index]) results.append(colg.aggregate(arg)) keys.append(col) except (TypeError, DataError): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2273598aef7f4..b5521ead747bc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5746,6 +5746,8 @@ def _gotitem(self, key, ndim, subset=None): """ if subset is None: subset = self + elif subset.ndim == 1: + subset = self._constructor(subset) # TODO: _shallow_copy(subset)? return subset[key] From f0135e8aadc5cb7c4080bab4a238e6913218842b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 15 May 2018 16:01:16 -0700 Subject: [PATCH 5/7] Added type annotations to internal method --- pandas/core/frame.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b5521ead747bc..30af23407308b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5731,7 +5731,12 @@ def diff(self, periods=1, axis=0): # ---------------------------------------------------------------------- # Function application - def _gotitem(self, key, ndim, subset=None): + def _gotitem(self, + key, # type: Union[str, List[str]] + ndim, # type: int + subset=None # type: Union[Series, DataFrame, None] + ): + # type: (...) -> Union[Series, DataFrame] """ sub-classes to define return a sliced object From 4a24f734047d387ce242c36dba16eb69388a3ca1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 16 May 2018 14:12:03 -0700 Subject: [PATCH 6/7] Updated whatsnew --- doc/source/whatsnew/v0.23.1.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 5c9c3e2931bd9..338364a943edf 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -43,7 +43,10 @@ Documentation Changes Bug Fixes ~~~~~~~~~ -- +Groupby/Resample/Rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :func:`DataFrame.agg` where applying multiple aggregation functions to a :class:`DataFrame` with duplicated column names would cause a stack overflow (:issue:`21063`) - Conversion From c0dc3f4119de05ae33d0650f4001661513a5807e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 16 May 2018 17:08:59 -0700 Subject: [PATCH 7/7] Simplified DataFrame _got_item implementation --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 30af23407308b..77a67c048a48d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5751,8 +5751,8 @@ def _gotitem(self, """ if subset is None: subset = self - elif subset.ndim == 1: - subset = self._constructor(subset) + elif subset.ndim == 1: # is Series + return subset # TODO: _shallow_copy(subset)? return subset[key]