From 86f1fd2f05b14950c24af7fb1f0a2039dfa1ffad Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Thu, 19 Jan 2023 21:14:51 +0800 Subject: [PATCH 01/18] BUG: Fix agg ingore arg/kwargs when given list like func --- pandas/core/apply.py | 77 +++++++++++++++++++++++--- pandas/tests/apply/test_frame_apply.py | 19 +++++++ 2 files changed, 89 insertions(+), 7 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index c28da1bc758cd..872bc63466dcc 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -123,14 +123,18 @@ def __init__( self.result_type = result_type # curry if needed - if ( - (kwargs or args) - and not isinstance(func, (np.ufunc, str)) - and not is_list_like(func) - ): + if (kwargs or args) and not isinstance(func, (np.ufunc, str)): + if not is_list_like(func): - def f(x): - return func(x, *args, **kwargs) + def f(x): + return func(x, *args, **kwargs) + + else: + # GH 50624 + # only explicit arg passing is supported temporarily + # eg. df.agg([foo], a=1) + # df.agg([foo], 1) is not supported + f = _recreate_func(func, kwargs) else: f = func @@ -1506,3 +1510,62 @@ def validate_func_kwargs( no_arg_message = "Must provide 'func' or named aggregation **kwargs." raise TypeError(no_arg_message) return columns, func + + +def _check_arg(callable: Callable, arg: str) -> bool: + # GH 50624 + """To check if arg is a valid argument for callable. + + Parameters + ---------- + callable : Callable + arg : str + + Returns + ------- + bool + + Examples + -------- + >>> def func(x, a=1, b=2): + ... pass + >>> _check_arg(func, 'a') + True + """ + argspec = inspect.getfullargspec(callable) + args_names = argspec.args + argspec.kwonlyargs + return arg in args_names + + +def _recreate_func(callable_list: list[Callable], kwargs: dict) -> list[partial[Any]]: + # GH 50624 + """Recreate callable with kwargs. + + Parameters + ---------- + callable_list : list[Callable] + kwargs : dict + + Returns + ------- + list[[partial[Any]] + + Examples + -------- + >>> def func(x, a=1, b=2): + ... pass + >>> _recreate_func([func], {'a': 3, 'b': 4}) + [, a=3, b=4] + """ + func_kwargs = {} + for single_func in callable_list: + single_func_kwargs = { + k: v for k, v in kwargs.items() if _check_arg(single_func, k) + } + func_kwargs[single_func] = single_func_kwargs + + f = [ + partial(single_func, **kwargs) + for single_func, kwargs in func_kwargs.items() + ] + return f diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 7aaad4d2ad081..51a3444ff48ca 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1623,3 +1623,22 @@ def test_any_apply_keyword_non_zero_axis_regression(): result = df.apply("any", 1) tm.assert_series_equal(result, expected) + + +def test_agg_list_like_arg(): + # GH 50624 + df = DataFrame({"x": [1, 2, 3]}) + + def foo1(x, a=1, b=2): + return x + a + b + + def foo2(x, c=3, d=4): + return x + c + d + + result = df.agg([foo1, foo2], 0, a=5, b=6, c=7, d=8) + expected = DataFrame( + [[12, 16], [13, 17], [14, 18]], + columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]), + index=[0, 1, 2], + ) + tm.assert_frame_equal(result, expected) From b0d23a04056334f32b32b6c22706be0b90a96f20 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Fri, 20 Jan 2023 16:41:46 +0800 Subject: [PATCH 02/18] Fix test --- pandas/core/apply.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 872bc63466dcc..78301b8fe7326 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -129,13 +129,14 @@ def __init__( def f(x): return func(x, *args, **kwargs) - else: + elif type(func) == list and all(i for i in func if callable(i)): # GH 50624 # only explicit arg passing is supported temporarily # eg. df.agg([foo], a=1) # df.agg([foo], 1) is not supported f = _recreate_func(func, kwargs) - + else: + f = func else: f = func @@ -1564,8 +1565,6 @@ def _recreate_func(callable_list: list[Callable], kwargs: dict) -> list[partial[ } func_kwargs[single_func] = single_func_kwargs - f = [ - partial(single_func, **kwargs) - for single_func, kwargs in func_kwargs.items() - ] - return f + return [ + partial(single_func, **kwargs) for single_func, kwargs in func_kwargs.items() + ] From 76a675cbe1e1a46a0fa672a2780fb6c6c30e7274 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Fri, 20 Jan 2023 17:55:03 +0800 Subject: [PATCH 03/18] Fix mypy --- pandas/core/apply.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 78301b8fe7326..9032141c3ad36 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1538,7 +1538,9 @@ def _check_arg(callable: Callable, arg: str) -> bool: return arg in args_names -def _recreate_func(callable_list: list[Callable], kwargs: dict) -> list[partial[Any]]: +def _recreate_func( + callable_list: list[Callable], kwargs: dict +) -> list[partial[Any]] | Callable[[Any], Any]: # GH 50624 """Recreate callable with kwargs. @@ -1549,7 +1551,7 @@ def _recreate_func(callable_list: list[Callable], kwargs: dict) -> list[partial[ Returns ------- - list[[partial[Any]] + list[partial[Any]] | Callable[[Any], Any] Examples -------- From abe58c8009d2291643d48fdc29ab18751830b5d4 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sun, 29 Jan 2023 14:18:05 +0800 Subject: [PATCH 04/18] More appreciate way --- pandas/core/apply.py | 110 ++++++++++++------------- pandas/tests/apply/test_frame_apply.py | 2 +- 2 files changed, 52 insertions(+), 60 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 9032141c3ad36..3ac91319fd4a9 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -123,20 +123,16 @@ def __init__( self.result_type = result_type # curry if needed - if (kwargs or args) and not isinstance(func, (np.ufunc, str)): - if not is_list_like(func): - - def f(x): - return func(x, *args, **kwargs) - - elif type(func) == list and all(i for i in func if callable(i)): - # GH 50624 - # only explicit arg passing is supported temporarily - # eg. df.agg([foo], a=1) - # df.agg([foo], 1) is not supported - f = _recreate_func(func, kwargs) - else: - f = func + + if ( + (kwargs or args) + and not isinstance(func, (np.ufunc, str)) + and not is_list_like(func) + ): + + def f(x): + return func(x, *args, **kwargs) + else: f = func @@ -335,9 +331,13 @@ def agg_list_like(self) -> DataFrame | Series: # degenerate case if selected_obj.ndim == 1: + args_remain, kwargs_remain = self.args, self.kwargs for a in arg: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) - new_res = colg.aggregate(a) + args_pass, kwargs_pass, args_remain, kwargs_remain = _map_args( + a, args_remain, kwargs_remain + ) + new_res = colg.aggregate(a, self.axis, *args_pass, **kwargs_pass) results.append(new_res) # make sure we find a good name @@ -349,7 +349,7 @@ def agg_list_like(self) -> DataFrame | Series: indices = [] for index, col in enumerate(selected_obj): colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - new_res = colg.aggregate(arg) + new_res = colg.aggregate(arg, self.axis, *self.args, **self.kwargs) results.append(new_res) indices.append(index) keys = selected_obj.columns.take(indices) @@ -1513,60 +1513,52 @@ def validate_func_kwargs( return columns, func -def _check_arg(callable: Callable, arg: str) -> bool: +def _map_args( + func: AggFuncType, args: tuple, kwargs: dict +) -> tuple[tuple, dict, tuple, dict]: # GH 50624 - """To check if arg is a valid argument for callable. - - Parameters - ---------- - callable : Callable - arg : str - - Returns - ------- - bool - - Examples - -------- - >>> def func(x, a=1, b=2): - ... pass - >>> _check_arg(func, 'a') - True """ - argspec = inspect.getfullargspec(callable) - args_names = argspec.args + argspec.kwonlyargs - return arg in args_names - - -def _recreate_func( - callable_list: list[Callable], kwargs: dict -) -> list[partial[Any]] | Callable[[Any], Any]: - # GH 50624 - """Recreate callable with kwargs. + Map arguments to function. + But for some cases with unnamed arguments, it will cause error. Parameters ---------- - callable_list : list[Callable] + func : function + args : tuple kwargs : dict Returns ------- - list[partial[Any]] | Callable[[Any], Any] + args_pass : tuple + Args should be passed to func + kwargs_pass : dict + Kwargs should be passed to func + args_remain : tuple + Args should be passed to other functions + kwargs_remain : dict + Kwargs should be passed to other functions Examples -------- - >>> def func(x, a=1, b=2): - ... pass - >>> _recreate_func([func], {'a': 3, 'b': 4}) - [, a=3, b=4] + >>> def f(a=1, b=2): + ... return a, b + >>> _map_args(f, (1,), {'b': 2, 'c': 3}) + ((1,), {'b': 2}, (), {'c': 3}) + >>> _map_args(f, (1, 2, 3), {'b': 4}) # maybe some unexpected results + ((1, 2), {}, (3,), {'b':4}) """ - func_kwargs = {} - for single_func in callable_list: - single_func_kwargs = { - k: v for k, v in kwargs.items() if _check_arg(single_func, k) - } - func_kwargs[single_func] = single_func_kwargs + argspec = inspect.getfullargspec(func) + args_names = argspec.args + argspec.kwonlyargs - return [ - partial(single_func, **kwargs) for single_func, kwargs in func_kwargs.items() - ] + if len(args) >= len(args_names): + args_pass = args[: len(args_names)] + args_remain = args[len(args_names) :] + kwargs_pass = {} + kwargs_remain = kwargs + else: + args_pass = args + args_remain = () + kwargs_pass = {k: v for k, v in kwargs.items() if k in args_names} + kwargs_remain = {k: v for k, v in kwargs.items() if k not in args_names} + + return args_pass, kwargs_pass, args_remain, kwargs_remain diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 51a3444ff48ca..df84fadc643ec 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1635,7 +1635,7 @@ def foo1(x, a=1, b=2): def foo2(x, c=3, d=4): return x + c + d - result = df.agg([foo1, foo2], 0, a=5, b=6, c=7, d=8) + result = df.agg([foo1, foo2], 0, 5, b=6, c=7, d=8) expected = DataFrame( [[12, 16], [13, 17], [14, 18]], columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]), From 5ce782a8559c1f5c01c05641161e5bf4b3535b69 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sun, 29 Jan 2023 14:20:01 +0800 Subject: [PATCH 05/18] Delete blank line --- pandas/core/apply.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 3ac91319fd4a9..d823bda9bf230 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -123,7 +123,6 @@ def __init__( self.result_type = result_type # curry if needed - if ( (kwargs or args) and not isinstance(func, (np.ufunc, str)) From c1e48b7b3db0ac8fdc9e57b3c60f3b0e9f400eae Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sun, 29 Jan 2023 14:38:57 +0800 Subject: [PATCH 06/18] Fix test failure --- pandas/core/apply.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index d823bda9bf230..32510ef9bf5f6 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -5,6 +5,7 @@ from contextlib import nullcontext from functools import partial import inspect +from types import FunctionType from typing import ( TYPE_CHECKING, Any, @@ -333,10 +334,17 @@ def agg_list_like(self) -> DataFrame | Series: args_remain, kwargs_remain = self.args, self.kwargs for a in arg: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) - args_pass, kwargs_pass, args_remain, kwargs_remain = _map_args( - a, args_remain, kwargs_remain - ) - new_res = colg.aggregate(a, self.axis, *args_pass, **kwargs_pass) + if not isinstance(a, (np.ufunc, str)) and isinstance( + a, FunctionType + ): + args_pass, kwargs_pass, args_remain, kwargs_remain = _map_args( + a, args_remain, kwargs_remain + ) + new_res = colg.aggregate( + a, self.axis, *args_pass, **kwargs_pass + ) + else: + new_res = colg.aggregate(a) results.append(new_res) # make sure we find a good name From 0daad25ec756ad8a3034b1d5d9e93d2b036a80e5 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sun, 29 Jan 2023 18:25:47 +0800 Subject: [PATCH 07/18] Fix test failure --- pandas/core/apply.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 32510ef9bf5f6..6eee935389bd7 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -340,9 +340,7 @@ def agg_list_like(self) -> DataFrame | Series: args_pass, kwargs_pass, args_remain, kwargs_remain = _map_args( a, args_remain, kwargs_remain ) - new_res = colg.aggregate( - a, self.axis, *args_pass, **kwargs_pass - ) + new_res = colg.aggregate(a, *args_pass, **kwargs_pass) else: new_res = colg.aggregate(a) results.append(new_res) @@ -356,7 +354,7 @@ def agg_list_like(self) -> DataFrame | Series: indices = [] for index, col in enumerate(selected_obj): colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - new_res = colg.aggregate(arg, self.axis, *self.args, **self.kwargs) + new_res = colg.aggregate(arg, *self.args, **self.kwargs) results.append(new_res) indices.append(index) keys = selected_obj.columns.take(indices) From eab348cef1e618ce2cbdc4f92630dd55cf72bfad Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Wed, 1 Feb 2023 23:34:47 +0800 Subject: [PATCH 08/18] Easy version --- pandas/core/apply.py | 67 +------------------------- pandas/tests/apply/test_frame_apply.py | 19 +++++--- 2 files changed, 13 insertions(+), 73 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 6eee935389bd7..1a8d48b10c74c 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -5,7 +5,6 @@ from contextlib import nullcontext from functools import partial import inspect -from types import FunctionType from typing import ( TYPE_CHECKING, Any, @@ -330,21 +329,10 @@ def agg_list_like(self) -> DataFrame | Series: with context_manager: # degenerate case if selected_obj.ndim == 1: - - args_remain, kwargs_remain = self.args, self.kwargs for a in arg: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) - if not isinstance(a, (np.ufunc, str)) and isinstance( - a, FunctionType - ): - args_pass, kwargs_pass, args_remain, kwargs_remain = _map_args( - a, args_remain, kwargs_remain - ) - new_res = colg.aggregate(a, *args_pass, **kwargs_pass) - else: - new_res = colg.aggregate(a) + new_res = colg.aggregate(a, self.axis, *self.args, **self.kwargs) results.append(new_res) - # make sure we find a good name name = com.get_callable_name(a) or a keys.append(name) @@ -354,7 +342,7 @@ def agg_list_like(self) -> DataFrame | Series: indices = [] for index, col in enumerate(selected_obj): colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - new_res = colg.aggregate(arg, *self.args, **self.kwargs) + new_res = colg.aggregate(arg, self.axis, *self.args, **self.kwargs) results.append(new_res) indices.append(index) keys = selected_obj.columns.take(indices) @@ -1516,54 +1504,3 @@ def validate_func_kwargs( no_arg_message = "Must provide 'func' or named aggregation **kwargs." raise TypeError(no_arg_message) return columns, func - - -def _map_args( - func: AggFuncType, args: tuple, kwargs: dict -) -> tuple[tuple, dict, tuple, dict]: - # GH 50624 - """ - Map arguments to function. - But for some cases with unnamed arguments, it will cause error. - - Parameters - ---------- - func : function - args : tuple - kwargs : dict - - Returns - ------- - args_pass : tuple - Args should be passed to func - kwargs_pass : dict - Kwargs should be passed to func - args_remain : tuple - Args should be passed to other functions - kwargs_remain : dict - Kwargs should be passed to other functions - - Examples - -------- - >>> def f(a=1, b=2): - ... return a, b - >>> _map_args(f, (1,), {'b': 2, 'c': 3}) - ((1,), {'b': 2}, (), {'c': 3}) - >>> _map_args(f, (1, 2, 3), {'b': 4}) # maybe some unexpected results - ((1, 2), {}, (3,), {'b':4}) - """ - argspec = inspect.getfullargspec(func) - args_names = argspec.args + argspec.kwonlyargs - - if len(args) >= len(args_names): - args_pass = args[: len(args_names)] - args_remain = args[len(args_names) :] - kwargs_pass = {} - kwargs_remain = kwargs - else: - args_pass = args - args_remain = () - kwargs_pass = {k: v for k, v in kwargs.items() if k in args_names} - kwargs_remain = {k: v for k, v in kwargs.items() if k not in args_names} - - return args_pass, kwargs_pass, args_remain, kwargs_remain diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index df84fadc643ec..a0e667dc8f243 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1625,20 +1625,23 @@ def test_any_apply_keyword_non_zero_axis_regression(): tm.assert_series_equal(result, expected) -def test_agg_list_like_arg(): +def test_agg_list_like_func_with_args(): # GH 50624 df = DataFrame({"x": [1, 2, 3]}) - def foo1(x, a=1, b=2): - return x + a + b + def foo1(x, a=1, c=0): + return x + a + c - def foo2(x, c=3, d=4): - return x + c + d + def foo2(x, b=2, c=0): + return x + b + c - result = df.agg([foo1, foo2], 0, 5, b=6, c=7, d=8) + msg = r"foo1\(\) got an unexpected keyword argument 'b'" + with pytest.raises(TypeError, match=msg): + df.agg([foo1, foo2], 0, 3, b=3, c=4) + + result = df.agg([foo1, foo2], 0, 3, c=4) expected = DataFrame( - [[12, 16], [13, 17], [14, 18]], + [[8, 8], [9, 9], [10, 10]], columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]), - index=[0, 1, 2], ) tm.assert_frame_equal(result, expected) From 54d27e1ae9fc4ed163084fcf591cd5db49ab129a Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Wed, 1 Feb 2023 23:56:25 +0800 Subject: [PATCH 09/18] Add blank lines --- pandas/core/apply.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 1a8d48b10c74c..f7884a00acc0c 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -329,10 +329,12 @@ def agg_list_like(self) -> DataFrame | Series: with context_manager: # degenerate case if selected_obj.ndim == 1: + for a in arg: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) new_res = colg.aggregate(a, self.axis, *self.args, **self.kwargs) results.append(new_res) + # make sure we find a good name name = com.get_callable_name(a) or a keys.append(name) From b98b35e64901c46aa9909c23233e53fdb58af1a2 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Thu, 2 Feb 2023 15:30:24 +0800 Subject: [PATCH 10/18] Avoid incroeect axis --- pandas/core/apply.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index f7884a00acc0c..b7516eabda77e 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -327,12 +327,16 @@ def agg_list_like(self) -> DataFrame | Series: else: context_manager = nullcontext() with context_manager: + + if self.args: + self.args = (self.axis,) + self.args + # degenerate case if selected_obj.ndim == 1: for a in arg: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) - new_res = colg.aggregate(a, self.axis, *self.args, **self.kwargs) + new_res = colg.aggregate(a, *self.args, **self.kwargs) results.append(new_res) # make sure we find a good name @@ -344,7 +348,7 @@ def agg_list_like(self) -> DataFrame | Series: indices = [] for index, col in enumerate(selected_obj): colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - new_res = colg.aggregate(arg, self.axis, *self.args, **self.kwargs) + new_res = colg.aggregate(arg, *self.args, **self.kwargs) results.append(new_res) indices.append(index) keys = selected_obj.columns.take(indices) From 234cff6d9d5e53b3826c4344b41921539acaec52 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Fri, 3 Feb 2023 13:08:48 +0800 Subject: [PATCH 11/18] Add whatsnew 2.0.0 --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c814e585672cb..4c6a5b2247e12 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1193,8 +1193,8 @@ Reshaping - Clarified error message in :func:`merge` when passing invalid ``validate`` option (:issue:`49417`) - Bug in :meth:`DataFrame.explode` raising ``ValueError`` on multiple columns with ``NaN`` values or empty lists (:issue:`46084`) - Bug in :meth:`DataFrame.transpose` with ``IntervalDtype`` column with ``timedelta64[ns]`` endpoints (:issue:`44917`) +- Bug in :func:`agg` would ignore arguments when passed a list of functions (:issue:`50863`) - - Sparse ^^^^^^ - Bug in :meth:`Series.astype` when converting a ``SparseDtype`` with ``datetime64[ns]`` subtype to ``int64`` dtype raising, inconsistent with the non-sparse behavior (:issue:`49631`,:issue:`50087`) From 6957a7cf3090d39496f071091366f192aba3d502 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Fri, 3 Feb 2023 13:10:18 +0800 Subject: [PATCH 12/18] Add blank line --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 4c6a5b2247e12..33abd5a3f5db2 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1195,6 +1195,7 @@ Reshaping - Bug in :meth:`DataFrame.transpose` with ``IntervalDtype`` column with ``timedelta64[ns]`` endpoints (:issue:`44917`) - Bug in :func:`agg` would ignore arguments when passed a list of functions (:issue:`50863`) - + Sparse ^^^^^^ - Bug in :meth:`Series.astype` when converting a ``SparseDtype`` with ``datetime64[ns]`` subtype to ``int64`` dtype raising, inconsistent with the non-sparse behavior (:issue:`49631`,:issue:`50087`) From a3ef451d2b773606a788affc965929b79284372b Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Fri, 3 Feb 2023 22:09:05 +0800 Subject: [PATCH 13/18] More appreciate --- pandas/core/apply.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index b7516eabda77e..608984301d727 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -328,15 +328,12 @@ def agg_list_like(self) -> DataFrame | Series: context_manager = nullcontext() with context_manager: - if self.args: - self.args = (self.axis,) + self.args - # degenerate case if selected_obj.ndim == 1: for a in arg: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) - new_res = colg.aggregate(a, *self.args, **self.kwargs) + new_res = colg.aggregate(a, self.axis, *self.args, **self.kwargs) results.append(new_res) # make sure we find a good name @@ -348,7 +345,7 @@ def agg_list_like(self) -> DataFrame | Series: indices = [] for index, col in enumerate(selected_obj): colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - new_res = colg.aggregate(arg, *self.args, **self.kwargs) + new_res = colg.aggregate(arg, self.axis, *self.args, **self.kwargs) results.append(new_res) indices.append(index) keys = selected_obj.columns.take(indices) From 1d46712eca8d2395a8b9ae337452004ad420763f Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Fri, 3 Feb 2023 22:10:10 +0800 Subject: [PATCH 14/18] Delete blank line --- pandas/core/apply.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 608984301d727..f7884a00acc0c 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -327,7 +327,6 @@ def agg_list_like(self) -> DataFrame | Series: else: context_manager = nullcontext() with context_manager: - # degenerate case if selected_obj.ndim == 1: From 6f28f2f16626a4db99208d4989d944faa83ad67a Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Mon, 6 Feb 2023 21:51:58 +0800 Subject: [PATCH 15/18] Whether Series or DataFrame --- pandas/core/apply.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index f7884a00acc0c..881bf5d8bb232 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -332,14 +332,18 @@ def agg_list_like(self) -> DataFrame | Series: for a in arg: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) - new_res = colg.aggregate(a, self.axis, *self.args, **self.kwargs) + if isinstance(colg, (ABCSeries, ABCDataFrame)): + new_res = colg.aggregate( + a, self.axis, *self.args, **self.kwargs + ) + else: + new_res = colg.aggregate(a, *self.args, **self.kwargs) results.append(new_res) # make sure we find a good name name = com.get_callable_name(a) or a keys.append(name) - # multiples else: indices = [] for index, col in enumerate(selected_obj): From b4056acc6ce4e0cb386569b7059a99b26cb1e805 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Tue, 7 Feb 2023 12:15:32 +0800 Subject: [PATCH 16/18] Fix test --- pandas/core/apply.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 881bf5d8bb232..f29a6ce4c0b82 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -348,7 +348,12 @@ def agg_list_like(self) -> DataFrame | Series: indices = [] for index, col in enumerate(selected_obj): colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - new_res = colg.aggregate(arg, self.axis, *self.args, **self.kwargs) + if isinstance(colg, (ABCSeries, ABCDataFrame)): + new_res = colg.aggregate( + arg, self.axis, *self.args, **self.kwargs + ) + else: + new_res = colg.aggregate(arg, *self.args, **self.kwargs) results.append(new_res) indices.append(index) keys = selected_obj.columns.take(indices) From 7eb2c942deb86d0189eb6fcb4abdacef93bd05ad Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Fri, 10 Feb 2023 00:39:06 +0800 Subject: [PATCH 17/18] Fix Groupby and add tests --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/core/groupby/generic.py | 6 +-- pandas/tests/apply/test_series_apply.py | 20 ++++++++ .../tests/groupby/aggregate/test_aggregate.py | 46 +++++++++++++++++++ pandas/tests/resample/test_resample_api.py | 25 ++++++++++ 5 files changed, 95 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 95a949292a3b5..0720dac605672 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1306,7 +1306,7 @@ Reshaping - Clarified error message in :func:`merge` when passing invalid ``validate`` option (:issue:`49417`) - Bug in :meth:`DataFrame.explode` raising ``ValueError`` on multiple columns with ``NaN`` values or empty lists (:issue:`46084`) - Bug in :meth:`DataFrame.transpose` with ``IntervalDtype`` column with ``timedelta64[ns]`` endpoints (:issue:`44917`) -- Bug in :func:`agg` would ignore arguments when passed a list of functions (:issue:`50863`) +- Bug in :meth:`DataFrame.agg`, :meth:`Series.agg`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, and :meth:`Resampler.agg` would ignore arguments when passed a list of functions (:issue:`50863`) - Sparse diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cdb5dddf03a64..c470a669d8441 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -268,7 +268,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) # Catch instances of lists / tuples # but not the class list / tuple itself. func = maybe_mangle_lambdas(func) - ret = self._aggregate_multiple_funcs(func) + ret = self._aggregate_multiple_funcs(func, *args, **kwargs) if relabeling: # columns is not narrowed by mypy from relabeling flag assert columns is not None # for mypy @@ -303,7 +303,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) agg = aggregate - def _aggregate_multiple_funcs(self, arg) -> DataFrame: + def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame: if isinstance(arg, dict): if self.as_index: # GH 15931 @@ -328,7 +328,7 @@ def _aggregate_multiple_funcs(self, arg) -> DataFrame: for idx, (name, func) in enumerate(arg): key = base.OutputKey(label=name, position=idx) - results[key] = self.aggregate(func) + results[key] = self.aggregate(func, *args, **kwargs) if any(isinstance(x, DataFrame) for x in results.values()): from pandas import concat diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 53dee6e15c3e0..30f040b4197eb 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -107,6 +107,26 @@ def f(x, a=0, b=0, c=0): tm.assert_series_equal(result, expected) +def test_agg_list_like_func_with_args(): + # GH 50624 + + s = Series([1, 2, 3]) + + def foo1(x, a=1, c=0): + return x + a + c + + def foo2(x, b=2, c=0): + return x + b + c + + msg = r"foo1\(\) got an unexpected keyword argument 'b'" + with pytest.raises(TypeError, match=msg): + s.agg([foo1, foo2], 0, 3, b=3, c=4) + + result = s.agg([foo1, foo2], 0, 3, c=4) + expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]}) + tm.assert_frame_equal(result, expected) + + def test_series_map_box_timestamps(): # GH#2689, GH#2627 ser = Series(pd.date_range("1/1/2000", periods=10)) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index e7be78be55620..41c6e490378f7 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1468,3 +1468,49 @@ def test_agg_of_mode_list(test, constant): expected = expected.set_index(0) tm.assert_frame_equal(result, expected) + + +def test__dataframe_groupy_agg_list_like_func_with_args(): + # GH 50624 + df = DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}) + df = df.groupby("y") + + def foo1(x, a=1, c=0): + return x.sum() + a + c + + def foo2(x, b=2, c=0): + return x.sum() + b + c + + msg = r"foo1\(\) got an unexpected keyword argument 'b'" + with pytest.raises(TypeError, match=msg): + df.agg([foo1, foo2], 3, b=3, c=4) + + result = df.agg([foo1, foo2], 3, c=4) + expected = DataFrame( + [[8, 8], [9, 9], [10, 10]], + index=Index(["a", "b", "c"], name="y"), + columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]), + ) + tm.assert_frame_equal(result, expected) + + +def test__series_groupy_agg_list_like_func_with_args(): + # GH 50624 + data = Series([1, 2, 3]) + data = data.groupby(data) + + def foo1(x, a=1, c=0): + return x.sum() + a + c + + def foo2(x, b=2, c=0): + return x.sum() + b + c + + msg = r"foo1\(\) got an unexpected keyword argument 'b'" + with pytest.raises(TypeError, match=msg): + data.agg([foo1, foo2], 3, b=3, c=4) + + result = data.agg([foo1, foo2], 3, c=4) + expected = DataFrame( + [[8, 8], [9, 9], [10, 10]], index=Index([1, 2, 3]), columns=["foo1", "foo2"] + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index e6e924793389d..c7b9de13af2a6 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -633,6 +633,31 @@ def test_try_aggregate_non_existing_column(): df.resample("30T").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]}) +def test_agg_list_like_func_with_args(): + # 50624 + df = DataFrame( + {"x": [1, 2, 3]}, index=date_range("2020-01-01", periods=3, freq="D") + ) + + def foo1(x, a=1, c=0): + return x + a + c + + def foo2(x, b=2, c=0): + return x + b + c + + msg = r"foo1\(\) got an unexpected keyword argument 'b'" + with pytest.raises(TypeError, match=msg): + df.agg([foo1, foo2], 0, 3, b=3, c=4) + + result = df.agg([foo1, foo2], 0, 3, c=4) + expected = DataFrame( + [[8, 8], [9, 9], [10, 10]], + index=date_range("2020-01-01", periods=3, freq="D"), + columns=pd.MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]), + ) + tm.assert_frame_equal(result, expected) + + def test_selection_api_validation(): # GH 13500 index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") From 371c068222cf9ad158a1eb4f38038e7adc222eb3 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sun, 12 Feb 2023 12:21:49 +0800 Subject: [PATCH 18/18] Rename variables and split bugfix --- doc/source/whatsnew/v2.0.0.rst | 3 ++- pandas/tests/groupby/aggregate/test_aggregate.py | 14 +++++++------- pandas/tests/resample/test_resample_api.py | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index eba04d49329b6..e305df7d525fa 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1304,6 +1304,7 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"ngroup"`` argument (:issue:`45986`) - Bug in :meth:`.DataFrameGroupBy.describe` produced incorrect results when data had duplicate columns (:issue:`50806`) - Bug in :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` failing to respect ``as_index=False`` (:issue:`51228`) +- Bug in :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, and :meth:`Resampler.agg` would ignore arguments when passed a list of functions (:issue:`50863`) - Reshaping @@ -1317,7 +1318,7 @@ Reshaping - Clarified error message in :func:`merge` when passing invalid ``validate`` option (:issue:`49417`) - Bug in :meth:`DataFrame.explode` raising ``ValueError`` on multiple columns with ``NaN`` values or empty lists (:issue:`46084`) - Bug in :meth:`DataFrame.transpose` with ``IntervalDtype`` column with ``timedelta64[ns]`` endpoints (:issue:`44917`) -- Bug in :meth:`DataFrame.agg`, :meth:`Series.agg`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, and :meth:`Resampler.agg` would ignore arguments when passed a list of functions (:issue:`50863`) +- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` would ignore arguments when passed a list of functions (:issue:`50863`) - Sparse diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 41c6e490378f7..22c9bbd74395d 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1473,7 +1473,7 @@ def test_agg_of_mode_list(test, constant): def test__dataframe_groupy_agg_list_like_func_with_args(): # GH 50624 df = DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}) - df = df.groupby("y") + gb = df.groupby("y") def foo1(x, a=1, c=0): return x.sum() + a + c @@ -1483,9 +1483,9 @@ def foo2(x, b=2, c=0): msg = r"foo1\(\) got an unexpected keyword argument 'b'" with pytest.raises(TypeError, match=msg): - df.agg([foo1, foo2], 3, b=3, c=4) + gb.agg([foo1, foo2], 3, b=3, c=4) - result = df.agg([foo1, foo2], 3, c=4) + result = gb.agg([foo1, foo2], 3, c=4) expected = DataFrame( [[8, 8], [9, 9], [10, 10]], index=Index(["a", "b", "c"], name="y"), @@ -1496,8 +1496,8 @@ def foo2(x, b=2, c=0): def test__series_groupy_agg_list_like_func_with_args(): # GH 50624 - data = Series([1, 2, 3]) - data = data.groupby(data) + s = Series([1, 2, 3]) + sgb = s.groupby(s) def foo1(x, a=1, c=0): return x.sum() + a + c @@ -1507,9 +1507,9 @@ def foo2(x, b=2, c=0): msg = r"foo1\(\) got an unexpected keyword argument 'b'" with pytest.raises(TypeError, match=msg): - data.agg([foo1, foo2], 3, b=3, c=4) + sgb.agg([foo1, foo2], 3, b=3, c=4) - result = data.agg([foo1, foo2], 3, c=4) + result = sgb.agg([foo1, foo2], 3, c=4) expected = DataFrame( [[8, 8], [9, 9], [10, 10]], index=Index([1, 2, 3]), columns=["foo1", "foo2"] ) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index c7b9de13af2a6..0b8dc8f3e8ac4 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -647,9 +647,9 @@ def foo2(x, b=2, c=0): msg = r"foo1\(\) got an unexpected keyword argument 'b'" with pytest.raises(TypeError, match=msg): - df.agg([foo1, foo2], 0, 3, b=3, c=4) + df.resample("D").agg([foo1, foo2], 3, b=3, c=4) - result = df.agg([foo1, foo2], 0, 3, c=4) + result = df.resample("D").agg([foo1, foo2], 3, c=4) expected = DataFrame( [[8, 8], [9, 9], [10, 10]], index=date_range("2020-01-01", periods=3, freq="D"),