From 384930275b4e7a6e8aa5e0f1b729bbe746fb1c04 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 28 Feb 2021 11:28:35 -0500
Subject: [PATCH] POC: aggregate always aggregates

---
 pandas/core/apply.py                          |  9 ++++-----
 pandas/core/generic.py                        |  3 +--
 pandas/core/groupby/generic.py                |  2 +-
 pandas/core/groupby/groupby.py                |  2 +-
 pandas/core/groupby/ops.py                    | 11 +----------
 pandas/tests/apply/test_series_apply.py       | 19 +++++++++++++++----
 .../tests/groupby/aggregate/test_aggregate.py | 13 +++++--------
 pandas/tests/groupby/aggregate/test_other.py  |  3 +--
 pandas/tests/groupby/test_groupby.py          | 10 ++++------
 pandas/tests/test_multilevel.py               |  2 +-
 10 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index c159abe55b38c..5112d8fd41c3d 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -674,7 +674,9 @@ def agg(self):
             result = result.T if result is not None else result
 
         if result is None:
-            result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs)
+            results, res_index = self.apply_series_generator()
+            result = self.obj._constructor_sliced(results)
+            result.index = res_index
 
         return result
 
@@ -1018,10 +1020,7 @@ def agg(self):
             # we cannot FIRST try the vectorized evaluation, because
             # then .agg and .apply would have different semantics if the
             # operation is actually defined on the Series, e.g. str
-            try:
-                result = self.obj.apply(f, *args, **kwargs)
-            except (ValueError, AttributeError, TypeError):
-                result = f(self.obj, *args, **kwargs)
+            result = f(self.obj, *args, **kwargs)
 
         return result
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 140f456926763..8cd7afbca256b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -10285,9 +10285,8 @@ def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):
         grouped = self.groupby(level=level, axis=axis, sort=False)
         if hasattr(grouped, name) and skipna:
             return getattr(grouped, name)(**kwargs)
-        axis = self._get_axis_number(axis)
         method = getattr(type(self), name)
-        applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwargs)
+        applyf = lambda x: method(x, skipna=skipna, **kwargs)
         return grouped.aggregate(applyf)
 
     @final
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 2de5e81360a93..d030396f7e2f9 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1010,7 +1010,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
         if result is None:
 
             # grouper specific aggregations
-            if self.grouper.nkeys > 1:
+            if not self._obj_with_exclusions.empty or self.grouper.nkeys > 1:
                 return self._python_agg_general(func, *args, **kwargs)
             elif args or kwargs:
                 result = self._aggregate_frame(func, *args, **kwargs)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index e11c296783476..a57bb86e2ff6d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1092,7 +1092,7 @@ def _agg_general(
 
             # apply a non-cython aggregation
             if result is None:
-                result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
+                result = self.aggregate(lambda x: npfunc(x))
             return result.__finalize__(self.obj, method="groupby")
 
     def _cython_agg_general(
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index a61e8872a7ce7..c691b55062101 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -760,7 +760,6 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
 
         counts = np.zeros(ngroups, dtype=int)
         result = np.empty(ngroups, dtype="O")
-        initialized = False
 
         splitter = get_splitter(obj, group_index, ngroups, axis=0)
 
@@ -768,16 +767,8 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
 
             # Each step of this loop corresponds to
             #  libreduction._BaseGrouper._apply_to_group
-            res = func(group)
-            res = libreduction.extract_result(res)
-
-            if not initialized:
-                # We only do this validation on the first iteration
-                libreduction.check_result_array(res, 0)
-                initialized = True
-
             counts[label] = group.shape[0]
-            result[label] = res
+            result[label] = func(group)
 
         result = lib.maybe_convert_objects(result, try_float=False)
         result = maybe_cast_result(result, obj, numeric_only=True)
diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
index dcb5de29da320..e0323306d996b 100644
--- a/pandas/tests/apply/test_series_apply.py
+++ b/pandas/tests/apply/test_series_apply.py
@@ -298,17 +298,27 @@ def test_demo():
     tm.assert_series_equal(result, expected)
 
 
-def test_agg_apply_evaluate_lambdas_the_same(string_series):
+def test_agg_apply_evaluate_lambdas(string_series):
     # test that we are evaluating row-by-row first
     # before vectorized evaluation
+    expected = string_series.astype(str)
+
     result = string_series.apply(lambda x: str(x))
-    expected = string_series.agg(lambda x: str(x))
     tm.assert_series_equal(result, expected)
 
     result = string_series.apply(str)
-    expected = string_series.agg(str)
     tm.assert_series_equal(result, expected)
 
+    # GH 35725
+    # Agg always aggs - applies the function to the entire Series
+    expected = str(string_series)
+
+    result = string_series.agg(lambda x: str(x))
+    assert result == expected
+
+    result = string_series.agg(str)
+    assert result == expected
+
 
 def test_with_nested_series(datetime_series):
     # GH 2316
@@ -318,7 +328,8 @@ def test_with_nested_series(datetime_series):
     tm.assert_frame_equal(result, expected)
 
     result = datetime_series.agg(lambda x: Series([x, x ** 2], index=["x", "x^2"]))
-    tm.assert_frame_equal(result, expected)
+    expected = Series([datetime_series, datetime_series ** 2], index=["x", "x^2"])
+    tm.assert_series_equal(result, expected)
 
 
 def test_replicate_describe(string_series):
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index b7df1c8382daa..dfcfd4ff813d0 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -47,12 +47,10 @@ def test_agg_regression1(tsframe):
 
 def test_agg_must_agg(df):
     grouped = df.groupby("A")["C"]
-
-    msg = "Must produce aggregated value"
-    with pytest.raises(Exception, match=msg):
-        grouped.agg(lambda x: x.describe())
-    with pytest.raises(Exception, match=msg):
-        grouped.agg(lambda x: x.index[:2])
+    result = grouped.agg(lambda x: x.describe())
+    expected = Series({name: group.describe() for name, group in grouped}, name="C")
+    expected.index.name = "A"
+    tm.assert_series_equal(result, expected)
 
 
 def test_agg_ser_multi_key(df):
@@ -127,9 +125,8 @@ def test_groupby_aggregation_multi_level_column():
         data=lst,
         columns=MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]),
     )
-
     result = df.groupby(level=1, axis=1).sum()
-    expected = DataFrame({0: [2.0, 1, 1, 1], 1: [1, 0, 1, 1]})
+    expected = DataFrame({0: [2, 1, 1, 1], 1: [1, 0, 1, 1]})
 
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 681192881c301..d38ae10be54c7 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -605,9 +605,8 @@ def test_agg_lambda_with_timezone():
     )
     result = df.groupby("tag").agg({"date": lambda e: e.head(1)})
     expected = DataFrame(
-        [pd.Timestamp("2018-01-01", tz="UTC")],
+        {"date": [df["date"].iloc[:1]]},
         index=Index([1], name="tag"),
-        columns=["date"],
     )
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index afde1daca74c1..179170126e455 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -75,11 +75,9 @@ def test_basic(dtype):
     agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
     assert agged[1] == 21
 
-    # corner cases
-    msg = "Must produce aggregated value"
-    # exception raised is type Exception
-    with pytest.raises(Exception, match=msg):
-        grouped.aggregate(lambda x: x * 2)
+    result = grouped.aggregate(lambda x: x * 2)
+    expected = Series({name: group * 2 for name, group in grouped})
+    tm.assert_series_equal(result, expected)
 
 
 def test_groupby_nonobject_dtype(mframe, df_mixed_floats):
@@ -1026,7 +1024,7 @@ def test_groupby_with_hier_columns():
     result = df.groupby(level=0).apply(lambda x: x.mean())
     tm.assert_index_equal(result.columns, columns)
 
-    result = df.groupby(level=0, axis=1).agg(lambda x: x.mean(1))
+    result = df.groupby(level=0, axis=1).agg(lambda x: x.mean())
     tm.assert_index_equal(result.columns, Index(["A", "B"]))
     tm.assert_index_equal(result.index, df.index)
 
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 8e6a636a8f602..b6d006439cfb6 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -214,7 +214,7 @@ def test_frame_group_ops(
 
         def aggf(x):
             pieces.append(x)
-            return getattr(x, op)(skipna=skipna, axis=axis)
+            return getattr(x, op)(skipna=skipna)
 
         leftside = grouped.agg(aggf)
         rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna)