From 432ffba3f6ccaf9466ee3110039944fddbe636f2 Mon Sep 17 00:00:00 2001
From: th3nn3ss <chuksmcdennis@yahoo.com>
Date: Fri, 16 Sep 2022 02:49:57 +0100
Subject: [PATCH 01/10] fix to maintain consistency for apply UDF on empty
 inputs

---
 pandas/core/groupby/ops.py         | 14 +++++++++-----
 pandas/tests/groupby/test_apply.py |  8 ++++++++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index ba808e1f2e07f..23f97fcdfd3df 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -844,7 +844,6 @@ def apply(
             if not mutated and not _is_indexed_like(res, group_axes, axis):
                 mutated = True
             result_values.append(res)
-
         # getattr pattern for __name__ is needed for functools.partial objects
         if len(group_keys) == 0 and getattr(f, "__name__", None) not in [
             "idxmin",
@@ -852,10 +851,15 @@ def apply(
             "nanargmin",
             "nanargmax",
         ]:
-            # If group_keys is empty, then no function calls have been made,
-            #  so we will not have raised even if this is an invalid dtype.
-            #  So do one dummy call here to raise appropriate TypeError.
-            f(data.iloc[:0])
+            try:
+                #  If group_keys is empty, then no function calls have been made,
+                #  so we will not have raised even if this is an invalid dtype.
+                #  So do one dummy call here to raise appropriate TypeError.
+                f(data.iloc[:0])
+            except IndexError:
+                # If IndexError is raised,
+                # maintain consistency for all operations on empty groups
+                pass
 
         return result_values, mutated
 
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index b064c12f89c21..0909b1b408708 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1331,3 +1331,11 @@ def test_result_name_when_one_group(name):
     expected = Series([1, 2], name=name)
 
     tm.assert_series_equal(result, expected)
+
+
+def test_empty_df():
+    empty_df = pd.DataFrame({"a": [], "b": []})
+    result = empty_df.groupby("a").b.apply(lambda x: x.values[-1])
+    expected = empty_df.groupby("a").b.take([0])
+
+    tm.assert_series_equal(result, expected)

From 519fa101f26bfa89038ebb1ce2d6a23c5f2f76ad Mon Sep 17 00:00:00 2001
From: th3nn3ss <chuksmcdennis@yahoo.com>
Date: Fri, 16 Sep 2022 03:12:52 +0100
Subject: [PATCH 02/10] use DataFrame instead of pd.DataFrame for test

---
 pandas/tests/groupby/test_apply.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 0909b1b408708..79ff6d5f96d42 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1334,8 +1334,10 @@ def test_result_name_when_one_group(name):
 
 
 def test_empty_df():
-    empty_df = pd.DataFrame({"a": [], "b": []})
+    empty_df = DataFrame({"a": [], "b": []})
+
+    # Both operations should return an empty series instead of IndexError for apply UDF
     result = empty_df.groupby("a").b.apply(lambda x: x.values[-1])
-    expected = empty_df.groupby("a").b.take([0])
+    expected = empty_df.groupby("a").b.agg("sum")
 
     tm.assert_series_equal(result, expected)

From f734276cb08dd5d210497b8e5368c057424748c0 Mon Sep 17 00:00:00 2001
From: th3nn3ss <chuksmcdennis@yahoo.com>
Date: Mon, 19 Sep 2022 23:12:11 +0100
Subject: [PATCH 03/10] change apply function to only handle TypeError for when
 df is empty and func_name is in mad, skew, sum or prod

---
 pandas/core/groupby/ops.py         | 23 +++++++++--------------
 pandas/tests/groupby/test_apply.py | 22 ++++++++++++++++++++--
 2 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 23f97fcdfd3df..1344f0ce45912 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -845,21 +845,16 @@ def apply(
                 mutated = True
             result_values.append(res)
         # getattr pattern for __name__ is needed for functools.partial objects
-        if len(group_keys) == 0 and getattr(f, "__name__", None) not in [
-            "idxmin",
-            "idxmax",
-            "nanargmin",
-            "nanargmax",
+        if len(group_keys) == 0 and getattr(f, "__name__", None) in [
+            "mad",
+            "skew",
+            "sum",
+            "prod",
         ]:
-            try:
-                #  If group_keys is empty, then no function calls have been made,
-                #  so we will not have raised even if this is an invalid dtype.
-                #  So do one dummy call here to raise appropriate TypeError.
-                f(data.iloc[:0])
-            except IndexError:
-                # If IndexError is raised,
-                # maintain consistency for all operations on empty groups
-                pass
+            #  If group_keys is empty, then no function calls have been made,
+            #  so we will not have raised even if this is an invalid dtype.
+            #  So do one dummy call here to raise appropriate TypeError.
+            f(data.iloc[:0])
 
         return result_values, mutated
 
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 79ff6d5f96d42..5f8c3513b70ab 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1337,7 +1337,25 @@ def test_empty_df():
     empty_df = DataFrame({"a": [], "b": []})
 
     # Both operations should return an empty series instead of IndexError for apply UDF
-    result = empty_df.groupby("a").b.apply(lambda x: x.values[-1])
-    expected = empty_df.groupby("a").b.agg("sum")
+    result = empty_df.groupby("a", group_keys=True).b.apply(lambda x: x.values[-1])
+    expected = empty_df.groupby("a", group_keys=True).b.agg("sum")
 
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "error_type",
+    [
+        TypeError,
+        ValueError,
+        IndexError,
+    ],
+)
+def test_udf_raise_error_on_empty_df(error_type):
+    empty_df = DataFrame({"a": [], "b": []})
+
+    def f(group):
+        raise error_type
+
+    # Exception should not be raised.
+    empty_df.groupby("a", group_keys=True).b.apply(f)

From 1ad10e123aed6e7f72c34dbe4d5238bfd8a34738 Mon Sep 17 00:00:00 2001
From: th3nn3ss <chuksmcdennis@yahoo.com>
Date: Thu, 22 Sep 2022 18:34:22 +0100
Subject: [PATCH 04/10] improve test for udfs on empty inputs

---
 pandas/tests/groupby/test_apply.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 5f8c3513b70ab..3a8928f85cc01 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -3,6 +3,7 @@
     datetime,
 )
 from io import StringIO
+from tkinter import S
 
 import numpy as np
 import pytest
@@ -1334,13 +1335,19 @@ def test_result_name_when_one_group(name):
 
 
 def test_empty_df():
+    # GH 47985
     empty_df = DataFrame({"a": [], "b": []})
 
     # Both operations should return an empty series instead of IndexError for apply UDF
-    result = empty_df.groupby("a", group_keys=True).b.apply(lambda x: x.values[-1])
-    expected = empty_df.groupby("a", group_keys=True).b.agg("sum")
+    result1 = empty_df.groupby("a", group_keys=True).b.apply(lambda x: x.values[-1])
+    result2 = empty_df.groupby("a", group_keys=True).b.agg("sum")
 
-    tm.assert_series_equal(result, expected)
+    expected = Series(
+        [], name="b", dtype="float64", index=Index([], dtype="float64", name="a")
+    )
+
+    tm.assert_series_equal(result1, expected)
+    tm.assert_series_equal(result2, expected)
 
 
 @pytest.mark.parametrize(
@@ -1352,10 +1359,16 @@ def test_empty_df():
     ],
 )
 def test_udf_raise_error_on_empty_df(error_type):
+    # GH 47985
     empty_df = DataFrame({"a": [], "b": []})
 
     def f(group):
         raise error_type
 
     # Exception should not be raised.
-    empty_df.groupby("a", group_keys=True).b.apply(f)
+    result = empty_df.groupby("a", group_keys=True).b.apply(f)
+    expected = Series(
+        [], name="b", dtype="float64", index=Index([], dtype="float64", name="a")
+    )
+
+    tm.assert_series_equal(result, expected)

From 29646583b0e2c67737ab88625165c5345b81f74f Mon Sep 17 00:00:00 2001
From: th3nn3ss <chuksmcdennis@yahoo.com>
Date: Thu, 22 Sep 2022 18:45:06 +0100
Subject: [PATCH 05/10] fix typo

---
 pandas/tests/groupby/test_apply.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 3a8928f85cc01..4cc2014192533 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -3,7 +3,6 @@
     datetime,
 )
 from io import StringIO
-from tkinter import S
 
 import numpy as np
 import pytest

From 98b303024c3b021fc7a6ec770fe1e01ebb0bc4d2 Mon Sep 17 00:00:00 2001
From: th3nn3ss <chuksmcdennis@yahoo.com>
Date: Fri, 23 Sep 2022 19:02:53 +0100
Subject: [PATCH 06/10] remove unrelated test

---
 pandas/tests/groupby/test_apply.py | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 4cc2014192533..a1e827f4e115c 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1347,27 +1347,3 @@ def test_empty_df():
 
     tm.assert_series_equal(result1, expected)
     tm.assert_series_equal(result2, expected)
-
-
-@pytest.mark.parametrize(
-    "error_type",
-    [
-        TypeError,
-        ValueError,
-        IndexError,
-    ],
-)
-def test_udf_raise_error_on_empty_df(error_type):
-    # GH 47985
-    empty_df = DataFrame({"a": [], "b": []})
-
-    def f(group):
-        raise error_type
-
-    # Exception should not be raised.
-    result = empty_df.groupby("a", group_keys=True).b.apply(f)
-    expected = Series(
-        [], name="b", dtype="float64", index=Index([], dtype="float64", name="a")
-    )
-
-    tm.assert_series_equal(result, expected)

From 60941f4522db2480d2a5e466c73438edaa47afad Mon Sep 17 00:00:00 2001
From: th3nn3ss <chuksmcdennis@yahoo.com>
Date: Sun, 25 Sep 2022 01:13:39 +0100
Subject: [PATCH 07/10] change test for empty df

---
 pandas/tests/groupby/test_apply.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index a1e827f4e115c..7ec978460803c 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1333,17 +1333,19 @@ def test_result_name_when_one_group(name):
     tm.assert_series_equal(result, expected)
 
 
-def test_empty_df():
+@pytest.mark.parametrize(
+    "apply_func", [lambda x: x.values[-1], lambda gb: gb["b"].iloc[0]]
+)
+@pytest.mark.parametrize("op", ["mad", "skew", "sum", "prod"])
+def test_empty_df(apply_func, op):
     # GH 47985
     empty_df = DataFrame({"a": [], "b": []})
+    gb = empty_df.groupby("a", group_keys=True)
+    group = getattr(gb, "b")
 
-    # Both operations should return an empty series instead of IndexError for apply UDF
-    result1 = empty_df.groupby("a", group_keys=True).b.apply(lambda x: x.values[-1])
-    result2 = empty_df.groupby("a", group_keys=True).b.agg("sum")
-
+    result = group.apply(apply_func) if apply_func else group.agg(op)
     expected = Series(
         [], name="b", dtype="float64", index=Index([], dtype="float64", name="a")
     )
 
-    tm.assert_series_equal(result1, expected)
-    tm.assert_series_equal(result2, expected)
+    tm.assert_series_equal(result, expected)

From ee08f0fb1e36c4fb0bfa74b92fa2371558ad9192 Mon Sep 17 00:00:00 2001
From: th3nn3ss <chuksmcdennis@yahoo.com>
Date: Mon, 26 Sep 2022 07:49:24 +0100
Subject: [PATCH 08/10] fix test for udf on empty df

---
 pandas/tests/groupby/test_apply.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 7ec978460803c..47ea6a99ffea9 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1334,16 +1334,23 @@ def test_result_name_when_one_group(name):
 
 
 @pytest.mark.parametrize(
-    "apply_func", [lambda x: x.values[-1], lambda gb: gb["b"].iloc[0]]
+    "method, op",
+    [
+        ("apply", lambda gb: gb.values[-1]),
+        ("apply", lambda gb: gb["b"].iloc[0]),
+        ("agg", "mad"),
+        ("agg", "skew"),
+        ("agg", "prod"),
+        ("agg", "sum"),
+    ],
 )
-@pytest.mark.parametrize("op", ["mad", "skew", "sum", "prod"])
-def test_empty_df(apply_func, op):
+def test_empty_df(method, op):
     # GH 47985
     empty_df = DataFrame({"a": [], "b": []})
     gb = empty_df.groupby("a", group_keys=True)
     group = getattr(gb, "b")
 
-    result = group.apply(apply_func) if apply_func else group.agg(op)
+    result = getattr(group, method)(op)
     expected = Series(
         [], name="b", dtype="float64", index=Index([], dtype="float64", name="a")
     )

From 348b14affce7e80024fcc00fe9e1c8a3952d5e97 Mon Sep 17 00:00:00 2001
From: th3nn3ss <chuksmcdennis@yahoo.com>
Date: Tue, 27 Sep 2022 00:26:28 +0100
Subject: [PATCH 09/10] add to whatsnew documentation

---
 doc/source/whatsnew/v1.5.1.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst
index da0bd746e3da5..4eec6f2927b3a 100644
--- a/doc/source/whatsnew/v1.5.1.rst
+++ b/doc/source/whatsnew/v1.5.1.rst
@@ -86,6 +86,9 @@ Bug fixes
 - Bug in :meth:`DataFrame.pivot_table` raising unexpected ``FutureWarning`` when setting datetime column as index (:issue:`48683`)
 -
 
+Groupby
+^^^^^^^
+- Bug in :meth:`DataFrameGroupBy.apply` invokes user defined function when called on an empty dataframe (:issue:`47985`)
 .. ---------------------------------------------------------------------------
 
 .. _whatsnew_151.other:

From 6972fd4eefd1b1ec4f882dca68ce788197ebf16c Mon Sep 17 00:00:00 2001
From: th3nn3ss <chuksmcdennis@yahoo.com>
Date: Tue, 27 Sep 2022 00:39:18 +0100
Subject: [PATCH 10/10] fix whatsnew v1.5.1 documentation

---
 doc/source/whatsnew/v1.5.1.rst | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst
index 3dcad77c72969..3157723167020 100644
--- a/doc/source/whatsnew/v1.5.1.rst
+++ b/doc/source/whatsnew/v1.5.1.rst
@@ -77,6 +77,7 @@ Fixed regressions
 - Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`)
 - Fixed regression causing an ``AttributeError`` during warning emitted if the provided table name in :meth:`DataFrame.to_sql` and the table name actually used in the database do not match (:issue:`48733`)
 - Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`)
+- Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`)
 
 .. ---------------------------------------------------------------------------
 
@@ -90,9 +91,6 @@ Bug fixes
 - Bug in :meth:`DataFrame.pivot_table` raising unexpected ``FutureWarning`` when setting datetime column as index (:issue:`48683`)
 -
 
-Groupby
-^^^^^^^
-- Bug in :meth:`DataFrameGroupBy.apply` invokes user defined function when called on an empty dataframe (:issue:`47985`)
 .. ---------------------------------------------------------------------------
 
 .. _whatsnew_151.other: