From 4c5eddd63e94bacddb96bf61f81a6a8fcd9c33f0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 20 Aug 2020 21:19:10 -0700
Subject: [PATCH 1/9] REF: remove unnecesary try/except

---
 pandas/core/groupby/generic.py | 69 ++++++++++++++++------------------
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 166631e69f523..51532a75d2d4a 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -31,7 +31,7 @@
 import numpy as np
 
 from pandas._libs import lib
-from pandas._typing import FrameOrSeries, FrameOrSeriesUnion
+from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion
 from pandas.util._decorators import Appender, Substitution, doc
 
 from pandas.core.dtypes.cast import (
@@ -60,6 +60,7 @@
     validate_func_kwargs,
 )
 import pandas.core.algorithms as algorithms
+from pandas.core.arrays import ExtensionArray
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
 from pandas.core.construction import create_series_with_explicit_dtype
@@ -1034,32 +1035,31 @@ def _cython_agg_blocks(
 
         no_result = object()
 
-        def cast_result_block(result, block: "Block", how: str) -> "Block":
-            # see if we can cast the block to the desired dtype
+        def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
+            # see if we can cast the values to the desired dtype
             # this may not be the original dtype
             assert not isinstance(result, DataFrame)
             assert result is not no_result
 
-            dtype = maybe_cast_result_dtype(block.dtype, how)
+            dtype = maybe_cast_result_dtype(values.dtype, how)
             result = maybe_downcast_numeric(result, dtype)
 
-            if block.is_extension and isinstance(result, np.ndarray):
-                # e.g. block.values was an IntegerArray
-                # (1, N) case can occur if block.values was Categorical
+            if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray):
+                # e.g. values was an IntegerArray
+                # (1, N) case can occur if values was Categorical
                 #  and result is ndarray[object]
                 # TODO(EA2D): special casing not needed with 2D EAs
                 assert result.ndim == 1 or result.shape[0] == 1
                 try:
                     # Cast back if feasible
-                    result = type(block.values)._from_sequence(
-                        result.ravel(), dtype=block.values.dtype
+                    result = type(values)._from_sequence(
+                        result.ravel(), dtype=values.dtype
                     )
                 except (ValueError, TypeError):
                     # reshape to be valid for non-Extension Block
                     result = result.reshape(1, -1)
 
-            agg_block: "Block" = block.make_block(result)
-            return agg_block
+            return result
 
         def blk_func(block: "Block") -> List["Block"]:
             new_blocks: List["Block"] = []
@@ -1093,33 +1093,30 @@ def blk_func(block: "Block") -> List["Block"]:
                 # Categoricals. This will done by later self._reindex_output()
                 # Doing it here creates an error. See GH#34951
                 sgb = get_groupby(obj, self.grouper, observed=True)
-                try:
-                    result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
-                except TypeError:
-                    # we may have an exception in trying to aggregate
-                    # continue and exclude the block
-                    raise
+                result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
+
+                result = cast(DataFrame, result)
+                # unwrap DataFrame to get array
+                if len(result._mgr.blocks) != 1:
+                    # We've split an object block! Everything we've assumed
+                    # about a single block input returning a single block output
+                    # is a lie. To keep the code-path for the typical non-split case
+                    # clean, we choose to clean up this mess later on.
+                    assert len(locs) == result.shape[1]
+                    for i, loc in enumerate(locs):
+                        agg_block = result.iloc[:, [i]]._mgr.blocks[0]
+                        agg_block.mgr_locs = [loc]
+                        new_blocks.append(agg_block)
                 else:
-                    result = cast(DataFrame, result)
-                    # unwrap DataFrame to get array
-                    if len(result._mgr.blocks) != 1:
-                        # We've split an object block! Everything we've assumed
-                        # about a single block input returning a single block output
-                        # is a lie. To keep the code-path for the typical non-split case
-                        # clean, we choose to clean up this mess later on.
-                        assert len(locs) == result.shape[1]
-                        for i, loc in enumerate(locs):
-                            agg_block = result.iloc[:, [i]]._mgr.blocks[0]
-                            agg_block.mgr_locs = [loc]
-                            new_blocks.append(agg_block)
-                    else:
-                        result = result._mgr.blocks[0].values
-                        if isinstance(result, np.ndarray) and result.ndim == 1:
-                            result = result.reshape(1, -1)
-                        agg_block = cast_result_block(result, block, how)
-                        new_blocks = [agg_block]
+                    result = result._mgr.blocks[0].values
+                    if isinstance(result, np.ndarray) and result.ndim == 1:
+                        result = result.reshape(1, -1)
+                    res_values = cast_agg_result(result, block.values, how)
+                    agg_block = block.make_block(res_values)
+                    new_blocks = [agg_block]
             else:
-                agg_block = cast_result_block(result, block, how)
+                res_values = cast_agg_result(result, block.values, how)
+                agg_block = block.make_block(res_values)
                 new_blocks = [agg_block]
             return new_blocks
 

From 42649fbb855a895ee5818d7dc80bdbd0ce0e9f5a Mon Sep 17 00:00:00 2001
From: Karthik Mathur <22126205+mathurk1@users.noreply.github.com>
Date: Fri, 21 Aug 2020 17:34:51 -0500
Subject: [PATCH 2/9] TST: add test for agg on ordered categorical cols
 (#35630)

---
 .../tests/groupby/aggregate/test_aggregate.py | 79 +++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index ce9d4b892d775..8fe450fe6abfc 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1063,6 +1063,85 @@ def test_groupby_get_by_index():
     pd.testing.assert_frame_equal(res, expected)
 
 
+@pytest.mark.parametrize(
+    "grp_col_dict, exp_data",
+    [
+        ({"nr": "min", "cat_ord": "min"}, {"nr": [1, 5], "cat_ord": ["a", "c"]}),
+        ({"cat_ord": "min"}, {"cat_ord": ["a", "c"]}),
+        ({"nr": "min"}, {"nr": [1, 5]}),
+    ],
+)
+def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data):
+    # test single aggregations on ordered categorical cols GHGH27800
+
+    # create the result dataframe
+    input_df = pd.DataFrame(
+        {
+            "nr": [1, 2, 3, 4, 5, 6, 7, 8],
+            "cat_ord": list("aabbccdd"),
+            "cat": list("aaaabbbb"),
+        }
+    )
+
+    input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
+    input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
+    result_df = input_df.groupby("cat").agg(grp_col_dict)
+
+    # create expected dataframe
+    cat_index = pd.CategoricalIndex(
+        ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category"
+    )
+
+    expected_df = pd.DataFrame(data=exp_data, index=cat_index)
+
+    tm.assert_frame_equal(result_df, expected_df)
+
+
+@pytest.mark.parametrize(
+    "grp_col_dict, exp_data",
+    [
+        ({"nr": ["min", "max"], "cat_ord": "min"}, [(1, 4, "a"), (5, 8, "c")]),
+        ({"nr": "min", "cat_ord": ["min", "max"]}, [(1, "a", "b"), (5, "c", "d")]),
+        ({"cat_ord": ["min", "max"]}, [("a", "b"), ("c", "d")]),
+    ],
+)
+def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
+    # test combined aggregations on ordered categorical cols GH27800
+
+    # create the result dataframe
+    input_df = pd.DataFrame(
+        {
+            "nr": [1, 2, 3, 4, 5, 6, 7, 8],
+            "cat_ord": list("aabbccdd"),
+            "cat": list("aaaabbbb"),
+        }
+    )
+
+    input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
+    input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
+    result_df = input_df.groupby("cat").agg(grp_col_dict)
+
+    # create expected dataframe
+    cat_index = pd.CategoricalIndex(
+        ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category"
+    )
+
+    # unpack the grp_col_dict to create the multi-index tuple
+    # this tuple will be used to create the expected dataframe index
+    multi_index_list = []
+    for k, v in grp_col_dict.items():
+        if isinstance(v, list):
+            for value in v:
+                multi_index_list.append([k, value])
+        else:
+            multi_index_list.append([k, v])
+    multi_index = pd.MultiIndex.from_tuples(tuple(multi_index_list))
+
+    expected_df = pd.DataFrame(data=exp_data, columns=multi_index, index=cat_index)
+
+    tm.assert_frame_equal(result_df, expected_df)
+
+
 def test_nonagg_agg():
     # GH 35490 - Single/Multiple agg of non-agg function give same results
     # TODO: agg should raise for functions that don't aggregate

From 47121ddc1c655f428c6c3fcea8fbf02eba85600a Mon Sep 17 00:00:00 2001
From: tkmz-n <60312218+tkmz-n@users.noreply.github.com>
Date: Sat, 22 Aug 2020 07:42:50 +0900
Subject: [PATCH 3/9] TST: resample does not yield empty groups (#10603)
 (#35799)

---
 pandas/tests/resample/test_timedelta.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py
index 0fbb60c176b30..3fa85e62d028c 100644
--- a/pandas/tests/resample/test_timedelta.py
+++ b/pandas/tests/resample/test_timedelta.py
@@ -150,3 +150,18 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
     tm.assert_index_equal(result.index, expected_index)
     assert result.index.freq == expected_index.freq
     assert not np.isnan(result[-1])
+
+
+def test_resample_with_timedelta_yields_no_empty_groups():
+    # GH 10603
+    df = pd.DataFrame(
+        np.random.normal(size=(10000, 4)),
+        index=pd.timedelta_range(start="0s", periods=10000, freq="3906250n"),
+    )
+    result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
+
+    expected = pd.DataFrame(
+        [[768.0] * 4] * 12 + [[528.0] * 4],
+        index=pd.timedelta_range(start="1s", periods=13, freq="3s"),
+    )
+    tm.assert_frame_equal(result, expected)

From 1decb3e0ee1923a29b8eded7507bcb783b3870d0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 21 Aug 2020 18:48:02 -0700
Subject: [PATCH 4/9] revert accidental rebase

---
 pandas/core/groupby/generic.py | 61 ++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 4b1f6cfe0a662..60e23b14eaf09 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -30,7 +30,7 @@
 import numpy as np
 
 from pandas._libs import lib
-from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion
+from pandas._typing import FrameOrSeries, FrameOrSeriesUnion
 from pandas.util._decorators import Appender, Substitution, doc
 
 from pandas.core.dtypes.cast import (
@@ -59,7 +59,6 @@
     validate_func_kwargs,
 )
 import pandas.core.algorithms as algorithms
-from pandas.core.arrays import ExtensionArray
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
 from pandas.core.construction import create_series_with_explicit_dtype
@@ -1034,31 +1033,32 @@ def _cython_agg_blocks(
 
         no_result = object()
 
-        def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
-            # see if we can cast the values to the desired dtype
+        def cast_result_block(result, block: "Block", how: str) -> "Block":
+            # see if we can cast the block to the desired dtype
             # this may not be the original dtype
             assert not isinstance(result, DataFrame)
             assert result is not no_result
 
-            dtype = maybe_cast_result_dtype(values.dtype, how)
+            dtype = maybe_cast_result_dtype(block.dtype, how)
             result = maybe_downcast_numeric(result, dtype)
 
-            if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray):
-                # e.g. values was an IntegerArray
-                # (1, N) case can occur if values was Categorical
+            if block.is_extension and isinstance(result, np.ndarray):
+                # e.g. block.values was an IntegerArray
+                # (1, N) case can occur if block.values was Categorical
                 #  and result is ndarray[object]
                 # TODO(EA2D): special casing not needed with 2D EAs
                 assert result.ndim == 1 or result.shape[0] == 1
                 try:
                     # Cast back if feasible
-                    result = type(values)._from_sequence(
-                        result.ravel(), dtype=values.dtype
+                    result = type(block.values)._from_sequence(
+                        result.ravel(), dtype=block.values.dtype
                     )
                 except (ValueError, TypeError):
                     # reshape to be valid for non-Extension Block
                     result = result.reshape(1, -1)
 
-            return result
+            agg_block: "Block" = block.make_block(result)
+            return agg_block
 
         def blk_func(block: "Block") -> List["Block"]:
             new_blocks: List["Block"] = []
@@ -1092,25 +1092,28 @@ def blk_func(block: "Block") -> List["Block"]:
                 # Categoricals. This will done by later self._reindex_output()
                 # Doing it here creates an error. See GH#34951
                 sgb = get_groupby(obj, self.grouper, observed=True)
-                result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
-
-                assert isinstance(result, (Series, DataFrame))  # for mypy
-                # In the case of object dtype block, it may have been split
-                #  in the operation.  We un-split here.
-                result = result._consolidate()
-                assert isinstance(result, (Series, DataFrame))  # for mypy
-                assert len(result._mgr.blocks) == 1
-
-                # unwrap DataFrame to get array
-                result = result._mgr.blocks[0].values
-                if isinstance(result, np.ndarray) and result.ndim == 1:
-                    result = result.reshape(1, -1)
-                res_values = cast_agg_result(result, block.values, how)
-                agg_block = block.make_block(res_values)
-                new_blocks = [agg_block]
+                try:
+                    result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
+                except TypeError:
+                    # we may have an exception in trying to aggregate
+                    # continue and exclude the block
+                    raise
+                else:
+                    assert isinstance(result, (Series, DataFrame))  # for mypy
+                    # In the case of object dtype block, it may have been split
+                    #  in the operation.  We un-split here.
+                    result = result._consolidate()
+                    assert isinstance(result, (Series, DataFrame))  # for mypy
+                    assert len(result._mgr.blocks) == 1
+
+                    # unwrap DataFrame to get array
+                    result = result._mgr.blocks[0].values
+                    if isinstance(result, np.ndarray) and result.ndim == 1:
+                        result = result.reshape(1, -1)
+                    agg_block = cast_result_block(result, block, how)
+                    new_blocks = [agg_block]
             else:
-                res_values = cast_agg_result(result, block.values, how)
-                agg_block = block.make_block(res_values)
+                agg_block = cast_result_block(result, block, how)
                 new_blocks = [agg_block]
             return new_blocks
 

From 51205a51dd75c791848c353e9af3d8b46aa4afd6 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 26 Aug 2020 18:51:45 -0700
Subject: [PATCH 5/9] REF/BUG: don't go through cython for EA indexes

---
 pandas/core/groupby/generic.py | 50 +++++++++++++++++++++++++++++-----
 pandas/core/groupby/ops.py     |  5 ++++
 2 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 2afa56b50c3c7..36db78a77c511 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -74,7 +74,14 @@
     get_groupby,
 )
 from pandas.core.groupby.numba_ import generate_numba_func, split_for_numba
-from pandas.core.indexes.api import Index, MultiIndex, all_indexes_same
+from pandas.core.indexes.api import (
+    DatetimeIndex,
+    Index,
+    MultiIndex,
+    PeriodIndex,
+    TimedeltaIndex,
+    all_indexes_same,
+)
 import pandas.core.indexes.base as ibase
 from pandas.core.internals import BlockManager, make_block
 from pandas.core.series import Series
@@ -262,17 +269,46 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             if self.grouper.nkeys > 1:
                 return self._python_agg_general(func, *args, **kwargs)
 
-            try:
-                return self._python_agg_general(func, *args, **kwargs)
-            except (ValueError, KeyError):
-                # TODO: KeyError is raised in _python_agg_general,
-                #  see see test_groupby.test_basic
+            if isinstance(
+                self._selected_obj.index, (DatetimeIndex, TimedeltaIndex, PeriodIndex)
+            ):
+                # using _python_agg_general would end up incorrectly patching
+                #  _index_data in reduction.pyx
                 result = self._aggregate_named(func, *args, **kwargs)
+            else:
+                try:
+                    return self._python_agg_general(func, *args, **kwargs)
+                except (ValueError, KeyError):
+                    # TODO: KeyError is raised in _python_agg_general,
+                    #  see see test_groupby.test_basic
+                    result = self._aggregate_named(func, *args, **kwargs)
 
             index = Index(sorted(result), name=self.grouper.names[0])
+            if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
+                # TODO: do we _always_ want to do this?
+                #  shouldnt this be done later in eg _wrap_aggregated_output?
+                index = index._with_freq("infer")
+
+                result_index = self.grouper.result_index
+
+                if (
+                    result_index.dtype == index.dtype
+                    and result_index.freq is not None
+                    and index.freq is None
+                ):
+                    # TODO: will dtype equality always hold?
+                    if len(index) == 1:
+                        index.freq = result_index.freq
+
+                    elif len(index) == 2:
+                        if index[0] + result_index.freq == index[1]:
+                            # infer_freq doesn't handle length-2 indexes
+                            index.freq = result_index.freq
+
             ret = create_series_with_explicit_dtype(
                 result, index=index, dtype_if_empty=object
             )
+            ret.name = self._selected_obj.name  # test_metadata_propagation_indiv
 
         if not self.as_index:  # pragma: no cover
             print("Warning, ignoring as_index=True")
@@ -478,7 +514,7 @@ def _get_index() -> Index:
     def _aggregate_named(self, func, *args, **kwargs):
         result = {}
 
-        for name, group in self:
+        for name, group in self:  # TODO: could we have duplicate names?
             group.name = name
             output = func(group, *args, **kwargs)
             if isinstance(output, (Series, Index, np.ndarray)):
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index c6171a55359fe..66a9f1353d3c5 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -672,6 +672,11 @@ def _aggregate_series_pure_python(
                         # e.g. test_agg_lambda_with_timezone lambda e: e.head(1)
                         # FIXME: are we potentially losing important res.index info?
                         res = res.item()
+                    elif group.dtype == object:
+                        # TODO: is this at all right?
+                        # e.g. test_agg_over_numpy_arrays where we have entries
+                        #  that are each ndarrays
+                        pass
                     else:
                         raise ValueError("Function does not reduce")
                 result = np.empty(ngroups, dtype="O")

From f453c5b3c74a86d4012b9478a3b64204f7cd81dc Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 26 Aug 2020 20:46:43 -0700
Subject: [PATCH 6/9] Implement _aggregate_maybe_named

---
 pandas/core/groupby/generic.py | 30 +++++++++++++++++++++++++++---
 pandas/core/groupby/ops.py     |  5 -----
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 36db78a77c511..9b72157ddd087 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -274,14 +274,14 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             ):
                 # using _python_agg_general would end up incorrectly patching
                 #  _index_data in reduction.pyx
-                result = self._aggregate_named(func, *args, **kwargs)
+                result = self._aggregate_maybe_named(func, *args, **kwargs)
             else:
                 try:
                     return self._python_agg_general(func, *args, **kwargs)
                 except (ValueError, KeyError):
                     # TODO: KeyError is raised in _python_agg_general,
                     #  see see test_groupby.test_basic
-                    result = self._aggregate_named(func, *args, **kwargs)
+                    result = self._aggregate_maybe_named(func, *args, **kwargs)
 
             index = Index(sorted(result), name=self.grouper.names[0])
             if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
@@ -511,11 +511,35 @@ def _get_index() -> Index:
             )
             return self._reindex_output(result)
 
+    def _aggregate_maybe_named(self, func, *args, **kwargs):
+        """
+        Try the named-aggregator first, then unnamed, which better matches
+        what libreduction does.
+        """
+        try:
+            return self._aggregate_named(func, *args, **kwargs)
+        except KeyError:
+            return self._aggregate_unnamed(func, *args, **kwargs)
+
     def _aggregate_named(self, func, *args, **kwargs):
         result = {}
 
         for name, group in self:  # TODO: could we have duplicate names?
-            group.name = name
+            group.name = name  # only difference vs _aggregate_unnamed
+            output = func(group, *args, **kwargs)
+            if isinstance(output, (Series, Index, np.ndarray)):
+                raise ValueError("Must produce aggregated value")
+            result[name] = output
+
+        return result
+
+    def _aggregate_unnamed(self, func, *args, **kwargs):
+        """
+        Pure-python analogue of what _python_agg_general does.
+        """
+        result = {}
+
+        for name, group in self:  # TODO: could we have duplicate names?
             output = func(group, *args, **kwargs)
             if isinstance(output, (Series, Index, np.ndarray)):
                 raise ValueError("Must produce aggregated value")
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 66a9f1353d3c5..c6171a55359fe 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -672,11 +672,6 @@ def _aggregate_series_pure_python(
                         # e.g. test_agg_lambda_with_timezone lambda e: e.head(1)
                         # FIXME: are we potentially losing important res.index info?
                         res = res.item()
-                    elif group.dtype == object:
-                        # TODO: is this at all right?
-                        # e.g. test_agg_over_numpy_arrays where we have entries
-                        #  that are each ndarrays
-                        pass
                     else:
                         raise ValueError("Function does not reduce")
                 result = np.empty(ngroups, dtype="O")

From 2ae2124fab275218268b680f5d5ce9e4bbefebe9 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 27 Aug 2020 09:01:00 -0700
Subject: [PATCH 7/9] de-duplicate

---
 pandas/core/groupby/generic.py | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 9b72157ddd087..7927a77141b3d 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -517,32 +517,28 @@ def _aggregate_maybe_named(self, func, *args, **kwargs):
         what libreduction does.
         """
         try:
-            return self._aggregate_named(func, *args, **kwargs)
+            return self._aggregate_named(func, *args, named=True, **kwargs)
         except KeyError:
-            return self._aggregate_unnamed(func, *args, **kwargs)
+            return self._aggregate_named(func, *args, named=False, **kwargs)
 
-    def _aggregate_named(self, func, *args, **kwargs):
+    def _aggregate_named(self, func, *args, named: bool = True, **kwargs):
         result = {}
 
         for name, group in self:  # TODO: could we have duplicate names?
-            group.name = name  # only difference vs _aggregate_unnamed
-            output = func(group, *args, **kwargs)
-            if isinstance(output, (Series, Index, np.ndarray)):
-                raise ValueError("Must produce aggregated value")
-            result[name] = output
+            if named:
+                group.name = name
 
-        return result
-
-    def _aggregate_unnamed(self, func, *args, **kwargs):
-        """
-        Pure-python analogue of what _python_agg_general does.
-        """
-        result = {}
-
-        for name, group in self:  # TODO: could we have duplicate names?
             output = func(group, *args, **kwargs)
             if isinstance(output, (Series, Index, np.ndarray)):
-                raise ValueError("Must produce aggregated value")
+                if (
+                    isinstance(output, Series)
+                    and len(output) == 1
+                    and name in output.index
+                ):
+                    # FIXME: kludge for test_resampler_grouper.test_apply
+                    output = output.iloc[0]
+                else:
+                    raise ValueError("Must produce aggregated value")
             result[name] = output
 
         return result

From 98a91a321ff758682bff573ecee1b0bf2e0e6d2e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 27 Aug 2020 14:48:49 -0700
Subject: [PATCH 8/9] avoid passing RangeIndex to libreduction

---
 pandas/core/groupby/ops.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index c6171a55359fe..98e4539adbe24 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -45,7 +45,7 @@
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
 from pandas.core.groupby import base, grouper
-from pandas.core.indexes.api import Index, MultiIndex, ensure_index
+from pandas.core.indexes.api import Index, MultiIndex, RangeIndex, ensure_index
 from pandas.core.series import Series
 from pandas.core.sorting import (
     compress_group_index,
@@ -620,8 +620,10 @@ def agg_series(
             # TODO: can we get a performant workaround for EAs backed by ndarray?
             return self._aggregate_series_pure_python(obj, func)
 
-        elif obj.index._has_complex_internals:
+        elif obj.index._has_complex_internals or isinstance(obj.index, RangeIndex):
             # Preempt TypeError in _aggregate_series_fast
+            # exclude RangeIndex because patching it in libreduction would
+            #  silently be incorrect
             return self._aggregate_series_pure_python(obj, func)
 
         try:

From c230f72b502446ac3c4a65fe7c79c7314b158bb0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 1 Sep 2020 19:29:18 -0700
Subject: [PATCH 9/9] simplify

---
 pandas/core/groupby/generic.py     | 23 ++---------------------
 pandas/tests/resample/test_base.py | 13 ++++++++-----
 2 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 45833a882fc0f..20dfb3e8fddd8 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -282,27 +282,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                     #  see see test_groupby.test_basic
                     result = self._aggregate_maybe_named(func, *args, **kwargs)
 
-            index = Index(sorted(result), name=self.grouper.names[0])
-            if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
-                # TODO: do we _always_ want to do this?
-                #  shouldnt this be done later in eg _wrap_aggregated_output?
-                index = index._with_freq("infer")
-
-                result_index = self.grouper.result_index
-
-                if (
-                    result_index.dtype == index.dtype
-                    and result_index.freq is not None
-                    and index.freq is None
-                ):
-                    # TODO: will dtype equality always hold?
-                    if len(index) == 1:
-                        index.freq = result_index.freq
-
-                    elif len(index) == 2:
-                        if index[0] + result_index.freq == index[1]:
-                            # infer_freq doesn't handle length-2 indexes
-                            index.freq = result_index.freq
+            index = self.grouper.result_index
+            assert index.name == self.grouper.names[0]
 
             ret = create_series_with_explicit_dtype(
                 result, index=index, dtype_if_empty=object
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
index 28d33ebb23c20..5827b1f456bd7 100644
--- a/pandas/tests/resample/test_base.py
+++ b/pandas/tests/resample/test_base.py
@@ -195,14 +195,17 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
 
 
 @all_ts
-def test_apply_to_empty_series(empty_series_dti):
+@pytest.mark.parametrize("freq", ["M", "D", "H"])
+def test_apply_to_empty_series(empty_series_dti, freq):
     # GH 14313
     s = empty_series_dti
-    for freq in ["M", "D", "H"]:
-        result = s.resample(freq).apply(lambda x: 1)
-        expected = s.resample(freq).apply(np.sum)
 
-        tm.assert_series_equal(result, expected, check_dtype=False)
+    result = s.resample(freq).apply(lambda x: 1)
+    expected = s.resample(freq).apply(np.sum)
+
+    assert result.index.dtype == expected.index.dtype
+
+    tm.assert_series_equal(result, expected, check_dtype=False)
 
 
 @all_ts