From aa02b839ee6dbe36c4552344b847b2b43eee97bb Mon Sep 17 00:00:00 2001
From: Richard <rhshadrach@gmail.com>
Date: Sat, 25 Jul 2020 15:15:59 -0400
Subject: [PATCH 1/3] CLN: Clean/Simplify _wrap_applied_output

---
 pandas/core/groupby/generic.py     | 235 +++++++++++------------------
 pandas/core/indexes/api.py         |   7 +-
 pandas/tests/groupby/test_apply.py |   8 +-
 3 files changed, 97 insertions(+), 153 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index ec7b14f27c5a1..c7659d902fd32 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1213,171 +1213,112 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
         if len(keys) == 0:
             return self.obj._constructor(index=keys)
 
-        key_names = self.grouper.names
-
-        # GH12824
+        # GH12824 - If first value is None, can't assume all are None
         first_not_none = next(com.not_none(*values), None)
 
         if first_not_none is None:
-            # GH9684. If all values are None, then this will throw an error.
-            # We'd prefer it return an empty dataframe.
+            # GH9684 - All values are None, return an empty frame.
             return self.obj._constructor()
-        elif isinstance(first_not_none, DataFrame):
+
+        if isinstance(first_not_none, DataFrame):
             return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
-        else:
-            if len(self.grouper.groupings) > 1:
-                key_index = self.grouper.result_index
 
+        if isinstance(first_not_none, NDFrame):
+
+            # this is to silence a DeprecationWarning
+            # TODO: Remove when default dtype of empty Series is object
+            kwargs = first_not_none._construct_axes_dict()
+            if isinstance(first_not_none, Series):
+                backup = create_series_with_explicit_dtype(
+                    **kwargs, dtype_if_empty=object
+                )
             else:
-                ping = self.grouper.groupings[0]
-                if len(keys) == ping.ngroups:
-                    key_index = ping.group_index
-                    key_index.name = key_names[0]
+                backup = first_not_none._constructor(**kwargs)
 
-                    key_lookup = Index(keys)
-                    indexer = key_lookup.get_indexer(key_index)
+            values = [x if (x is not None) else backup for x in values]
 
-                    # reorder the values
-                    values = [values[i] for i in indexer]
+        key_index = self.grouper.result_index if self.as_index else None
+        v = values[0]
 
-                    # update due to the potential reorder
-                    first_not_none = next(com.not_none(*values), None)
-                else:
+        if not isinstance(v, (np.ndarray, Index, Series)):
+            # values are not series or array-like but scalars
+            # self._selection_name not passed through to Series as the
+            # result should not take the name of original selection
+            # of columns
+            if self.as_index:
+                return self.obj._constructor_sliced(values, index=key_index)
+            else:
+                result = DataFrame(values, index=key_index, columns=[self._selection])
+                self._insert_inaxis_grouper_inplace(result)
+                return result
+
+        if not isinstance(v, ABCSeries):
+            # GH1738: values is list of arrays of unequal lengths
+            # TODO: sure this is right?  we used to do this
+            #  after raising AttributeError above
+            return self.obj._constructor_sliced(
+                values, index=key_index, name=self._selection_name
+            )
 
-                    key_index = Index(keys, name=key_names[0])
+        all_indexed_same = all_indexes_same((x.index for x in values))
+
+        # GH3596 - provide a reduction (Frame -> Series) if groups are unique
+        if self.squeeze:
+            # assign the name to this series
+            applied_index = self._selected_obj._get_axis(self.axis)
+            if len(values) == 1 and applied_index.nlevels == 1:
+                values[0].name = keys[0]
+
+                # GH2893
+                # we have series in the values array, we want to
+                # produce a series:
+                # if any of the sub-series are not indexed the same
+                # OR we don't have a multi-index and we have only a
+                # single values
+                return self._concat_objects(
+                    keys, values, not_indexed_same=not_indexed_same
+                )
 
-                # don't use the key indexer
-                if not self.as_index:
-                    key_index = None
+            # still a series
+            # path added as of GH 5545
+            elif all_indexed_same:
+                from pandas.core.reshape.concat import concat
 
-            # make Nones an empty object
-            if first_not_none is None:
-                return self.obj._constructor()
-            elif isinstance(first_not_none, NDFrame):
+                return concat(values)
 
-                # this is to silence a DeprecationWarning
-                # TODO: Remove when default dtype of empty Series is object
-                kwargs = first_not_none._construct_axes_dict()
-                if isinstance(first_not_none, Series):
-                    backup = create_series_with_explicit_dtype(
-                        **kwargs, dtype_if_empty=object
-                    )
-                else:
-                    backup = first_not_none._constructor(**kwargs)
-
-                values = [x if (x is not None) else backup for x in values]
-
-            v = values[0]
-
-            if isinstance(v, (np.ndarray, Index, Series)) or not self.as_index:
-                if isinstance(v, Series):
-                    applied_index = self._selected_obj._get_axis(self.axis)
-                    all_indexed_same = all_indexes_same([x.index for x in values])
-                    singular_series = len(values) == 1 and applied_index.nlevels == 1
-
-                    # GH3596
-                    # provide a reduction (Frame -> Series) if groups are
-                    # unique
-                    if self.squeeze:
-                        # assign the name to this series
-                        if singular_series:
-                            values[0].name = keys[0]
-
-                            # GH2893
-                            # we have series in the values array, we want to
-                            # produce a series:
-                            # if any of the sub-series are not indexed the same
-                            # OR we don't have a multi-index and we have only a
-                            # single values
-                            return self._concat_objects(
-                                keys, values, not_indexed_same=not_indexed_same
-                            )
-
-                        # still a series
-                        # path added as of GH 5545
-                        elif all_indexed_same:
-                            from pandas.core.reshape.concat import concat
-
-                            return concat(values)
-
-                    if not all_indexed_same:
-                        # GH 8467
-                        return self._concat_objects(keys, values, not_indexed_same=True)
-
-                if self.axis == 0 and isinstance(v, ABCSeries):
-                    # GH6124 if the list of Series have a consistent name,
-                    # then propagate that name to the result.
-                    index = v.index.copy()
-                    if index.name is None:
-                        # Only propagate the series name to the result
-                        # if all series have a consistent name.  If the
-                        # series do not have a consistent name, do
-                        # nothing.
-                        names = {v.name for v in values}
-                        if len(names) == 1:
-                            index.name = list(names)[0]
-
-                    # normally use vstack as its faster than concat
-                    # and if we have mi-columns
-                    if (
-                        isinstance(v.index, MultiIndex)
-                        or key_index is None
-                        or isinstance(key_index, MultiIndex)
-                    ):
-                        stacked_values = np.vstack([np.asarray(v) for v in values])
-                        result = self.obj._constructor(
-                            stacked_values, index=key_index, columns=index
-                        )
-                    else:
-                        # GH5788 instead of stacking; concat gets the
-                        # dtypes correct
-                        from pandas.core.reshape.concat import concat
-
-                        result = concat(
-                            values,
-                            keys=key_index,
-                            names=key_index.names,
-                            axis=self.axis,
-                        ).unstack()
-                        result.columns = index
-                elif isinstance(v, ABCSeries):
-                    stacked_values = np.vstack([np.asarray(v) for v in values])
-                    result = self.obj._constructor(
-                        stacked_values.T, index=v.index, columns=key_index
-                    )
-                elif not self.as_index:
-                    # We add grouping column below, so create a frame here
-                    result = DataFrame(
-                        values, index=key_index, columns=[self._selection]
-                    )
-                else:
-                    # GH#1738: values is list of arrays of unequal lengths
-                    #  fall through to the outer else clause
-                    # TODO: sure this is right?  we used to do this
-                    #  after raising AttributeError above
-                    return self.obj._constructor_sliced(
-                        values, index=key_index, name=self._selection_name
-                    )
+        if not all_indexed_same:
+            # GH 8467
+            return self._concat_objects(keys, values, not_indexed_same=True)
 
-                # if we have date/time like in the original, then coerce dates
-                # as we are stacking can easily have object dtypes here
-                so = self._selected_obj
-                if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any():
-                    result = _recast_datetimelike_result(result)
-                else:
-                    result = result._convert(datetime=True)
+        stacked_values = np.vstack([np.asarray(v) for v in values])
 
-                if not self.as_index:
-                    self._insert_inaxis_grouper_inplace(result)
+        if self.axis == 0:
+            index = key_index
+            columns = v.index.copy()
+            if columns.name is None:
+                # GH6124 - propagate name of Series when it's consistent
+                names = {v.name for v in values}
+                if len(names) == 1:
+                    columns.name = list(names)[0]
+        else:
+            index = v.index
+            columns = key_index
+            stacked_values = stacked_values.T
 
-                return self._reindex_output(result)
+        result = self.obj._constructor(stacked_values, index=index, columns=columns)
 
-            # values are not series or array-like but scalars
-            else:
-                # self._selection_name not passed through to Series as the
-                # result should not take the name of original selection
-                # of columns
-                return self.obj._constructor_sliced(values, index=key_index)
+        # if we have date/time like in the original, then coerce dates
+        # as we are stacking can easily have object dtypes here
+        so = self._selected_obj
+        if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any():
+            result = _recast_datetimelike_result(result)
+        else:
+            result = result._convert(datetime=True)
+
+        if not self.as_index:
+            self._insert_inaxis_grouper_inplace(result)
+
+        return self._reindex_output(result)
 
     def _transform_general(
         self, func, *args, engine="cython", engine_kwargs=None, **kwargs
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index 4c5a70f4088ee..678753f684141 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -298,15 +298,16 @@ def all_indexes_same(indexes):
 
     Parameters
     ----------
-    indexes : list of Index objects
+    indexes : iterable of Index objects
 
     Returns
     -------
     bool
         True if all indexes contain the same elements, False otherwise.
     """
-    first = indexes[0]
-    for index in indexes[1:]:
+    itr = iter(indexes)
+    first = next(itr)
+    for index in itr:
         if not first.equals(index):
             return False
     return True
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 5a1268bfb03db..8e8053d1296b5 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -868,13 +868,15 @@ def test_apply_multi_level_name(category):
     b = [1, 2] * 5
     if category:
         b = pd.Categorical(b, categories=[1, 2, 3])
+        expected_index = pd.CategoricalIndex([1, 2], categories=[1, 2, 3], name="B")
+    else:
+        expected_index = pd.Index([1, 2], name="B")
     df = pd.DataFrame(
         {"A": np.arange(10), "B": b, "C": list(range(10)), "D": list(range(10))}
     ).set_index(["A", "B"])
     result = df.groupby("B").apply(lambda x: x.sum())
-    expected = pd.DataFrame(
-        {"C": [20, 25], "D": [20, 25]}, index=pd.Index([1, 2], name="B")
-    )
+
+    expected = pd.DataFrame({"C": [20, 25], "D": [20, 25]}, index=expected_index)
     tm.assert_frame_equal(result, expected)
     assert df.index.names == ["A", "B"]
 

From 21e1fcae71323c001bf07fa46687245e48d65bb9 Mon Sep 17 00:00:00 2001
From: Richard <rhshadrach@gmail.com>
Date: Tue, 4 Aug 2020 18:04:12 -0400
Subject: [PATCH 2/3] Refactored if-else.

---
 pandas/core/groupby/generic.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index c7659d902fd32..8710464769a12 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1223,6 +1223,8 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
         if isinstance(first_not_none, DataFrame):
             return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
 
+        key_index = self.grouper.result_index if self.as_index else None
+
         if isinstance(first_not_none, NDFrame):
 
             # this is to silence a DeprecationWarning
@@ -1236,11 +1238,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                 backup = first_not_none._constructor(**kwargs)
 
             values = [x if (x is not None) else backup for x in values]
-
-        key_index = self.grouper.result_index if self.as_index else None
-        v = values[0]
-
-        if not isinstance(v, (np.ndarray, Index, Series)):
+        else:
             # values are not series or array-like but scalars
             # self._selection_name not passed through to Series as the
             # result should not take the name of original selection
@@ -1252,6 +1250,8 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                 self._insert_inaxis_grouper_inplace(result)
                 return result
 
+        v = values[0]
+
         if not isinstance(v, ABCSeries):
             # GH1738: values is list of arrays of unequal lengths
             # TODO: sure this is right?  we used to do this

From 059405cb15189446ed81bbd2f17de7f1704099ae Mon Sep 17 00:00:00 2001
From: Richard <rhshadrach@gmail.com>
Date: Fri, 14 Aug 2020 13:52:40 -0400
Subject: [PATCH 3/3] Reworked logic for non-NDFrame cases

---
 pandas/core/groupby/generic.py | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 94fee533d98a6..449099e5ce073 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1231,20 +1231,8 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
 
         key_index = self.grouper.result_index if self.as_index else None
 
-        if isinstance(first_not_none, NDFrame):
-
-            # this is to silence a DeprecationWarning
-            # TODO: Remove when default dtype of empty Series is object
-            kwargs = first_not_none._construct_axes_dict()
-            if isinstance(first_not_none, Series):
-                backup = create_series_with_explicit_dtype(
-                    **kwargs, dtype_if_empty=object
-                )
-            else:
-                backup = first_not_none._constructor(**kwargs)
+        if not isinstance(first_not_none, (Series, np.ndarray, Index)):
 
-            values = [x if (x is not None) else backup for x in values]
-        else:
             # values are not series or array-like but scalars
             # self._selection_name not passed through to Series as the
             # result should not take the name of original selection
@@ -1256,9 +1244,8 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                 self._insert_inaxis_grouper_inplace(result)
                 return result
 
-        v = values[0]
+        elif not isinstance(first_not_none, Series):
 
-        if not isinstance(v, ABCSeries):
             # GH1738: values is list of arrays of unequal lengths
             # TODO: sure this is right?  we used to do this
             #  after raising AttributeError above
@@ -1266,6 +1253,14 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                 values, index=key_index, name=self._selection_name
             )
 
+        # this is to silence a DeprecationWarning
+        # TODO: Replace when default dtype of empty Series is object
+        #       with backup = first_not_none._constructor(**kwargs)
+        kwargs = first_not_none._construct_axes_dict()
+        backup = create_series_with_explicit_dtype(**kwargs, dtype_if_empty=object)
+        values = [x if (x is not None) else backup for x in values]
+
+        v = values[0]
         all_indexed_same = all_indexes_same((x.index for x in values))
 
         # GH3596 - provide a reduction (Frame -> Series) if groups are unique
@@ -1273,7 +1268,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
             # assign the name to this series
             applied_index = self._selected_obj._get_axis(self.axis)
             if len(values) == 1 and applied_index.nlevels == 1:
-                values[0].name = keys[0]
+                v.name = keys[0]
 
                 # GH2893
                 # we have series in the values array, we want to