From 5e5059e0e528d859f77bef148baf497800f14cbe Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 8 May 2021 15:05:49 -0700
Subject: [PATCH 1/3] REF: re-use dispatch methods in DataFrameGroupBy.nunique

---
 pandas/core/groupby/generic.py       | 61 ++++++++++++++++------------
 pandas/core/groupby/groupby.py       |  4 +-
 pandas/tests/groupby/test_groupby.py | 13 ++++--
 3 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 9287163053cac..7de99a7735a53 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1627,21 +1627,21 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
 
         return self._reindex_output(result)._convert(datetime=True)
 
-    def _iterate_column_groupbys(self):
-        for i, colname in enumerate(self._selected_obj.columns):
+    def _iterate_column_groupbys(self, obj: FrameOrSeries):
+        for i, colname in enumerate(obj.columns):
             yield colname, SeriesGroupBy(
-                self._selected_obj.iloc[:, i],
+                obj.iloc[:, i],
                 selection=colname,
                 grouper=self.grouper,
                 exclusions=self.exclusions,
             )
 
-    def _apply_to_column_groupbys(self, func) -> DataFrame:
+    def _apply_to_column_groupbys(self, func, obj: FrameOrSeries) -> DataFrame:
         from pandas.core.reshape.concat import concat
 
-        columns = self._selected_obj.columns
+        columns = obj.columns
         results = [
-            func(col_groupby) for _, col_groupby in self._iterate_column_groupbys()
+            func(col_groupby) for _, col_groupby in self._iterate_column_groupbys(obj)
         ]
 
         if not len(results):
@@ -1730,39 +1730,46 @@ def nunique(self, dropna: bool = True) -> DataFrame:
         """
         from pandas.core.reshape.concat import concat
 
-        # TODO: this is duplicative of how GroupBy naturally works
-        # Try to consolidate with normal wrapping functions
-
         obj = self._obj_with_exclusions
+
         if self.axis == 0:
-            iter_func = obj.items
+            results = self._apply_to_column_groupbys(
+                lambda sgb: sgb.nunique(dropna), obj=obj
+            )
+            results.columns.names = obj.columns.names  # TODO: do at higher level?
         else:
+            # see test_groupby_crash_on_nunique
+            # TODO: this is duplicative of how GroupBy naturally works
+            # Try to consolidate with normal wrapping functions
+
             iter_func = obj.iterrows
 
-        res_list = [
-            SeriesGroupBy(content, selection=label, grouper=self.grouper).nunique(
-                dropna
-            )
-            for label, content in iter_func()
-        ]
-        if res_list:
-            results = concat(res_list, axis=1)
-            results = cast(DataFrame, results)
-        else:
-            # concat would raise
-            results = DataFrame(
-                [], index=self.grouper.result_index, columns=obj.columns[:0]
-            )
+            res_list = [
+                SeriesGroupBy(content, selection=label, grouper=self.grouper).nunique(
+                    dropna
+                )
+                for label, content in iter_func()
+            ]
+            if res_list:
+                results = concat(res_list, axis=1)
+                results = cast(DataFrame, results)
+            else:
+                # concat would raise
+                results = DataFrame(
+                    [], index=self.grouper.result_index, columns=obj.columns[:0]
+                )
 
-        if self.axis == 1:
             results = results.T
 
-        other_axis = 1 - self.axis
-        results._get_axis(other_axis).names = obj._get_axis(other_axis).names
+            results.index.names = obj.index.names
+            if results.index.equals(obj.index):
+                # retain freq attribute on DatetimeIndex/TimedeltaIndex
+                results.index = obj.index.copy()
 
         if not self.as_index:
             results.index = ibase.default_index(len(results))
             self._insert_inaxis_grouper_inplace(results)
+
         return results
 
     @Appender(DataFrame.idxmax.__doc__)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 1105c1bd1d782..d6b0e118cc7ce 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1904,7 +1904,9 @@ def ohlc(self) -> DataFrame:
             )
             return self._reindex_output(result)
 
-        return self._apply_to_column_groupbys(lambda x: x.ohlc())
+        return self._apply_to_column_groupbys(
+            lambda x: x.ohlc(), self._obj_with_exclusions
+        )
 
     @final
     @doc(DataFrame.describe)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index f716a3a44cd54..7e3feae844061 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2060,23 +2060,28 @@ def test_dup_labels_output_shape(groupby_func, idx):
 
 def test_groupby_crash_on_nunique(axis):
     # Fix following 30253
+    dti = date_range("2016-01-01", periods=2, name="foo")
     df = DataFrame({("A", "B"): [1, 2], ("A", "C"): [1, 3], ("D", "B"): [0, 0]})
+    df.columns.names = ("bar", "baz")
+    df.index = dti
 
     axis_number = df._get_axis_number(axis)
     if not axis_number:
         df = df.T
 
-    result = df.groupby(axis=axis_number, level=0).nunique()
+    gb = df.groupby(axis=axis_number, level=0)
+    result = gb.nunique()
 
-    expected = DataFrame({"A": [1, 2], "D": [1, 1]})
+    expected = DataFrame({"A": [1, 2], "D": [1, 1]}, index=dti)
+    expected.columns.name = "bar"
     if not axis_number:
         expected = expected.T
 
     tm.assert_frame_equal(result, expected)
 
     # same thing, but empty columns
-    gb = df[[]].groupby(axis=axis_number, level=0)
-    res = gb.nunique()
+    gb2 = df[[]].groupby(axis=axis_number, level=0)
+    res = gb2.nunique()
     exp = expected[[]]
     tm.assert_frame_equal(res, exp)
 

From 88fa90973a358c1223f738183cd036ced0dacec1 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 8 May 2021 20:27:04 -0700
Subject: [PATCH 2/3] REF: re-use machinery in DataFrameGroupBy.nunique

---
 pandas/core/groupby/generic.py       | 46 +++++++---------------------
 pandas/tests/groupby/test_groupby.py | 13 ++++++--
 2 files changed, 21 insertions(+), 38 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 7de99a7735a53..76c53f2888a8f 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -22,7 +22,6 @@
     Mapping,
     TypeVar,
     Union,
-    cast,
 )
 import warnings
 
@@ -1576,6 +1575,10 @@ def _wrap_aggregated_output(
 
         if self.axis == 1:
             result = result.T
+            if result.index.equals(self.obj.index):
+                # Retain e.g. DatetimeIndex/TimedeltaIndex freq
+                result.index = self.obj.index.copy()
+                # TODO: Do this more systematically
 
         return self._reindex_output(result)
 
@@ -1728,43 +1731,16 @@ def nunique(self, dropna: bool = True) -> DataFrame:
         4   ham       5      x
         5   ham       5      y
         """
-        from pandas.core.reshape.concat import concat
-
-        obj = self._obj_with_exclusions
 
-        if self.axis == 0:
-            results = self._apply_to_column_groupbys(
-                lambda sgb: sgb.nunique(dropna), obj=obj
-            )
-            results.columns.names = obj.columns.names  # TODO: do at higher level?
-        else:
+        if self.axis != 0:
             # see test_groupby_crash_on_nunique
-            # TODO: this is duplicative of how GroupBy naturally works
-            # Try to consolidate with normal wrapping functions
+            return self._python_agg_general(lambda sgb: sgb.nunique(dropna))
 
-            iter_func = obj.iterrows
-
-            res_list = [
-                SeriesGroupBy(content, selection=label, grouper=self.grouper).nunique(
-                    dropna
-                )
-                for label, content in iter_func()
-            ]
-            if res_list:
-                results = concat(res_list, axis=1)
-                results = cast(DataFrame, results)
-            else:
-                # concat would raise
-                results = DataFrame(
-                    [], index=self.grouper.result_index, columns=obj.columns[:0]
-                )
-
-            results = results.T
-
-            results.index.names = obj.index.names
-            if results.index.equals(obj.index):
-                # retain freq attribute on DatetimeIndex/TimedeltaIndex
-                results.index = obj.index.copy()
+        obj = self._obj_with_exclusions
+        results = self._apply_to_column_groupbys(
+            lambda sgb: sgb.nunique(dropna), obj=obj
+        )
+        results.columns.names = obj.columns.names  # TODO: do at higher level?
 
         if not self.as_index:
             results.index = ibase.default_index(len(results))
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 7e3feae844061..67d2af46ac8ee 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2079,10 +2079,17 @@ def test_groupby_crash_on_nunique(axis):
 
     tm.assert_frame_equal(result, expected)
 
-    # same thing, but empty columns
-    gb2 = df[[]].groupby(axis=axis_number, level=0)
+    if axis_number == 0:
+        # same thing, but empty columns
+        gb2 = df[[]].groupby(axis=axis_number, level=0)
+        exp = expected[[]]
+    else:
+        # same thing, but empty rows
+        gb2 = df.loc[[]].groupby(axis=axis_number, level=0)
+        # default for empty when we can't infer a dtype is float64
+        exp = expected.loc[[]].astype(np.float64)
+
     res = gb2.nunique()
-    exp = expected[[]]
     tm.assert_frame_equal(res, exp)
 
 

From a8564272a698218a8ed2654b98b1272e43623b33 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 9 May 2021 09:17:45 -0700
Subject: [PATCH 3/3] fix xfail

---
 pandas/tests/resample/test_time_grouper.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py
index 5dc64a33098f3..7cc2b7f72fb69 100644
--- a/pandas/tests/resample/test_time_grouper.py
+++ b/pandas/tests/resample/test_time_grouper.py
@@ -121,12 +121,8 @@ def test_aaa_group_order():
     tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
 
 
-def test_aggregate_normal(request, resample_method):
+def test_aggregate_normal(resample_method):
     """Check TimeGrouper's aggregation is identical as normal groupby."""
-    if resample_method == "ohlc":
-        request.node.add_marker(
-            pytest.mark.xfail(reason="DataError: No numeric types to aggregate")
-        )
 
     data = np.random.randn(20, 4)
     normal_df = DataFrame(data, columns=["A", "B", "C", "D"])