From dea76c10b9b6d6c031c22efb918af5b03780b4f6 Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Wed, 22 Jan 2020 19:36:00 -0700
Subject: [PATCH 1/4] REGR: Fix GroupBy aggregation with ExtensionArray backed
 index

---
 pandas/_libs/reduction.pyx                       |  6 +++---
 pandas/tests/groupby/aggregate/test_aggregate.py | 11 +++++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 8571761f77265..635c0e36d7457 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -158,7 +158,7 @@ cdef class _BaseGrouper:
         if util.is_array(values) and not values.flags.contiguous:
             # e.g. Categorical has no `flags` attribute
             values = values.copy()
-        index = dummy.index.values
+        index = dummy.index.to_numpy()
         if not index.flags.contiguous:
             index = index.copy()
 
@@ -229,7 +229,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
         self.arr = values
         self.typ = series._constructor
         self.ityp = series.index._constructor
-        self.index = series.index.values
+        self.index = series.index.to_numpy()
         self.name = series.name
 
         self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
@@ -326,7 +326,7 @@ cdef class SeriesGrouper(_BaseGrouper):
         self.arr = values
         self.typ = series._constructor
         self.ityp = series.index._constructor
-        self.index = series.index.values
+        self.index = series.index.to_numpy()
         self.name = series.name
 
         self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 0a7272bbc131c..3629b0b52b3aa 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -360,6 +360,17 @@ def test_func_duplicates_raises():
         df.groupby("A").agg(["min", "min"])
 
 
+@pytest.mark.parametrize(
+    "index", [pd.CategoricalIndex(list("abc")), pd.interval_range(0, 3)]
+)
+def test_agg_with_ea_backed_index(index):
+    # GH 31223
+    df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index)
+    result = df.groupby("group").agg({"value": Series.nunique})
+    expected = DataFrame({"group": [1, 2], "value": [2, 1]}).set_index("group")
+    tm.assert_frame_equal(result, expected)
+
+
 class TestNamedAggregationSeries:
     def test_series_named_agg(self):
         df = pd.Series([1, 2, 3, 4])

From ed3a9a340bfff9287351489e5889eac32e774394 Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Sun, 26 Jan 2020 13:45:58 -0700
Subject: [PATCH 2/4] add back _has_complex_internals

---
 pandas/_libs/reduction.pyx                       |  6 +++---
 pandas/core/apply.py                             |  7 +++----
 pandas/core/groupby/ops.py                       |  4 ++--
 pandas/core/indexes/base.py                      |  8 ++++++++
 pandas/core/indexes/category.py                  |  5 +++++
 pandas/core/indexes/interval.py                  |  5 +++++
 pandas/core/indexes/multi.py                     |  5 +++++
 pandas/core/indexes/period.py                    |  5 +++++
 pandas/tests/groupby/aggregate/test_aggregate.py | 10 ++++++++--
 9 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 635c0e36d7457..8571761f77265 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -158,7 +158,7 @@ cdef class _BaseGrouper:
         if util.is_array(values) and not values.flags.contiguous:
             # e.g. Categorical has no `flags` attribute
             values = values.copy()
-        index = dummy.index.to_numpy()
+        index = dummy.index.values
         if not index.flags.contiguous:
             index = index.copy()
 
@@ -229,7 +229,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
         self.arr = values
         self.typ = series._constructor
         self.ityp = series.index._constructor
-        self.index = series.index.to_numpy()
+        self.index = series.index.values
         self.name = series.name
 
         self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
@@ -326,7 +326,7 @@ cdef class SeriesGrouper(_BaseGrouper):
         self.arr = values
         self.typ = series._constructor
         self.ityp = series.index._constructor
-        self.index = series.index.to_numpy()
+        self.index = series.index.values
         self.name = series.name
 
         self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index ca1be3154757a..9947866a76e3f 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -14,7 +14,7 @@
     is_list_like,
     is_sequence,
 )
-from pandas.core.dtypes.generic import ABCMultiIndex, ABCSeries
+from pandas.core.dtypes.generic import ABCSeries
 
 from pandas.core.construction import create_series_with_explicit_dtype
 
@@ -278,9 +278,8 @@ def apply_standard(self):
         if (
             self.result_type in ["reduce", None]
             and not self.dtypes.apply(is_extension_array_dtype).any()
-            # Disallow complex_internals since libreduction shortcut
-            #  cannot handle MultiIndex
-            and not isinstance(self.agg_axis, ABCMultiIndex)
+            # Disallow complex_internals since libreduction shortcut raises a TypeError
+            and not self.agg_axis._has_complex_internals
         ):
 
             values = self.values
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 37067a1897a52..844ad9a7068ea 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -616,8 +616,8 @@ def agg_series(self, obj: Series, func):
             # TODO: can we get a performant workaround for EAs backed by ndarray?
             return self._aggregate_series_pure_python(obj, func)
 
-        elif isinstance(obj.index, MultiIndex):
-            # MultiIndex; Pre-empt TypeError in _aggregate_series_fast
+        elif obj.index._has_complex_internals:
+            # Pre-empt TypeError in _aggregate_series_fast
             return self._aggregate_series_pure_python(obj, func)
 
         try:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index c158bdfbac441..7f55f4f3f1357 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4099,6 +4099,14 @@ def _assert_can_do_op(self, value):
         if not is_scalar(value):
             raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")
 
+    @property
+    def _has_complex_internals(self):
+        """
+        Indicates if an index is not directly backed by a numpy array
+        """
+        # used to disable groupby tricks
+        return False
+
     def _is_memory_usage_qualified(self) -> bool:
         """
         Return a boolean if we need a qualified .info display.
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 268ab9ba4e4c4..4f02faacad3df 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -378,6 +378,11 @@ def values(self):
         """ return the underlying data, which is a Categorical """
         return self._data
 
+    @property
+    def _has_complex_internals(self):
+        # to disable groupby tricks
+        return True
+
     def _wrap_setop_result(self, other, result):
         name = get_op_result_name(self, other)
         # We use _shallow_copy rather than the Index implementation
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 3108c1a1afd0c..1bdab23d039fe 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -408,6 +408,11 @@ def values(self):
         """
         return self._data
 
+    @property
+    def _has_complex_internals(self):
+        # to disable groupby tricks
+        return True
+
     def __array_wrap__(self, result, context=None):
         # we don't want the superclass implementation
         return result
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index a26a01ab7be21..d9546685e212e 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1346,6 +1346,11 @@ def values(self):
         self._tuples = lib.fast_zip(values)
         return self._tuples
 
+    @property
+    def _has_complex_internals(self):
+        # to disable groupby tricks
+        return True
+
     @cache_readonly
     def is_monotonic_increasing(self) -> bool:
         """
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 2a40f4a6f6239..d2509aac8b5d1 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -255,6 +255,11 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs):
     def values(self):
         return np.asarray(self)
 
+    @property
+    def _has_complex_internals(self):
+        # to disable groupby tricks
+        return True
+
     def _shallow_copy(self, values=None, **kwargs):
         # TODO: simplify, figure out type of values
         if values is None:
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 3629b0b52b3aa..67bdcc246579e 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -361,9 +361,15 @@ def test_func_duplicates_raises():
 
 
 @pytest.mark.parametrize(
-    "index", [pd.CategoricalIndex(list("abc")), pd.interval_range(0, 3)]
+    "index",
+    [
+        pd.CategoricalIndex(list("abc")),
+        pd.interval_range(0, 3),
+        pd.period_range("2020", periods=3, freq="D"),
+        pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]),
+    ],
 )
-def test_agg_with_ea_backed_index(index):
+def test_agg_index_has_complex_internals(index):
     # GH 31223
     df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index)
     result = df.groupby("group").agg({"value": Series.nunique})

From 1018ca9383e672fd6f606d0cc7adb4b1d82cc2bf Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Mon, 27 Jan 2020 09:01:32 -0700
Subject: [PATCH 3/4] prevent fast_apply case for _has_complex_internals

---
 pandas/core/groupby/ops.py         |  4 ++--
 pandas/tests/groupby/test_apply.py | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 844ad9a7068ea..679d3668523c2 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -164,8 +164,8 @@ def apply(self, f, data: FrameOrSeries, axis: int = 0):
             com.get_callable_name(f) not in base.plotting_methods
             and isinstance(splitter, FrameSplitter)
             and axis == 0
-            # apply_frame_axis0 doesn't allow MultiIndex
-            and not isinstance(sdata.index, MultiIndex)
+            # fast_apply/libreduction doesn't allow non-numpy backed indexes
+            and not sdata.index._has_complex_internals
         ):
             try:
                 result_values, mutated = splitter.fast_apply(f, group_keys)
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index fc7b9f56002d8..c18ef73203914 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -811,3 +811,19 @@ def test_groupby_apply_datetime_result_dtypes():
         index=["observation", "color", "mood", "intensity", "score"],
     )
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        pd.CategoricalIndex(list("abc")),
+        pd.interval_range(0, 3),
+        pd.period_range("2020", periods=3, freq="D"),
+        pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]),
+    ],
+)
+def test_apply_index_has_complex_internals(index):
+    # GH 31248
+    df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index)
+    result = df.groupby("group").apply(lambda x: x)
+    tm.assert_frame_equal(result, df)

From cc400cd6d9a61c5ba0e7c1251cce8aee0b10a1a4 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Mon, 27 Jan 2020 18:07:07 -0700
Subject: [PATCH 4/4] update comment

---
 pandas/core/indexes/base.py     | 2 +-
 pandas/core/indexes/category.py | 2 +-
 pandas/core/indexes/interval.py | 2 +-
 pandas/core/indexes/multi.py    | 2 +-
 pandas/core/indexes/period.py   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index edc97fccc89cf..00f0984d43578 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4114,7 +4114,7 @@ def _has_complex_internals(self):
         """
         Indicates if an index is not directly backed by a numpy array
         """
-        # used to disable groupby tricks
+        # used to avoid libreduction code paths, which raise or require conversion
         return False
 
     def _is_memory_usage_qualified(self) -> bool:
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 4f02faacad3df..36ba86bdc9f07 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -380,7 +380,7 @@ def values(self):
 
     @property
     def _has_complex_internals(self):
-        # to disable groupby tricks
+        # used to avoid libreduction code paths, which raise or require conversion
         return True
 
     def _wrap_setop_result(self, other, result):
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index af9264393c9b9..89db2b4a7a379 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -413,7 +413,7 @@ def values(self):
 
     @property
     def _has_complex_internals(self):
-        # to disable groupby tricks
+        # used to avoid libreduction code paths, which raise or require conversion
         return True
 
     def __array_wrap__(self, result, context=None):
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 8e745a7ee38aa..2aa7a00707f24 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1348,7 +1348,7 @@ def values(self):
 
     @property
     def _has_complex_internals(self):
-        # to disable groupby tricks
+        # used to avoid libreduction code paths, which raise or require conversion
         return True
 
     @cache_readonly
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 4541038d44fd4..106aaeec4f8be 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -257,7 +257,7 @@ def values(self):
 
     @property
     def _has_complex_internals(self):
-        # to disable groupby tricks
+        # used to avoid libreduction code paths, which raise or require conversion
         return True
 
     def _shallow_copy(self, values=None, **kwargs):