pandas-dev · mroeschke · Apr 14, 2023 · Apr 13, 2023
diff --git a/pandas/_libs/reduction.pyi b/pandas/_libs/reduction.pyi
diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
@@ -30,7 +30,6 @@
 from pandas._libs import (
     Interval,
     lib,
-    reduction as libreduction,
 )
 from pandas.errors import SpecificationError
 from pandas.util._decorators import (
@@ -66,7 +65,10 @@
 )
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
-from pandas.core.groupby import base
+from pandas.core.groupby import (
+    base,
+    ops,
+)
 from pandas.core.groupby.groupby import (
     GroupBy,
     GroupByPlot,
@@ -437,10 +439,10 @@ def _aggregate_named(self, func, *args, **kwargs):
             object.__setattr__(group, "name", name)
 
             output = func(group, *args, **kwargs)
-            output = libreduction.extract_result(output)
+            output = ops.extract_result(output)
             if not initialized:
                 # We only do this validation on the first iteration
-                libreduction.check_result_array(output, group.dtype)
+                ops.check_result_array(output, group.dtype)
                 initialized = True
             result[name] = output
 

@@ -26,7 +26,6 @@
     lib,
 )
 import pandas._libs.groupby as libgroupby
-import pandas._libs.reduction as libreduction
 from pandas._typing import (
     ArrayLike,
     AxisInt,
@@ -75,6 +74,31 @@
     from pandas.core.generic import NDFrame
 
 
+def check_result_array(obj, dtype):
+    # Our operation is supposed to be an aggregation/reduction. If
+    #  it returns an ndarray, this likely means an invalid operation has
+    #  been passed. See test_apply_without_aggregation, test_agg_must_agg
+    if isinstance(obj, np.ndarray):
+        if dtype != object:
+            # If it is object dtype, the function can be a reduction/aggregation
+            #  and still return an ndarray e.g. test_agg_over_numpy_arrays
+            raise ValueError("Must produce aggregated value")
+
+
+def extract_result(res):
+    """
+    Extract the result object, it might be a 0-dim ndarray
+    or a len-1 0-dim, or a scalar
+    """
+    if hasattr(res, "_values"):
+        # Preserve EA
+        res = res._values
+        if res.ndim == 1 and len(res) == 1:
+            # see test_agg_lambda_with_timezone, test_resampler_grouper.py::test_apply
+            res = res[0]
+    return res
+
+
 class WrappedCythonOp:
     """
     Dispatch logic for functions defined in _libs.groupby
@@ -836,11 +860,11 @@ def _aggregate_series_pure_python(
 
         for i, group in enumerate(splitter):
             res = func(group)
-            res = libreduction.extract_result(res)
+            res = extract_result(res)
 
             if not initialized:
                 # We only do this validation on the first iteration
-                libreduction.check_result_array(res, group.dtype)
+                check_result_array(res, group.dtype)
                 initialized = True
 
             result[i] = res
@@ -948,7 +972,7 @@ def __init__(
         self.indexer = indexer
 
         # These lengths must match, otherwise we could call agg_series
-        #  with empty self.bins, which would raise in libreduction.
+        #  with empty self.bins, which would raise later.
         assert len(self.binlabels) == len(self.bins)
 
     @cache_readonly

diff --git a/setup.py b/setup.py
@@ -204,7 +204,6 @@ class CheckSDist(sdist_class):
         "pandas/_libs/interval.pyx",
         "pandas/_libs/hashing.pyx",
         "pandas/_libs/missing.pyx",
-        "pandas/_libs/reduction.pyx",
         "pandas/_libs/testing.pyx",
         "pandas/_libs/sparse.pyx",
         "pandas/_libs/ops.pyx",
@@ -486,7 +485,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
             "pandas/_libs/src/pd_parser.h",
         ],
     },
-    "_libs.reduction": {"pyxfile": "_libs/reduction"},
     "_libs.ops": {"pyxfile": "_libs/ops"},
     "_libs.ops_dispatch": {"pyxfile": "_libs/ops_dispatch"},
     "_libs.properties": {"pyxfile": "_libs/properties"},