diff --git a/pandas/_libs/reduction.pyi b/pandas/_libs/reduction.pyi deleted file mode 100644 index 525546f26c854..0000000000000 --- a/pandas/_libs/reduction.pyi +++ /dev/null @@ -1,6 +0,0 @@ -from typing import Any - -from pandas._typing import DtypeObj - -def check_result_array(obj: object, dtype: DtypeObj) -> None: ... -def extract_result(res: object) -> Any: ... diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx deleted file mode 100644 index 7ff0842678d7f..0000000000000 --- a/pandas/_libs/reduction.pyx +++ /dev/null @@ -1,33 +0,0 @@ -import numpy as np - -cimport numpy as cnp - -cnp.import_array() - -from pandas._libs.util cimport is_array - - -cdef cnp.dtype _dtype_obj = np.dtype("object") - - -cpdef check_result_array(object obj, object dtype): - # Our operation is supposed to be an aggregation/reduction. If - # it returns an ndarray, this likely means an invalid operation has - # been passed. See test_apply_without_aggregation, test_agg_must_agg - if is_array(obj): - if dtype != _dtype_obj: - # If it is object dtype, the function can be a reduction/aggregation - # and still return an ndarray e.g. test_agg_over_numpy_arrays - raise ValueError("Must produce aggregated value") - - -cpdef inline extract_result(object res): - """ extract the result object, it might be a 0-dim ndarray - or a len-1 0-dim, or a scalar """ - if hasattr(res, "_values"): - # Preserve EA - res = res._values - if res.ndim == 1 and len(res) == 1: - # see test_agg_lambda_with_timezone, test_resampler_grouper.py::test_apply - res = res[0] - return res diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 228fb90c02293..c9ce9d4acde8d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -30,7 +30,6 @@ from pandas._libs import ( Interval, lib, - reduction as libreduction, ) from pandas.errors import SpecificationError from pandas.util._decorators import ( @@ -66,7 +65,10 @@ ) import pandas.core.common as com from pandas.core.frame import DataFrame -from pandas.core.groupby import base +from pandas.core.groupby import ( + base, + ops, +) from pandas.core.groupby.groupby import ( GroupBy, GroupByPlot, @@ -437,10 +439,10 @@ def _aggregate_named(self, func, *args, **kwargs): object.__setattr__(group, "name", name) output = func(group, *args, **kwargs) - output = libreduction.extract_result(output) + output = ops.extract_result(output) if not initialized: # We only do this validation on the first iteration - libreduction.check_result_array(output, group.dtype) + ops.check_result_array(output, group.dtype) initialized = True result[name] = output diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 9134ddc2026bc..d177ca324ec54 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -26,7 +26,6 @@ lib, ) import pandas._libs.groupby as libgroupby -import pandas._libs.reduction as libreduction from pandas._typing import ( ArrayLike, AxisInt, @@ -75,6 +74,31 @@ from pandas.core.generic import NDFrame +def check_result_array(obj, dtype): + # Our operation is supposed to be an aggregation/reduction. If + # it returns an ndarray, this likely means an invalid operation has + # been passed. See test_apply_without_aggregation, test_agg_must_agg + if isinstance(obj, np.ndarray): + if dtype != object: + # If it is object dtype, the function can be a reduction/aggregation + # and still return an ndarray e.g. test_agg_over_numpy_arrays + raise ValueError("Must produce aggregated value") + + +def extract_result(res): + """ + Extract the result object, it might be a 0-dim ndarray + or a len-1 0-dim, or a scalar + """ + if hasattr(res, "_values"): + # Preserve EA + res = res._values + if res.ndim == 1 and len(res) == 1: + # see test_agg_lambda_with_timezone, test_resampler_grouper.py::test_apply + res = res[0] + return res + + class WrappedCythonOp: """ Dispatch logic for functions defined in _libs.groupby @@ -836,11 +860,11 @@ def _aggregate_series_pure_python( for i, group in enumerate(splitter): res = func(group) - res = libreduction.extract_result(res) + res = extract_result(res) if not initialized: # We only do this validation on the first iteration - libreduction.check_result_array(res, group.dtype) + check_result_array(res, group.dtype) initialized = True result[i] = res @@ -948,7 +972,7 @@ def __init__( self.indexer = indexer # These lengths must match, otherwise we could call agg_series - # with empty self.bins, which would raise in libreduction. + # with empty self.bins, which would raise later. assert len(self.binlabels) == len(self.bins) @cache_readonly diff --git a/setup.py b/setup.py index 857cc4c71b70c..49f6557e2e250 100755 --- a/setup.py +++ b/setup.py @@ -204,7 +204,6 @@ class CheckSDist(sdist_class): "pandas/_libs/interval.pyx", "pandas/_libs/hashing.pyx", "pandas/_libs/missing.pyx", - "pandas/_libs/reduction.pyx", "pandas/_libs/testing.pyx", "pandas/_libs/sparse.pyx", "pandas/_libs/ops.pyx", @@ -486,7 +485,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas/_libs/src/pd_parser.h", ], }, - "_libs.reduction": {"pyxfile": "_libs/reduction"}, "_libs.ops": {"pyxfile": "_libs/ops"}, "_libs.ops_dispatch": {"pyxfile": "_libs/ops_dispatch"}, "_libs.properties": {"pyxfile": "_libs/properties"},