diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index e3a643a38e24c..a603222094bdb 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -918,12 +918,12 @@ def external_error_raised(expected_exception: type[Exception]) -> ContextManager return pytest.raises(expected_exception, match=None) -cython_table = pd.core.base.SelectionMixin._cython_table.items() +cython_table = pd.core.common._cython_table.items() def get_cython_table_params(ndframe, func_names_and_expected): """ - Combine frame, functions from SelectionMixin._cython_table + Combine frame, functions from com._cython_table keys and expected result. Parameters diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 9d6e81ed8dda5..86cde647cc798 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -44,6 +44,7 @@ from pandas.core.algorithms import safe_sort from pandas.core.base import ( DataError, + SelectionMixin, SpecificationError, ) import pandas.core.common as com @@ -173,7 +174,7 @@ def agg(self) -> FrameOrSeriesUnion | None: return self.agg_list_like() if callable(arg): - f = obj._get_cython_func(arg) + f = com.get_cython_func(arg) if f and not args and not kwargs: return getattr(obj, f)() @@ -301,10 +302,10 @@ def transform_str_or_callable(self, func) -> FrameOrSeriesUnion: kwargs = self.kwargs if isinstance(func, str): - return obj._try_aggregate_string_function(func, *args, **kwargs) + return self._try_aggregate_string_function(obj, func, *args, **kwargs) if not args and not kwargs: - f = obj._get_cython_func(func) + f = com.get_cython_func(func) if f: return getattr(obj, f)() @@ -327,7 +328,10 @@ def agg_list_like(self) -> FrameOrSeriesUnion: obj = self.obj arg = cast(List[AggFuncTypeBase], self.f) - if obj._selected_obj.ndim == 1: + if not isinstance(obj, SelectionMixin): + # i.e. obj is Series or DataFrame + selected_obj = obj + elif obj._selected_obj.ndim == 1: selected_obj = obj._selected_obj else: selected_obj = obj._obj_with_exclusions @@ -406,13 +410,19 @@ def agg_dict_like(self) -> FrameOrSeriesUnion: obj = self.obj arg = cast(AggFuncTypeDict, self.f) - selected_obj = obj._selected_obj + if not isinstance(obj, SelectionMixin): + # i.e. obj is Series or DataFrame + selected_obj = obj + selection = None + else: + selected_obj = obj._selected_obj + selection = obj._selection arg = self.normalize_dictlike_arg("agg", selected_obj, arg) if selected_obj.ndim == 1: # key only used for output - colg = obj._gotitem(obj._selection, ndim=1) + colg = obj._gotitem(selection, ndim=1) results = {key: colg.agg(how) for key, how in arg.items()} else: # key used for column selection and output @@ -486,7 +496,7 @@ def maybe_apply_str(self) -> FrameOrSeriesUnion | None: self.kwargs["axis"] = self.axis elif self.axis != 0: raise ValueError(f"Operation {f} does not support axis=1") - return obj._try_aggregate_string_function(f, *self.args, **self.kwargs) + return self._try_aggregate_string_function(obj, f, *self.args, **self.kwargs) def maybe_apply_multiple(self) -> FrameOrSeriesUnion | None: """ @@ -547,6 +557,35 @@ def normalize_dictlike_arg( func = new_func return func + def _try_aggregate_string_function(self, obj, arg: str, *args, **kwargs): + """ + if arg is a string, then try to operate on it: + - try to find a function (or attribute) on ourselves + - try to find a numpy function + - raise + """ + assert isinstance(arg, str) + + f = getattr(obj, arg, None) + if f is not None: + if callable(f): + return f(*args, **kwargs) + + # people may try to aggregate on a non-callable attribute + # but don't let them think they can pass args to it + assert len(args) == 0 + assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0 + return f + + f = getattr(np, arg, None) + if f is not None and hasattr(obj, "__array__"): + # in particular exclude Window + return f(obj, *args, **kwargs) + + raise AttributeError( + f"'{arg}' is not a valid function for '{type(obj).__name__}' object" + ) + class FrameApply(Apply): obj: DataFrame diff --git a/pandas/core/base.py b/pandas/core/base.py index 3b6ff4ac9aee4..05ec9d543976a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -2,12 +2,10 @@ Base and utility classes for pandas objects. """ -import builtins import textwrap from typing import ( TYPE_CHECKING, Any, - Callable, Dict, FrozenSet, Optional, @@ -176,36 +174,6 @@ class SelectionMixin: _internal_names = ["_cache", "__setstate__"] _internal_names_set = set(_internal_names) - _builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min} - - _cython_table = { - builtins.sum: "sum", - builtins.max: "max", - builtins.min: "min", - np.all: "all", - np.any: "any", - np.sum: "sum", - np.nansum: "sum", - np.mean: "mean", - np.nanmean: "mean", - np.prod: "prod", - np.nanprod: "prod", - np.std: "std", - np.nanstd: "std", - np.var: "var", - np.nanvar: "var", - np.median: "median", - np.nanmedian: "median", - np.max: "max", - np.nanmax: "max", - np.min: "min", - np.nanmin: "min", - np.cumprod: "cumprod", - np.nancumprod: "cumprod", - np.cumsum: "cumsum", - np.nancumsum: "cumsum", - } - @property def _selection_name(self): """ @@ -216,6 +184,7 @@ def _selection_name(self): """ return self._selection + @final @property def _selection_list(self): if not isinstance( @@ -240,6 +209,7 @@ def _selected_obj(self): def ndim(self) -> int: return self._selected_obj.ndim + @final @cache_readonly def _obj_with_exclusions(self): # error: "SelectionMixin" has no attribute "obj" @@ -308,48 +278,6 @@ def aggregate(self, func, *args, **kwargs): agg = aggregate - def _try_aggregate_string_function(self, arg: str, *args, **kwargs): - """ - if arg is a string, then try to operate on it: - - try to find a function (or attribute) on ourselves - - try to find a numpy function - - raise - """ - assert isinstance(arg, str) - - f = getattr(self, arg, None) - if f is not None: - if callable(f): - return f(*args, **kwargs) - - # people may try to aggregate on a non-callable attribute - # but don't let them think they can pass args to it - assert len(args) == 0 - assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0 - return f - - f = getattr(np, arg, None) - if f is not None and hasattr(self, "__array__"): - # in particular exclude Window - return f(self, *args, **kwargs) - - raise AttributeError( - f"'{arg}' is not a valid function for '{type(self).__name__}' object" - ) - - def _get_cython_func(self, arg: Callable) -> Optional[str]: - """ - if we define an internal function for this argument, return it - """ - return self._cython_table.get(arg) - - def _is_builtin_func(self, arg): - """ - if we define an builtin function for this argument, return it, - otherwise return the arg - """ - return self._builtin_table.get(arg, arg) - class IndexOpsMixin(OpsMixin): """ diff --git a/pandas/core/common.py b/pandas/core/common.py index e0c00fc419bf1..04ff2d2c4618f 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -5,6 +5,7 @@ """ from __future__ import annotations +import builtins from collections import ( abc, defaultdict, @@ -532,3 +533,49 @@ def require_length_match(data, index: Index): "does not match length of index " f"({len(index)})" ) + + +_builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min} + +_cython_table = { + builtins.sum: "sum", + builtins.max: "max", + builtins.min: "min", + np.all: "all", + np.any: "any", + np.sum: "sum", + np.nansum: "sum", + np.mean: "mean", + np.nanmean: "mean", + np.prod: "prod", + np.nanprod: "prod", + np.std: "std", + np.nanstd: "std", + np.var: "var", + np.nanvar: "var", + np.median: "median", + np.nanmedian: "median", + np.max: "max", + np.nanmax: "max", + np.min: "min", + np.nanmin: "min", + np.cumprod: "cumprod", + np.nancumprod: "cumprod", + np.cumsum: "cumsum", + np.nancumsum: "cumsum", +} + + +def get_cython_func(arg: Callable) -> str | None: + """ + if we define an internal function for this argument, return it + """ + return _cython_table.get(arg) + + +def is_builtin_func(arg): + """ + if we define an builtin function for this argument, return it, + otherwise return the arg + """ + return _builtin_table.get(arg, arg) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a8f6443ff4712..4e7c311f39cdb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -109,10 +109,7 @@ ) import pandas.core.algorithms as algos from pandas.core.arrays import ExtensionArray -from pandas.core.base import ( - PandasObject, - SelectionMixin, -) +from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.construction import ( create_series_with_explicit_dtype, @@ -187,7 +184,7 @@ bool_t = bool # Need alias because NDFrame has def bool: -class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin): +class NDFrame(PandasObject, indexing.IndexingMixin): """ N-dimensional analogue of DataFrame. Store multi-dimensional in a size-mutable, labeled data structure @@ -684,18 +681,6 @@ def size(self) -> int: # error: Incompatible return value type (got "number", expected "int") return np.prod(self.shape) # type: ignore[return-value] - @final - @property - def _selected_obj(self: FrameOrSeries) -> FrameOrSeries: - """ internal compat with SelectionMixin """ - return self - - @final - @property - def _obj_with_exclusions(self: FrameOrSeries) -> FrameOrSeries: - """ internal compat with SelectionMixin """ - return self - @overload def set_axis( self: FrameOrSeries, labels, axis: Axis = ..., inplace: Literal[False] = ... diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5aebad84a0a30..bb820574846a6 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -268,7 +268,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) if relabeling: ret.columns = columns else: - cyfunc = self._get_cython_func(func) + cyfunc = com.get_cython_func(func) if cyfunc and not args and not kwargs: return getattr(self, cyfunc)() @@ -536,7 +536,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): result.ravel(), index=data.index, name=data.name ) - func = self._get_cython_func(func) or func + func = com.get_cython_func(func) or func if not isinstance(func, str): return self._transform_general(func, *args, **kwargs) @@ -1440,7 +1440,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): return self.obj._constructor(result, index=data.index, columns=data.columns) # optimized transforms - func = self._get_cython_func(func) or func + func = com.get_cython_func(func) or func if not isinstance(func, str): return self._transform_general(func, *args, **kwargs) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index de5a65108a5cc..adc01d3dad012 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -909,7 +909,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, FrameOrSeries]]: ) def apply(self, func, *args, **kwargs): - func = self._is_builtin_func(func) + func = com.is_builtin_func(func) # this is needed so we don't try and wrap strings. If we could # resolve functions to their callable functions prior, this @@ -1205,7 +1205,7 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs) @final def _python_agg_general(self, func, *args, **kwargs): - func = self._is_builtin_func(func) + func = com.is_builtin_func(func) f = lambda x: func(x, *args, **kwargs) # iterate through "columns" ex exclusions to populate output dict diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 5bf9f81e3073d..bc5318a1f367c 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -69,7 +69,6 @@ from pandas.core import algorithms from pandas.core.arrays import ExtensionArray -from pandas.core.base import SelectionMixin import pandas.core.common as com from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame @@ -557,14 +556,6 @@ def get_group_levels(self) -> list[Index]: # ------------------------------------------------------------ # Aggregation functions - @final - def _is_builtin_func(self, arg): - """ - if we define a builtin function for this argument, return it, - otherwise return the arg - """ - return SelectionMixin._builtin_table.get(arg, arg) - @final def _ea_wrap_cython_operation( self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs @@ -759,7 +750,7 @@ def _aggregate_series_fast(self, obj: Series, func: F): # - obj is backed by an ndarray, not ExtensionArray # - len(obj) > 0 # - ngroups != 0 - func = self._is_builtin_func(func) + func = com.is_builtin_func(func) group_index, _, ngroups = self.group_info diff --git a/pandas/core/resample.py b/pandas/core/resample.py index c7d71d4531d11..ffe0990a0c8be 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -40,6 +40,7 @@ import pandas.core.algorithms as algos from pandas.core.apply import ResamplerWindowApply from pandas.core.base import DataError +import pandas.core.common as com from pandas.core.generic import ( NDFrame, _shared_docs, @@ -1086,7 +1087,7 @@ def _downsample(self, how, **kwargs): **kwargs : kw args passed to how function """ self._set_binner() - how = self._get_cython_func(how) or how + how = com.get_cython_func(how) or how ax = self.ax obj = self._selected_obj @@ -1241,7 +1242,7 @@ def _downsample(self, how, **kwargs): if self.kind == "timestamp": return super()._downsample(how, **kwargs) - how = self._get_cython_func(how) or how + how = com.get_cython_func(how) or how ax = self.ax if is_subperiod(ax.freq, self.freq):