Skip to content

REF: NDFrame dont mixin SelectionMixin #40857

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -918,12 +918,12 @@ def external_error_raised(expected_exception: type[Exception]) -> ContextManager
return pytest.raises(expected_exception, match=None)


cython_table = pd.core.base.SelectionMixin._cython_table.items()
cython_table = pd.core.common._cython_table.items()


def get_cython_table_params(ndframe, func_names_and_expected):
"""
Combine frame, functions from SelectionMixin._cython_table
Combine frame, functions from com._cython_table
keys and expected result.

Parameters
Expand Down
53 changes: 46 additions & 7 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from pandas.core.algorithms import safe_sort
from pandas.core.base import (
DataError,
SelectionMixin,
SpecificationError,
)
import pandas.core.common as com
Expand Down Expand Up @@ -173,7 +174,7 @@ def agg(self) -> FrameOrSeriesUnion | None:
return self.agg_list_like()

if callable(arg):
f = obj._get_cython_func(arg)
f = com.get_cython_func(arg)
if f and not args and not kwargs:
return getattr(obj, f)()

Expand Down Expand Up @@ -301,10 +302,10 @@ def transform_str_or_callable(self, func) -> FrameOrSeriesUnion:
kwargs = self.kwargs

if isinstance(func, str):
return obj._try_aggregate_string_function(func, *args, **kwargs)
return self._try_aggregate_string_function(obj, func, *args, **kwargs)

if not args and not kwargs:
f = obj._get_cython_func(func)
f = com.get_cython_func(func)
if f:
return getattr(obj, f)()

Expand All @@ -327,7 +328,10 @@ def agg_list_like(self) -> FrameOrSeriesUnion:
obj = self.obj
arg = cast(List[AggFuncTypeBase], self.f)

if obj._selected_obj.ndim == 1:
if not isinstance(obj, SelectionMixin):
# i.e. obj is Series or DataFrame
selected_obj = obj
elif obj._selected_obj.ndim == 1:
selected_obj = obj._selected_obj
else:
selected_obj = obj._obj_with_exclusions
Expand Down Expand Up @@ -406,13 +410,19 @@ def agg_dict_like(self) -> FrameOrSeriesUnion:
obj = self.obj
arg = cast(AggFuncTypeDict, self.f)

selected_obj = obj._selected_obj
if not isinstance(obj, SelectionMixin):
# i.e. obj is Series or DataFrame
selected_obj = obj
selection = None
else:
selected_obj = obj._selected_obj
selection = obj._selection

arg = self.normalize_dictlike_arg("agg", selected_obj, arg)

if selected_obj.ndim == 1:
# key only used for output
colg = obj._gotitem(obj._selection, ndim=1)
colg = obj._gotitem(selection, ndim=1)
results = {key: colg.agg(how) for key, how in arg.items()}
else:
# key used for column selection and output
Expand Down Expand Up @@ -486,7 +496,7 @@ def maybe_apply_str(self) -> FrameOrSeriesUnion | None:
self.kwargs["axis"] = self.axis
elif self.axis != 0:
raise ValueError(f"Operation {f} does not support axis=1")
return obj._try_aggregate_string_function(f, *self.args, **self.kwargs)
return self._try_aggregate_string_function(obj, f, *self.args, **self.kwargs)

def maybe_apply_multiple(self) -> FrameOrSeriesUnion | None:
"""
Expand Down Expand Up @@ -547,6 +557,35 @@ def normalize_dictlike_arg(
func = new_func
return func

def _try_aggregate_string_function(self, obj, arg: str, *args, **kwargs):
"""
if arg is a string, then try to operate on it:
- try to find a function (or attribute) on ourselves
- try to find a numpy function
- raise
"""
assert isinstance(arg, str)

f = getattr(obj, arg, None)
if f is not None:
if callable(f):
return f(*args, **kwargs)

# people may try to aggregate on a non-callable attribute
# but don't let them think they can pass args to it
assert len(args) == 0
assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0
return f

f = getattr(np, arg, None)
if f is not None and hasattr(obj, "__array__"):
# in particular exclude Window
return f(obj, *args, **kwargs)

raise AttributeError(
f"'{arg}' is not a valid function for '{type(obj).__name__}' object"
)


class FrameApply(Apply):
obj: DataFrame
Expand Down
76 changes: 2 additions & 74 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@
Base and utility classes for pandas objects.
"""

import builtins
import textwrap
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
FrozenSet,
Optional,
Expand Down Expand Up @@ -176,36 +174,6 @@ class SelectionMixin:
_internal_names = ["_cache", "__setstate__"]
_internal_names_set = set(_internal_names)

_builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min}

_cython_table = {
builtins.sum: "sum",
builtins.max: "max",
builtins.min: "min",
np.all: "all",
np.any: "any",
np.sum: "sum",
np.nansum: "sum",
np.mean: "mean",
np.nanmean: "mean",
np.prod: "prod",
np.nanprod: "prod",
np.std: "std",
np.nanstd: "std",
np.var: "var",
np.nanvar: "var",
np.median: "median",
np.nanmedian: "median",
np.max: "max",
np.nanmax: "max",
np.min: "min",
np.nanmin: "min",
np.cumprod: "cumprod",
np.nancumprod: "cumprod",
np.cumsum: "cumsum",
np.nancumsum: "cumsum",
}

@property
def _selection_name(self):
"""
Expand All @@ -216,6 +184,7 @@ def _selection_name(self):
"""
return self._selection

@final
@property
def _selection_list(self):
if not isinstance(
Expand All @@ -240,6 +209,7 @@ def _selected_obj(self):
def ndim(self) -> int:
return self._selected_obj.ndim

@final
@cache_readonly
def _obj_with_exclusions(self):
# error: "SelectionMixin" has no attribute "obj"
Expand Down Expand Up @@ -308,48 +278,6 @@ def aggregate(self, func, *args, **kwargs):

agg = aggregate

def _try_aggregate_string_function(self, arg: str, *args, **kwargs):
"""
if arg is a string, then try to operate on it:
- try to find a function (or attribute) on ourselves
- try to find a numpy function
- raise
"""
assert isinstance(arg, str)

f = getattr(self, arg, None)
if f is not None:
if callable(f):
return f(*args, **kwargs)

# people may try to aggregate on a non-callable attribute
# but don't let them think they can pass args to it
assert len(args) == 0
assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0
return f

f = getattr(np, arg, None)
if f is not None and hasattr(self, "__array__"):
# in particular exclude Window
return f(self, *args, **kwargs)

raise AttributeError(
f"'{arg}' is not a valid function for '{type(self).__name__}' object"
)

def _get_cython_func(self, arg: Callable) -> Optional[str]:
"""
if we define an internal function for this argument, return it
"""
return self._cython_table.get(arg)

def _is_builtin_func(self, arg):
"""
if we define an builtin function for this argument, return it,
otherwise return the arg
"""
return self._builtin_table.get(arg, arg)


class IndexOpsMixin(OpsMixin):
"""
Expand Down
47 changes: 47 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""
from __future__ import annotations

import builtins
from collections import (
abc,
defaultdict,
Expand Down Expand Up @@ -532,3 +533,49 @@ def require_length_match(data, index: Index):
"does not match length of index "
f"({len(index)})"
)


_builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min}

_cython_table = {
builtins.sum: "sum",
builtins.max: "max",
builtins.min: "min",
np.all: "all",
np.any: "any",
np.sum: "sum",
np.nansum: "sum",
np.mean: "mean",
np.nanmean: "mean",
np.prod: "prod",
np.nanprod: "prod",
np.std: "std",
np.nanstd: "std",
np.var: "var",
np.nanvar: "var",
np.median: "median",
np.nanmedian: "median",
np.max: "max",
np.nanmax: "max",
np.min: "min",
np.nanmin: "min",
np.cumprod: "cumprod",
np.nancumprod: "cumprod",
np.cumsum: "cumsum",
np.nancumsum: "cumsum",
}


def get_cython_func(arg: Callable) -> str | None:
"""
if we define an internal function for this argument, return it
"""
return _cython_table.get(arg)


def is_builtin_func(arg):
"""
if we define an builtin function for this argument, return it,
otherwise return the arg
"""
return _builtin_table.get(arg, arg)
19 changes: 2 additions & 17 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,7 @@
)
import pandas.core.algorithms as algos
from pandas.core.arrays import ExtensionArray
from pandas.core.base import (
PandasObject,
SelectionMixin,
)
from pandas.core.base import PandasObject
import pandas.core.common as com
from pandas.core.construction import (
create_series_with_explicit_dtype,
Expand Down Expand Up @@ -187,7 +184,7 @@
bool_t = bool # Need alias because NDFrame has def bool:


class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin):
class NDFrame(PandasObject, indexing.IndexingMixin):
"""
N-dimensional analogue of DataFrame. Store multi-dimensional in a
size-mutable, labeled data structure
Expand Down Expand Up @@ -684,18 +681,6 @@ def size(self) -> int:
# error: Incompatible return value type (got "number", expected "int")
return np.prod(self.shape) # type: ignore[return-value]

@final
@property
def _selected_obj(self: FrameOrSeries) -> FrameOrSeries:
""" internal compat with SelectionMixin """
return self

@final
@property
def _obj_with_exclusions(self: FrameOrSeries) -> FrameOrSeries:
""" internal compat with SelectionMixin """
return self

@overload
def set_axis(
self: FrameOrSeries, labels, axis: Axis = ..., inplace: Literal[False] = ...
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
if relabeling:
ret.columns = columns
else:
cyfunc = self._get_cython_func(func)
cyfunc = com.get_cython_func(func)
if cyfunc and not args and not kwargs:
return getattr(self, cyfunc)()

Expand Down Expand Up @@ -536,7 +536,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
result.ravel(), index=data.index, name=data.name
)

func = self._get_cython_func(func) or func
func = com.get_cython_func(func) or func

if not isinstance(func, str):
return self._transform_general(func, *args, **kwargs)
Expand Down Expand Up @@ -1440,7 +1440,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
return self.obj._constructor(result, index=data.index, columns=data.columns)

# optimized transforms
func = self._get_cython_func(func) or func
func = com.get_cython_func(func) or func

if not isinstance(func, str):
return self._transform_general(func, *args, **kwargs)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, FrameOrSeries]]:
)
def apply(self, func, *args, **kwargs):

func = self._is_builtin_func(func)
func = com.is_builtin_func(func)

# this is needed so we don't try and wrap strings. If we could
# resolve functions to their callable functions prior, this
Expand Down Expand Up @@ -1205,7 +1205,7 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs)

@final
def _python_agg_general(self, func, *args, **kwargs):
func = self._is_builtin_func(func)
func = com.is_builtin_func(func)
f = lambda x: func(x, *args, **kwargs)

# iterate through "columns" ex exclusions to populate output dict
Expand Down
Loading