Skip to content

Commit 33fec60

Browse files
authored
REF: NDFrame dont mixin SelectionMixin (#40857)
1 parent 9373dbe commit 33fec60

File tree

9 files changed

+108
-117
lines changed

9 files changed

+108
-117
lines changed

pandas/_testing/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -918,12 +918,12 @@ def external_error_raised(expected_exception: type[Exception]) -> ContextManager
918918
return pytest.raises(expected_exception, match=None)
919919

920920

921-
cython_table = pd.core.base.SelectionMixin._cython_table.items()
921+
cython_table = pd.core.common._cython_table.items()
922922

923923

924924
def get_cython_table_params(ndframe, func_names_and_expected):
925925
"""
926-
Combine frame, functions from SelectionMixin._cython_table
926+
Combine frame, functions from com._cython_table
927927
keys and expected result.
928928
929929
Parameters

pandas/core/apply.py

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from pandas.core.algorithms import safe_sort
4545
from pandas.core.base import (
4646
DataError,
47+
SelectionMixin,
4748
SpecificationError,
4849
)
4950
import pandas.core.common as com
@@ -173,7 +174,7 @@ def agg(self) -> FrameOrSeriesUnion | None:
173174
return self.agg_list_like()
174175

175176
if callable(arg):
176-
f = obj._get_cython_func(arg)
177+
f = com.get_cython_func(arg)
177178
if f and not args and not kwargs:
178179
return getattr(obj, f)()
179180

@@ -301,10 +302,10 @@ def transform_str_or_callable(self, func) -> FrameOrSeriesUnion:
301302
kwargs = self.kwargs
302303

303304
if isinstance(func, str):
304-
return obj._try_aggregate_string_function(func, *args, **kwargs)
305+
return self._try_aggregate_string_function(obj, func, *args, **kwargs)
305306

306307
if not args and not kwargs:
307-
f = obj._get_cython_func(func)
308+
f = com.get_cython_func(func)
308309
if f:
309310
return getattr(obj, f)()
310311

@@ -327,7 +328,10 @@ def agg_list_like(self) -> FrameOrSeriesUnion:
327328
obj = self.obj
328329
arg = cast(List[AggFuncTypeBase], self.f)
329330

330-
if obj._selected_obj.ndim == 1:
331+
if not isinstance(obj, SelectionMixin):
332+
# i.e. obj is Series or DataFrame
333+
selected_obj = obj
334+
elif obj._selected_obj.ndim == 1:
331335
selected_obj = obj._selected_obj
332336
else:
333337
selected_obj = obj._obj_with_exclusions
@@ -406,13 +410,19 @@ def agg_dict_like(self) -> FrameOrSeriesUnion:
406410
obj = self.obj
407411
arg = cast(AggFuncTypeDict, self.f)
408412

409-
selected_obj = obj._selected_obj
413+
if not isinstance(obj, SelectionMixin):
414+
# i.e. obj is Series or DataFrame
415+
selected_obj = obj
416+
selection = None
417+
else:
418+
selected_obj = obj._selected_obj
419+
selection = obj._selection
410420

411421
arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
412422

413423
if selected_obj.ndim == 1:
414424
# key only used for output
415-
colg = obj._gotitem(obj._selection, ndim=1)
425+
colg = obj._gotitem(selection, ndim=1)
416426
results = {key: colg.agg(how) for key, how in arg.items()}
417427
else:
418428
# key used for column selection and output
@@ -486,7 +496,7 @@ def maybe_apply_str(self) -> FrameOrSeriesUnion | None:
486496
self.kwargs["axis"] = self.axis
487497
elif self.axis != 0:
488498
raise ValueError(f"Operation {f} does not support axis=1")
489-
return obj._try_aggregate_string_function(f, *self.args, **self.kwargs)
499+
return self._try_aggregate_string_function(obj, f, *self.args, **self.kwargs)
490500

491501
def maybe_apply_multiple(self) -> FrameOrSeriesUnion | None:
492502
"""
@@ -547,6 +557,35 @@ def normalize_dictlike_arg(
547557
func = new_func
548558
return func
549559

560+
def _try_aggregate_string_function(self, obj, arg: str, *args, **kwargs):
561+
"""
562+
if arg is a string, then try to operate on it:
563+
- try to find a function (or attribute) on ourselves
564+
- try to find a numpy function
565+
- raise
566+
"""
567+
assert isinstance(arg, str)
568+
569+
f = getattr(obj, arg, None)
570+
if f is not None:
571+
if callable(f):
572+
return f(*args, **kwargs)
573+
574+
# people may try to aggregate on a non-callable attribute
575+
# but don't let them think they can pass args to it
576+
assert len(args) == 0
577+
assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0
578+
return f
579+
580+
f = getattr(np, arg, None)
581+
if f is not None and hasattr(obj, "__array__"):
582+
# in particular exclude Window
583+
return f(obj, *args, **kwargs)
584+
585+
raise AttributeError(
586+
f"'{arg}' is not a valid function for '{type(obj).__name__}' object"
587+
)
588+
550589

551590
class FrameApply(Apply):
552591
obj: DataFrame

pandas/core/base.py

Lines changed: 2 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,10 @@
22
Base and utility classes for pandas objects.
33
"""
44

5-
import builtins
65
import textwrap
76
from typing import (
87
TYPE_CHECKING,
98
Any,
10-
Callable,
119
Dict,
1210
FrozenSet,
1311
Optional,
@@ -176,36 +174,6 @@ class SelectionMixin:
176174
_internal_names = ["_cache", "__setstate__"]
177175
_internal_names_set = set(_internal_names)
178176

179-
_builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min}
180-
181-
_cython_table = {
182-
builtins.sum: "sum",
183-
builtins.max: "max",
184-
builtins.min: "min",
185-
np.all: "all",
186-
np.any: "any",
187-
np.sum: "sum",
188-
np.nansum: "sum",
189-
np.mean: "mean",
190-
np.nanmean: "mean",
191-
np.prod: "prod",
192-
np.nanprod: "prod",
193-
np.std: "std",
194-
np.nanstd: "std",
195-
np.var: "var",
196-
np.nanvar: "var",
197-
np.median: "median",
198-
np.nanmedian: "median",
199-
np.max: "max",
200-
np.nanmax: "max",
201-
np.min: "min",
202-
np.nanmin: "min",
203-
np.cumprod: "cumprod",
204-
np.nancumprod: "cumprod",
205-
np.cumsum: "cumsum",
206-
np.nancumsum: "cumsum",
207-
}
208-
209177
@property
210178
def _selection_name(self):
211179
"""
@@ -216,6 +184,7 @@ def _selection_name(self):
216184
"""
217185
return self._selection
218186

187+
@final
219188
@property
220189
def _selection_list(self):
221190
if not isinstance(
@@ -240,6 +209,7 @@ def _selected_obj(self):
240209
def ndim(self) -> int:
241210
return self._selected_obj.ndim
242211

212+
@final
243213
@cache_readonly
244214
def _obj_with_exclusions(self):
245215
# error: "SelectionMixin" has no attribute "obj"
@@ -308,48 +278,6 @@ def aggregate(self, func, *args, **kwargs):
308278

309279
agg = aggregate
310280

311-
def _try_aggregate_string_function(self, arg: str, *args, **kwargs):
312-
"""
313-
if arg is a string, then try to operate on it:
314-
- try to find a function (or attribute) on ourselves
315-
- try to find a numpy function
316-
- raise
317-
"""
318-
assert isinstance(arg, str)
319-
320-
f = getattr(self, arg, None)
321-
if f is not None:
322-
if callable(f):
323-
return f(*args, **kwargs)
324-
325-
# people may try to aggregate on a non-callable attribute
326-
# but don't let them think they can pass args to it
327-
assert len(args) == 0
328-
assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0
329-
return f
330-
331-
f = getattr(np, arg, None)
332-
if f is not None and hasattr(self, "__array__"):
333-
# in particular exclude Window
334-
return f(self, *args, **kwargs)
335-
336-
raise AttributeError(
337-
f"'{arg}' is not a valid function for '{type(self).__name__}' object"
338-
)
339-
340-
def _get_cython_func(self, arg: Callable) -> Optional[str]:
341-
"""
342-
if we define an internal function for this argument, return it
343-
"""
344-
return self._cython_table.get(arg)
345-
346-
def _is_builtin_func(self, arg):
347-
"""
348-
if we define an builtin function for this argument, return it,
349-
otherwise return the arg
350-
"""
351-
return self._builtin_table.get(arg, arg)
352-
353281

354282
class IndexOpsMixin(OpsMixin):
355283
"""

pandas/core/common.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"""
66
from __future__ import annotations
77

8+
import builtins
89
from collections import (
910
abc,
1011
defaultdict,
@@ -532,3 +533,49 @@ def require_length_match(data, index: Index):
532533
"does not match length of index "
533534
f"({len(index)})"
534535
)
536+
537+
538+
_builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min}
539+
540+
_cython_table = {
541+
builtins.sum: "sum",
542+
builtins.max: "max",
543+
builtins.min: "min",
544+
np.all: "all",
545+
np.any: "any",
546+
np.sum: "sum",
547+
np.nansum: "sum",
548+
np.mean: "mean",
549+
np.nanmean: "mean",
550+
np.prod: "prod",
551+
np.nanprod: "prod",
552+
np.std: "std",
553+
np.nanstd: "std",
554+
np.var: "var",
555+
np.nanvar: "var",
556+
np.median: "median",
557+
np.nanmedian: "median",
558+
np.max: "max",
559+
np.nanmax: "max",
560+
np.min: "min",
561+
np.nanmin: "min",
562+
np.cumprod: "cumprod",
563+
np.nancumprod: "cumprod",
564+
np.cumsum: "cumsum",
565+
np.nancumsum: "cumsum",
566+
}
567+
568+
569+
def get_cython_func(arg: Callable) -> str | None:
570+
"""
571+
if we define an internal function for this argument, return it
572+
"""
573+
return _cython_table.get(arg)
574+
575+
576+
def is_builtin_func(arg):
577+
"""
578+
if we define an builtin function for this argument, return it,
579+
otherwise return the arg
580+
"""
581+
return _builtin_table.get(arg, arg)

pandas/core/generic.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,7 @@
109109
)
110110
import pandas.core.algorithms as algos
111111
from pandas.core.arrays import ExtensionArray
112-
from pandas.core.base import (
113-
PandasObject,
114-
SelectionMixin,
115-
)
112+
from pandas.core.base import PandasObject
116113
import pandas.core.common as com
117114
from pandas.core.construction import (
118115
create_series_with_explicit_dtype,
@@ -187,7 +184,7 @@
187184
bool_t = bool # Need alias because NDFrame has def bool:
188185

189186

190-
class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin):
187+
class NDFrame(PandasObject, indexing.IndexingMixin):
191188
"""
192189
N-dimensional analogue of DataFrame. Store multi-dimensional in a
193190
size-mutable, labeled data structure
@@ -684,18 +681,6 @@ def size(self) -> int:
684681
# error: Incompatible return value type (got "number", expected "int")
685682
return np.prod(self.shape) # type: ignore[return-value]
686683

687-
@final
688-
@property
689-
def _selected_obj(self: FrameOrSeries) -> FrameOrSeries:
690-
""" internal compat with SelectionMixin """
691-
return self
692-
693-
@final
694-
@property
695-
def _obj_with_exclusions(self: FrameOrSeries) -> FrameOrSeries:
696-
""" internal compat with SelectionMixin """
697-
return self
698-
699684
@overload
700685
def set_axis(
701686
self: FrameOrSeries, labels, axis: Axis = ..., inplace: Literal[False] = ...

pandas/core/groupby/generic.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
268268
if relabeling:
269269
ret.columns = columns
270270
else:
271-
cyfunc = self._get_cython_func(func)
271+
cyfunc = com.get_cython_func(func)
272272
if cyfunc and not args and not kwargs:
273273
return getattr(self, cyfunc)()
274274

@@ -536,7 +536,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
536536
result.ravel(), index=data.index, name=data.name
537537
)
538538

539-
func = self._get_cython_func(func) or func
539+
func = com.get_cython_func(func) or func
540540

541541
if not isinstance(func, str):
542542
return self._transform_general(func, *args, **kwargs)
@@ -1440,7 +1440,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
14401440
return self.obj._constructor(result, index=data.index, columns=data.columns)
14411441

14421442
# optimized transforms
1443-
func = self._get_cython_func(func) or func
1443+
func = com.get_cython_func(func) or func
14441444

14451445
if not isinstance(func, str):
14461446
return self._transform_general(func, *args, **kwargs)

pandas/core/groupby/groupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -909,7 +909,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, FrameOrSeries]]:
909909
)
910910
def apply(self, func, *args, **kwargs):
911911

912-
func = self._is_builtin_func(func)
912+
func = com.is_builtin_func(func)
913913

914914
# this is needed so we don't try and wrap strings. If we could
915915
# resolve functions to their callable functions prior, this
@@ -1205,7 +1205,7 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs)
12051205

12061206
@final
12071207
def _python_agg_general(self, func, *args, **kwargs):
1208-
func = self._is_builtin_func(func)
1208+
func = com.is_builtin_func(func)
12091209
f = lambda x: func(x, *args, **kwargs)
12101210

12111211
# iterate through "columns" ex exclusions to populate output dict

0 commit comments

Comments
 (0)