-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REF: let EAs override WrappedCythonOp groupby implementations #51166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9528ffc
1ea4a72
8645cf1
ea40255
a2e7e64
6a2788b
ec58ebb
fcfd76d
a3f32d0
d80c0b2
c78af4d
0248ce3
2206dea
335eb11
2517eae
fab8725
dd5428e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1722,6 +1722,82 @@ def map(self, mapper, na_action=None): | |
""" | ||
return map_array(self, mapper, na_action=na_action) | ||
|
||
# ------------------------------------------------------------------------ | ||
# GroupBy Methods | ||
|
||
def _groupby_op( | ||
self, | ||
*, | ||
how: str, | ||
has_dropped_na: bool, | ||
min_count: int, | ||
ngroups: int, | ||
ids: npt.NDArray[np.intp], | ||
**kwargs, | ||
) -> ArrayLike: | ||
""" | ||
Dispatch GroupBy reduction or transformation operation. | ||
|
||
This is an *experimental* API to allow ExtensionArray authors to implement | ||
reductions and transformations. The API is subject to change. | ||
|
||
Parameters | ||
---------- | ||
how : {'any', 'all', 'sum', 'prod', 'min', 'max', 'mean', 'median', | ||
'median', 'var', 'std', 'sem', 'nth', 'last', 'ohlc', | ||
'cumprod', 'cumsum', 'cummin', 'cummax', 'rank'} | ||
has_dropped_na : bool | ||
min_count : int | ||
ngroups : int | ||
ids : np.ndarray[np.intp] | ||
ids[i] gives the integer label for the group that self[i] belongs to. | ||
**kwargs : operation-specific | ||
'any', 'all' -> ['skipna'] | ||
'var', 'std', 'sem' -> ['ddof'] | ||
'cumprod', 'cumsum', 'cummin', 'cummax' -> ['skipna'] | ||
'rank' -> ['ties_method', 'ascending', 'na_option', 'pct'] | ||
|
||
Returns | ||
------- | ||
np.ndarray or ExtensionArray | ||
""" | ||
from pandas.core.arrays.string_ import StringDtype | ||
from pandas.core.groupby.ops import WrappedCythonOp | ||
|
||
kind = WrappedCythonOp.get_kind_from_how(how) | ||
op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na) | ||
|
||
# GH#43682 | ||
if isinstance(self.dtype, StringDtype): | ||
# StringArray | ||
npvalues = self.to_numpy(object, na_value=np.nan) | ||
else: | ||
raise NotImplementedError( | ||
f"function is not implemented for this dtype: {self.dtype}" | ||
) | ||
|
||
res_values = op._cython_op_ndim_compat( | ||
npvalues, | ||
min_count=min_count, | ||
ngroups=ngroups, | ||
comp_ids=ids, | ||
mask=None, | ||
**kwargs, | ||
) | ||
|
||
if op.how in op.cast_blocklist: | ||
# i.e. how in ["rank"], since other cast_blocklist methods don't go | ||
# through cython_operation | ||
return res_values | ||
|
||
if isinstance(self.dtype, StringDtype): | ||
dtype = self.dtype | ||
string_array_cls = dtype.construct_array_type() | ||
return string_array_cls._from_sequence(res_values, dtype=dtype) | ||
|
||
else: | ||
raise NotImplementedError | ||
Comment on lines
+1793
to
+1799
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hasn't this if-then already been done above? or is it just to future-proof the code? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. future-proof is a nice way of putting it, yes. this is transplanted from its current position in WrappedCythonOp where the redundant checks are in separate methods |
||
|
||
|
||
class ExtensionArraySupportsAnyAll(ExtensionArray): | ||
def any(self, *, skipna: bool = True) -> bool: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1382,3 +1382,46 @@ def _accumulate( | |
data, mask = op(data, mask, skipna=skipna, **kwargs) | ||
|
||
return type(self)(data, mask, copy=False) | ||
|
||
# ------------------------------------------------------------------ | ||
# GroupBy Methods | ||
|
||
def _groupby_op( | ||
self, | ||
*, | ||
how: str, | ||
has_dropped_na: bool, | ||
min_count: int, | ||
ngroups: int, | ||
ids: npt.NDArray[np.intp], | ||
**kwargs, | ||
): | ||
from pandas.core.groupby.ops import WrappedCythonOp | ||
|
||
kind = WrappedCythonOp.get_kind_from_how(how) | ||
op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na) | ||
|
||
# libgroupby functions are responsible for NOT altering mask | ||
mask = self._mask | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably add tests covering this at some point. Not sure if we already have them |
||
if op.kind != "aggregate": | ||
result_mask = mask.copy() | ||
else: | ||
result_mask = np.zeros(ngroups, dtype=bool) | ||
|
||
res_values = op._cython_op_ndim_compat( | ||
self._data, | ||
min_count=min_count, | ||
ngroups=ngroups, | ||
comp_ids=ids, | ||
mask=mask, | ||
result_mask=result_mask, | ||
**kwargs, | ||
) | ||
|
||
if op.how == "ohlc": | ||
arity = op._cython_arity.get(op.how, 1) | ||
result_mask = np.tile(result_mask, (arity, 1)).T | ||
|
||
# res_values should already have the correct dtype, we just need to | ||
# wrap in a MaskedArray | ||
return self._maybe_mask_result(res_values, result_mask) |
Uh oh!
There was an error while loading. Please reload this page.