Skip to content

Commit 58fca97

Browse files
authored
REF: avoid try/except in wrapping in cython_agg_blocks (#38164)
1 parent efbcd68 commit 58fca97

File tree

1 file changed

+45
-47
lines changed

1 file changed

+45
-47
lines changed

pandas/core/groupby/generic.py

Lines changed: 45 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
validate_func_kwargs,
5959
)
6060
import pandas.core.algorithms as algorithms
61-
from pandas.core.arrays import ExtensionArray
61+
from pandas.core.arrays import Categorical, ExtensionArray
6262
from pandas.core.base import DataError, SpecificationError
6363
import pandas.core.common as com
6464
from pandas.core.construction import create_series_with_explicit_dtype
@@ -1026,38 +1026,64 @@ def _cython_agg_blocks(
10261026
if numeric_only:
10271027
data = data.get_numeric_data(copy=False)
10281028

1029-
no_result = object()
1030-
10311029
def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
10321030
# see if we can cast the values to the desired dtype
10331031
# this may not be the original dtype
10341032
assert not isinstance(result, DataFrame)
1035-
assert result is not no_result
10361033

10371034
dtype = maybe_cast_result_dtype(values.dtype, how)
10381035
result = maybe_downcast_numeric(result, dtype)
10391036

1040-
if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray):
1041-
# e.g. values was an IntegerArray
1042-
# (1, N) case can occur if values was Categorical
1043-
# and result is ndarray[object]
1044-
# TODO(EA2D): special casing not needed with 2D EAs
1045-
assert result.ndim == 1 or result.shape[0] == 1
1046-
try:
1047-
# Cast back if feasible
1048-
result = type(values)._from_sequence(
1049-
result.ravel(), dtype=values.dtype
1050-
)
1051-
except (ValueError, TypeError):
1052-
# reshape to be valid for non-Extension Block
1053-
result = result.reshape(1, -1)
1037+
if isinstance(values, Categorical) and isinstance(result, np.ndarray):
1038+
# If the Categorical op didn't raise, it is dtype-preserving
1039+
result = type(values)._from_sequence(result.ravel(), dtype=values.dtype)
1040+
# Note this will have result.dtype == dtype from above
10541041

10551042
elif isinstance(result, np.ndarray) and result.ndim == 1:
10561043
# We went through a SeriesGroupByPath and need to reshape
1044+
# GH#32223 includes case with IntegerArray values
10571045
result = result.reshape(1, -1)
1046+
# test_groupby_duplicate_columns gets here with
1047+
# result.dtype == int64, values.dtype=object, how="min"
10581048

10591049
return result
10601050

1051+
def py_fallback(bvalues: ArrayLike) -> ArrayLike:
1052+
# if self.grouper.aggregate fails, we fall back to a pure-python
1053+
# solution
1054+
1055+
# We get here with a) EADtypes and b) object dtype
1056+
obj: FrameOrSeriesUnion
1057+
1058+
# call our grouper again with only this block
1059+
if isinstance(bvalues, ExtensionArray):
1060+
# TODO(EA2D): special case not needed with 2D EAs
1061+
obj = Series(bvalues)
1062+
else:
1063+
obj = DataFrame(bvalues.T)
1064+
if obj.shape[1] == 1:
1065+
# Avoid call to self.values that can occur in DataFrame
1066+
# reductions; see GH#28949
1067+
obj = obj.iloc[:, 0]
1068+
1069+
# Create SeriesGroupBy with observed=True so that it does
1070+
# not try to add missing categories if grouping over multiple
1071+
# Categoricals. This will done by later self._reindex_output()
1072+
# Doing it here creates an error. See GH#34951
1073+
sgb = get_groupby(obj, self.grouper, observed=True)
1074+
result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
1075+
1076+
assert isinstance(result, (Series, DataFrame)) # for mypy
1077+
# In the case of object dtype block, it may have been split
1078+
# in the operation. We un-split here.
1079+
result = result._consolidate()
1080+
assert isinstance(result, (Series, DataFrame)) # for mypy
1081+
assert len(result._mgr.blocks) == 1
1082+
1083+
# unwrap DataFrame to get array
1084+
result = result._mgr.blocks[0].values
1085+
return result
1086+
10611087
def blk_func(bvalues: ArrayLike) -> ArrayLike:
10621088

10631089
try:
@@ -1075,35 +1101,7 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike:
10751101
assert how == "ohlc"
10761102
raise
10771103

1078-
# We get here with a) EADtypes and b) object dtype
1079-
obj: FrameOrSeriesUnion
1080-
# call our grouper again with only this block
1081-
if isinstance(bvalues, ExtensionArray):
1082-
# TODO(EA2D): special case not needed with 2D EAs
1083-
obj = Series(bvalues)
1084-
else:
1085-
obj = DataFrame(bvalues.T)
1086-
if obj.shape[1] == 1:
1087-
# Avoid call to self.values that can occur in DataFrame
1088-
# reductions; see GH#28949
1089-
obj = obj.iloc[:, 0]
1090-
1091-
# Create SeriesGroupBy with observed=True so that it does
1092-
# not try to add missing categories if grouping over multiple
1093-
# Categoricals. This will done by later self._reindex_output()
1094-
# Doing it here creates an error. See GH#34951
1095-
sgb = get_groupby(obj, self.grouper, observed=True)
1096-
result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
1097-
1098-
assert isinstance(result, (Series, DataFrame)) # for mypy
1099-
# In the case of object dtype block, it may have been split
1100-
# in the operation. We un-split here.
1101-
result = result._consolidate()
1102-
assert isinstance(result, (Series, DataFrame)) # for mypy
1103-
assert len(result._mgr.blocks) == 1
1104-
1105-
# unwrap DataFrame to get array
1106-
result = result._mgr.blocks[0].values
1104+
result = py_fallback(bvalues)
11071105

11081106
return cast_agg_result(result, bvalues, how)
11091107

0 commit comments

Comments
 (0)