58
58
validate_func_kwargs ,
59
59
)
60
60
import pandas .core .algorithms as algorithms
61
- from pandas .core .arrays import ExtensionArray
61
+ from pandas .core .arrays import Categorical , ExtensionArray
62
62
from pandas .core .base import DataError , SpecificationError
63
63
import pandas .core .common as com
64
64
from pandas .core .construction import create_series_with_explicit_dtype
@@ -1026,38 +1026,64 @@ def _cython_agg_blocks(
1026
1026
if numeric_only :
1027
1027
data = data .get_numeric_data (copy = False )
1028
1028
1029
- no_result = object ()
1030
-
1031
1029
def cast_agg_result (result , values : ArrayLike , how : str ) -> ArrayLike :
1032
1030
# see if we can cast the values to the desired dtype
1033
1031
# this may not be the original dtype
1034
1032
assert not isinstance (result , DataFrame )
1035
- assert result is not no_result
1036
1033
1037
1034
dtype = maybe_cast_result_dtype (values .dtype , how )
1038
1035
result = maybe_downcast_numeric (result , dtype )
1039
1036
1040
- if isinstance (values , ExtensionArray ) and isinstance (result , np .ndarray ):
1041
- # e.g. values was an IntegerArray
1042
- # (1, N) case can occur if values was Categorical
1043
- # and result is ndarray[object]
1044
- # TODO(EA2D): special casing not needed with 2D EAs
1045
- assert result .ndim == 1 or result .shape [0 ] == 1
1046
- try :
1047
- # Cast back if feasible
1048
- result = type (values )._from_sequence (
1049
- result .ravel (), dtype = values .dtype
1050
- )
1051
- except (ValueError , TypeError ):
1052
- # reshape to be valid for non-Extension Block
1053
- result = result .reshape (1 , - 1 )
1037
+ if isinstance (values , Categorical ) and isinstance (result , np .ndarray ):
1038
+ # If the Categorical op didn't raise, it is dtype-preserving
1039
+ result = type (values )._from_sequence (result .ravel (), dtype = values .dtype )
1040
+ # Note this will have result.dtype == dtype from above
1054
1041
1055
1042
elif isinstance (result , np .ndarray ) and result .ndim == 1 :
1056
1043
# We went through a SeriesGroupByPath and need to reshape
1044
+ # GH#32223 includes case with IntegerArray values
1057
1045
result = result .reshape (1 , - 1 )
1046
+ # test_groupby_duplicate_columns gets here with
1047
+ # result.dtype == int64, values.dtype=object, how="min"
1058
1048
1059
1049
return result
1060
1050
1051
+ def py_fallback (bvalues : ArrayLike ) -> ArrayLike :
1052
+ # if self.grouper.aggregate fails, we fall back to a pure-python
1053
+ # solution
1054
+
1055
+ # We get here with a) EADtypes and b) object dtype
1056
+ obj : FrameOrSeriesUnion
1057
+
1058
+ # call our grouper again with only this block
1059
+ if isinstance (bvalues , ExtensionArray ):
1060
+ # TODO(EA2D): special case not needed with 2D EAs
1061
+ obj = Series (bvalues )
1062
+ else :
1063
+ obj = DataFrame (bvalues .T )
1064
+ if obj .shape [1 ] == 1 :
1065
+ # Avoid call to self.values that can occur in DataFrame
1066
+ # reductions; see GH#28949
1067
+ obj = obj .iloc [:, 0 ]
1068
+
1069
+ # Create SeriesGroupBy with observed=True so that it does
1070
+ # not try to add missing categories if grouping over multiple
1071
+ # Categoricals. This will done by later self._reindex_output()
1072
+ # Doing it here creates an error. See GH#34951
1073
+ sgb = get_groupby (obj , self .grouper , observed = True )
1074
+ result = sgb .aggregate (lambda x : alt (x , axis = self .axis ))
1075
+
1076
+ assert isinstance (result , (Series , DataFrame )) # for mypy
1077
+ # In the case of object dtype block, it may have been split
1078
+ # in the operation. We un-split here.
1079
+ result = result ._consolidate ()
1080
+ assert isinstance (result , (Series , DataFrame )) # for mypy
1081
+ assert len (result ._mgr .blocks ) == 1
1082
+
1083
+ # unwrap DataFrame to get array
1084
+ result = result ._mgr .blocks [0 ].values
1085
+ return result
1086
+
1061
1087
def blk_func (bvalues : ArrayLike ) -> ArrayLike :
1062
1088
1063
1089
try :
@@ -1075,35 +1101,7 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike:
1075
1101
assert how == "ohlc"
1076
1102
raise
1077
1103
1078
- # We get here with a) EADtypes and b) object dtype
1079
- obj : FrameOrSeriesUnion
1080
- # call our grouper again with only this block
1081
- if isinstance (bvalues , ExtensionArray ):
1082
- # TODO(EA2D): special case not needed with 2D EAs
1083
- obj = Series (bvalues )
1084
- else :
1085
- obj = DataFrame (bvalues .T )
1086
- if obj .shape [1 ] == 1 :
1087
- # Avoid call to self.values that can occur in DataFrame
1088
- # reductions; see GH#28949
1089
- obj = obj .iloc [:, 0 ]
1090
-
1091
- # Create SeriesGroupBy with observed=True so that it does
1092
- # not try to add missing categories if grouping over multiple
1093
- # Categoricals. This will done by later self._reindex_output()
1094
- # Doing it here creates an error. See GH#34951
1095
- sgb = get_groupby (obj , self .grouper , observed = True )
1096
- result = sgb .aggregate (lambda x : alt (x , axis = self .axis ))
1097
-
1098
- assert isinstance (result , (Series , DataFrame )) # for mypy
1099
- # In the case of object dtype block, it may have been split
1100
- # in the operation. We un-split here.
1101
- result = result ._consolidate ()
1102
- assert isinstance (result , (Series , DataFrame )) # for mypy
1103
- assert len (result ._mgr .blocks ) == 1
1104
-
1105
- # unwrap DataFrame to get array
1106
- result = result ._mgr .blocks [0 ].values
1104
+ result = py_fallback (bvalues )
1107
1105
1108
1106
return cast_agg_result (result , bvalues , how )
1109
1107
0 commit comments