diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6726374dbe30e..e91927d87d318 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -55,7 +55,6 @@ ensure_str, is_bool, is_bool_dtype, - is_categorical_dtype, is_complex, is_complex_dtype, is_datetime64_dtype, @@ -79,6 +78,7 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( + CategoricalDtype, DatetimeTZDtype, ExtensionDtype, IntervalDtype, @@ -359,15 +359,15 @@ def trans(x): return result -def maybe_cast_result( +def maybe_cast_pointwise_result( result: ArrayLike, dtype: DtypeObj, numeric_only: bool = False, - how: str = "", same_dtype: bool = True, ) -> ArrayLike: """ - Try casting result to a different type if appropriate + Try casting result of a pointwise operation back to the original dtype if + appropriate. Parameters ---------- @@ -377,8 +377,6 @@ def maybe_cast_result( Input Series from which result was calculated. numeric_only : bool, default False Whether to cast only numerics or datetimes as well. - how : str, default "" - How the result was computed. same_dtype : bool, default True Specify dtype when calling _from_sequence @@ -387,12 +385,12 @@ def maybe_cast_result( result : array-like result maybe casted to the dtype. """ - dtype = maybe_cast_result_dtype(dtype, how) assert not is_scalar(result) if isinstance(dtype, ExtensionDtype): - if not is_categorical_dtype(dtype) and dtype.kind != "M": + if not isinstance(dtype, (CategoricalDtype, DatetimeTZDtype)): + # TODO: avoid this special-casing # We have to special case categorical so as not to upcast # things like counts back to categorical diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 702d67b198e8d..54a10b9b62ec4 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -36,7 +36,7 @@ from pandas.util._decorators import cache_readonly from pandas.core.dtypes.cast import ( - maybe_cast_result, + maybe_cast_pointwise_result, maybe_cast_result_dtype, maybe_downcast_to_dtype, ) @@ -797,7 +797,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F): result[label] = res out = lib.maybe_convert_objects(result, try_float=False) - out = maybe_cast_result(out, obj.dtype, numeric_only=True) + out = maybe_cast_pointwise_result(out, obj.dtype, numeric_only=True) return out, counts diff --git a/pandas/core/series.py b/pandas/core/series.py index cbec0024d5f9e..fd115a550fa77 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -60,7 +60,7 @@ from pandas.core.dtypes.cast import ( convert_dtypes, maybe_box_native, - maybe_cast_result, + maybe_cast_pointwise_result, validate_numeric_casting, ) from pandas.core.dtypes.common import ( @@ -3070,22 +3070,26 @@ def combine(self, other, func, fill_value=None) -> Series: # so do this element by element new_index = self.index.union(other.index) new_name = ops.get_op_result_name(self, other) - new_values = [] - for idx in new_index: + new_values = np.empty(len(new_index), dtype=object) + for i, idx in enumerate(new_index): lv = self.get(idx, fill_value) rv = other.get(idx, fill_value) with np.errstate(all="ignore"): - new_values.append(func(lv, rv)) + new_values[i] = func(lv, rv) else: # Assume that other is a scalar, so apply the function for # each element in the Series new_index = self.index + new_values = np.empty(len(new_index), dtype=object) with np.errstate(all="ignore"): - new_values = [func(lv, other) for lv in self._values] + new_values[:] = [func(lv, other) for lv in self._values] new_name = self.name - res_values = sanitize_array(new_values, None) - res_values = maybe_cast_result(res_values, self.dtype, same_dtype=False) + # try_float=False is to match _aggregate_series_pure_python + res_values = lib.maybe_convert_objects(new_values, try_float=False) + res_values = maybe_cast_pointwise_result( + res_values, self.dtype, same_dtype=False + ) return self._constructor(res_values, index=new_index, name=new_name) def combine_first(self, other) -> Series: