diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 6f0bb3091133f..58d8ec39120e6 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -671,12 +671,8 @@ class String: ["str", "string[python]"], [ "sum", - "prod", "min", "max", - "mean", - "median", - "var", "first", "last", "any", diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 32802f6429c4c..a9ef1fca7e8e7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -863,7 +863,7 @@ def skew( self, axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True, - numeric_only: bool | None = None, + numeric_only: bool = False, **kwargs, ) -> Series: result = self._op_via_apply( @@ -1357,9 +1357,7 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike: # We could use `mgr.apply` here and not have to set_axis, but # we would have to do shape gymnastics for ArrayManager compat - res_mgr = mgr.grouped_reduce( - arr_func, ignore_failures=numeric_only is lib.no_default - ) + res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=False) res_mgr.set_axis(1, mgr.axes[1]) if len(res_mgr) < orig_mgr_len: @@ -1785,84 +1783,64 @@ def nunique(self, dropna: bool = True) -> DataFrame: @doc( _shared_docs["idxmax"], - numeric_only_default="True for axis=0, False for axis=1", + numeric_only_default="False", ) def idxmax( self, axis: Axis = 0, skipna: bool = True, - numeric_only: bool | lib.NoDefault = lib.no_default, + numeric_only: bool = False, ) -> DataFrame: axis = DataFrame._get_axis_number(axis) - if numeric_only is lib.no_default: - # Cannot use self._resolve_numeric_only; we must pass None to - # DataFrame.idxmax for backwards compatibility - numeric_only_arg = None if axis == 0 else False - else: - numeric_only_arg = numeric_only def func(df): - with warnings.catch_warnings(): - # Suppress numeric_only warnings here, will warn below - warnings.filterwarnings("ignore", ".*numeric_only in DataFrame.argmax") - res = df._reduce( - nanops.nanargmax, - "argmax", - axis=axis, - skipna=skipna, - numeric_only=numeric_only_arg, - ) - indices = res._values - index = df._get_axis(axis) - result = [index[i] if i >= 0 else np.nan for i in indices] - return df._constructor_sliced(result, index=res.index) + res = df._reduce( + nanops.nanargmax, + "argmax", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + ) + indices = res._values + index = df._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + return df._constructor_sliced(result, index=res.index) func.__name__ = "idxmax" result = self._python_apply_general( func, self._obj_with_exclusions, not_indexed_same=True ) - self._maybe_warn_numeric_only_depr("idxmax", result, numeric_only) return result @doc( _shared_docs["idxmin"], - numeric_only_default="True for axis=0, False for axis=1", + numeric_only_default="False", ) def idxmin( self, axis: Axis = 0, skipna: bool = True, - numeric_only: bool | lib.NoDefault = lib.no_default, + numeric_only: bool = False, ) -> DataFrame: axis = DataFrame._get_axis_number(axis) - if numeric_only is lib.no_default: - # Cannot use self._resolve_numeric_only; we must pass None to - # DataFrame.idxmin for backwards compatibility - numeric_only_arg = None if axis == 0 else False - else: - numeric_only_arg = numeric_only def func(df): - with warnings.catch_warnings(): - # Suppress numeric_only warnings here, will warn below - warnings.filterwarnings("ignore", ".*numeric_only in DataFrame.argmin") - res = df._reduce( - nanops.nanargmin, - "argmin", - axis=axis, - skipna=skipna, - numeric_only=numeric_only_arg, - ) - indices = res._values - index = df._get_axis(axis) - result = [index[i] if i >= 0 else np.nan for i in indices] - return df._constructor_sliced(result, index=res.index) + res = df._reduce( + nanops.nanargmin, + "argmin", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + ) + indices = res._values + index = df._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + return df._constructor_sliced(result, index=res.index) func.__name__ = "idxmin" result = self._python_apply_general( func, self._obj_with_exclusions, not_indexed_same=True ) - self._maybe_warn_numeric_only_depr("idxmin", result, numeric_only) return result boxplot = boxplot_frame_groupby diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1d8271a845f9a..b5e904a7d3882 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -324,8 +324,12 @@ class providing the base-class of operations. Parameters ---------- numeric_only : bool, default {no} - Include only float, int, boolean columns. If None, will attempt to use - everything, then use only numeric data. + Include only float, int, boolean columns. + + .. versionchanged:: 2.0.0 + + numeric_only no longer accepts ``None``. + min_count : int, default {mc} The required number of valid values to perform the operation. If fewer than ``min_count`` non-NA values are present the result will be NA. @@ -1654,7 +1658,6 @@ def _agg_general( alt=npfunc, numeric_only=numeric_only, min_count=min_count, - ignore_failures=numeric_only is lib.no_default, ) return result.__finalize__(self.obj, method="groupby") @@ -1705,7 +1708,6 @@ def _cython_agg_general( alt: Callable, numeric_only: bool | lib.NoDefault, min_count: int = -1, - ignore_failures: bool = True, **kwargs, ): # Note: we never get here with how="ohlc" for DataFrameGroupBy; @@ -1749,7 +1751,7 @@ def array_func(values: ArrayLike) -> ArrayLike: # TypeError -> we may have an exception in trying to aggregate # continue and exclude the block - new_mgr = data.grouped_reduce(array_func, ignore_failures=ignore_failures) + new_mgr = data.grouped_reduce(array_func, ignore_failures=False) if not is_ser and len(new_mgr) < orig_len: warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only) @@ -2054,8 +2056,11 @@ def mean( Parameters ---------- numeric_only : bool, default True - Include only float, int, boolean columns. If None, will attempt to use - everything, then use only numeric data. + Include only float, int, boolean columns. + + .. versionchanged:: 2.0.0 + + numeric_only no longer accepts ``None``. engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. @@ -2123,7 +2128,6 @@ def mean( "mean", alt=lambda x: Series(x).mean(numeric_only=numeric_only_bool), numeric_only=numeric_only, - ignore_failures=numeric_only is lib.no_default, ) return result.__finalize__(self.obj, method="groupby") @@ -2139,8 +2143,11 @@ def median(self, numeric_only: bool | lib.NoDefault = lib.no_default): Parameters ---------- numeric_only : bool, default True - Include only float, int, boolean columns. If None, will attempt to use - everything, then use only numeric data. + Include only float, int, boolean columns. + + .. versionchanged:: 2.0.0 + + numeric_only no longer accepts ``None``. Returns ------- @@ -2153,7 +2160,6 @@ def median(self, numeric_only: bool | lib.NoDefault = lib.no_default): "median", alt=lambda x: Series(x).median(numeric_only=numeric_only_bool), numeric_only=numeric_only, - ignore_failures=numeric_only is lib.no_default, ) return result.__finalize__(self.obj, method="groupby") @@ -2287,7 +2293,6 @@ def var( "var", alt=lambda x: Series(x).var(ddof=ddof), numeric_only=numeric_only, - ignore_failures=numeric_only is lib.no_default, ddof=ddof, ) @@ -3286,8 +3291,7 @@ def blk_func(values: ArrayLike) -> ArrayLike: is_ser = obj.ndim == 1 mgr = self._get_data_to_aggregate() data = mgr.get_numeric_data() if numeric_only_bool else mgr - ignore_failures = numeric_only_bool - res_mgr = data.grouped_reduce(blk_func, ignore_failures=ignore_failures) + res_mgr = data.grouped_reduce(blk_func, ignore_failures=False) if ( numeric_only is lib.no_default @@ -3765,9 +3769,7 @@ def blk_func(values: ArrayLike) -> ArrayLike: if numeric_only_bool: mgr = mgr.get_numeric_data() - res_mgr = mgr.grouped_reduce( - blk_func, ignore_failures=numeric_only is lib.no_default - ) + res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=False) if not is_ser and len(res_mgr.items) != orig_mgr_len: howstr = how.replace("group_", "") diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 0a3845617b32d..b8d2350cf6267 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -101,8 +101,9 @@ def test_cython_agg_nothing_to_agg(): frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25}) - with tm.assert_produces_warning(FutureWarning): - result = frame[["b"]].groupby(frame["a"]).mean() + with pytest.raises(TypeError, match="Could not convert"): + frame[["b"]].groupby(frame["a"]).mean() + result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True) expected = DataFrame([], index=frame["a"].sort_values().drop_duplicates()) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index ca794d4ae5a3e..6fa5d210b8d15 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -103,9 +103,7 @@ def test_basic(): # TODO: split this test gb = df.groupby("A", observed=False) exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True) expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)}) - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = gb.sum() + result = gb.sum() tm.assert_frame_equal(result, expected) # GH 8623 @@ -857,12 +855,14 @@ def test_preserve_categorical_dtype(): } ) for col in ["C1", "C2"]: - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result1 = df.groupby(by=col, as_index=False, observed=False).mean() - result2 = ( - df.groupby(by=col, as_index=True, observed=False).mean().reset_index() - ) + result1 = df.groupby(by=col, as_index=False, observed=False).mean( + numeric_only=True + ) + result2 = ( + df.groupby(by=col, as_index=True, observed=False) + .mean(numeric_only=True) + .reset_index() + ) expected = exp_full.reindex(columns=result1.columns) tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected) @@ -1856,10 +1856,7 @@ def test_category_order_reducer( df = df.set_index(keys) args = get_groupby_method_args(reduction_func, df) gb = df.groupby(keys, as_index=as_index, sort=sort, observed=observed) - msg = "is deprecated and will be removed in a future version" - warn = FutureWarning if reduction_func == "mad" else None - with tm.assert_produces_warning(warn, match=msg): - op_result = getattr(gb, reduction_func)(*args) + op_result = getattr(gb, reduction_func)(*args) if as_index: result = op_result.index.get_level_values("a").categories else: