From 640f15dc684f58e85d4b5f8fa0e8cbc9c4b371fe Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 10 Apr 2021 23:15:42 +0200 Subject: [PATCH 1/5] Deprecate level keyword for dataframe and series aggregations --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/frame.py | 7 +++ pandas/core/generic.py | 35 +++++++++++++++ pandas/core/series.py | 12 +++++- pandas/tests/frame/methods/test_count.py | 36 ++++++++++------ pandas/tests/frame/test_reductions.py | 43 ++++++++++++++++--- pandas/tests/frame/test_subclass.py | 3 +- pandas/tests/generic/test_finalize.py | 1 - pandas/tests/groupby/test_allowlist.py | 6 ++- pandas/tests/groupby/test_groupby.py | 3 +- pandas/tests/reductions/test_reductions.py | 27 ++++++++---- .../tests/reductions/test_stat_reductions.py | 3 +- pandas/tests/series/methods/test_count.py | 30 ++++++++----- pandas/tests/series/test_reductions.py | 3 +- pandas/tests/test_multilevel.py | 24 +++++++---- 15 files changed, 181 insertions(+), 53 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c9267a756bef3..91cf6ea6ba2bc 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -563,6 +563,7 @@ Deprecations - Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like and raises anything but ``TypeError``; ``func`` raising anything but a ``TypeError`` will raise in a future version (:issue:`40211`) - Deprecated support for ``np.ma.mrecords.MaskedRecords`` in the :class:`DataFrame` constructor, pass ``{name: data[name] for name in data.dtype.names}`` instead (:issue:`40363`) - Deprecated the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`) +- Deprecated the ``level`` keyword for :class:`DataFrame` and :class:`Series` aggregations; use groupby instead (:issue:`39983`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 64ec0801b5d8a..46713f1b5d2ad 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9479,6 +9479,13 @@ def count( """ axis = self._get_axis_number(axis) if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead.", + FutureWarning, + stacklevel=2, + ) return self._count_level(level, axis=axis, numeric_only=numeric_only) if numeric_only: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4e7c311f39cdb..c2f897fbbaf18 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10260,6 +10260,13 @@ def _logical_func( ): nv.validate_logical_func((), kwargs, fname=name) if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead.", + FutureWarning, + stacklevel=4, + ) if bool_only is not None: raise NotImplementedError( "Option bool_only is not implemented with option level." @@ -10351,6 +10358,13 @@ def _stat_function_ddof( if axis is None: axis = self._stat_axis_number if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead.", + FutureWarning, + stacklevel=4, + ) return self._agg_by_level( name, axis=axis, level=level, skipna=skipna, ddof=ddof ) @@ -10399,6 +10413,13 @@ def _stat_function( if axis is None: axis = self._stat_axis_number if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead.", + FutureWarning, + stacklevel=4, + ) return self._agg_by_level( name, axis=axis, level=level, skipna=skipna, numeric_only=numeric_only ) @@ -10461,6 +10482,13 @@ def _min_count_stat_function( if axis is None: axis = self._stat_axis_number if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead.", + FutureWarning, + stacklevel=4, + ) return self._agg_by_level( name, axis=axis, @@ -10538,6 +10566,13 @@ def mad(self, axis=None, skipna=None, level=None): if axis is None: axis = self._stat_axis_number if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead.", + FutureWarning, + stacklevel=3, + ) return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna) data = self._get_numeric_data() diff --git a/pandas/core/series.py b/pandas/core/series.py index 968ab27d6d58c..9a8889246847e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1894,8 +1894,16 @@ def count(self, level=None): """ if level is None: return notna(self._values).sum() - elif not isinstance(self.index, MultiIndex): - raise ValueError("Series.count level is only valid with a MultiIndex") + else: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead.", + FutureWarning, + stacklevel=2, + ) + if not isinstance(self.index, MultiIndex): + raise ValueError("Series.count level is only valid with a MultiIndex") index = self.index assert isinstance(index, MultiIndex) # for mypy diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py index 4533e46a7aabd..f78720d4190da 100644 --- a/pandas/tests/frame/methods/test_count.py +++ b/pandas/tests/frame/methods/test_count.py @@ -16,17 +16,22 @@ def test_count_multiindex(self, multiindex_dataframe_random_data): frame = frame.copy() frame.index.names = ["a", "b"] - result = frame.count(level="b") - expected = frame.count(level=1) + with tm.assert_produces_warning(FutureWarning): + result = frame.count(level="b") + with tm.assert_produces_warning(FutureWarning): + expected = frame.count(level=1) tm.assert_frame_equal(result, expected, check_names=False) - result = frame.count(level="a") - expected = frame.count(level=0) + with tm.assert_produces_warning(FutureWarning): + result = frame.count(level="a") + with tm.assert_produces_warning(FutureWarning): + expected = frame.count(level=0) tm.assert_frame_equal(result, expected, check_names=False) msg = "Level x not found" with pytest.raises(KeyError, match=msg): - frame.count(level="x") + with tm.assert_produces_warning(FutureWarning): + frame.count(level="x") def test_count(self): # corner case @@ -64,12 +69,14 @@ def test_count_level_corner(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data ser = frame["A"][:0] - result = ser.count(level=0) + with tm.assert_produces_warning(FutureWarning): + result = ser.count(level=0) expected = Series(0, index=ser.index.levels[0], name="A") tm.assert_series_equal(result, expected) df = frame[:0] - result = df.count(level=0) + with tm.assert_produces_warning(FutureWarning): + result = df.count(level=0) expected = ( DataFrame( index=ser.index.levels[0].set_names(["first"]), columns=df.columns @@ -90,7 +97,8 @@ def test_count_index_with_nan(self): ) # count on row labels - res = df.set_index(["Person", "Single"]).count(level="Person") + with tm.assert_produces_warning(FutureWarning): + res = df.set_index(["Person", "Single"]).count(level="Person") expected = DataFrame( index=Index(["John", "Myla"], name="Person"), columns=Index(["Age"]), @@ -99,7 +107,8 @@ def test_count_index_with_nan(self): tm.assert_frame_equal(res, expected) # count on column labels - res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1) + with tm.assert_produces_warning(FutureWarning): + res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1) expected = DataFrame( columns=Index(["John", "Myla"], name="Person"), index=Index(["Age"]), @@ -118,7 +127,8 @@ def test_count_level( def _check_counts(frame, axis=0): index = frame._get_axis(axis) for i in range(index.nlevels): - result = frame.count(axis=axis, level=i) + with tm.assert_produces_warning(FutureWarning): + result = frame.count(axis=axis, level=i) expected = frame.groupby(axis=axis, level=i).count() expected = expected.reindex_like(result).astype("i8") tm.assert_frame_equal(result, expected) @@ -136,8 +146,10 @@ def _check_counts(frame, axis=0): # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() with pytest.raises(TypeError, match="hierarchical"): - df.count(level=0) + with tm.assert_produces_warning(FutureWarning): + df.count(level=0) frame["D"] = "foo" - result = frame.count(level=0, numeric_only=True) + with tm.assert_produces_warning(FutureWarning): + result = frame.count(level=0, numeric_only=True) tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp")) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 72f0787d69b22..ecf2bd469d03e 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -580,7 +580,8 @@ def test_kurt(self): df = DataFrame(np.random.randn(6, 3), index=index) kurt = df.kurt() - kurt2 = df.kurt(level=0).xs("bar") + with tm.assert_produces_warning(FutureWarning): + kurt2 = df.kurt(level=0).xs("bar") tm.assert_series_equal(kurt, kurt2, check_names=False) assert kurt.name is None assert kurt2.name == "bar" @@ -1240,7 +1241,8 @@ def test_any_all_level_axis_none_raises(self, method): ) xpr = "Must specify 'axis' when aggregating by level." with pytest.raises(ValueError, match=xpr): - getattr(df, method)(axis=None, level="out") + with tm.assert_produces_warning(FutureWarning): + getattr(df, method)(axis=None, level="out") # --------------------------------------------------------------------- # Unsorted @@ -1365,11 +1367,13 @@ def test_frame_any_all_with_level(self): ], ) - result = df.any(level=0) + with tm.assert_produces_warning(FutureWarning): + result = df.any(level=0) ex = DataFrame({"data": [False, True]}, index=["one", "two"]) tm.assert_frame_equal(result, ex) - result = df.all(level=0) + with tm.assert_produces_warning(FutureWarning): + result = df.all(level=0) ex = DataFrame({"data": [False, False]}, index=["one", "two"]) tm.assert_frame_equal(result, ex) @@ -1390,6 +1394,34 @@ def test_frame_any_with_timedelta(self): expected = Series(data=[False, True]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "func", + [ + "any", + "all", + "count", + "sum", + "prod", + "max", + "min", + "mean", + "median", + "skew", + "kurt", + "sem", + "var", + "std", + "mad", + ], + ) + def test_reductions_deprecation_level_argument(self, frame_or_series, func): + # GH#39983 + obj = frame_or_series( + [1, 2, 3], index=MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]]) + ) + with tm.assert_produces_warning(FutureWarning): + getattr(obj, func)(level=0) + class TestNuisanceColumns: @pytest.mark.parametrize("method", ["any", "all"]) @@ -1556,7 +1588,8 @@ def test_groupy_regular_arithmetic_equivalent(meth): ) expected = df.copy() - result = getattr(df, meth)(level=0) + with tm.assert_produces_warning(FutureWarning): + result = getattr(df, meth)(level=0) tm.assert_frame_equal(result, expected) result = getattr(df.groupby(level=0), meth)(numeric_only=False) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 6be91c9e75188..3214290465832 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -599,7 +599,8 @@ def test_subclassed_count(self): list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] ), ) - result = df.count(level=1) + with tm.assert_produces_warning(FutureWarning): + result = df.count(level=1) assert isinstance(result, tm.SubclassedDataFrame) df = tm.SubclassedDataFrame() diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 15c51e5f3e6e4..dbe2df5238c7e 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -46,7 +46,6 @@ pytest.param( (pd.Series, ([0],), operator.methodcaller("to_frame")), marks=pytest.mark.xfail ), - (pd.Series, (0, mi), operator.methodcaller("count", level="A")), (pd.Series, ([0, 0],), operator.methodcaller("drop_duplicates")), (pd.Series, ([0, 0],), operator.methodcaller("duplicated")), (pd.Series, ([0, 0],), operator.methodcaller("round")), diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index cc036bb484ff9..7f88eceb2bd82 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -208,13 +208,15 @@ def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): if op in AGG_FUNCTIONS_WITH_SKIPNA: grouped = frame.groupby(level=level, axis=axis, sort=sort) result = getattr(grouped, op)(skipna=skipna) - expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna) + with tm.assert_produces_warning(FutureWarning): + expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna) if sort: expected = expected.sort_index(axis=axis, level=level) tm.assert_frame_equal(result, expected) else: grouped = frame.groupby(level=level, axis=axis, sort=sort) - result = getattr(grouped, op)() + with tm.assert_produces_warning(FutureWarning): + result = getattr(grouped, op)() expected = getattr(frame, op)(level=level, axis=axis) if sort: expected = expected.sort_index(axis=axis, level=level) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 6c51e32fa9a78..2dab22910a0c9 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -980,7 +980,8 @@ def test_groupby_complex(): result = a.groupby(level=0).sum() tm.assert_series_equal(result, expected) - result = a.sum(level=0) + with tm.assert_produces_warning(FutureWarning): + result = a.sum(level=0) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 2d7dddbc5ea42..6e0422a527639 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -677,17 +677,20 @@ def test_empty_multi(self, method, unit): index=pd.MultiIndex.from_product([("a", "b"), (0, 1)]), ) # 1 / 0 by default - result = getattr(s, method)(level=0) + with tm.assert_produces_warning(FutureWarning): + result = getattr(s, method)(level=0) expected = Series([1, unit], index=["a", "b"]) tm.assert_series_equal(result, expected) # min_count=0 - result = getattr(s, method)(level=0, min_count=0) + with tm.assert_produces_warning(FutureWarning): + result = getattr(s, method)(level=0, min_count=0) expected = Series([1, unit], index=["a", "b"]) tm.assert_series_equal(result, expected) # min_count=1 - result = getattr(s, method)(level=0, min_count=1) + with tm.assert_produces_warning(FutureWarning): + result = getattr(s, method)(level=0, min_count=1) expected = Series([1, np.nan], index=["a", "b"]) tm.assert_series_equal(result, expected) @@ -915,14 +918,18 @@ def test_all_any_params(self): # Check level. s = Series([False, False, True, True, False, True], index=[0, 0, 1, 1, 2, 2]) - tm.assert_series_equal(s.all(level=0), Series([False, True, False])) - tm.assert_series_equal(s.any(level=0), Series([False, True, True])) + with tm.assert_produces_warning(FutureWarning): + tm.assert_series_equal(s.all(level=0), Series([False, True, False])) + with tm.assert_produces_warning(FutureWarning): + tm.assert_series_equal(s.any(level=0), Series([False, True, True])) msg = "Option bool_only is not implemented with option level" with pytest.raises(NotImplementedError, match=msg): - s.any(bool_only=True, level=0) + with tm.assert_produces_warning(FutureWarning): + s.any(bool_only=True, level=0) with pytest.raises(NotImplementedError, match=msg): - s.all(bool_only=True, level=0) + with tm.assert_produces_warning(FutureWarning): + s.all(bool_only=True, level=0) # bool_only is not implemented alone. # TODO GH38810 change this error message to: @@ -955,8 +962,10 @@ def test_all_any_boolean(self): index=[0, 0, 1, 1, 2, 2], dtype="boolean", ) - tm.assert_series_equal(s.all(level=0), Series([False, True, False])) - tm.assert_series_equal(s.any(level=0), Series([False, True, True])) + with tm.assert_produces_warning(FutureWarning): + tm.assert_series_equal(s.all(level=0), Series([False, True, False])) + with tm.assert_produces_warning(FutureWarning): + tm.assert_series_equal(s.any(level=0), Series([False, True, True])) def test_any_axis1_bool_only(self): # GH#32432 diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index 88f69d00447b1..4eca9af78422d 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -263,7 +263,8 @@ def test_kurt(self): codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], ) s = Series(np.random.randn(6), index=index) - tm.assert_almost_equal(s.kurt(), s.kurt(level=0)["bar"]) + with tm.assert_produces_warning(FutureWarning): + tm.assert_almost_equal(s.kurt(), s.kurt(level=0)["bar"]) # test corner cases, kurt() returns NaN unless there's at least 4 # values diff --git a/pandas/tests/series/methods/test_count.py b/pandas/tests/series/methods/test_count.py index 937bb383dd35c..29fb6aa32bc7c 100644 --- a/pandas/tests/series/methods/test_count.py +++ b/pandas/tests/series/methods/test_count.py @@ -19,13 +19,15 @@ def test_count_level_series(self): ser = Series(np.random.randn(len(index)), index=index) - result = ser.count(level=0) + with tm.assert_produces_warning(FutureWarning): + result = ser.count(level=0) expected = ser.groupby(level=0).count() tm.assert_series_equal( result.astype("f8"), expected.reindex(result.index).fillna(0) ) - result = ser.count(level=1) + with tm.assert_produces_warning(FutureWarning): + result = ser.count(level=1) expected = ser.groupby(level=1).count() tm.assert_series_equal( result.astype("f8"), expected.reindex(result.index).fillna(0) @@ -37,24 +39,30 @@ def test_count_multiindex(self, series_with_multilevel_index): series = ser.copy() series.index.names = ["a", "b"] - result = series.count(level="b") - expect = ser.count(level=1).rename_axis("b") + with tm.assert_produces_warning(FutureWarning): + result = series.count(level="b") + with tm.assert_produces_warning(FutureWarning): + expect = ser.count(level=1).rename_axis("b") tm.assert_series_equal(result, expect) - result = series.count(level="a") - expect = ser.count(level=0).rename_axis("a") + with tm.assert_produces_warning(FutureWarning): + result = series.count(level="a") + with tm.assert_produces_warning(FutureWarning): + expect = ser.count(level=0).rename_axis("a") tm.assert_series_equal(result, expect) msg = "Level x not found" with pytest.raises(KeyError, match=msg): - series.count("x") + with tm.assert_produces_warning(FutureWarning): + series.count("x") def test_count_level_without_multiindex(self): ser = Series(range(3)) msg = "Series.count level is only valid with a MultiIndex" with pytest.raises(ValueError, match=msg): - ser.count(level=1) + with tm.assert_produces_warning(FutureWarning): + ser.count(level=1) def test_count(self, datetime_series): assert datetime_series.count() == len(datetime_series) @@ -66,12 +74,14 @@ def test_count(self, datetime_series): mi = MultiIndex.from_arrays([list("aabbcc"), [1, 2, 2, np.nan, 1, 2]]) ts = Series(np.arange(len(mi)), index=mi) - left = ts.count(level=1) + with tm.assert_produces_warning(FutureWarning): + left = ts.count(level=1) right = Series([2, 3, 1], index=[1, 2, np.nan]) tm.assert_series_equal(left, right) ts.iloc[[0, 3, 5]] = np.nan - tm.assert_series_equal(ts.count(level=1), right - 1) + with tm.assert_produces_warning(FutureWarning): + tm.assert_series_equal(ts.count(level=1), right - 1) # GH#29478 with pd.option_context("use_inf_as_na", True): diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 12671bbf5ba98..ca30e8f1ee6fd 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -63,7 +63,8 @@ def test_prod_numpy16_bug(): def test_sum_with_level(): obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)])) - result = obj.sum(level=0) + with tm.assert_produces_warning(FutureWarning): + result = obj.sum(level=0) expected = Series([10.0], index=[2]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 8e6a636a8f602..a0e3399bee49f 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -29,7 +29,8 @@ def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data): # axis=0 ymd = multiindex_year_month_day_dataframe_random_data - month_sums = ymd.sum(level="month") + with tm.assert_produces_warning(FutureWarning): + month_sums = ymd.sum(level="month") result = month_sums.reindex(ymd.index, level=1) expected = ymd.groupby(level="month").transform(np.sum) @@ -41,7 +42,8 @@ def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data): tm.assert_series_equal(result, expected, check_names=False) # axis=1 - month_sums = ymd.T.sum(axis=1, level="month") + with tm.assert_produces_warning(FutureWarning): + month_sums = ymd.T.sum(axis=1, level="month") result = month_sums.reindex(columns=ymd.index, level=1) expected = ymd.groupby(level="month").transform(np.sum).T tm.assert_frame_equal(result, expected) @@ -51,7 +53,8 @@ def test_binops_level(self, multiindex_year_month_day_dataframe_random_data): def _check_op(opname): op = getattr(DataFrame, opname) - month_sums = ymd.sum(level="month") + with tm.assert_produces_warning(FutureWarning): + month_sums = ymd.sum(level="month") result = op(ymd, month_sums, level="month") broadcasted = ymd.groupby(level="month").transform(np.sum) @@ -182,7 +185,8 @@ def test_series_group_min_max( grouped = ser.groupby(level=level, sort=sort) # skipna=True leftside = grouped.agg(lambda x: getattr(x, op)(skipna=skipna)) - rightside = getattr(ser, op)(level=level, skipna=skipna) + with tm.assert_produces_warning(FutureWarning): + rightside = getattr(ser, op)(level=level, skipna=skipna) if sort: rightside = rightside.sort_index(level=level) tm.assert_series_equal(leftside, rightside) @@ -217,7 +221,8 @@ def aggf(x): return getattr(x, op)(skipna=skipna, axis=axis) leftside = grouped.agg(aggf) - rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) + with tm.assert_produces_warning(FutureWarning): + rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) if sort: rightside = rightside.sort_index(level=level, axis=axis) frame = frame.sort_index(level=level, axis=axis) @@ -240,11 +245,13 @@ def test_std_var_pass_ddof(self): ddof = 4 alt = lambda x: getattr(x, meth)(ddof=ddof) - result = getattr(df[0], meth)(level=0, ddof=ddof) + with tm.assert_produces_warning(FutureWarning): + result = getattr(df[0], meth)(level=0, ddof=ddof) expected = df[0].groupby(level=0).agg(alt) tm.assert_series_equal(result, expected) - result = getattr(df, meth)(level=0, ddof=ddof) + with tm.assert_produces_warning(FutureWarning): + result = getattr(df, meth)(level=0, ddof=ddof) expected = df.groupby(level=0).agg(alt) tm.assert_frame_equal(result, expected) @@ -255,7 +262,8 @@ def test_agg_multiple_levels( if frame_or_series is Series: ymd = ymd["A"] - result = ymd.sum(level=["year", "month"]) + with tm.assert_produces_warning(FutureWarning): + result = ymd.sum(level=["year", "month"]) expected = ymd.groupby(level=["year", "month"]).sum() tm.assert_equal(result, expected) From 1b9e0467c507271d0fbe8c9cb6cd6c0105bdfcf2 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 11 Apr 2021 00:02:10 +0200 Subject: [PATCH 2/5] Move raise warning line --- pandas/tests/groupby/test_allowlist.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index 7f88eceb2bd82..8be721c13eea8 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -215,9 +215,9 @@ def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): tm.assert_frame_equal(result, expected) else: grouped = frame.groupby(level=level, axis=axis, sort=sort) + result = getattr(grouped, op)() with tm.assert_produces_warning(FutureWarning): - result = getattr(grouped, op)() - expected = getattr(frame, op)(level=level, axis=axis) + expected = getattr(frame, op)(level=level, axis=axis) if sort: expected = expected.sort_index(axis=axis, level=level) tm.assert_frame_equal(result, expected) From e3b1abf5e3fb76d4088805923d1abed5e3bbe216 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 11 Apr 2021 02:48:51 +0200 Subject: [PATCH 3/5] Add warnings to docs --- doc/source/user_guide/advanced.rst | 1 + doc/source/user_guide/categorical.rst | 1 + doc/source/user_guide/groupby.rst | 1 + doc/source/whatsnew/v0.15.2.rst | 1 + 4 files changed, 4 insertions(+) diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 6377aeb0e7c74..7e46fa590441b 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -492,6 +492,7 @@ Using the parameter ``level`` in the :meth:`~DataFrame.reindex` and values across a level. For instance: .. ipython:: python + :okwarning: midx = pd.MultiIndex( levels=[["zero", "one"], ["x", "y"]], codes=[[1, 1, 0, 0], [1, 0, 1, 0]] diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index 5c43de05fb5b9..77d3ff35670c6 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -638,6 +638,7 @@ even if some categories are not present in the data: Groupby will also show "unused" categories: .. ipython:: python + :okwarning: cats = pd.Categorical( ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"] diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index d6081155b58db..7baf80689b918 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -325,6 +325,7 @@ directly. Additionally, the resulting index will be named according to the chosen level: .. ipython:: python + :okwarning: s.sum(level="second") diff --git a/doc/source/whatsnew/v0.15.2.rst b/doc/source/whatsnew/v0.15.2.rst index b5b25796fea73..2dae76dd6b461 100644 --- a/doc/source/whatsnew/v0.15.2.rst +++ b/doc/source/whatsnew/v0.15.2.rst @@ -154,6 +154,7 @@ Other enhancements: - ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters (:issue:`8302`): .. ipython:: python + :okwarning: s = pd.Series([False, True, False], index=[0, 0, 1]) s.any(level=0) From 9f64d6f900721556c4eeebeff9613d433f556612 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 11 Apr 2021 03:07:19 +0200 Subject: [PATCH 4/5] Move ok warning --- doc/source/user_guide/categorical.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index 77d3ff35670c6..c8360bff52f60 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -625,6 +625,7 @@ even if some categories are not present in the data: ``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories. .. ipython:: python + :okwarning: columns = pd.Categorical( ["One", "One", "Two"], categories=["One", "Two", "Three"], ordered=True @@ -638,7 +639,6 @@ even if some categories are not present in the data: Groupby will also show "unused" categories: .. ipython:: python - :okwarning: cats = pd.Categorical( ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"] From dbe0dfd005ecfdc0a1f7deb1b34c16ff34784822 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 12 Apr 2021 22:57:55 +0200 Subject: [PATCH 5/5] Adress review --- doc/source/user_guide/advanced.rst | 3 +- doc/source/user_guide/categorical.rst | 3 +- doc/source/user_guide/groupby.rst | 9 -- pandas/core/frame.py | 2 +- pandas/core/generic.py | 10 +- pandas/core/series.py | 2 +- pandas/tests/frame/methods/test_count.py | 116 ----------------- .../test_count_with_level_deprecated.py | 123 ++++++++++++++++++ 8 files changed, 132 insertions(+), 136 deletions(-) create mode 100644 pandas/tests/frame/methods/test_count_with_level_deprecated.py diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 7e46fa590441b..bd9463c50ab1f 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -492,14 +492,13 @@ Using the parameter ``level`` in the :meth:`~DataFrame.reindex` and values across a level. For instance: .. ipython:: python - :okwarning: midx = pd.MultiIndex( levels=[["zero", "one"], ["x", "y"]], codes=[[1, 1, 0, 0], [1, 0, 1, 0]] ) df = pd.DataFrame(np.random.randn(4, 2), index=midx) df - df2 = df.mean(level=0) + df2 = df.groupby(level=0).mean() df2 df2.reindex(df.index, level=0) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index c8360bff52f60..fba41f73ba819 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -625,7 +625,6 @@ even if some categories are not present in the data: ``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories. .. ipython:: python - :okwarning: columns = pd.Categorical( ["One", "One", "Two"], categories=["One", "Two", "Three"], ordered=True @@ -634,7 +633,7 @@ even if some categories are not present in the data: data=[[1, 2, 3], [4, 5, 6]], columns=pd.MultiIndex.from_arrays([["A", "B", "B"], columns]), ) - df.sum(axis=1, level=1) + df.groupby(axis=1, level=1).sum() Groupby will also show "unused" categories: diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 7baf80689b918..afb2e72cbff07 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -320,15 +320,6 @@ number: s.groupby(level="second").sum() -The aggregation functions such as ``sum`` will take the level parameter -directly. Additionally, the resulting index will be named according to the -chosen level: - -.. ipython:: python - :okwarning: - - s.sum(level="second") - Grouping with multiple levels is supported. .. ipython:: python diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 46713f1b5d2ad..045776c3f5c50 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9482,7 +9482,7 @@ def count( warnings.warn( "Using the level keyword in DataFrame and Series aggregations is " "deprecated and will be removed in a future version. Use groupby " - "instead.", + "instead. df.count(level=1) should use df.groupby(level=1).count().", FutureWarning, stacklevel=2, ) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c2f897fbbaf18..4b08792596676 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10263,7 +10263,7 @@ def _logical_func( warnings.warn( "Using the level keyword in DataFrame and Series aggregations is " "deprecated and will be removed in a future version. Use groupby " - "instead.", + "instead. df.any(level=1) should use df.groupby(level=1).any()", FutureWarning, stacklevel=4, ) @@ -10361,7 +10361,7 @@ def _stat_function_ddof( warnings.warn( "Using the level keyword in DataFrame and Series aggregations is " "deprecated and will be removed in a future version. Use groupby " - "instead.", + "instead. df.var(level=1) should use df.groupby(level=1).var().", FutureWarning, stacklevel=4, ) @@ -10416,7 +10416,7 @@ def _stat_function( warnings.warn( "Using the level keyword in DataFrame and Series aggregations is " "deprecated and will be removed in a future version. Use groupby " - "instead.", + "instead. df.median(level=1) should use df.groupby(level=1).median().", FutureWarning, stacklevel=4, ) @@ -10485,7 +10485,7 @@ def _min_count_stat_function( warnings.warn( "Using the level keyword in DataFrame and Series aggregations is " "deprecated and will be removed in a future version. Use groupby " - "instead.", + "instead. df.sum(level=1) should use df.groupby(level=1).sum().", FutureWarning, stacklevel=4, ) @@ -10569,7 +10569,7 @@ def mad(self, axis=None, skipna=None, level=None): warnings.warn( "Using the level keyword in DataFrame and Series aggregations is " "deprecated and will be removed in a future version. Use groupby " - "instead.", + "instead. df.mad(level=1) should use df.groupby(level=1).mad()", FutureWarning, stacklevel=3, ) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9a8889246847e..7a04c6d40dd17 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1898,7 +1898,7 @@ def count(self, level=None): warnings.warn( "Using the level keyword in DataFrame and Series aggregations is " "deprecated and will be removed in a future version. Use groupby " - "instead.", + "instead. ser.count(level=1) should use ser.groupby(level=1).count().", FutureWarning, stacklevel=2, ) diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py index f78720d4190da..43eb96f7f32d9 100644 --- a/pandas/tests/frame/methods/test_count.py +++ b/pandas/tests/frame/methods/test_count.py @@ -1,38 +1,11 @@ -import numpy as np -import pytest - from pandas import ( DataFrame, - Index, Series, ) import pandas._testing as tm class TestDataFrameCount: - def test_count_multiindex(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - - frame = frame.copy() - frame.index.names = ["a", "b"] - - with tm.assert_produces_warning(FutureWarning): - result = frame.count(level="b") - with tm.assert_produces_warning(FutureWarning): - expected = frame.count(level=1) - tm.assert_frame_equal(result, expected, check_names=False) - - with tm.assert_produces_warning(FutureWarning): - result = frame.count(level="a") - with tm.assert_produces_warning(FutureWarning): - expected = frame.count(level=0) - tm.assert_frame_equal(result, expected, check_names=False) - - msg = "Level x not found" - with pytest.raises(KeyError, match=msg): - with tm.assert_produces_warning(FutureWarning): - frame.count(level="x") - def test_count(self): # corner case frame = DataFrame() @@ -64,92 +37,3 @@ def test_count_objects(self, float_string_frame): tm.assert_series_equal(dm.count(), df.count()) tm.assert_series_equal(dm.count(1), df.count(1)) - - def test_count_level_corner(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - - ser = frame["A"][:0] - with tm.assert_produces_warning(FutureWarning): - result = ser.count(level=0) - expected = Series(0, index=ser.index.levels[0], name="A") - tm.assert_series_equal(result, expected) - - df = frame[:0] - with tm.assert_produces_warning(FutureWarning): - result = df.count(level=0) - expected = ( - DataFrame( - index=ser.index.levels[0].set_names(["first"]), columns=df.columns - ) - .fillna(0) - .astype(np.int64) - ) - tm.assert_frame_equal(result, expected) - - def test_count_index_with_nan(self): - # https://github.com/pandas-dev/pandas/issues/21824 - df = DataFrame( - { - "Person": ["John", "Myla", None, "John", "Myla"], - "Age": [24.0, 5, 21.0, 33, 26], - "Single": [False, True, True, True, False], - } - ) - - # count on row labels - with tm.assert_produces_warning(FutureWarning): - res = df.set_index(["Person", "Single"]).count(level="Person") - expected = DataFrame( - index=Index(["John", "Myla"], name="Person"), - columns=Index(["Age"]), - data=[2, 2], - ) - tm.assert_frame_equal(res, expected) - - # count on column labels - with tm.assert_produces_warning(FutureWarning): - res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1) - expected = DataFrame( - columns=Index(["John", "Myla"], name="Person"), - index=Index(["Age"]), - data=[[2, 2]], - ) - tm.assert_frame_equal(res, expected) - - def test_count_level( - self, - multiindex_year_month_day_dataframe_random_data, - multiindex_dataframe_random_data, - ): - ymd = multiindex_year_month_day_dataframe_random_data - frame = multiindex_dataframe_random_data - - def _check_counts(frame, axis=0): - index = frame._get_axis(axis) - for i in range(index.nlevels): - with tm.assert_produces_warning(FutureWarning): - result = frame.count(axis=axis, level=i) - expected = frame.groupby(axis=axis, level=i).count() - expected = expected.reindex_like(result).astype("i8") - tm.assert_frame_equal(result, expected) - - frame.iloc[1, [1, 2]] = np.nan - frame.iloc[7, [0, 1]] = np.nan - ymd.iloc[1, [1, 2]] = np.nan - ymd.iloc[7, [0, 1]] = np.nan - - _check_counts(frame) - _check_counts(ymd) - _check_counts(frame.T, axis=1) - _check_counts(ymd.T, axis=1) - - # can't call with level on regular DataFrame - df = tm.makeTimeDataFrame() - with pytest.raises(TypeError, match="hierarchical"): - with tm.assert_produces_warning(FutureWarning): - df.count(level=0) - - frame["D"] = "foo" - with tm.assert_produces_warning(FutureWarning): - result = frame.count(level=0, numeric_only=True) - tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp")) diff --git a/pandas/tests/frame/methods/test_count_with_level_deprecated.py b/pandas/tests/frame/methods/test_count_with_level_deprecated.py new file mode 100644 index 0000000000000..f6fbc281c7a8e --- /dev/null +++ b/pandas/tests/frame/methods/test_count_with_level_deprecated.py @@ -0,0 +1,123 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm + + +class TestDataFrameCount: + def test_count_multiindex(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + frame = frame.copy() + frame.index.names = ["a", "b"] + + with tm.assert_produces_warning(FutureWarning): + result = frame.count(level="b") + with tm.assert_produces_warning(FutureWarning): + expected = frame.count(level=1) + tm.assert_frame_equal(result, expected, check_names=False) + + with tm.assert_produces_warning(FutureWarning): + result = frame.count(level="a") + with tm.assert_produces_warning(FutureWarning): + expected = frame.count(level=0) + tm.assert_frame_equal(result, expected, check_names=False) + + msg = "Level x not found" + with pytest.raises(KeyError, match=msg): + with tm.assert_produces_warning(FutureWarning): + frame.count(level="x") + + def test_count_level_corner(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + ser = frame["A"][:0] + with tm.assert_produces_warning(FutureWarning): + result = ser.count(level=0) + expected = Series(0, index=ser.index.levels[0], name="A") + tm.assert_series_equal(result, expected) + + df = frame[:0] + with tm.assert_produces_warning(FutureWarning): + result = df.count(level=0) + expected = ( + DataFrame( + index=ser.index.levels[0].set_names(["first"]), columns=df.columns + ) + .fillna(0) + .astype(np.int64) + ) + tm.assert_frame_equal(result, expected) + + def test_count_index_with_nan(self): + # https://github.com/pandas-dev/pandas/issues/21824 + df = DataFrame( + { + "Person": ["John", "Myla", None, "John", "Myla"], + "Age": [24.0, 5, 21.0, 33, 26], + "Single": [False, True, True, True, False], + } + ) + + # count on row labels + with tm.assert_produces_warning(FutureWarning): + res = df.set_index(["Person", "Single"]).count(level="Person") + expected = DataFrame( + index=Index(["John", "Myla"], name="Person"), + columns=Index(["Age"]), + data=[2, 2], + ) + tm.assert_frame_equal(res, expected) + + # count on column labels + with tm.assert_produces_warning(FutureWarning): + res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1) + expected = DataFrame( + columns=Index(["John", "Myla"], name="Person"), + index=Index(["Age"]), + data=[[2, 2]], + ) + tm.assert_frame_equal(res, expected) + + def test_count_level( + self, + multiindex_year_month_day_dataframe_random_data, + multiindex_dataframe_random_data, + ): + ymd = multiindex_year_month_day_dataframe_random_data + frame = multiindex_dataframe_random_data + + def _check_counts(frame, axis=0): + index = frame._get_axis(axis) + for i in range(index.nlevels): + with tm.assert_produces_warning(FutureWarning): + result = frame.count(axis=axis, level=i) + expected = frame.groupby(axis=axis, level=i).count() + expected = expected.reindex_like(result).astype("i8") + tm.assert_frame_equal(result, expected) + + frame.iloc[1, [1, 2]] = np.nan + frame.iloc[7, [0, 1]] = np.nan + ymd.iloc[1, [1, 2]] = np.nan + ymd.iloc[7, [0, 1]] = np.nan + + _check_counts(frame) + _check_counts(ymd) + _check_counts(frame.T, axis=1) + _check_counts(ymd.T, axis=1) + + # can't call with level on regular DataFrame + df = tm.makeTimeDataFrame() + with pytest.raises(TypeError, match="hierarchical"): + with tm.assert_produces_warning(FutureWarning): + df.count(level=0) + + frame["D"] = "foo" + with tm.assert_produces_warning(FutureWarning): + result = frame.count(level=0, numeric_only=True) + tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp"))