diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index 6fc53fe09d791..7c98c99fecd5b 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -702,11 +702,11 @@ Sorting is per order in the categories, not lexical order: df.sort_values(by="grade") -Grouping by a categorical column also shows empty categories: +Grouping by a categorical column with ``observed=False`` also shows empty categories: .. ipython:: python - df.groupby("grade").size() + df.groupby("grade", observed=False).size() Plotting diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index ef08d709822d2..3ce54cfebf65a 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -800,8 +800,8 @@ Groupby operations on the index will preserve the index nature as well. .. ipython:: python - df2.groupby(level=0).sum() - df2.groupby(level=0).sum().index + df2.groupby(level=0, observed=True).sum() + df2.groupby(level=0, observed=True).sum().index Reindexing operations will return a resulting index based on the type of the passed indexer. Passing a list will return a plain-old ``Index``; indexing with diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index 0b2224fe9bb32..e486235f044f5 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -607,7 +607,7 @@ even if some categories are not present in the data: s = pd.Series(pd.Categorical(["a", "b", "c", "c"], categories=["c", "a", "b", "d"])) s.value_counts() -``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories. +``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories when ``observed=False``. .. ipython:: python @@ -618,9 +618,9 @@ even if some categories are not present in the data: data=[[1, 2, 3], [4, 5, 6]], columns=pd.MultiIndex.from_arrays([["A", "B", "B"], columns]), ).T - df.groupby(level=1).sum() + df.groupby(level=1, observed=False).sum() -Groupby will also show "unused" categories: +Groupby will also show "unused" categories when ``observed=False``: .. ipython:: python @@ -628,7 +628,7 @@ Groupby will also show "unused" categories: ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"] ) df = pd.DataFrame({"cats": cats, "values": [1, 2, 2, 2, 3, 4, 5]}) - df.groupby("cats").mean() + df.groupby("cats", observed=False).mean() cats2 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"]) df2 = pd.DataFrame( @@ -638,7 +638,7 @@ Groupby will also show "unused" categories: "values": [1, 2, 3, 4], } ) - df2.groupby(["cats", "B"]).mean() + df2.groupby(["cats", "B"], observed=False).mean() Pivot tables: diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 31c4bd1d7c87c..56e62ba20e030 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1401,7 +1401,7 @@ can be used as group keys. If so, the order of the levels will be preserved: factor = pd.qcut(data, [0, 0.25, 0.5, 0.75, 1.0]) - data.groupby(factor).mean() + data.groupby(factor, observed=False).mean() .. _groupby.specify: diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst index f52253687ecfd..67e91751e9527 100644 --- a/doc/source/whatsnew/v0.15.0.rst +++ b/doc/source/whatsnew/v0.15.0.rst @@ -85,7 +85,7 @@ For full docs, see the :ref:`categorical introduction ` and the "medium", "good", "very good"]) df["grade"] df.sort_values("grade") - df.groupby("grade").size() + df.groupby("grade", observed=False).size() - ``pandas.core.group_agg`` and ``pandas.core.factor_agg`` were removed. As an alternative, construct a dataframe and use ``df.groupby().agg()``. diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index feeb7b5ee30ce..ab17cacd830e5 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -1134,7 +1134,7 @@ As a consequence, ``groupby`` and ``set_index`` also preserve categorical dtypes .. ipython:: python df = pd.DataFrame({"A": [0, 1], "B": [10, 11], "C": cat}) - df_grouped = df.groupby(by=["A", "C"]).first() + df_grouped = df.groupby(by=["A", "C"], observed=False).first() df_set_idx = df.set_index(["A", "C"]) **Previous behavior**: diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index b41a469fe0c1f..34a875f59e808 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -289,7 +289,7 @@ In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueErr .. code-block:: ipython - In [3]: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() + In [3]: df[df.chromosomes != '1'].groupby('chromosomes', observed=False, sort=False).sum() --------------------------------------------------------------------------- ValueError: items in new_categories are not the same as in old categories @@ -297,7 +297,7 @@ In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueErr .. ipython:: python - df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() + df[df.chromosomes != '1'].groupby('chromosomes', observed=False, sort=False).sum() .. _whatsnew_0200.enhancements.table_schema: diff --git a/doc/source/whatsnew/v0.22.0.rst b/doc/source/whatsnew/v0.22.0.rst index ec9769c22e76b..c494b4f286662 100644 --- a/doc/source/whatsnew/v0.22.0.rst +++ b/doc/source/whatsnew/v0.22.0.rst @@ -109,7 +109,7 @@ instead of ``NaN``. In [8]: grouper = pd.Categorical(['a', 'a'], categories=['a', 'b']) - In [9]: pd.Series([1, 2]).groupby(grouper).sum() + In [9]: pd.Series([1, 2]).groupby(grouper, observed=False).sum() Out[9]: a 3.0 b NaN @@ -120,14 +120,14 @@ instead of ``NaN``. .. ipython:: python grouper = pd.Categorical(["a", "a"], categories=["a", "b"]) - pd.Series([1, 2]).groupby(grouper).sum() + pd.Series([1, 2]).groupby(grouper, observed=False).sum() To restore the 0.21 behavior of returning ``NaN`` for unobserved groups, use ``min_count>=1``. .. ipython:: python - pd.Series([1, 2]).groupby(grouper).sum(min_count=1) + pd.Series([1, 2]).groupby(grouper, observed=False).sum(min_count=1) Resample ^^^^^^^^ diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 865fa3c6ac949..6cf0d3848b912 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -99,6 +99,7 @@ Deprecations - Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`) - Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`) - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`) +- Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`) - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`) - Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`) - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9dd5ee426e37c..f5ed3e8adf976 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8677,7 +8677,7 @@ def groupby( as_index: bool = True, sort: bool = True, group_keys: bool = True, - observed: bool = False, + observed: bool | lib.NoDefault = lib.no_default, dropna: bool = True, ) -> DataFrameGroupBy: if axis is not lib.no_default: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 96f39bb99e544..4f40728449d8a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -68,6 +68,7 @@ class providing the base-class of operations. cache_readonly, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ensure_dtype_can_hold_na from pandas.core.dtypes.common import ( @@ -905,7 +906,7 @@ def __init__( as_index: bool = True, sort: bool = True, group_keys: bool | lib.NoDefault = True, - observed: bool = False, + observed: bool | lib.NoDefault = lib.no_default, dropna: bool = True, ) -> None: self._selection = selection @@ -922,7 +923,6 @@ def __init__( self.keys = keys self.sort = sort self.group_keys = group_keys - self.observed = observed self.dropna = dropna if grouper is None: @@ -932,10 +932,23 @@ def __init__( axis=axis, level=level, sort=sort, - observed=observed, + observed=False if observed is lib.no_default else observed, dropna=self.dropna, ) + if observed is lib.no_default: + if any(ping._passed_categorical for ping in grouper.groupings): + warnings.warn( + "The default of observed=False is deprecated and will be changed " + "to True in a future version of pandas. Pass observed=False to " + "retain current behavior or observed=True to adopt the future " + "default and silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + observed = False + self.observed = observed + self.obj = obj self.axis = obj._get_axis_number(axis) self.grouper = grouper @@ -2125,6 +2138,8 @@ def _value_counts( result_series.index.droplevel(levels), sort=self.sort, dropna=self.dropna, + # GH#43999 - deprecation of observed=False + observed=False, ).transform("sum") result_series /= indexed_group_size diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 87fed03a73daf..8fdc3da908c42 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -723,7 +723,11 @@ def _format_duplicate_message(self) -> DataFrame: duplicates = self[self.duplicated(keep="first")].unique() assert len(duplicates) - out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates] + out = ( + Series(np.arange(len(self))) + .groupby(self, observed=False) + .agg(list)[duplicates] + ) if self._is_multi: # test_format_duplicate_labels_message_multi # error: "Type[Index]" has no attribute "from_tuples" [attr-defined] diff --git a/pandas/core/series.py b/pandas/core/series.py index b0958869c67f3..f8723b5ebf9c7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1999,7 +1999,7 @@ def groupby( as_index: bool = True, sort: bool = True, group_keys: bool = True, - observed: bool = False, + observed: bool | lib.NoDefault = lib.no_default, dropna: bool = True, ) -> SeriesGroupBy: from pandas.core.groupby.generic import SeriesGroupBy diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index f3917f539ae3f..09bebf6a92dca 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -154,6 +154,11 @@ This only applies if any of the groupers are Categoricals. If True: only show observed values for categorical groupers. If False: show all values for categorical groupers. + + .. deprecated:: 2.1.0 + + The default value will change to True in a future version of pandas. + dropna : bool, default True If True, and if group keys contain NA values, NA values together with row/column will be dropped. diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index ca08f39b852ee..b39fc93f4f024 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -254,7 +254,7 @@ def _grouped_plot_by_column( return_type=None, **kwargs, ): - grouped = data.groupby(by) + grouped = data.groupby(by, observed=False) if columns is None: if not isinstance(by, (list, tuple)): by = [by] diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index ad53cf6629adb..200b04e0524f2 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1250,7 +1250,7 @@ def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() - result_df = input_df.groupby("cat").agg(grp_col_dict) + result_df = input_df.groupby("cat", observed=False).agg(grp_col_dict) # create expected dataframe cat_index = pd.CategoricalIndex( @@ -1289,7 +1289,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() - result_df = input_df.groupby("cat").agg(grp_col_dict) + result_df = input_df.groupby("cat", observed=False).agg(grp_col_dict) # create expected dataframe cat_index = pd.CategoricalIndex( diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index a7ba1e8e81848..0699b7c1369f2 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -883,7 +883,7 @@ def test_apply_multi_level_name(category): df = DataFrame( {"A": np.arange(10), "B": b, "C": list(range(10)), "D": list(range(10))} ).set_index(["A", "B"]) - result = df.groupby("B").apply(lambda x: x.sum()) + result = df.groupby("B", observed=False).apply(lambda x: x.sum()) tm.assert_frame_equal(result, expected) assert df.index.names == ["A", "B"] diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index dbbfab14d5c76..e4dd07f790f47 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -739,7 +739,7 @@ def test_categorical_series(series, data): # Group the given series by a series with categorical data type such that group A # takes indices 0 and 3 and group B indices 1 and 2, obtaining the values mapped in # the given data. - groupby = series.groupby(Series(list("ABBA"), dtype="category")) + groupby = series.groupby(Series(list("ABBA"), dtype="category"), observed=False) result = groupby.aggregate(list) expected = Series(data, index=CategoricalIndex(data.keys())) tm.assert_series_equal(result, expected) @@ -1115,7 +1115,7 @@ def test_groupby_multiindex_categorical_datetime(): "values": np.arange(9), } ) - result = df.groupby(["key1", "key2"]).mean() + result = df.groupby(["key1", "key2"], observed=False).mean() idx = MultiIndex.from_product( [ @@ -1291,8 +1291,8 @@ def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data): def test_groupby_categorical_series_dataframe_consistent(df_cat): # GH 20416 - expected = df_cat.groupby(["A", "B"])["C"].mean() - result = df_cat.groupby(["A", "B"]).mean()["C"] + expected = df_cat.groupby(["A", "B"], observed=False)["C"].mean() + result = df_cat.groupby(["A", "B"], observed=False).mean()["C"] tm.assert_series_equal(result, expected) @@ -1303,11 +1303,11 @@ def test_groupby_categorical_axis_1(code): cat = Categorical.from_codes(code, categories=list("abc")) msg = "DataFrame.groupby with axis=1 is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(cat, axis=1) + gb = df.groupby(cat, axis=1, observed=False) result = gb.mean() msg = "The 'axis' keyword in DataFrame.groupby is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): - gb2 = df.T.groupby(cat, axis=0) + gb2 = df.T.groupby(cat, axis=0, observed=False) expected = gb2.mean().T tm.assert_frame_equal(result, expected) @@ -1478,7 +1478,7 @@ def test_series_groupby_categorical_aggregation_getitem(): df = DataFrame(d) cat = pd.cut(df["foo"], np.linspace(0, 20, 5)) df["range"] = cat - groups = df.groupby(["range", "baz"], as_index=True, sort=True) + groups = df.groupby(["range", "baz"], as_index=True, sort=True, observed=False) result = groups["foo"].agg("mean") expected = groups.agg("mean")["foo"] tm.assert_series_equal(result, expected) @@ -1539,7 +1539,7 @@ def test_read_only_category_no_sort(): {"a": [1, 3, 5, 7], "b": Categorical([1, 1, 2, 2], categories=Index(cats))} ) expected = DataFrame(data={"a": [2.0, 6.0]}, index=CategoricalIndex(cats, name="b")) - result = df.groupby("b", sort=False).mean() + result = df.groupby("b", sort=False, observed=False).mean() tm.assert_frame_equal(result, expected) @@ -1583,7 +1583,7 @@ def test_sorted_missing_category_values(): dtype="category", ) - result = df.groupby(["bar", "foo"]).size().unstack() + result = df.groupby(["bar", "foo"], observed=False).size().unstack() tm.assert_frame_equal(result, expected) @@ -1748,7 +1748,7 @@ def test_groupby_categorical_indices_unused_categories(): "col": range(3), } ) - grouped = df.groupby("key", sort=False) + grouped = df.groupby("key", sort=False, observed=False) result = grouped.indices expected = { "b": np.array([0, 1], dtype="intp"), @@ -2013,3 +2013,15 @@ def test_many_categories(as_index, sort, index_kind, ordered): expected = DataFrame({"a": Series(index), "b": data}) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("cat_columns", ["a", "b", ["a", "b"]]) +@pytest.mark.parametrize("keys", ["a", "b", ["a", "b"]]) +def test_groupby_default_depr(cat_columns, keys): + # GH#43999 + df = DataFrame({"a": [1, 1, 2, 3], "b": [4, 5, 6, 7]}) + df[cat_columns] = df[cat_columns].astype("category") + msg = "The default of observed=False is deprecated" + klass = FutureWarning if set(cat_columns) & set(keys) else None + with tm.assert_produces_warning(klass, match=msg): + df.groupby(keys) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index ea4bb42fb7ee1..f1dad7a22c789 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1926,7 +1926,7 @@ def test_empty_groupby( df = df.iloc[:0] - gb = df.groupby(keys, group_keys=False, dropna=dropna)[columns] + gb = df.groupby(keys, group_keys=False, dropna=dropna, observed=False)[columns] def get_result(**kwargs): if method == "attr": @@ -2638,7 +2638,7 @@ def test_datetime_categorical_multikey_groupby_indices(): "c": Categorical.from_codes([-1, 0, 1], categories=[0, 1]), } ) - result = df.groupby(["a", "b"]).indices + result = df.groupby(["a", "b"], observed=False).indices expected = { ("a", Timestamp("2018-01-01 00:00:00")): np.array([0]), ("b", Timestamp("2018-02-01 00:00:00")): np.array([1]), diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 95184bfd770d1..a051b30307a28 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -448,7 +448,7 @@ def test_no_sort_keep_na(sequence_index, dtype, test_series, as_index): "a": [0, 1, 2, 3], } ) - gb = df.groupby("key", dropna=False, sort=False, as_index=as_index) + gb = df.groupby("key", dropna=False, sort=False, as_index=as_index, observed=False) if test_series: gb = gb["a"] result = gb.sum() @@ -665,7 +665,7 @@ def test_categorical_agg(): df = pd.DataFrame( {"x": pd.Categorical(values, categories=[1, 2, 3]), "y": range(len(values))} ) - gb = df.groupby("x", dropna=False) + gb = df.groupby("x", dropna=False, observed=False) result = gb.agg(lambda x: x.sum()) expected = gb.sum() tm.assert_frame_equal(result, expected) @@ -677,7 +677,7 @@ def test_categorical_transform(): df = pd.DataFrame( {"x": pd.Categorical(values, categories=[1, 2, 3]), "y": range(len(values))} ) - gb = df.groupby("x", dropna=False) + gb = df.groupby("x", dropna=False, observed=False) result = gb.transform(lambda x: x.sum()) expected = gb.transform("sum") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_min_max.py b/pandas/tests/groupby/test_min_max.py index 11f62c5d03c49..8602f8bdb1aa1 100644 --- a/pandas/tests/groupby/test_min_max.py +++ b/pandas/tests/groupby/test_min_max.py @@ -236,7 +236,7 @@ def test_min_max_nullable_uint64_empty_group(): # don't raise NotImplementedError from libgroupby cat = pd.Categorical([0] * 10, categories=[0, 1]) df = DataFrame({"A": cat, "B": pd.array(np.arange(10, dtype=np.uint64))}) - gb = df.groupby("A") + gb = df.groupby("A", observed=False) res = gb.min() diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index 9f42f6ad72591..8c863dc2982ae 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -21,11 +21,11 @@ def test_rank_unordered_categorical_typeerror(): msg = "Cannot perform rank with non-ordered Categorical" - gb = ser.groupby(cat) + gb = ser.groupby(cat, observed=False) with pytest.raises(TypeError, match=msg): gb.rank() - gb2 = df.groupby(cat) + gb2 = df.groupby(cat, observed=False) with pytest.raises(TypeError, match=msg): gb2.rank() diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py index e29f87992f8a1..7da6bc8a32013 100644 --- a/pandas/tests/groupby/test_size.py +++ b/pandas/tests/groupby/test_size.py @@ -83,7 +83,7 @@ def test_size_period_index(): def test_size_on_categorical(as_index): df = DataFrame([[1, 1], [2, 2]], columns=["A", "B"]) df["A"] = df["A"].astype("category") - result = df.groupby(["A", "B"], as_index=as_index).size() + result = df.groupby(["A", "B"], as_index=as_index, observed=False).size() expected = DataFrame( [[1, 1, 1], [1, 2, 0], [2, 1, 0], [2, 2, 1]], columns=["A", "B", "size"] diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 27ffeb9247556..d6d0b03a65ebb 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1078,7 +1078,7 @@ def test_transform_absent_categories(func): x_cats = range(2) y = [1] df = DataFrame({"x": Categorical(x_vals, x_cats), "y": y}) - result = getattr(df.y.groupby(df.x), func)() + result = getattr(df.y.groupby(df.x, observed=False), func)() expected = df.y tm.assert_series_equal(result, expected)