From 7bb4fad21f75e4b00e57651e58c1b18811066b79 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Sun, 26 Feb 2023 22:09:37 -0500
Subject: [PATCH 1/6] DEPR: observed=False default in groupby

---
 doc/source/whatsnew/v2.1.0.rst                |  2 +-
 pandas/core/frame.py                          |  2 +-
 pandas/core/groupby/groupby.py                | 17 +++++++++-
 pandas/core/indexes/base.py                   |  6 +++-
 pandas/core/series.py                         |  2 +-
 .../tests/groupby/aggregate/test_aggregate.py |  4 +--
 pandas/tests/groupby/test_apply.py            |  2 +-
 pandas/tests/groupby/test_categorical.py      | 31 +++++++++++++------
 pandas/tests/groupby/test_groupby.py          |  4 +--
 pandas/tests/groupby/test_groupby_dropna.py   |  6 ++--
 pandas/tests/groupby/test_min_max.py          |  2 +-
 pandas/tests/groupby/test_rank.py             |  4 +--
 pandas/tests/groupby/test_size.py             |  2 +-
 .../tests/groupby/transform/test_transform.py |  2 +-
 14 files changed, 58 insertions(+), 28 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 45b5c16415f9d..ef6dc69e40e2d 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -93,7 +93,7 @@ Other API changes
 Deprecations
 ~~~~~~~~~~~~
 - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
--
+- Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`43999`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.performance:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 49416cc2d53c0..de5e7c371138d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8216,7 +8216,7 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        observed: bool = False,
+        observed: bool | lib.NoDefault = lib.no_default,
         dropna: bool = True,
     ) -> DataFrameGroupBy:
         from pandas.core.groupby.generic import DataFrameGroupBy
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 55e14bc11246b..5e6b5cc21f50b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -68,6 +68,7 @@ class providing the base-class of operations.
     cache_readonly,
     doc,
 )
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import ensure_dtype_can_hold_na
 from pandas.core.dtypes.common import (
@@ -905,7 +906,7 @@ def __init__(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool | lib.NoDefault = True,
-        observed: bool = False,
+        observed: bool | lib.NoDefault = lib.no_default,
         dropna: bool = True,
     ) -> None:
         self._selection = selection
@@ -941,6 +942,18 @@ def __init__(
         self.grouper = grouper
         self.exclusions = frozenset(exclusions) if exclusions else frozenset()
 
+        if observed is lib.no_default:
+            if any(ping._passed_categorical for ping in grouper.groupings):
+                warnings.warn(
+                    "The default of observed=False is deprecated and will be changed "
+                    "to True in a future version of pandas. Pass observed=False to "
+                    "retain current behavior or observed=True to adopt the future "
+                    "default and silence this warning.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+            self.observed = False
+
     def __getattr__(self, attr: str):
         if attr in self._internal_names_set:
             return object.__getattribute__(self, attr)
@@ -2125,6 +2138,8 @@ def _value_counts(
                 result_series.index.droplevel(levels),
                 sort=self.sort,
                 dropna=self.dropna,
+                # GH#43999 - deprecation of observed=False
+                observed=False,
             ).transform("sum")
             result_series /= indexed_group_size
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index acebe8a498f03..3f77ea4fed90a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -712,7 +712,11 @@ def _format_duplicate_message(self) -> DataFrame:
         duplicates = self[self.duplicated(keep="first")].unique()
         assert len(duplicates)
 
-        out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
+        out = (
+            Series(np.arange(len(self)))
+            .groupby(self, observed=False)
+            .agg(list)[duplicates]
+        )
         if self._is_multi:
             # test_format_duplicate_labels_message_multi
             # error: "Type[Index]" has no attribute "from_tuples"  [attr-defined]
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 06e9611c318cd..fd1fd313fc04f 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1970,7 +1970,7 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        observed: bool = False,
+        observed: bool | lib.NoDefault = lib.no_default,
         dropna: bool = True,
     ) -> SeriesGroupBy:
         from pandas.core.groupby.generic import SeriesGroupBy
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index d658de4a7d7c3..ac08297912e3c 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1240,7 +1240,7 @@ def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data):
 
     input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
     input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
-    result_df = input_df.groupby("cat").agg(grp_col_dict)
+    result_df = input_df.groupby("cat", observed=False).agg(grp_col_dict)
 
     # create expected dataframe
     cat_index = pd.CategoricalIndex(
@@ -1279,7 +1279,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
 
     input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
     input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
-    result_df = input_df.groupby("cat").agg(grp_col_dict)
+    result_df = input_df.groupby("cat", observed=False).agg(grp_col_dict)
 
     # create expected dataframe
     cat_index = pd.CategoricalIndex(
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 5fa7ed15a01d4..efa6232a668b1 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -880,7 +880,7 @@ def test_apply_multi_level_name(category):
     df = DataFrame(
         {"A": np.arange(10), "B": b, "C": list(range(10)), "D": list(range(10))}
     ).set_index(["A", "B"])
-    result = df.groupby("B").apply(lambda x: x.sum())
+    result = df.groupby("B", observed=False).apply(lambda x: x.sum())
     tm.assert_frame_equal(result, expected)
     assert df.index.names == ["A", "B"]
 
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index fa8df166d56ac..32a711bd659bf 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -739,7 +739,7 @@ def test_categorical_series(series, data):
     # Group the given series by a series with categorical data type such that group A
     # takes indices 0 and 3 and group B indices 1 and 2, obtaining the values mapped in
     # the given data.
-    groupby = series.groupby(Series(list("ABBA"), dtype="category"))
+    groupby = series.groupby(Series(list("ABBA"), dtype="category"), observed=False)
     result = groupby.aggregate(list)
     expected = Series(data, index=CategoricalIndex(data.keys()))
     tm.assert_series_equal(result, expected)
@@ -1115,7 +1115,7 @@ def test_groupby_multiindex_categorical_datetime():
             "values": np.arange(9),
         }
     )
-    result = df.groupby(["key1", "key2"]).mean()
+    result = df.groupby(["key1", "key2"], observed=False).mean()
 
     idx = MultiIndex.from_product(
         [
@@ -1291,8 +1291,8 @@ def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data):
 
 def test_groupby_categorical_series_dataframe_consistent(df_cat):
     # GH 20416
-    expected = df_cat.groupby(["A", "B"])["C"].mean()
-    result = df_cat.groupby(["A", "B"]).mean()["C"]
+    expected = df_cat.groupby(["A", "B"], observed=False)["C"].mean()
+    result = df_cat.groupby(["A", "B"], observed=False).mean()["C"]
     tm.assert_series_equal(result, expected)
 
 
@@ -1301,8 +1301,8 @@ def test_groupby_categorical_axis_1(code):
     # GH 13420
     df = DataFrame({"a": [1, 2, 3, 4], "b": [-1, -2, -3, -4], "c": [5, 6, 7, 8]})
     cat = Categorical.from_codes(code, categories=list("abc"))
-    result = df.groupby(cat, axis=1).mean()
-    expected = df.T.groupby(cat, axis=0).mean().T
+    result = df.groupby(cat, axis=1, observed=False).mean()
+    expected = df.T.groupby(cat, axis=0, observed=False).mean().T
     tm.assert_frame_equal(result, expected)
 
 
@@ -1472,7 +1472,7 @@ def test_series_groupby_categorical_aggregation_getitem():
     df = DataFrame(d)
     cat = pd.cut(df["foo"], np.linspace(0, 20, 5))
     df["range"] = cat
-    groups = df.groupby(["range", "baz"], as_index=True, sort=True)
+    groups = df.groupby(["range", "baz"], as_index=True, sort=True, observed=False)
     result = groups["foo"].agg("mean")
     expected = groups.agg("mean")["foo"]
     tm.assert_series_equal(result, expected)
@@ -1533,7 +1533,7 @@ def test_read_only_category_no_sort():
         {"a": [1, 3, 5, 7], "b": Categorical([1, 1, 2, 2], categories=Index(cats))}
     )
     expected = DataFrame(data={"a": [2.0, 6.0]}, index=CategoricalIndex(cats, name="b"))
-    result = df.groupby("b", sort=False).mean()
+    result = df.groupby("b", sort=False, observed=False).mean()
     tm.assert_frame_equal(result, expected)
 
 
@@ -1577,7 +1577,7 @@ def test_sorted_missing_category_values():
         dtype="category",
     )
 
-    result = df.groupby(["bar", "foo"]).size().unstack()
+    result = df.groupby(["bar", "foo"], observed=False).size().unstack()
 
     tm.assert_frame_equal(result, expected)
 
@@ -1742,7 +1742,7 @@ def test_groupby_categorical_indices_unused_categories():
             "col": range(3),
         }
     )
-    grouped = df.groupby("key", sort=False)
+    grouped = df.groupby("key", sort=False, observed=False)
     result = grouped.indices
     expected = {
         "b": np.array([0, 1], dtype="intp"),
@@ -2007,3 +2007,14 @@ def test_many_categories(as_index, sort, index_kind, ordered):
         expected = DataFrame({"a": Series(index), "b": data})
 
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("cat_columns", ["a", "b", ["a", "b"]])
+@pytest.mark.parametrize("keys", ["a", "b", ["a", "b"]])
+def test_groupby_default_depr(cat_columns, keys):
+    df = DataFrame({"a": [1, 1, 2, 3], "b": [4, 5, 6, 7]})
+    df[cat_columns] = df[cat_columns].astype("category")
+    msg = "The default of observed=False is deprecated"
+    klass = FutureWarning if set(cat_columns) & set(keys) else None
+    with tm.assert_produces_warning(klass, match=msg):
+        df.groupby(keys)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index e225ff5a0fa43..3da802b1766e7 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1905,7 +1905,7 @@ def test_empty_groupby(
 
     df = df.iloc[:0]
 
-    gb = df.groupby(keys, group_keys=False, dropna=dropna)[columns]
+    gb = df.groupby(keys, group_keys=False, dropna=dropna, observed=False)[columns]
 
     def get_result(**kwargs):
         if method == "attr":
@@ -2602,7 +2602,7 @@ def test_datetime_categorical_multikey_groupby_indices():
             "c": Categorical.from_codes([-1, 0, 1], categories=[0, 1]),
         }
     )
-    result = df.groupby(["a", "b"]).indices
+    result = df.groupby(["a", "b"], observed=False).indices
     expected = {
         ("a", Timestamp("2018-01-01 00:00:00")): np.array([0]),
         ("b", Timestamp("2018-02-01 00:00:00")): np.array([1]),
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 31a8e7a7d36ac..1fab736453ea4 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -448,7 +448,7 @@ def test_no_sort_keep_na(sequence_index, dtype, test_series, as_index):
             "a": [0, 1, 2, 3],
         }
     )
-    gb = df.groupby("key", dropna=False, sort=False, as_index=as_index)
+    gb = df.groupby("key", dropna=False, sort=False, as_index=as_index, observed=False)
     if test_series:
         gb = gb["a"]
     result = gb.sum()
@@ -666,7 +666,7 @@ def test_categorical_agg():
     df = pd.DataFrame(
         {"x": pd.Categorical(values, categories=[1, 2, 3]), "y": range(len(values))}
     )
-    gb = df.groupby("x", dropna=False)
+    gb = df.groupby("x", dropna=False, observed=False)
     result = gb.agg(lambda x: x.sum())
     expected = gb.sum()
     tm.assert_frame_equal(result, expected)
@@ -678,7 +678,7 @@ def test_categorical_transform():
     df = pd.DataFrame(
         {"x": pd.Categorical(values, categories=[1, 2, 3]), "y": range(len(values))}
     )
-    gb = df.groupby("x", dropna=False)
+    gb = df.groupby("x", dropna=False, observed=False)
     result = gb.transform(lambda x: x.sum())
     expected = gb.transform("sum")
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_min_max.py b/pandas/tests/groupby/test_min_max.py
index 11f62c5d03c49..8602f8bdb1aa1 100644
--- a/pandas/tests/groupby/test_min_max.py
+++ b/pandas/tests/groupby/test_min_max.py
@@ -236,7 +236,7 @@ def test_min_max_nullable_uint64_empty_group():
     # don't raise NotImplementedError from libgroupby
     cat = pd.Categorical([0] * 10, categories=[0, 1])
     df = DataFrame({"A": cat, "B": pd.array(np.arange(10, dtype=np.uint64))})
-    gb = df.groupby("A")
+    gb = df.groupby("A", observed=False)
 
     res = gb.min()
 
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index d0b848a567346..becf42ce78798 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -21,11 +21,11 @@ def test_rank_unordered_categorical_typeerror():
 
     msg = "Cannot perform rank with non-ordered Categorical"
 
-    gb = ser.groupby(cat)
+    gb = ser.groupby(cat, observed=False)
     with pytest.raises(TypeError, match=msg):
         gb.rank()
 
-    gb2 = df.groupby(cat)
+    gb2 = df.groupby(cat, observed=False)
     with pytest.raises(TypeError, match=msg):
         gb2.rank()
 
diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py
index c0c98562eda68..cb706dc2341dd 100644
--- a/pandas/tests/groupby/test_size.py
+++ b/pandas/tests/groupby/test_size.py
@@ -81,7 +81,7 @@ def test_size_period_index():
 def test_size_on_categorical(as_index):
     df = DataFrame([[1, 1], [2, 2]], columns=["A", "B"])
     df["A"] = df["A"].astype("category")
-    result = df.groupby(["A", "B"], as_index=as_index).size()
+    result = df.groupby(["A", "B"], as_index=as_index, observed=False).size()
 
     expected = DataFrame(
         [[1, 1, 1], [1, 2, 0], [2, 1, 0], [2, 2, 1]], columns=["A", "B", "size"]
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 8abcc52db0500..6f7613d5a6958 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -1062,7 +1062,7 @@ def test_transform_absent_categories(func):
     x_cats = range(2)
     y = [1]
     df = DataFrame({"x": Categorical(x_vals, x_cats), "y": y})
-    result = getattr(df.y.groupby(df.x), func)()
+    result = getattr(df.y.groupby(df.x, observed=False), func)()
     expected = df.y
     tm.assert_series_equal(result, expected)
 

From 612ca05ae67f27ccd5f74951391b392b5bf70481 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Mon, 6 Mar 2023 18:48:27 -0500
Subject: [PATCH 2/6] Fixup docs

---
 doc/source/user_guide/10min.rst        |  4 ++--
 doc/source/user_guide/advanced.rst     |  4 ++--
 doc/source/user_guide/categorical.rst  | 10 +++++-----
 doc/source/user_guide/groupby.rst      |  2 +-
 doc/source/whatsnew/v0.15.0.rst        |  2 +-
 doc/source/whatsnew/v0.19.0.rst        |  2 +-
 doc/source/whatsnew/v0.20.0.rst        |  4 ++--
 doc/source/whatsnew/v0.22.0.rst        |  6 +++---
 pandas/core/shared_docs.py             |  2 +-
 pandas/plotting/_matplotlib/boxplot.py |  2 +-
 10 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
index 6fc53fe09d791..7c98c99fecd5b 100644
--- a/doc/source/user_guide/10min.rst
+++ b/doc/source/user_guide/10min.rst
@@ -702,11 +702,11 @@ Sorting is per order in the categories, not lexical order:
 
     df.sort_values(by="grade")
 
-Grouping by a categorical column also shows empty categories:
+Grouping by a categorical column with ``observed=False`` also shows empty categories:
 
 .. ipython:: python
 
-    df.groupby("grade").size()
+    df.groupby("grade", observed=False).size()
 
 
 Plotting
diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index ef08d709822d2..3ce54cfebf65a 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -800,8 +800,8 @@ Groupby operations on the index will preserve the index nature as well.
 
 .. ipython:: python
 
-   df2.groupby(level=0).sum()
-   df2.groupby(level=0).sum().index
+   df2.groupby(level=0, observed=True).sum()
+   df2.groupby(level=0, observed=True).sum().index
 
 Reindexing operations will return a resulting index based on the type of the passed
 indexer. Passing a list will return a plain-old ``Index``; indexing with
diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
index 0b2224fe9bb32..e486235f044f5 100644
--- a/doc/source/user_guide/categorical.rst
+++ b/doc/source/user_guide/categorical.rst
@@ -607,7 +607,7 @@ even if some categories are not present in the data:
     s = pd.Series(pd.Categorical(["a", "b", "c", "c"], categories=["c", "a", "b", "d"]))
     s.value_counts()
 
-``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories.
+``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories when ``observed=False``.
 
 .. ipython:: python
 
@@ -618,9 +618,9 @@ even if some categories are not present in the data:
         data=[[1, 2, 3], [4, 5, 6]],
         columns=pd.MultiIndex.from_arrays([["A", "B", "B"], columns]),
     ).T
-    df.groupby(level=1).sum()
+    df.groupby(level=1, observed=False).sum()
 
-Groupby will also show "unused" categories:
+Groupby will also show "unused" categories when ``observed=False``:
 
 .. ipython:: python
 
@@ -628,7 +628,7 @@ Groupby will also show "unused" categories:
         ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"]
     )
     df = pd.DataFrame({"cats": cats, "values": [1, 2, 2, 2, 3, 4, 5]})
-    df.groupby("cats").mean()
+    df.groupby("cats", observed=False).mean()
 
     cats2 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
     df2 = pd.DataFrame(
@@ -638,7 +638,7 @@ Groupby will also show "unused" categories:
             "values": [1, 2, 3, 4],
         }
     )
-    df2.groupby(["cats", "B"]).mean()
+    df2.groupby(["cats", "B"], observed=False).mean()
 
 
 Pivot tables:
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index b5bf7ee25a50f..b64b1814e13f7 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -1281,7 +1281,7 @@ can be used as group keys. If so, the order of the levels will be preserved:
 
    factor = pd.qcut(data, [0, 0.25, 0.5, 0.75, 1.0])
 
-   data.groupby(factor).mean()
+   data.groupby(factor, observed=False).mean()
 
 .. _groupby.specify:
 
diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst
index f52253687ecfd..67e91751e9527 100644
--- a/doc/source/whatsnew/v0.15.0.rst
+++ b/doc/source/whatsnew/v0.15.0.rst
@@ -85,7 +85,7 @@ For full docs, see the :ref:`categorical introduction <categorical>` and the
                                                   "medium", "good", "very good"])
     df["grade"]
     df.sort_values("grade")
-    df.groupby("grade").size()
+    df.groupby("grade", observed=False).size()
 
 - ``pandas.core.group_agg`` and ``pandas.core.factor_agg`` were removed. As an alternative, construct
   a dataframe and use ``df.groupby(<group>).agg(<func>)``.
diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
index feeb7b5ee30ce..ab17cacd830e5 100644
--- a/doc/source/whatsnew/v0.19.0.rst
+++ b/doc/source/whatsnew/v0.19.0.rst
@@ -1134,7 +1134,7 @@ As a consequence, ``groupby`` and ``set_index`` also preserve categorical dtypes
 .. ipython:: python
 
    df = pd.DataFrame({"A": [0, 1], "B": [10, 11], "C": cat})
-   df_grouped = df.groupby(by=["A", "C"]).first()
+   df_grouped = df.groupby(by=["A", "C"], observed=False).first()
    df_set_idx = df.set_index(["A", "C"])
 
 **Previous behavior**:
diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
index b41a469fe0c1f..34a875f59e808 100644
--- a/doc/source/whatsnew/v0.20.0.rst
+++ b/doc/source/whatsnew/v0.20.0.rst
@@ -289,7 +289,7 @@ In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueErr
 
 .. code-block:: ipython
 
-   In [3]: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum()
+   In [3]: df[df.chromosomes != '1'].groupby('chromosomes', observed=False, sort=False).sum()
    ---------------------------------------------------------------------------
    ValueError: items in new_categories are not the same as in old categories
 
@@ -297,7 +297,7 @@ In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueErr
 
 .. ipython:: python
 
-   df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum()
+   df[df.chromosomes != '1'].groupby('chromosomes', observed=False, sort=False).sum()
 
 .. _whatsnew_0200.enhancements.table_schema:
 
diff --git a/doc/source/whatsnew/v0.22.0.rst b/doc/source/whatsnew/v0.22.0.rst
index ec9769c22e76b..c494b4f286662 100644
--- a/doc/source/whatsnew/v0.22.0.rst
+++ b/doc/source/whatsnew/v0.22.0.rst
@@ -109,7 +109,7 @@ instead of ``NaN``.
 
    In [8]: grouper = pd.Categorical(['a', 'a'], categories=['a', 'b'])
 
-   In [9]: pd.Series([1, 2]).groupby(grouper).sum()
+   In [9]: pd.Series([1, 2]).groupby(grouper, observed=False).sum()
    Out[9]:
    a    3.0
    b    NaN
@@ -120,14 +120,14 @@ instead of ``NaN``.
 .. ipython:: python
 
    grouper = pd.Categorical(["a", "a"], categories=["a", "b"])
-   pd.Series([1, 2]).groupby(grouper).sum()
+   pd.Series([1, 2]).groupby(grouper, observed=False).sum()
 
 To restore the 0.21 behavior of returning ``NaN`` for unobserved groups,
 use ``min_count>=1``.
 
 .. ipython:: python
 
-   pd.Series([1, 2]).groupby(grouper).sum(min_count=1)
+   pd.Series([1, 2]).groupby(grouper, observed=False).sum(min_count=1)
 
 Resample
 ^^^^^^^^
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index 8df13200342c1..7f7f55b133f6f 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -145,7 +145,7 @@
     If True: only show observed values for categorical groupers.
     If False: show all values for categorical groupers.
 
-    .. deprecated:: 2.2.0
+    .. deprecated:: 2.1.0
 
         The default value will change to True in a future version of pandas.
 
diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
index e2f30da1b839c..1c5f122395fb5 100644
--- a/pandas/plotting/_matplotlib/boxplot.py
+++ b/pandas/plotting/_matplotlib/boxplot.py
@@ -253,7 +253,7 @@ def _grouped_plot_by_column(
     return_type=None,
     **kwargs,
 ):
-    grouped = data.groupby(by)
+    grouped = data.groupby(by, observed=False)
     if columns is None:
         if not isinstance(by, (list, tuple)):
             by = [by]

From 32f4003728554ea5779027980d524403dd3cb880 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Sun, 26 Feb 2023 22:09:37 -0500
Subject: [PATCH 3/6] DEPR: observed=False default in groupby

---
 doc/source/user_guide/10min.rst               |  4 +--
 doc/source/user_guide/advanced.rst            |  4 +--
 doc/source/user_guide/categorical.rst         | 10 +++---
 doc/source/user_guide/groupby.rst             |  2 +-
 doc/source/whatsnew/v0.15.0.rst               |  2 +-
 doc/source/whatsnew/v0.19.0.rst               |  2 +-
 doc/source/whatsnew/v0.20.0.rst               |  4 +--
 doc/source/whatsnew/v0.22.0.rst               |  6 ++--
 doc/source/whatsnew/v2.1.0.rst                |  2 +-
 pandas/core/frame.py                          |  2 +-
 pandas/core/groupby/groupby.py                | 17 +++++++++-
 pandas/core/indexes/base.py                   |  6 +++-
 pandas/core/series.py                         |  2 +-
 pandas/core/shared_docs.py                    |  5 +++
 pandas/plotting/_matplotlib/boxplot.py        |  2 +-
 .../tests/groupby/aggregate/test_aggregate.py |  4 +--
 pandas/tests/groupby/test_apply.py            |  2 +-
 pandas/tests/groupby/test_categorical.py      | 32 +++++++++++++------
 pandas/tests/groupby/test_groupby.py          |  4 +--
 pandas/tests/groupby/test_groupby_dropna.py   |  6 ++--
 pandas/tests/groupby/test_min_max.py          |  2 +-
 pandas/tests/groupby/test_rank.py             |  4 +--
 pandas/tests/groupby/test_size.py             |  2 +-
 .../tests/groupby/transform/test_transform.py |  2 +-
 24 files changed, 82 insertions(+), 46 deletions(-)

diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
index 6fc53fe09d791..7c98c99fecd5b 100644
--- a/doc/source/user_guide/10min.rst
+++ b/doc/source/user_guide/10min.rst
@@ -702,11 +702,11 @@ Sorting is per order in the categories, not lexical order:
 
     df.sort_values(by="grade")
 
-Grouping by a categorical column also shows empty categories:
+Grouping by a categorical column with ``observed=False`` also shows empty categories:
 
 .. ipython:: python
 
-    df.groupby("grade").size()
+    df.groupby("grade", observed=False).size()
 
 
 Plotting
diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index ef08d709822d2..3ce54cfebf65a 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -800,8 +800,8 @@ Groupby operations on the index will preserve the index nature as well.
 
 .. ipython:: python
 
-   df2.groupby(level=0).sum()
-   df2.groupby(level=0).sum().index
+   df2.groupby(level=0, observed=True).sum()
+   df2.groupby(level=0, observed=True).sum().index
 
 Reindexing operations will return a resulting index based on the type of the passed
 indexer. Passing a list will return a plain-old ``Index``; indexing with
diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
index 0b2224fe9bb32..e486235f044f5 100644
--- a/doc/source/user_guide/categorical.rst
+++ b/doc/source/user_guide/categorical.rst
@@ -607,7 +607,7 @@ even if some categories are not present in the data:
     s = pd.Series(pd.Categorical(["a", "b", "c", "c"], categories=["c", "a", "b", "d"]))
     s.value_counts()
 
-``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories.
+``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories when ``observed=False``.
 
 .. ipython:: python
 
@@ -618,9 +618,9 @@ even if some categories are not present in the data:
         data=[[1, 2, 3], [4, 5, 6]],
         columns=pd.MultiIndex.from_arrays([["A", "B", "B"], columns]),
     ).T
-    df.groupby(level=1).sum()
+    df.groupby(level=1, observed=False).sum()
 
-Groupby will also show "unused" categories:
+Groupby will also show "unused" categories when ``observed=False``:
 
 .. ipython:: python
 
@@ -628,7 +628,7 @@ Groupby will also show "unused" categories:
         ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"]
     )
     df = pd.DataFrame({"cats": cats, "values": [1, 2, 2, 2, 3, 4, 5]})
-    df.groupby("cats").mean()
+    df.groupby("cats", observed=False).mean()
 
     cats2 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
     df2 = pd.DataFrame(
@@ -638,7 +638,7 @@ Groupby will also show "unused" categories:
             "values": [1, 2, 3, 4],
         }
     )
-    df2.groupby(["cats", "B"]).mean()
+    df2.groupby(["cats", "B"], observed=False).mean()
 
 
 Pivot tables:
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index b5bf7ee25a50f..b64b1814e13f7 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -1281,7 +1281,7 @@ can be used as group keys. If so, the order of the levels will be preserved:
 
    factor = pd.qcut(data, [0, 0.25, 0.5, 0.75, 1.0])
 
-   data.groupby(factor).mean()
+   data.groupby(factor, observed=False).mean()
 
 .. _groupby.specify:
 
diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst
index f52253687ecfd..67e91751e9527 100644
--- a/doc/source/whatsnew/v0.15.0.rst
+++ b/doc/source/whatsnew/v0.15.0.rst
@@ -85,7 +85,7 @@ For full docs, see the :ref:`categorical introduction <categorical>` and the
                                                   "medium", "good", "very good"])
     df["grade"]
     df.sort_values("grade")
-    df.groupby("grade").size()
+    df.groupby("grade", observed=False).size()
 
 - ``pandas.core.group_agg`` and ``pandas.core.factor_agg`` were removed. As an alternative, construct
   a dataframe and use ``df.groupby(<group>).agg(<func>)``.
diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
index feeb7b5ee30ce..ab17cacd830e5 100644
--- a/doc/source/whatsnew/v0.19.0.rst
+++ b/doc/source/whatsnew/v0.19.0.rst
@@ -1134,7 +1134,7 @@ As a consequence, ``groupby`` and ``set_index`` also preserve categorical dtypes
 .. ipython:: python
 
    df = pd.DataFrame({"A": [0, 1], "B": [10, 11], "C": cat})
-   df_grouped = df.groupby(by=["A", "C"]).first()
+   df_grouped = df.groupby(by=["A", "C"], observed=False).first()
    df_set_idx = df.set_index(["A", "C"])
 
 **Previous behavior**:
diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
index b41a469fe0c1f..34a875f59e808 100644
--- a/doc/source/whatsnew/v0.20.0.rst
+++ b/doc/source/whatsnew/v0.20.0.rst
@@ -289,7 +289,7 @@ In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueErr
 
 .. code-block:: ipython
 
-   In [3]: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum()
+   In [3]: df[df.chromosomes != '1'].groupby('chromosomes', observed=False, sort=False).sum()
    ---------------------------------------------------------------------------
    ValueError: items in new_categories are not the same as in old categories
 
@@ -297,7 +297,7 @@ In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueErr
 
 .. ipython:: python
 
-   df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum()
+   df[df.chromosomes != '1'].groupby('chromosomes', observed=False, sort=False).sum()
 
 .. _whatsnew_0200.enhancements.table_schema:
 
diff --git a/doc/source/whatsnew/v0.22.0.rst b/doc/source/whatsnew/v0.22.0.rst
index ec9769c22e76b..c494b4f286662 100644
--- a/doc/source/whatsnew/v0.22.0.rst
+++ b/doc/source/whatsnew/v0.22.0.rst
@@ -109,7 +109,7 @@ instead of ``NaN``.
 
    In [8]: grouper = pd.Categorical(['a', 'a'], categories=['a', 'b'])
 
-   In [9]: pd.Series([1, 2]).groupby(grouper).sum()
+   In [9]: pd.Series([1, 2]).groupby(grouper, observed=False).sum()
    Out[9]:
    a    3.0
    b    NaN
@@ -120,14 +120,14 @@ instead of ``NaN``.
 .. ipython:: python
 
    grouper = pd.Categorical(["a", "a"], categories=["a", "b"])
-   pd.Series([1, 2]).groupby(grouper).sum()
+   pd.Series([1, 2]).groupby(grouper, observed=False).sum()
 
 To restore the 0.21 behavior of returning ``NaN`` for unobserved groups,
 use ``min_count>=1``.
 
 .. ipython:: python
 
-   pd.Series([1, 2]).groupby(grouper).sum(min_count=1)
+   pd.Series([1, 2]).groupby(grouper, observed=False).sum(min_count=1)
 
 Resample
 ^^^^^^^^
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index cb2ca6d16ec0a..9c62f34b02779 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -93,10 +93,10 @@ Other API changes
 Deprecations
 ~~~~~~~~~~~~
 - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
+- Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`43999`)
 - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
 - Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
 - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.performance:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 98acab52e62f0..b997cc5ba4371 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8254,7 +8254,7 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        observed: bool = False,
+        observed: bool | lib.NoDefault = lib.no_default,
         dropna: bool = True,
     ) -> DataFrameGroupBy:
         if axis is not lib.no_default:
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 457352564f255..537435b326f11 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -68,6 +68,7 @@ class providing the base-class of operations.
     cache_readonly,
     doc,
 )
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import ensure_dtype_can_hold_na
 from pandas.core.dtypes.common import (
@@ -905,7 +906,7 @@ def __init__(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool | lib.NoDefault = True,
-        observed: bool = False,
+        observed: bool | lib.NoDefault = lib.no_default,
         dropna: bool = True,
     ) -> None:
         self._selection = selection
@@ -941,6 +942,18 @@ def __init__(
         self.grouper = grouper
         self.exclusions = frozenset(exclusions) if exclusions else frozenset()
 
+        if observed is lib.no_default:
+            if any(ping._passed_categorical for ping in grouper.groupings):
+                warnings.warn(
+                    "The default of observed=False is deprecated and will be changed "
+                    "to True in a future version of pandas. Pass observed=False to "
+                    "retain current behavior or observed=True to adopt the future "
+                    "default and silence this warning.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+            self.observed = False
+
     def __getattr__(self, attr: str):
         if attr in self._internal_names_set:
             return object.__getattribute__(self, attr)
@@ -2125,6 +2138,8 @@ def _value_counts(
                 result_series.index.droplevel(levels),
                 sort=self.sort,
                 dropna=self.dropna,
+                # GH#43999 - deprecation of observed=False
+                observed=False,
             ).transform("sum")
             result_series /= indexed_group_size
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index acebe8a498f03..3f77ea4fed90a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -712,7 +712,11 @@ def _format_duplicate_message(self) -> DataFrame:
         duplicates = self[self.duplicated(keep="first")].unique()
         assert len(duplicates)
 
-        out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
+        out = (
+            Series(np.arange(len(self)))
+            .groupby(self, observed=False)
+            .agg(list)[duplicates]
+        )
         if self._is_multi:
             # test_format_duplicate_labels_message_multi
             # error: "Type[Index]" has no attribute "from_tuples"  [attr-defined]
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 95ee3f1af58f1..9f06d7ad6d02c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1975,7 +1975,7 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        observed: bool = False,
+        observed: bool | lib.NoDefault = lib.no_default,
         dropna: bool = True,
     ) -> SeriesGroupBy:
         from pandas.core.groupby.generic import SeriesGroupBy
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index 184b77c880238..7f7f55b133f6f 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -144,6 +144,11 @@
     This only applies if any of the groupers are Categoricals.
     If True: only show observed values for categorical groupers.
     If False: show all values for categorical groupers.
+
+    .. deprecated:: 2.1.0
+
+        The default value will change to True in a future version of pandas.
+
 dropna : bool, default True
     If True, and if group keys contain NA values, NA values together
     with row/column will be dropped.
diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
index e2f30da1b839c..1c5f122395fb5 100644
--- a/pandas/plotting/_matplotlib/boxplot.py
+++ b/pandas/plotting/_matplotlib/boxplot.py
@@ -253,7 +253,7 @@ def _grouped_plot_by_column(
     return_type=None,
     **kwargs,
 ):
-    grouped = data.groupby(by)
+    grouped = data.groupby(by, observed=False)
     if columns is None:
         if not isinstance(by, (list, tuple)):
             by = [by]
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 14bd466b052bf..205846ad694b2 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1250,7 +1250,7 @@ def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data):
 
     input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
     input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
-    result_df = input_df.groupby("cat").agg(grp_col_dict)
+    result_df = input_df.groupby("cat", observed=False).agg(grp_col_dict)
 
     # create expected dataframe
     cat_index = pd.CategoricalIndex(
@@ -1289,7 +1289,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
 
     input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
     input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
-    result_df = input_df.groupby("cat").agg(grp_col_dict)
+    result_df = input_df.groupby("cat", observed=False).agg(grp_col_dict)
 
     # create expected dataframe
     cat_index = pd.CategoricalIndex(
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index a7ba1e8e81848..0699b7c1369f2 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -883,7 +883,7 @@ def test_apply_multi_level_name(category):
     df = DataFrame(
         {"A": np.arange(10), "B": b, "C": list(range(10)), "D": list(range(10))}
     ).set_index(["A", "B"])
-    result = df.groupby("B").apply(lambda x: x.sum())
+    result = df.groupby("B", observed=False).apply(lambda x: x.sum())
     tm.assert_frame_equal(result, expected)
     assert df.index.names == ["A", "B"]
 
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index dbbfab14d5c76..e4dd07f790f47 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -739,7 +739,7 @@ def test_categorical_series(series, data):
     # Group the given series by a series with categorical data type such that group A
     # takes indices 0 and 3 and group B indices 1 and 2, obtaining the values mapped in
     # the given data.
-    groupby = series.groupby(Series(list("ABBA"), dtype="category"))
+    groupby = series.groupby(Series(list("ABBA"), dtype="category"), observed=False)
     result = groupby.aggregate(list)
     expected = Series(data, index=CategoricalIndex(data.keys()))
     tm.assert_series_equal(result, expected)
@@ -1115,7 +1115,7 @@ def test_groupby_multiindex_categorical_datetime():
             "values": np.arange(9),
         }
     )
-    result = df.groupby(["key1", "key2"]).mean()
+    result = df.groupby(["key1", "key2"], observed=False).mean()
 
     idx = MultiIndex.from_product(
         [
@@ -1291,8 +1291,8 @@ def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data):
 
 def test_groupby_categorical_series_dataframe_consistent(df_cat):
     # GH 20416
-    expected = df_cat.groupby(["A", "B"])["C"].mean()
-    result = df_cat.groupby(["A", "B"]).mean()["C"]
+    expected = df_cat.groupby(["A", "B"], observed=False)["C"].mean()
+    result = df_cat.groupby(["A", "B"], observed=False).mean()["C"]
     tm.assert_series_equal(result, expected)
 
 
@@ -1303,11 +1303,11 @@ def test_groupby_categorical_axis_1(code):
     cat = Categorical.from_codes(code, categories=list("abc"))
     msg = "DataFrame.groupby with axis=1 is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):
-        gb = df.groupby(cat, axis=1)
+        gb = df.groupby(cat, axis=1, observed=False)
     result = gb.mean()
     msg = "The 'axis' keyword in DataFrame.groupby is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):
-        gb2 = df.T.groupby(cat, axis=0)
+        gb2 = df.T.groupby(cat, axis=0, observed=False)
     expected = gb2.mean().T
     tm.assert_frame_equal(result, expected)
 
@@ -1478,7 +1478,7 @@ def test_series_groupby_categorical_aggregation_getitem():
     df = DataFrame(d)
     cat = pd.cut(df["foo"], np.linspace(0, 20, 5))
     df["range"] = cat
-    groups = df.groupby(["range", "baz"], as_index=True, sort=True)
+    groups = df.groupby(["range", "baz"], as_index=True, sort=True, observed=False)
     result = groups["foo"].agg("mean")
     expected = groups.agg("mean")["foo"]
     tm.assert_series_equal(result, expected)
@@ -1539,7 +1539,7 @@ def test_read_only_category_no_sort():
         {"a": [1, 3, 5, 7], "b": Categorical([1, 1, 2, 2], categories=Index(cats))}
     )
     expected = DataFrame(data={"a": [2.0, 6.0]}, index=CategoricalIndex(cats, name="b"))
-    result = df.groupby("b", sort=False).mean()
+    result = df.groupby("b", sort=False, observed=False).mean()
     tm.assert_frame_equal(result, expected)
 
 
@@ -1583,7 +1583,7 @@ def test_sorted_missing_category_values():
         dtype="category",
     )
 
-    result = df.groupby(["bar", "foo"]).size().unstack()
+    result = df.groupby(["bar", "foo"], observed=False).size().unstack()
 
     tm.assert_frame_equal(result, expected)
 
@@ -1748,7 +1748,7 @@ def test_groupby_categorical_indices_unused_categories():
             "col": range(3),
         }
     )
-    grouped = df.groupby("key", sort=False)
+    grouped = df.groupby("key", sort=False, observed=False)
     result = grouped.indices
     expected = {
         "b": np.array([0, 1], dtype="intp"),
@@ -2013,3 +2013,15 @@ def test_many_categories(as_index, sort, index_kind, ordered):
         expected = DataFrame({"a": Series(index), "b": data})
 
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("cat_columns", ["a", "b", ["a", "b"]])
+@pytest.mark.parametrize("keys", ["a", "b", ["a", "b"]])
+def test_groupby_default_depr(cat_columns, keys):
+    # GH#43999
+    df = DataFrame({"a": [1, 1, 2, 3], "b": [4, 5, 6, 7]})
+    df[cat_columns] = df[cat_columns].astype("category")
+    msg = "The default of observed=False is deprecated"
+    klass = FutureWarning if set(cat_columns) & set(keys) else None
+    with tm.assert_produces_warning(klass, match=msg):
+        df.groupby(keys)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index ea4bb42fb7ee1..f1dad7a22c789 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1926,7 +1926,7 @@ def test_empty_groupby(
 
     df = df.iloc[:0]
 
-    gb = df.groupby(keys, group_keys=False, dropna=dropna)[columns]
+    gb = df.groupby(keys, group_keys=False, dropna=dropna, observed=False)[columns]
 
     def get_result(**kwargs):
         if method == "attr":
@@ -2638,7 +2638,7 @@ def test_datetime_categorical_multikey_groupby_indices():
             "c": Categorical.from_codes([-1, 0, 1], categories=[0, 1]),
         }
     )
-    result = df.groupby(["a", "b"]).indices
+    result = df.groupby(["a", "b"], observed=False).indices
     expected = {
         ("a", Timestamp("2018-01-01 00:00:00")): np.array([0]),
         ("b", Timestamp("2018-02-01 00:00:00")): np.array([1]),
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 31a8e7a7d36ac..1fab736453ea4 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -448,7 +448,7 @@ def test_no_sort_keep_na(sequence_index, dtype, test_series, as_index):
             "a": [0, 1, 2, 3],
         }
     )
-    gb = df.groupby("key", dropna=False, sort=False, as_index=as_index)
+    gb = df.groupby("key", dropna=False, sort=False, as_index=as_index, observed=False)
     if test_series:
         gb = gb["a"]
     result = gb.sum()
@@ -666,7 +666,7 @@ def test_categorical_agg():
     df = pd.DataFrame(
         {"x": pd.Categorical(values, categories=[1, 2, 3]), "y": range(len(values))}
     )
-    gb = df.groupby("x", dropna=False)
+    gb = df.groupby("x", dropna=False, observed=False)
     result = gb.agg(lambda x: x.sum())
     expected = gb.sum()
     tm.assert_frame_equal(result, expected)
@@ -678,7 +678,7 @@ def test_categorical_transform():
     df = pd.DataFrame(
         {"x": pd.Categorical(values, categories=[1, 2, 3]), "y": range(len(values))}
     )
-    gb = df.groupby("x", dropna=False)
+    gb = df.groupby("x", dropna=False, observed=False)
     result = gb.transform(lambda x: x.sum())
     expected = gb.transform("sum")
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_min_max.py b/pandas/tests/groupby/test_min_max.py
index 11f62c5d03c49..8602f8bdb1aa1 100644
--- a/pandas/tests/groupby/test_min_max.py
+++ b/pandas/tests/groupby/test_min_max.py
@@ -236,7 +236,7 @@ def test_min_max_nullable_uint64_empty_group():
     # don't raise NotImplementedError from libgroupby
     cat = pd.Categorical([0] * 10, categories=[0, 1])
     df = DataFrame({"A": cat, "B": pd.array(np.arange(10, dtype=np.uint64))})
-    gb = df.groupby("A")
+    gb = df.groupby("A", observed=False)
 
     res = gb.min()
 
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index 9f42f6ad72591..8c863dc2982ae 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -21,11 +21,11 @@ def test_rank_unordered_categorical_typeerror():
 
     msg = "Cannot perform rank with non-ordered Categorical"
 
-    gb = ser.groupby(cat)
+    gb = ser.groupby(cat, observed=False)
     with pytest.raises(TypeError, match=msg):
         gb.rank()
 
-    gb2 = df.groupby(cat)
+    gb2 = df.groupby(cat, observed=False)
     with pytest.raises(TypeError, match=msg):
         gb2.rank()
 
diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py
index e29f87992f8a1..7da6bc8a32013 100644
--- a/pandas/tests/groupby/test_size.py
+++ b/pandas/tests/groupby/test_size.py
@@ -83,7 +83,7 @@ def test_size_period_index():
 def test_size_on_categorical(as_index):
     df = DataFrame([[1, 1], [2, 2]], columns=["A", "B"])
     df["A"] = df["A"].astype("category")
-    result = df.groupby(["A", "B"], as_index=as_index).size()
+    result = df.groupby(["A", "B"], as_index=as_index, observed=False).size()
 
     expected = DataFrame(
         [[1, 1, 1], [1, 2, 0], [2, 1, 0], [2, 2, 1]], columns=["A", "B", "size"]
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 27ffeb9247556..d6d0b03a65ebb 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -1078,7 +1078,7 @@ def test_transform_absent_categories(func):
     x_cats = range(2)
     y = [1]
     df = DataFrame({"x": Categorical(x_vals, x_cats), "y": y})
-    result = getattr(df.y.groupby(df.x), func)()
+    result = getattr(df.y.groupby(df.x, observed=False), func)()
     expected = df.y
     tm.assert_series_equal(result, expected)
 

From 12f93c61b48d855e75f98f0cf02544a2589249b0 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 6 Mar 2023 20:32:00 -0500
Subject: [PATCH 4/6] fixup

---
 pandas/core/groupby/groupby.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 537435b326f11..27884eb5a6502 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -923,7 +923,6 @@ def __init__(
         self.keys = keys
         self.sort = sort
         self.group_keys = group_keys
-        self.observed = observed
         self.dropna = dropna
 
         if grouper is None:
@@ -933,15 +932,10 @@ def __init__(
                 axis=axis,
                 level=level,
                 sort=sort,
-                observed=observed,
+                observed=False if observed is lib.no_default else observed,
                 dropna=self.dropna,
             )
 
-        self.obj = obj
-        self.axis = obj._get_axis_number(axis)
-        self.grouper = grouper
-        self.exclusions = frozenset(exclusions) if exclusions else frozenset()
-
         if observed is lib.no_default:
             if any(ping._passed_categorical for ping in grouper.groupings):
                 warnings.warn(
@@ -952,7 +946,13 @@ def __init__(
                     FutureWarning,
                     stacklevel=find_stack_level(),
                 )
-            self.observed = False
+            observed = False
+        self.observed = observed
+
+        self.obj = obj
+        self.axis = obj._get_axis_number(axis)
+        self.grouper = grouper
+        self.exclusions = frozenset(exclusions) if exclusions else frozenset()
 
     def __getattr__(self, attr: str):
         if attr in self._internal_names_set:

From de4aecece112acfd08572978a631171f89af2c96 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Wed, 15 Mar 2023 23:14:35 -0400
Subject: [PATCH 5/6] Mention defaulting to True

---
 doc/source/whatsnew/v2.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 9c62f34b02779..35032c2d60644 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -93,7 +93,7 @@ Other API changes
 Deprecations
 ~~~~~~~~~~~~
 - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
-- Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`43999`)
+- Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`)
 - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
 - Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
 - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)

From 9d12c4e46d6e71884883587b47f765d25353c91d Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Thu, 16 Mar 2023 22:12:22 -0400
Subject: [PATCH 6/6] fixup

---
 pandas/core/groupby/groupby.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index a6006cc9c2077..4f40728449d8a 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -954,18 +954,6 @@ def __init__(
         self.grouper = grouper
         self.exclusions = frozenset(exclusions) if exclusions else frozenset()
 
-        if observed is lib.no_default:
-            if any(ping._passed_categorical for ping in grouper.groupings):
-                warnings.warn(
-                    "The default of observed=False is deprecated and will be changed "
-                    "to True in a future version of pandas. Pass observed=False to "
-                    "retain current behavior or observed=True to adopt the future "
-                    "default and silence this warning.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-            self.observed = False
-
     def __getattr__(self, attr: str):
         if attr in self._internal_names_set:
             return object.__getattribute__(self, attr)