pandas-dev · Dr-Irv · Jun 2, 2023 · May 24, 2023 · May 24, 2023 · May 24, 2023
diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi
@@ -427,12 +427,30 @@ ByT = TypeVar(
     Interval[Timedelta],
     tuple,
 )
+# Use a distinct SeriesByT when using groupby with Series of known dtype.
+# Essentially, an intersection between Series S1 TypeVar, and ByT TypeVar
+SeriesByT = TypeVar(
+    "SeriesByT",
+    str,
+    bytes,
+    datetime.date,
+    bool,
+    int,
+    float,
+    complex,
+    Timestamp,
+    Timedelta,
+    Period,
+    Interval[int],
+    Interval[float],
+    Interval[Timestamp],
+    Interval[Timedelta],
+)
 GroupByObjectNonScalar: TypeAlias = (
     tuple
     | list[_HashableTa]
     | Function
     | list[Function]
-    | Series
     | list[Series]
     | np.ndarray
     | list[np.ndarray]
@@ -442,7 +460,7 @@ GroupByObjectNonScalar: TypeAlias = (
     | Grouper
     | list[Grouper]
 )
-GroupByObject: TypeAlias = Scalar | Index | GroupByObjectNonScalar
+GroupByObject: TypeAlias = Scalar | Index | GroupByObjectNonScalar | Series
 
 StataDateFormat: TypeAlias = Literal[
     "tc",

diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi
@@ -106,6 +106,7 @@ from pandas._typing import (
     ReplaceMethod,
     Scalar,
     ScalarT,
+    SeriesByT,
     SortKind,
     StataDateFormat,
     StorageOptions,
@@ -1085,7 +1086,20 @@ class DataFrame(NDFrame, OpsMixin):
     @overload
     def groupby(
         self,
-        by: CategoricalIndex | Index,
+        by: Series[SeriesByT],
+        axis: Axis = ...,
+        level: Level | None = ...,
+        as_index: _bool = ...,
+        sort: _bool = ...,
+        group_keys: _bool = ...,
+        squeeze: _bool = ...,
+        observed: _bool = ...,
+        dropna: _bool = ...,
+    ) -> DataFrameGroupBy[SeriesByT]: ...
+    @overload
+    def groupby(
+        self,
+        by: CategoricalIndex | Index | Series,
         axis: Axis = ...,
         level: Level | None = ...,
         as_index: _bool = ...,

diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi
@@ -129,6 +129,7 @@ from pandas._typing import (
     Renamer,
     ReplaceMethod,
     Scalar,
+    SeriesByT,
     SortKind,
     StrDtypeArg,
     TimedeltaDtypeArg,
@@ -639,7 +640,20 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
     @overload
     def groupby(
         self,
-        by: CategoricalIndex | Index,
+        by: Series[SeriesByT],
+        axis: AxisIndex = ...,
+        level: Level | None = ...,
+        as_index: _bool = ...,
+        sort: _bool = ...,
+        group_keys: _bool = ...,
+        squeeze: _bool = ...,
+        observed: _bool = ...,
+        dropna: _bool = ...,
+    ) -> SeriesGroupBy[S1, SeriesByT]: ...
+    @overload
+    def groupby(
+        self,
+        by: CategoricalIndex | Index | Series,
         axis: AxisIndex = ...,
         level: Level | None = ...,
         as_index: _bool = ...,

diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -985,6 +985,20 @@ def test_types_groupby_any() -> None:
     )
 
 
+def test_types_groupby_iter() -> None:
+    df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]})
+    series_groupby = pd.Series([True, True, False], dtype=bool)
+    first_group = next(iter(df.groupby(series_groupby)))
+    check(
+        assert_type(first_group[0], bool),
+        bool,
+    )
+    check(
+        assert_type(first_group[1], pd.DataFrame),
+        pd.DataFrame,
+    )
+
+
 def test_types_merge() -> None:
     df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]})
     df2 = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [0, 1, 0]})

diff --git a/tests/test_series.py b/tests/test_series.py
@@ -731,6 +731,17 @@ def test_types_group_by_with_dropna_keyword() -> None:
     s.groupby(level=0).sum()
 
 
+def test_types_groupby_iter() -> None:
+    s = pd.Series([1, 1, 2], dtype=int)
+    series_groupby = pd.Series([True, True, False], dtype=bool)
+    first_group = next(iter(s.groupby(series_groupby)))
+    check(
+        assert_type(first_group[0], bool),
+        bool,
+    )
+    check(assert_type(first_group[1], "pd.Series[int]"), pd.Series, np.integer)
+
+
 def test_types_plot() -> None:
     s = pd.Series([0, 1, 1, 0, -10])
     if TYPE_CHECKING:  # skip pytest