diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 598750475f3e8..4aac2630feb2c 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -446,15 +446,14 @@ def __init__( index: Index, grouper=None, obj: FrameOrSeries | None = None, - name: Hashable = None, level=None, sort: bool = True, observed: bool = False, in_axis: bool = False, dropna: bool = True, ): - self.name = name self.level = level + self._orig_grouper = grouper self.grouper = _convert_grouper(index, grouper) self.all_grouper = None self.index = index @@ -466,18 +465,11 @@ def __init__( self._passed_categorical = False - # right place for this? - if isinstance(grouper, (Series, Index)) and name is None: - self.name = grouper.name - # we have a single grouper which may be a myriad of things, # some of which are dependent on the passing in level ilevel = self._ilevel if ilevel is not None: - if self.name is None: - self.name = index.names[ilevel] - ( self.grouper, # Index self._codes, @@ -491,16 +483,22 @@ def __init__( # what key/level refer to exactly, don't need to # check again as we have by this point converted these # to an actual value (rather than a pd.Grouper) - _, grouper, _ = self.grouper._get_grouper( + _, newgrouper, newobj = self.grouper._get_grouper( # error: Value of type variable "FrameOrSeries" of "_get_grouper" # of "Grouper" cannot be "Optional[FrameOrSeries]" self.obj, # type: ignore[type-var] validate=False, ) - if self.name is None: - self.name = grouper.result_index.name - self.obj = self.grouper.obj - self.grouper = grouper._get_grouper() + self.obj = newobj + + ng = newgrouper._get_grouper() + if isinstance(newgrouper, ops.BinGrouper): + # in this case we have `ng is newgrouper` + self.grouper = ng + else: + # ops.BaseGrouper + # use Index instead of ndarray so we can recover the name + self.grouper = Index(ng, name=newgrouper.result_index.name) else: # a passed Categorical @@ -511,10 +509,6 @@ def __init__( self.grouper, self.sort, observed ) - # we are done - elif isinstance(self.grouper, Grouping): - self.grouper = self.grouper.grouper - # no level passed elif not isinstance( self.grouper, (Series, Index, ExtensionArray, np.ndarray) @@ -546,6 +540,23 @@ def __repr__(self) -> str: def __iter__(self): return iter(self.indices) + @cache_readonly + def name(self) -> Hashable: + ilevel = self._ilevel + if ilevel is not None: + return self.index.names[ilevel] + + if isinstance(self._orig_grouper, (Index, Series)): + return self._orig_grouper.name + + elif isinstance(self.grouper, ops.BaseGrouper): + return self.grouper.result_index.name + + elif isinstance(self.grouper, Index): + return self.grouper.name + + return None + @cache_readonly def _ilevel(self) -> int | None: """ @@ -814,25 +825,29 @@ def is_in_obj(gpr) -> bool: for gpr, level in zip(keys, levels): if is_in_obj(gpr): # df.groupby(df['name']) - in_axis, name = True, gpr.name - exclusions.add(name) + in_axis = True + exclusions.add(gpr.name) elif is_in_axis(gpr): # df.groupby('name') if gpr in obj: if validate: obj._check_label_or_level_ambiguity(gpr, axis=axis) in_axis, name, gpr = True, gpr, obj[gpr] + if gpr.ndim != 1: + # non-unique columns; raise here to get the name in the + # exception message + raise ValueError(f"Grouper for '{name}' not 1-dimensional") exclusions.add(name) elif obj._is_level_reference(gpr, axis=axis): - in_axis, name, level, gpr = False, None, gpr, None + in_axis, level, gpr = False, gpr, None else: raise KeyError(gpr) elif isinstance(gpr, Grouper) and gpr.key is not None: # Add key to exclusions exclusions.add(gpr.key) - in_axis, name = False, None + in_axis = False else: - in_axis, name = False, None + in_axis = False if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]: raise ValueError( @@ -847,7 +862,6 @@ def is_in_obj(gpr) -> bool: group_axis, gpr, obj=obj, - name=name, level=level, sort=sort, observed=observed, diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 3045451974ee7..4226158f53ffa 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -1200,7 +1200,7 @@ def names(self) -> list[Hashable]: @property def groupings(self) -> list[grouper.Grouping]: lev = self.binlabels - ping = grouper.Grouping(lev, lev, in_axis=False, level=None, name=lev.name) + ping = grouper.Grouping(lev, lev, in_axis=False, level=None) return [ping] def _aggregate_series_fast(self, obj: Series, func: F) -> np.ndarray: