Skip to content

REF: dont pass keys through wrap_applied_output #43479

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 11 additions & 12 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,6 @@ def _wrap_transformed_output(
def _wrap_applied_output(
self,
data: Series,
keys: Index,
values: list[Any] | None,
not_indexed_same: bool = False,
) -> DataFrame | Series:
Expand All @@ -410,8 +409,6 @@ def _wrap_applied_output(
----------
data : Series
Input data for groupby operation.
keys : Index
Keys of groups that Series was grouped by.
values : Optional[List[Any]]
Applied output for each group.
not_indexed_same : bool, default False
Expand All @@ -421,6 +418,8 @@ def _wrap_applied_output(
-------
DataFrame or Series
"""
keys = self.grouper.group_keys_seq

if len(keys) == 0:
# GH #6265
return self.obj._constructor(
Expand All @@ -442,7 +441,7 @@ def _wrap_applied_output(
res_ser.name = self.obj.name
return res_ser
elif isinstance(values[0], (Series, DataFrame)):
return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
return self._concat_objects(values, not_indexed_same=not_indexed_same)
else:
# GH #6265 #24880
result = self.obj._constructor(
Expand Down Expand Up @@ -1130,7 +1129,9 @@ def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame:
res_df.columns = obj.columns
return res_df

def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
def _wrap_applied_output(self, data, values, not_indexed_same=False):
keys = self.grouper.group_keys_seq

if len(keys) == 0:
result = self.obj._constructor(
index=self.grouper.result_index, columns=data.columns
Expand All @@ -1145,7 +1146,7 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
# GH9684 - All values are None, return an empty frame.
return self.obj._constructor()
elif isinstance(first_not_none, DataFrame):
return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
return self._concat_objects(values, not_indexed_same=not_indexed_same)

key_index = self.grouper.result_index if self.as_index else None

Expand Down Expand Up @@ -1173,12 +1174,11 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
else:
# values are Series
return self._wrap_applied_output_series(
keys, values, not_indexed_same, first_not_none, key_index
values, not_indexed_same, first_not_none, key_index
)

def _wrap_applied_output_series(
self,
keys,
values: list[Series],
not_indexed_same: bool,
first_not_none,
Expand All @@ -1201,6 +1201,7 @@ def _wrap_applied_output_series(

# assign the name to this series
if singular_series:
keys = self.grouper.group_keys_seq
values[0].name = keys[0]

# GH2893
Expand All @@ -1209,9 +1210,7 @@ def _wrap_applied_output_series(
# if any of the sub-series are not indexed the same
# OR we don't have a multi-index and we have only a
# single values
return self._concat_objects(
keys, values, not_indexed_same=not_indexed_same
)
return self._concat_objects(values, not_indexed_same=not_indexed_same)

# still a series
# path added as of GH 5545
Expand All @@ -1222,7 +1221,7 @@ def _wrap_applied_output_series(

if not all_indexed_same:
# GH 8467
return self._concat_objects(keys, values, not_indexed_same=True)
return self._concat_objects(values, not_indexed_same=True)

# Combine values
# vstack+constructor is faster than concat and handles MI-columns
Expand Down
14 changes: 7 additions & 7 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,7 +998,7 @@ def _iterate_slices(self) -> Iterable[Series]:
# Dispatch/Wrapping

@final
def _concat_objects(self, keys, values, not_indexed_same: bool = False):
def _concat_objects(self, values, not_indexed_same: bool = False):
from pandas.core.reshape.concat import concat

def reset_identity(values):
Expand Down Expand Up @@ -1035,7 +1035,7 @@ def reset_identity(values):
if self.as_index:

# possible MI return case
group_keys = keys
group_keys = self.grouper.group_keys_seq
group_levels = self.grouper.levels
group_names = self.grouper.names

Expand Down Expand Up @@ -1146,7 +1146,7 @@ def _wrap_aggregated_output(
def _wrap_transformed_output(self, output: Mapping[base.OutputKey, ArrayLike]):
raise AbstractMethodError(self)

def _wrap_applied_output(self, data, keys, values, not_indexed_same: bool = False):
def _wrap_applied_output(self, data, values, not_indexed_same: bool = False):
raise AbstractMethodError(self)

def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
Expand Down Expand Up @@ -1182,7 +1182,7 @@ def _group_keys_index(self) -> Index:
# The index to use for the result of Groupby Aggregations.
# This _may_ be redundant with self.grouper.result_index, but that
# has not been conclusively proven yet.
keys = self.grouper._get_group_keys()
keys = self.grouper.group_keys_seq
if self.grouper.nkeys > 1:
index = MultiIndex.from_tuples(keys, names=self.grouper.names)
else:
Expand Down Expand Up @@ -1223,7 +1223,7 @@ def _transform_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs)
data and indices into a Numba jitted function.
"""
starts, ends, sorted_index, sorted_data = self._numba_prep(func, data)
group_keys = self.grouper._get_group_keys()
group_keys = self.grouper.group_keys_seq

numba_transform_func = numba_.generate_numba_transform_func(
kwargs, func, engine_kwargs
Expand Down Expand Up @@ -1360,13 +1360,13 @@ def _python_apply_general(
Series or DataFrame
data after applying f
"""
keys, values, mutated = self.grouper.apply(f, data, self.axis)
values, mutated = self.grouper.apply(f, data, self.axis)

if not_indexed_same is None:
not_indexed_same = mutated or self.mutated

return self._wrap_applied_output(
data, keys, values, not_indexed_same=not_indexed_same
data, values, not_indexed_same=not_indexed_same
)

@final
Expand Down
11 changes: 6 additions & 5 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ def get_iterator(
for each group
"""
splitter = self._get_splitter(data, axis=axis)
keys = self._get_group_keys()
keys = self.group_keys_seq
for key, group in zip(keys, splitter):
yield key, group.__finalize__(data, method="groupby")

Expand All @@ -716,7 +716,8 @@ def _get_grouper(self):
return self.groupings[0].grouping_vector

@final
def _get_group_keys(self):
@cache_readonly
def group_keys_seq(self):
if len(self.groupings) == 1:
return self.levels[0]
else:
Expand All @@ -726,10 +727,10 @@ def _get_group_keys(self):
return get_flattened_list(ids, ngroups, self.levels, self.codes)

@final
def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
def apply(self, f: F, data: FrameOrSeries, axis: int = 0) -> tuple[list, bool]:
mutated = self.mutated
splitter = self._get_splitter(data, axis=axis)
group_keys = self._get_group_keys()
group_keys = self.group_keys_seq
result_values = []

# This calls DataSplitter.__iter__
Expand All @@ -745,7 +746,7 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
mutated = True
result_values.append(res)

return group_keys, result_values, mutated
return result_values, mutated

@cache_readonly
def indices(self):
Expand Down