From 4c5eddd63e94bacddb96bf61f81a6a8fcd9c33f0 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 20 Aug 2020 21:19:10 -0700 Subject: [PATCH 1/9] REF: remove unnecesary try/except --- pandas/core/groupby/generic.py | 69 ++++++++++++++++------------------ 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 166631e69f523..51532a75d2d4a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -31,7 +31,7 @@ import numpy as np from pandas._libs import lib -from pandas._typing import FrameOrSeries, FrameOrSeriesUnion +from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion from pandas.util._decorators import Appender, Substitution, doc from pandas.core.dtypes.cast import ( @@ -60,6 +60,7 @@ validate_func_kwargs, ) import pandas.core.algorithms as algorithms +from pandas.core.arrays import ExtensionArray from pandas.core.base import DataError, SpecificationError import pandas.core.common as com from pandas.core.construction import create_series_with_explicit_dtype @@ -1034,32 +1035,31 @@ def _cython_agg_blocks( no_result = object() - def cast_result_block(result, block: "Block", how: str) -> "Block": - # see if we can cast the block to the desired dtype + def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike: + # see if we can cast the values to the desired dtype # this may not be the original dtype assert not isinstance(result, DataFrame) assert result is not no_result - dtype = maybe_cast_result_dtype(block.dtype, how) + dtype = maybe_cast_result_dtype(values.dtype, how) result = maybe_downcast_numeric(result, dtype) - if block.is_extension and isinstance(result, np.ndarray): - # e.g. block.values was an IntegerArray - # (1, N) case can occur if block.values was Categorical + if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray): + # e.g. values was an IntegerArray + # (1, N) case can occur if values was Categorical # and result is ndarray[object] # TODO(EA2D): special casing not needed with 2D EAs assert result.ndim == 1 or result.shape[0] == 1 try: # Cast back if feasible - result = type(block.values)._from_sequence( - result.ravel(), dtype=block.values.dtype + result = type(values)._from_sequence( + result.ravel(), dtype=values.dtype ) except (ValueError, TypeError): # reshape to be valid for non-Extension Block result = result.reshape(1, -1) - agg_block: "Block" = block.make_block(result) - return agg_block + return result def blk_func(block: "Block") -> List["Block"]: new_blocks: List["Block"] = [] @@ -1093,33 +1093,30 @@ def blk_func(block: "Block") -> List["Block"]: # Categoricals. This will done by later self._reindex_output() # Doing it here creates an error. See GH#34951 sgb = get_groupby(obj, self.grouper, observed=True) - try: - result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) - except TypeError: - # we may have an exception in trying to aggregate - # continue and exclude the block - raise + result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) + + result = cast(DataFrame, result) + # unwrap DataFrame to get array + if len(result._mgr.blocks) != 1: + # We've split an object block! Everything we've assumed + # about a single block input returning a single block output + # is a lie. To keep the code-path for the typical non-split case + # clean, we choose to clean up this mess later on. + assert len(locs) == result.shape[1] + for i, loc in enumerate(locs): + agg_block = result.iloc[:, [i]]._mgr.blocks[0] + agg_block.mgr_locs = [loc] + new_blocks.append(agg_block) else: - result = cast(DataFrame, result) - # unwrap DataFrame to get array - if len(result._mgr.blocks) != 1: - # We've split an object block! Everything we've assumed - # about a single block input returning a single block output - # is a lie. To keep the code-path for the typical non-split case - # clean, we choose to clean up this mess later on. - assert len(locs) == result.shape[1] - for i, loc in enumerate(locs): - agg_block = result.iloc[:, [i]]._mgr.blocks[0] - agg_block.mgr_locs = [loc] - new_blocks.append(agg_block) - else: - result = result._mgr.blocks[0].values - if isinstance(result, np.ndarray) and result.ndim == 1: - result = result.reshape(1, -1) - agg_block = cast_result_block(result, block, how) - new_blocks = [agg_block] + result = result._mgr.blocks[0].values + if isinstance(result, np.ndarray) and result.ndim == 1: + result = result.reshape(1, -1) + res_values = cast_agg_result(result, block.values, how) + agg_block = block.make_block(res_values) + new_blocks = [agg_block] else: - agg_block = cast_result_block(result, block, how) + res_values = cast_agg_result(result, block.values, how) + agg_block = block.make_block(res_values) new_blocks = [agg_block] return new_blocks From 42649fbb855a895ee5818d7dc80bdbd0ce0e9f5a Mon Sep 17 00:00:00 2001 From: Karthik Mathur <22126205+mathurk1@users.noreply.github.com> Date: Fri, 21 Aug 2020 17:34:51 -0500 Subject: [PATCH 2/9] TST: add test for agg on ordered categorical cols (#35630) --- .../tests/groupby/aggregate/test_aggregate.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index ce9d4b892d775..8fe450fe6abfc 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1063,6 +1063,85 @@ def test_groupby_get_by_index(): pd.testing.assert_frame_equal(res, expected) +@pytest.mark.parametrize( + "grp_col_dict, exp_data", + [ + ({"nr": "min", "cat_ord": "min"}, {"nr": [1, 5], "cat_ord": ["a", "c"]}), + ({"cat_ord": "min"}, {"cat_ord": ["a", "c"]}), + ({"nr": "min"}, {"nr": [1, 5]}), + ], +) +def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): + # test single aggregations on ordered categorical cols GHGH27800 + + # create the result dataframe + input_df = pd.DataFrame( + { + "nr": [1, 2, 3, 4, 5, 6, 7, 8], + "cat_ord": list("aabbccdd"), + "cat": list("aaaabbbb"), + } + ) + + input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) + input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() + result_df = input_df.groupby("cat").agg(grp_col_dict) + + # create expected dataframe + cat_index = pd.CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category" + ) + + expected_df = pd.DataFrame(data=exp_data, index=cat_index) + + tm.assert_frame_equal(result_df, expected_df) + + +@pytest.mark.parametrize( + "grp_col_dict, exp_data", + [ + ({"nr": ["min", "max"], "cat_ord": "min"}, [(1, 4, "a"), (5, 8, "c")]), + ({"nr": "min", "cat_ord": ["min", "max"]}, [(1, "a", "b"), (5, "c", "d")]), + ({"cat_ord": ["min", "max"]}, [("a", "b"), ("c", "d")]), + ], +) +def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): + # test combined aggregations on ordered categorical cols GH27800 + + # create the result dataframe + input_df = pd.DataFrame( + { + "nr": [1, 2, 3, 4, 5, 6, 7, 8], + "cat_ord": list("aabbccdd"), + "cat": list("aaaabbbb"), + } + ) + + input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) + input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() + result_df = input_df.groupby("cat").agg(grp_col_dict) + + # create expected dataframe + cat_index = pd.CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category" + ) + + # unpack the grp_col_dict to create the multi-index tuple + # this tuple will be used to create the expected dataframe index + multi_index_list = [] + for k, v in grp_col_dict.items(): + if isinstance(v, list): + for value in v: + multi_index_list.append([k, value]) + else: + multi_index_list.append([k, v]) + multi_index = pd.MultiIndex.from_tuples(tuple(multi_index_list)) + + expected_df = pd.DataFrame(data=exp_data, columns=multi_index, index=cat_index) + + tm.assert_frame_equal(result_df, expected_df) + + def test_nonagg_agg(): # GH 35490 - Single/Multiple agg of non-agg function give same results # TODO: agg should raise for functions that don't aggregate From 47121ddc1c655f428c6c3fcea8fbf02eba85600a Mon Sep 17 00:00:00 2001 From: tkmz-n <60312218+tkmz-n@users.noreply.github.com> Date: Sat, 22 Aug 2020 07:42:50 +0900 Subject: [PATCH 3/9] TST: resample does not yield empty groups (#10603) (#35799) --- pandas/tests/resample/test_timedelta.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 0fbb60c176b30..3fa85e62d028c 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -150,3 +150,18 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq): tm.assert_index_equal(result.index, expected_index) assert result.index.freq == expected_index.freq assert not np.isnan(result[-1]) + + +def test_resample_with_timedelta_yields_no_empty_groups(): + # GH 10603 + df = pd.DataFrame( + np.random.normal(size=(10000, 4)), + index=pd.timedelta_range(start="0s", periods=10000, freq="3906250n"), + ) + result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x)) + + expected = pd.DataFrame( + [[768.0] * 4] * 12 + [[528.0] * 4], + index=pd.timedelta_range(start="1s", periods=13, freq="3s"), + ) + tm.assert_frame_equal(result, expected) From 1decb3e0ee1923a29b8eded7507bcb783b3870d0 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 Aug 2020 18:48:02 -0700 Subject: [PATCH 4/9] revert accidental rebase --- pandas/core/groupby/generic.py | 61 ++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4b1f6cfe0a662..60e23b14eaf09 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -30,7 +30,7 @@ import numpy as np from pandas._libs import lib -from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion +from pandas._typing import FrameOrSeries, FrameOrSeriesUnion from pandas.util._decorators import Appender, Substitution, doc from pandas.core.dtypes.cast import ( @@ -59,7 +59,6 @@ validate_func_kwargs, ) import pandas.core.algorithms as algorithms -from pandas.core.arrays import ExtensionArray from pandas.core.base import DataError, SpecificationError import pandas.core.common as com from pandas.core.construction import create_series_with_explicit_dtype @@ -1034,31 +1033,32 @@ def _cython_agg_blocks( no_result = object() - def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike: - # see if we can cast the values to the desired dtype + def cast_result_block(result, block: "Block", how: str) -> "Block": + # see if we can cast the block to the desired dtype # this may not be the original dtype assert not isinstance(result, DataFrame) assert result is not no_result - dtype = maybe_cast_result_dtype(values.dtype, how) + dtype = maybe_cast_result_dtype(block.dtype, how) result = maybe_downcast_numeric(result, dtype) - if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray): - # e.g. values was an IntegerArray - # (1, N) case can occur if values was Categorical + if block.is_extension and isinstance(result, np.ndarray): + # e.g. block.values was an IntegerArray + # (1, N) case can occur if block.values was Categorical # and result is ndarray[object] # TODO(EA2D): special casing not needed with 2D EAs assert result.ndim == 1 or result.shape[0] == 1 try: # Cast back if feasible - result = type(values)._from_sequence( - result.ravel(), dtype=values.dtype + result = type(block.values)._from_sequence( + result.ravel(), dtype=block.values.dtype ) except (ValueError, TypeError): # reshape to be valid for non-Extension Block result = result.reshape(1, -1) - return result + agg_block: "Block" = block.make_block(result) + return agg_block def blk_func(block: "Block") -> List["Block"]: new_blocks: List["Block"] = [] @@ -1092,25 +1092,28 @@ def blk_func(block: "Block") -> List["Block"]: # Categoricals. This will done by later self._reindex_output() # Doing it here creates an error. See GH#34951 sgb = get_groupby(obj, self.grouper, observed=True) - result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) - - assert isinstance(result, (Series, DataFrame)) # for mypy - # In the case of object dtype block, it may have been split - # in the operation. We un-split here. - result = result._consolidate() - assert isinstance(result, (Series, DataFrame)) # for mypy - assert len(result._mgr.blocks) == 1 - - # unwrap DataFrame to get array - result = result._mgr.blocks[0].values - if isinstance(result, np.ndarray) and result.ndim == 1: - result = result.reshape(1, -1) - res_values = cast_agg_result(result, block.values, how) - agg_block = block.make_block(res_values) - new_blocks = [agg_block] + try: + result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) + except TypeError: + # we may have an exception in trying to aggregate + # continue and exclude the block + raise + else: + assert isinstance(result, (Series, DataFrame)) # for mypy + # In the case of object dtype block, it may have been split + # in the operation. We un-split here. + result = result._consolidate() + assert isinstance(result, (Series, DataFrame)) # for mypy + assert len(result._mgr.blocks) == 1 + + # unwrap DataFrame to get array + result = result._mgr.blocks[0].values + if isinstance(result, np.ndarray) and result.ndim == 1: + result = result.reshape(1, -1) + agg_block = cast_result_block(result, block, how) + new_blocks = [agg_block] else: - res_values = cast_agg_result(result, block.values, how) - agg_block = block.make_block(res_values) + agg_block = cast_result_block(result, block, how) new_blocks = [agg_block] return new_blocks From 51205a51dd75c791848c353e9af3d8b46aa4afd6 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 26 Aug 2020 18:51:45 -0700 Subject: [PATCH 5/9] REF/BUG: don't go through cython for EA indexes --- pandas/core/groupby/generic.py | 50 +++++++++++++++++++++++++++++----- pandas/core/groupby/ops.py | 5 ++++ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 2afa56b50c3c7..36db78a77c511 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -74,7 +74,14 @@ get_groupby, ) from pandas.core.groupby.numba_ import generate_numba_func, split_for_numba -from pandas.core.indexes.api import Index, MultiIndex, all_indexes_same +from pandas.core.indexes.api import ( + DatetimeIndex, + Index, + MultiIndex, + PeriodIndex, + TimedeltaIndex, + all_indexes_same, +) import pandas.core.indexes.base as ibase from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series @@ -262,17 +269,46 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) if self.grouper.nkeys > 1: return self._python_agg_general(func, *args, **kwargs) - try: - return self._python_agg_general(func, *args, **kwargs) - except (ValueError, KeyError): - # TODO: KeyError is raised in _python_agg_general, - # see see test_groupby.test_basic + if isinstance( + self._selected_obj.index, (DatetimeIndex, TimedeltaIndex, PeriodIndex) + ): + # using _python_agg_general would end up incorrectly patching + # _index_data in reduction.pyx result = self._aggregate_named(func, *args, **kwargs) + else: + try: + return self._python_agg_general(func, *args, **kwargs) + except (ValueError, KeyError): + # TODO: KeyError is raised in _python_agg_general, + # see see test_groupby.test_basic + result = self._aggregate_named(func, *args, **kwargs) index = Index(sorted(result), name=self.grouper.names[0]) + if isinstance(index, (DatetimeIndex, TimedeltaIndex)): + # TODO: do we _always_ want to do this? + # shouldnt this be done later in eg _wrap_aggregated_output? + index = index._with_freq("infer") + + result_index = self.grouper.result_index + + if ( + result_index.dtype == index.dtype + and result_index.freq is not None + and index.freq is None + ): + # TODO: will dtype equality always hold? + if len(index) == 1: + index.freq = result_index.freq + + elif len(index) == 2: + if index[0] + result_index.freq == index[1]: + # infer_freq doesn't handle length-2 indexes + index.freq = result_index.freq + ret = create_series_with_explicit_dtype( result, index=index, dtype_if_empty=object ) + ret.name = self._selected_obj.name # test_metadata_propagation_indiv if not self.as_index: # pragma: no cover print("Warning, ignoring as_index=True") @@ -478,7 +514,7 @@ def _get_index() -> Index: def _aggregate_named(self, func, *args, **kwargs): result = {} - for name, group in self: + for name, group in self: # TODO: could we have duplicate names? group.name = name output = func(group, *args, **kwargs) if isinstance(output, (Series, Index, np.ndarray)): diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index c6171a55359fe..66a9f1353d3c5 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -672,6 +672,11 @@ def _aggregate_series_pure_python( # e.g. test_agg_lambda_with_timezone lambda e: e.head(1) # FIXME: are we potentially losing important res.index info? res = res.item() + elif group.dtype == object: + # TODO: is this at all right? + # e.g. test_agg_over_numpy_arrays where we have entries + # that are each ndarrays + pass else: raise ValueError("Function does not reduce") result = np.empty(ngroups, dtype="O") From f453c5b3c74a86d4012b9478a3b64204f7cd81dc Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 26 Aug 2020 20:46:43 -0700 Subject: [PATCH 6/9] Implement _aggregate_maybe_named --- pandas/core/groupby/generic.py | 30 +++++++++++++++++++++++++++--- pandas/core/groupby/ops.py | 5 ----- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 36db78a77c511..9b72157ddd087 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -274,14 +274,14 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) ): # using _python_agg_general would end up incorrectly patching # _index_data in reduction.pyx - result = self._aggregate_named(func, *args, **kwargs) + result = self._aggregate_maybe_named(func, *args, **kwargs) else: try: return self._python_agg_general(func, *args, **kwargs) except (ValueError, KeyError): # TODO: KeyError is raised in _python_agg_general, # see see test_groupby.test_basic - result = self._aggregate_named(func, *args, **kwargs) + result = self._aggregate_maybe_named(func, *args, **kwargs) index = Index(sorted(result), name=self.grouper.names[0]) if isinstance(index, (DatetimeIndex, TimedeltaIndex)): @@ -511,11 +511,35 @@ def _get_index() -> Index: ) return self._reindex_output(result) + def _aggregate_maybe_named(self, func, *args, **kwargs): + """ + Try the named-aggregator first, then unnamed, which better matches + what libreduction does. + """ + try: + return self._aggregate_named(func, *args, **kwargs) + except KeyError: + return self._aggregate_unnamed(func, *args, **kwargs) + def _aggregate_named(self, func, *args, **kwargs): result = {} for name, group in self: # TODO: could we have duplicate names? - group.name = name + group.name = name # only difference vs _aggregate_unnamed + output = func(group, *args, **kwargs) + if isinstance(output, (Series, Index, np.ndarray)): + raise ValueError("Must produce aggregated value") + result[name] = output + + return result + + def _aggregate_unnamed(self, func, *args, **kwargs): + """ + Pure-python analogue of what _python_agg_general does. + """ + result = {} + + for name, group in self: # TODO: could we have duplicate names? output = func(group, *args, **kwargs) if isinstance(output, (Series, Index, np.ndarray)): raise ValueError("Must produce aggregated value") diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 66a9f1353d3c5..c6171a55359fe 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -672,11 +672,6 @@ def _aggregate_series_pure_python( # e.g. test_agg_lambda_with_timezone lambda e: e.head(1) # FIXME: are we potentially losing important res.index info? res = res.item() - elif group.dtype == object: - # TODO: is this at all right? - # e.g. test_agg_over_numpy_arrays where we have entries - # that are each ndarrays - pass else: raise ValueError("Function does not reduce") result = np.empty(ngroups, dtype="O") From 2ae2124fab275218268b680f5d5ce9e4bbefebe9 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 27 Aug 2020 09:01:00 -0700 Subject: [PATCH 7/9] de-duplicate --- pandas/core/groupby/generic.py | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9b72157ddd087..7927a77141b3d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -517,32 +517,28 @@ def _aggregate_maybe_named(self, func, *args, **kwargs): what libreduction does. """ try: - return self._aggregate_named(func, *args, **kwargs) + return self._aggregate_named(func, *args, named=True, **kwargs) except KeyError: - return self._aggregate_unnamed(func, *args, **kwargs) + return self._aggregate_named(func, *args, named=False, **kwargs) - def _aggregate_named(self, func, *args, **kwargs): + def _aggregate_named(self, func, *args, named: bool = True, **kwargs): result = {} for name, group in self: # TODO: could we have duplicate names? - group.name = name # only difference vs _aggregate_unnamed - output = func(group, *args, **kwargs) - if isinstance(output, (Series, Index, np.ndarray)): - raise ValueError("Must produce aggregated value") - result[name] = output + if named: + group.name = name - return result - - def _aggregate_unnamed(self, func, *args, **kwargs): - """ - Pure-python analogue of what _python_agg_general does. - """ - result = {} - - for name, group in self: # TODO: could we have duplicate names? output = func(group, *args, **kwargs) if isinstance(output, (Series, Index, np.ndarray)): - raise ValueError("Must produce aggregated value") + if ( + isinstance(output, Series) + and len(output) == 1 + and name in output.index + ): + # FIXME: kludge for test_resampler_grouper.test_apply + output = output.iloc[0] + else: + raise ValueError("Must produce aggregated value") result[name] = output return result From 98a91a321ff758682bff573ecee1b0bf2e0e6d2e Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 27 Aug 2020 14:48:49 -0700 Subject: [PATCH 8/9] avoid passing RangeIndex to libreduction --- pandas/core/groupby/ops.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index c6171a55359fe..98e4539adbe24 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -45,7 +45,7 @@ from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame from pandas.core.groupby import base, grouper -from pandas.core.indexes.api import Index, MultiIndex, ensure_index +from pandas.core.indexes.api import Index, MultiIndex, RangeIndex, ensure_index from pandas.core.series import Series from pandas.core.sorting import ( compress_group_index, @@ -620,8 +620,10 @@ def agg_series( # TODO: can we get a performant workaround for EAs backed by ndarray? return self._aggregate_series_pure_python(obj, func) - elif obj.index._has_complex_internals: + elif obj.index._has_complex_internals or isinstance(obj.index, RangeIndex): # Preempt TypeError in _aggregate_series_fast + # exclude RangeIndex because patching it in libreduction would + # silently be incorrect return self._aggregate_series_pure_python(obj, func) try: From c230f72b502446ac3c4a65fe7c79c7314b158bb0 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Sep 2020 19:29:18 -0700 Subject: [PATCH 9/9] simplify --- pandas/core/groupby/generic.py | 23 ++--------------------- pandas/tests/resample/test_base.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 26 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 45833a882fc0f..20dfb3e8fddd8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -282,27 +282,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) # see see test_groupby.test_basic result = self._aggregate_maybe_named(func, *args, **kwargs) - index = Index(sorted(result), name=self.grouper.names[0]) - if isinstance(index, (DatetimeIndex, TimedeltaIndex)): - # TODO: do we _always_ want to do this? - # shouldnt this be done later in eg _wrap_aggregated_output? - index = index._with_freq("infer") - - result_index = self.grouper.result_index - - if ( - result_index.dtype == index.dtype - and result_index.freq is not None - and index.freq is None - ): - # TODO: will dtype equality always hold? - if len(index) == 1: - index.freq = result_index.freq - - elif len(index) == 2: - if index[0] + result_index.freq == index[1]: - # infer_freq doesn't handle length-2 indexes - index.freq = result_index.freq + index = self.grouper.result_index + assert index.name == self.grouper.names[0] ret = create_series_with_explicit_dtype( result, index=index, dtype_if_empty=object diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 28d33ebb23c20..5827b1f456bd7 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -195,14 +195,17 @@ def test_resample_empty_dtypes(index, dtype, resample_method): @all_ts -def test_apply_to_empty_series(empty_series_dti): +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_apply_to_empty_series(empty_series_dti, freq): # GH 14313 s = empty_series_dti - for freq in ["M", "D", "H"]: - result = s.resample(freq).apply(lambda x: 1) - expected = s.resample(freq).apply(np.sum) - tm.assert_series_equal(result, expected, check_dtype=False) + result = s.resample(freq).apply(lambda x: 1) + expected = s.resample(freq).apply(np.sum) + + assert result.index.dtype == expected.index.dtype + + tm.assert_series_equal(result, expected, check_dtype=False) @all_ts