From 66c23aa6aa20bd729bc3b36bc1a2a4515bd3f6e6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 15 Feb 2016 09:49:18 -0500 Subject: [PATCH 1/2] BUG: addtl fix for compat summary of groupby/resample with dicts closes #9052 --- pandas/core/groupby.py | 23 ++++++++++++-------- pandas/tests/test_groupby.py | 7 ++++++ pandas/tseries/tests/test_resample.py | 31 ++++++++++++++++----------- 3 files changed, 40 insertions(+), 21 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 698bbcb2538b9..1b43d3bd76f59 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2526,7 +2526,8 @@ def aggregate(self, func_or_funcs, *args, **kwargs): return getattr(self, func_or_funcs)(*args, **kwargs) if hasattr(func_or_funcs, '__iter__'): - ret = self._aggregate_multiple_funcs(func_or_funcs, _level) + ret = self._aggregate_multiple_funcs(func_or_funcs, + (_level or 0) + 1) else: cyfunc = self._is_cython_func(func_or_funcs) if cyfunc and not args and not kwargs: @@ -2546,6 +2547,18 @@ def aggregate(self, func_or_funcs, *args, **kwargs): if not self.as_index: # pragma: no cover print('Warning, ignoring as_index=True') + # _level handled at higher + if not _level and isinstance(ret, dict): + from pandas import concat + + # our result is a Series-like + if len(ret) == 1: + ret = concat([r for r in ret.values()], + axis=1) + + # our result is a DataFrame like + else: + ret = concat(ret, axis=1) return ret agg = aggregate @@ -2571,14 +2584,6 @@ def _aggregate_multiple_funcs(self, arg, _level): columns.append(com._get_callable_name(f)) arg = lzip(columns, arg) - # for a ndim=1, disallow a nested dict for an aggregator as - # this is a mis-specification of the aggregations, via a - # specificiation error - # e.g. g['A'].agg({'A': ..., 'B': ...}) - if self.name in columns and len(columns) > 1: - raise SpecificationError('invalid aggregation names specified ' - 'for selected objects') - results = {} for name, func in arg: obj = self diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 405589f501532..cc619a998cdd8 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1558,6 +1558,13 @@ def f(): 'ra', 'std'), ('rb', 'mean'), ('rb', 'std')]) assert_frame_equal(result, expected, check_like=True) + # same name as the original column + # GH9052 + expected = g['D'].agg({'result1': np.sum, 'result2': np.mean}) + expected = expected.rename(columns={'result1': 'D'}) + result = g['D'].agg({'D': np.sum, 'result2': np.mean}) + assert_frame_equal(result, expected, check_like=True) + def test_multi_iter(self): s = Series(np.arange(6)) k1 = np.array(['a', 'a', 'a', 'b', 'b', 'b']) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 4e2d596681942..d6d3c40359c79 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -419,25 +419,32 @@ def test_agg_misc(self): assert_frame_equal(result, expected, check_like=True) # series like aggs - expected = pd.concat([t['A'].sum(), - t['A'].std()], - axis=1) - expected.columns = ['sum', 'std'] - for t in [r, g]: - result = r['A'].agg({'A': ['sum', 'std']}) + result = t['A'].agg({'A': ['sum', 'std']}) + expected = pd.concat([t['A'].sum(), + t['A'].std()], + axis=1) + expected.columns = ['sum', 'std'] + + assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([t['A'].agg(['sum', 'std']), + t['A'].agg(['mean', 'std'])], + axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), + ('A', 'std'), + ('B', 'mean'), + ('B', 'std')]) + result = t['A'].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']}) assert_frame_equal(result, expected, check_like=True) # errors + # invalid names in the agg specification for t in [r, g]: - # invalid names in the agg specification - def f(): - r['A'].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']}) - self.assertRaises(SpecificationError, f) - def f(): - r[['A']].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']}) + r[['A']].agg({'A': ['sum', 'std'], + 'B': ['mean', 'std']}) self.assertRaises(SpecificationError, f) def test_agg_nested_dicts(self): From 0d8561e79a730913ef5c5de9100d5898a5cf7a56 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 15 Feb 2016 14:16:24 -0500 Subject: [PATCH 2/2] BUG: resampling with a how could trigger asfreq instead closes #12332 --- doc/source/whatsnew/v0.18.0.txt | 2 +- pandas/tseries/resample.py | 2 +- pandas/tseries/tests/test_resample.py | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index c6d02acf75477..b1cb94d131b5b 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -581,7 +581,7 @@ other anchored offsets like ``MonthBegin`` and ``YearBegin``. Resample API ^^^^^^^^^^^^ -Like the change in the window functions API :ref:`above `, ``.resample(...)`` is changing to have a more groupby-like API. (:issue:`11732`, :issue:`12702`, :issue:`12202`). +Like the change in the window functions API :ref:`above `, ``.resample(...)`` is changing to have a more groupby-like API. (:issue:`11732`, :issue:`12702`, :issue:`12202`, :issue:`12332`). .. ipython:: python diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index a22f87cb90420..ba2eb3463d169 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -500,7 +500,7 @@ def _downsample(self, how, **kwargs): # do we have a regular frequency if ax.freq is not None or ax.inferred_freq is not None: - if len(self.grouper.binlabels) > len(ax): + if len(self.grouper.binlabels) > len(ax) and how is None: # let's do an asfreq return self.asfreq() diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index d6d3c40359c79..262526f3f4c7c 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -925,6 +925,19 @@ def test_resample_ohlc(self): self.assertEqual(xs['low'], s[:5].min()) self.assertEqual(xs['close'], s[4]) + def test_resample_ohlc_result(self): + + # GH 12332 + index = pd.date_range('1-1-2000', '2-15-2000', freq='h') + index = index.union(pd.date_range('4-15-2000', '5-15-2000', freq='h')) + s = Series(range(len(index)), index=index) + + a = s.loc[:'4-15-2000'].resample('30T').ohlc() + self.assertIsInstance(a, DataFrame) + + b = s.loc[:'4-14-2000'].resample('30T').ohlc() + self.assertIsInstance(b, DataFrame) + def test_resample_ohlc_dataframe(self): df = ( pd.DataFrame({