diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 6e9ef4b10273c..dfe3f0ef87c11 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -292,7 +292,10 @@ def setup(self): self.rng3 = date_range(start='1/1/2000', periods=1500000, freq='S') self.ts3 = Series(1, index=self.rng3) - def time_sort_index(self): + def time_sort_index_monotonic(self): + self.ts2.sort_index() + + def time_sort_index_non_monotonic(self): self.ts.sort_index() def time_timeseries_slice_minutely(self): diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 37a70435ed6ff..d518d85836123 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -656,6 +656,78 @@ If indicated, a deprecation warning will be issued if you reference that module. "pandas._hash", "pandas.tools.libhash", "" "pandas._window", "pandas.core.libwindow", "" +.. _whatsnew_0200.api_breaking.sort_index: + +DataFrame.sort_index changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In certain cases, calling ``.sort_index()`` on a MultiIndexed DataFrame would return the *same* DataFrame without seeming to sort. +This would happen with a ``lexsorted``, but non-montonic levels. (:issue:`15622`, :issue:`15687`, :issue:`14015`, :issue:`13431`) + +This is UNCHANGED between versions, but showing for illustration purposes: + +.. ipython:: python + + df = DataFrame(np.arange(6), columns=['value'], index=MultiIndex.from_product([list('BA'), range(3)])) + df + +.. ipython:: python + + df.index.is_lexsorted() + df.index.is_monotonic + +Sorting works as expected + +.. ipython:: python + + df.sort_index() + +.. ipython:: python + + df.sort_index().index.is_lexsorted() + df.sort_index().index.is_monotonic + +However, this example, which has a monotonic level, doesn't behave as desired. + +.. ipython:: python + df = pd.DataFrame({'value': [1, 2, 3, 4]}, + index=pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + +Previous Behavior: + +.. ipython:: python + + In [11]: df.sort_index() + Out[11]: + value + a bb 1 + aa 2 + b bb 3 + aa 4 + + In [14]: df.sort_index().index.is_lexsorted() + Out[14]: True + + In [15]: df.sort_index().index.is_monotonic + Out[15]: False + +New Behavior: + +.. ipython:: python + + df.sort_index() + df.sort_index().index.is_lexsorted() + df.sort_index().index.is_monotonic + +Previous Behavior: + +.. code-block:: ipython + +New Behavior: + +.. ipython:: python + .. _whatsnew_0200.api_breaking.groupby_describe: @@ -830,7 +902,7 @@ Performance Improvements - Improved performance when using ``.unstack()`` (:issue:`15503`) - Improved performance of merge/join on ``category`` columns (:issue:`10409`) - Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`) - +- Improved performance of ``Series.sort_index()`` with a monotonic index (:issue:`15694`) .. _whatsnew_0200.bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6b5e8e0799421..c9987053150ff 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3308,6 +3308,10 @@ def trans(v): def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, by=None): + + # TODO: this can be combined with Series.sort_index impl as + # almost identical + inplace = validate_bool_kwarg(inplace, 'inplace') # 10726 if by is not None: @@ -3321,8 +3325,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, axis = self._get_axis_number(axis) labels = self._get_axis(axis) - # sort by the index - if level is not None: + if level: new_axis, indexer = labels.sortlevel(level, ascending=ascending, sort_remaining=sort_remaining) @@ -3332,17 +3335,15 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, # make sure that the axis is lexsorted to start # if not we need to reconstruct to get the correct indexer - if not labels.is_lexsorted(): - labels = MultiIndex.from_tuples(labels.values) + labels = labels._reconstruct(sort=True) indexer = lexsort_indexer(labels.labels, orders=ascending, na_position=na_position) else: from pandas.core.sorting import nargsort - # GH11080 - Check monotonic-ness before sort an index - # if monotonic (already sorted), return None or copy() according - # to 'inplace' + # Check monotonic-ness before sort an index + # GH11080 if ((ascending and labels.is_monotonic_increasing) or (not ascending and labels.is_monotonic_decreasing)): if inplace: @@ -3353,8 +3354,9 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, indexer = nargsort(labels, kind=kind, ascending=ascending, na_position=na_position) + baxis = self._get_block_manager_axis(axis) new_data = self._data.take(indexer, - axis=self._get_block_manager_axis(axis), + axis=baxis, convert=False, verify=False) if inplace: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 727af8b8cd3eb..00300ccaa1b75 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1808,6 +1808,13 @@ def get_group_levels(self): 'ohlc': lambda *args: ['open', 'high', 'low', 'close'] } + def _is_builtin_func(self, arg): + """ + if we define an builtin function for this argument, return it, + otherwise return the arg + """ + return SelectionMixin._builtin_table.get(arg, arg) + def _get_cython_function(self, kind, how, values, is_numeric): dtype_str = values.dtype.name @@ -2033,7 +2040,7 @@ def _aggregate_series_fast(self, obj, func): # avoids object / Series creation overhead dummy = obj._get_values(slice(None, 0)).to_dense() indexer = get_group_index_sorter(group_index, ngroups) - obj = obj.take(indexer, convert=False) + obj = obj.take(indexer, convert=False).to_dense() group_index = algorithms.take_nd( group_index, indexer, allow_fill=False) grouper = lib.SeriesGrouper(obj, func, group_index, ngroups, diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 2822d98b7c906..8d6b6e17396eb 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -22,8 +22,8 @@ from pandas.sparse.libsparse import IntIndex from pandas.core.categorical import Categorical, _factorize_from_iterable -from pandas.core.sorting import (get_group_index, compress_group_index, - decons_obs_group_ids) +from pandas.core.sorting import (get_group_index, get_compressed_ids, + compress_group_index, decons_obs_group_ids) import pandas.core.algorithms as algos from pandas._libs import algos as _algos, reshape as _reshape @@ -494,11 +494,6 @@ def _unstack_frame(obj, level, fill_value=None): return unstacker.get_result() -def get_compressed_ids(labels, sizes): - ids = get_group_index(labels, sizes, sort=True, xnull=False) - return compress_group_index(ids, sort=True) - - def stack(frame, level=-1, dropna=True): """ Convert DataFrame to Series with multi-level Index. Columns become the diff --git a/pandas/core/series.py b/pandas/core/series.py index 0913592e055cd..6fb1b66708369 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1752,17 +1752,31 @@ def _try_kind_sort(arr): def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True): + # TODO: this can be combined with DataFrame.sort_index impl as + # almost identical inplace = validate_bool_kwarg(inplace, 'inplace') axis = self._get_axis_number(axis) index = self.index - if level is not None: + + if level: new_index, indexer = index.sortlevel(level, ascending=ascending, sort_remaining=sort_remaining) elif isinstance(index, MultiIndex): from pandas.core.sorting import lexsort_indexer - indexer = lexsort_indexer(index.labels, orders=ascending) + labels = index._reconstruct(sort=True) + indexer = lexsort_indexer(labels.labels, orders=ascending) else: from pandas.core.sorting import nargsort + + # Check monotonic-ness before sort an index + # GH11080 + if ((ascending and index.is_monotonic_increasing) or + (not ascending and index.is_monotonic_decreasing)): + if inplace: + return + else: + return self.copy() + indexer = nargsort(index, kind=kind, ascending=ascending, na_position=na_position) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 205d0d94d2ec3..ea131e66cb833 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -93,6 +93,11 @@ def maybe_lift(lab, size): # pormote nan values return loop(list(labels), list(shape)) +def get_compressed_ids(labels, sizes): + ids = get_group_index(labels, sizes, sort=True, xnull=False) + return compress_group_index(ids, sort=True) + + def is_int64_overflow_possible(shape): the_prod = long(1) for x in shape: diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index e6ae0605d4758..16019dd2d860b 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1173,9 +1173,100 @@ def from_product(cls, iterables, sortorder=None, names=None): labels, levels = _factorize_from_iterables(iterables) labels = cartesian_product(labels) + return MultiIndex(levels, labels, sortorder=sortorder, names=names) - return MultiIndex(levels=levels, labels=labels, sortorder=sortorder, - names=names) + def _reconstruct(self, sort=False, remove_unused=False): + """ + reconstruct the MultiIndex + + The MultiIndex will have the same outward appearance (e.g. values) + and will also .equals() + + Parameters + ---------- + sort: boolean, default False + monotonically sort the levels + remove_unused: boolean, default False + remove unsued levels + + Returns + ------- + MultiIndex + + """ + + if sort and remove_unused: + raise ValueError("only support one of sort / remove_unused") + + if not (sort or remove_unused): + raise ValueError("must supply one of sort / remove_unsued") + + levels = self.levels + labels = self.labels + + new_levels = [] + new_labels = [] + + if sort: + + if self.is_lexsorted() and self.is_monotonic: + return self + + for lev, lab in zip(levels, labels): + + if lev.is_monotonic: + new_levels.append(lev) + new_labels.append(lab) + continue + + # indexer to reorder the levels + indexer = lev.argsort() + lev = lev.take(indexer) + + # indexer to reorder the labels + ri = lib.get_reverse_indexer(indexer, len(indexer)) + lab = algos.take_1d(ri, lab) + + new_levels.append(lev) + new_labels.append(lab) + + elif remove_unused: + + changed = np.zeros(self.nlevels, dtype=bool) + for i, (lev, lab) in enumerate(zip(levels, labels)): + + uniques = np.sort(algos.unique(lab)) + + # nothing unused + if len(uniques) == len(lev): + new_levels.append(lev) + new_labels.append(lab) + changed[i] = True + continue + + unused = list(reversed(sorted(set( + np.arange(len(lev))) - set(uniques)))) + + # new levels are simple + lev = lev.take(uniques) + + # new labels, we remove the unsued + # by decrementing the labels for that value + # prob a better way + for u in unused: + + lab = np.where(lab > u, lab - 1, lab) + + new_levels.append(lev) + new_labels.append(lab) + + # nothing changed + if not changed.any(): + return self + + return MultiIndex(new_levels, new_labels, + names=self.names, sortorder=self.sortorder, + verify_integrity=False) @property def nlevels(self): @@ -1746,9 +1837,10 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): def _partial_tup_index(self, tup, side='left'): if len(tup) > self.lexsort_depth: - raise KeyError('Key length (%d) was greater than MultiIndex' - ' lexsort depth (%d)' % - (len(tup), self.lexsort_depth)) + raise UnsortedIndexError( + 'Key length (%d) was greater than MultiIndex' + ' lexsort depth (%d)' % + (len(tup), self.lexsort_depth)) n = len(tup) start, end = 0, len(self) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 0c274b2f6c4ff..f153fc6d7b815 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2411,6 +2411,92 @@ def test_is_monotonic(self): self.assertFalse(i.is_monotonic) + def test_reconstruct_api(self): + + mi = MultiIndex.from_arrays([ + ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] + ]) + + with pytest.raises(ValueError): + mi._reconstruct() + + with pytest.raises(ValueError): + mi._reconstruct(sort=True, remove_unused=True) + + def test_reconstruct_sort(self): + + # starts off lexsorted & monotonic + mi = MultiIndex.from_arrays([ + ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] + ]) + assert mi.is_lexsorted() + assert mi.is_monotonic + + recons = mi._reconstruct(sort=True) + assert recons.is_lexsorted() + assert recons.is_monotonic + assert mi is recons + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), + ('x', 'b'), ('y', 'a'), ('z', 'b')], + names=['one', 'two']) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._reconstruct(sort=True) + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], + labels=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=['col1', 'col2']) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._reconstruct(sort=True) + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + def test_reconstruct_remove_unused(self): + # xref to GH 2770 + df = DataFrame([['deleteMe', 1, 9], + ['keepMe', 2, 9], + ['keepMeToo', 3, 9]], + columns=['first', 'second', 'third']) + df2 = df.set_index(['first', 'second'], drop=False) + df2 = df2[df2['first'] != 'deleteMe'] + + # removed levels are there + expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'], + [1, 2, 3]], + labels=[[1, 2], [1, 2]], + names=['first', 'second']) + result = df2.index + tm.assert_index_equal(result, expected) + + expected = MultiIndex(levels=[['keepMe', 'keepMeToo'], + [2, 3]], + labels=[[0, 1], [0, 1]], + names=['first', 'second']) + result = df2.index._reconstruct(remove_unused=True) + tm.assert_index_equal(result, expected) + + # idempotent + result2 = result._reconstruct(remove_unused=True) + tm.assert_index_equal(result2, expected) + assert result2 is result + def test_isin(self): values = [('foo', 2), ('bar', 3), ('quux', 4)] @@ -2699,6 +2785,30 @@ def test_unsortedindex(self): with assertRaises(KeyError): df.loc(axis=0)['q', :] + def test_unsortedindex_doc_examples(self): + # http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa + dfm = DataFrame({'jim': [0, 0, 1, 1], + 'joe': ['x', 'x', 'z', 'y'], + 'jolie': np.random.rand(4)}) + + dfm = dfm.set_index(['jim', 'joe']) + with tm.assert_produces_warning(PerformanceWarning): + dfm.loc[(1, 'z')] + + with pytest.raises(UnsortedIndexError): + dfm.loc[(0, 'y'):(1, 'z')] + + assert not dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 1 + + # sort it + dfm = dfm.sort_index() + dfm.loc[(1, 'z')] + dfm.loc[(0, 'y'):(1, 'z')] + + assert dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 2 + def test_tuples_with_name_string(self): # GH 15110 and GH 14848 diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index dc71fafb1094f..1e197f10cb9c6 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1599,7 +1599,7 @@ def test_unstack(self): labels=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) expected = DataFrame({'bar': s.values}, index=exp_index).sort_index(level=0) - unstacked = s.unstack(0) + unstacked = s.unstack(0).sort_index() assert_frame_equal(unstacked, expected) # GH5873 diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index fd5421abc89ad..ef3dab19b81b6 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2438,6 +2438,30 @@ def test_getitem_slice_not_sorted(self): expected = df.reindex(columns=df.columns[:3]) tm.assert_frame_equal(result, expected) + def test_frame_getitem_not_sorted2(self): + # 13431 + df = DataFrame({'col1': ['b', 'd', 'b', 'a'], + 'col2': [3, 1, 1, 2], + 'data': ['one', 'two', 'three', 'four']}) + + df2 = df.set_index(['col1', 'col2']) + df2_original = df2.copy() + + df2.index.set_levels(['b', 'd', 'a'], level='col1', inplace=True) + df2.index.set_labels([0, 1, 0, 2], level='col1', inplace=True) + assert not df2.index.is_lexsorted() + assert not df2.index.is_monotonic + + assert df2_original.index.equals(df2.index) + expected = df2.sort_index() + assert not expected.index.is_lexsorted() + assert expected.index.is_monotonic + + result = df2.sort_index(level=0) + assert not result.index.is_lexsorted() + assert result.index.is_monotonic + tm.assert_frame_equal(result, expected) + def test_frame_getitem_not_sorted(self): df = self.frame.T df['foo', 'four'] = 'foo' @@ -2474,3 +2498,117 @@ def test_series_getitem_not_sorted(self): expected.index = expected.index.droplevel(0) tm.assert_series_equal(result, expected) tm.assert_series_equal(result2, expected) + + def test_sort_index_and_reconstruction(self): + + # 15622 + # lexsortedness should be identical + # across MultiIndex consruction methods + + df = DataFrame([[1, 1], [2, 2]], index=list('ab')) + expected = DataFrame([[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex.from_tuples([(0.5, 'a'), + (0.5, 'b'), + (0.8, 'a'), + (0.8, 'b')])) + assert expected.index.is_lexsorted() + + result = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex.from_product([[0.5, 0.8], list('ab')])) + result = result.sort_index() + assert result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + result = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex(levels=[[0.5, 0.8], ['a', 'b']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + result = result.sort_index() + assert result.index.is_lexsorted() + + tm.assert_frame_equal(result, expected) + + concatted = pd.concat([df, df], keys=[0.8, 0.5]) + result = concatted.sort_index() + + # this will be monotonic, but not lexsorted! + assert not result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + # 14015 + df = DataFrame([[1, 2], [6, 7]], + columns=MultiIndex.from_tuples( + [(0, '20160811 12:00:00'), + (0, '20160809 12:00:00')], + names=['l1', 'Date'])) + + df.columns.set_levels(pd.to_datetime(df.columns.levels[1]), + level=1, + inplace=True) + assert not df.columns.is_lexsorted() + assert not df.columns.is_monotonic + result = df.sort_index(axis=1) + assert result.columns.is_lexsorted() + assert result.columns.is_monotonic + result = df.sort_index(axis=1, level=1) + assert result.columns.is_lexsorted() + assert result.columns.is_monotonic + + def test_sort_index_and_reconstruction_doc_example(self): + # doc example + df = DataFrame({'value': [1, 2, 3, 4]}, + index=MultiIndex( + levels=[['a', 'b'], ['bb', 'aa']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + assert df.index.is_lexsorted() + assert not df.index.is_monotonic + + # sort it + expected = DataFrame({'value': [2, 1, 4, 3]}, + index=MultiIndex( + levels=[['a', 'b'], ['aa', 'bb']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + result = df.sort_index() + assert not result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + # reconstruct + result = df.sort_index().copy() + result.index = result.index._reconstruct(sort=True) + assert result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + def test_sort_index_reorder_on_ops(self): + # 15687 + df = pd.DataFrame( + np.random.randn(8, 2), + index=MultiIndex.from_product( + [['a', 'b'], + ['big', 'small'], + ['red', 'blu']], + names=['letter', 'size', 'color']), + columns=['near', 'far']) + df = df.sort_index() + + def my_func(group): + group.index = ['newz', 'newa'] + return group + + result = df.groupby(level=['letter', 'size']).apply( + my_func).sort_index() + expected = MultiIndex.from_product( + [['a', 'b'], + ['big', 'small'], + ['newa', 'newz']], + names=['letter', 'size', None]) + + tm.assert_index_equal(result.index, expected) diff --git a/pandas/tests/tools/test_hashing.py b/pandas/tests/tools/test_hashing.py index 9bed0d428bc41..17a1fb1a7d525 100644 --- a/pandas/tests/tools/test_hashing.py +++ b/pandas/tests/tools/test_hashing.py @@ -87,6 +87,35 @@ def test_multiindex_unique(self): result = hash_pandas_object(mi) self.assertTrue(result.is_unique) + def test_multiindex_objects(self): + mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], + labels=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=['col1', 'col2']) + recons = mi._reconstruct(sort=True) + + # these are equal + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # _hashed_values and hash_pandas_object(..., index=False) + # equivalency + expected = hash_pandas_object( + mi, index=False).values + result = mi._hashed_values + tm.assert_numpy_array_equal(result, expected) + + expected = hash_pandas_object( + recons, index=False).values + result = recons._hashed_values + tm.assert_numpy_array_equal(result, expected) + + expected = mi._hashed_values + result = recons._hashed_values + + # values should match, but in different order + tm.assert_numpy_array_equal(np.sort(result), + np.sort(expected)) + def test_hash_pandas_object(self): for obj in [Series([1, 2, 3]), diff --git a/pandas/tests/tools/test_pivot.py b/pandas/tests/tools/test_pivot.py index 4502f232c6d9c..c8dfaf5e29bc6 100644 --- a/pandas/tests/tools/test_pivot.py +++ b/pandas/tests/tools/test_pivot.py @@ -2,6 +2,7 @@ import numpy as np +from collections import OrderedDict import pandas as pd from pandas import (DataFrame, Series, Index, MultiIndex, Grouper, date_range, concat) @@ -513,7 +514,7 @@ def test_pivot_columns_lexsorted(self): self.assertTrue(pivoted.columns.is_monotonic) def test_pivot_complex_aggfunc(self): - f = {'D': ['std'], 'E': ['sum']} + f = OrderedDict([('D', ['std']), ('E', ['sum'])]) expected = self.data.groupby(['A', 'B']).agg(f).unstack('B') result = self.data.pivot_table(index='A', columns='B', aggfunc=f)