From 8e9fb6274dba12d201761b916a4e3655d8f27955 Mon Sep 17 00:00:00 2001 From: Jeffrey Gerard Date: Tue, 21 Jun 2016 23:26:09 +0200 Subject: [PATCH 01/11] TST: Clean up tests of DataFrame.sort_{index,values} --- pandas/tests/frame/test_sorting.py | 47 +++++++++++------------------- 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index ff2159f8b6f40..64521e6a6335d 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -21,7 +21,7 @@ class TestDataFrameSorting(tm.TestCase, TestData): _multiprocess_can_split_ = True - def test_sort_values(self): + def test_sort_index(self): # API for 9816 # sort_index @@ -34,63 +34,50 @@ def test_sort_values(self): with tm.assert_produces_warning(FutureWarning): frame.sort() + # axis=0 : sort rows by index labels unordered = frame.ix[[3, 2, 4, 1]] - expected = unordered.sort_index() - result = unordered.sort_index(axis=0) + expected = frame assert_frame_equal(result, expected) - unordered = frame.ix[:, [2, 1, 3, 0]] - expected = unordered.sort_index(axis=1) + result = unordered.sort_index(ascending=False) + expected = frame[::-1] + assert_frame_equal(result, expected) + # axis=1 : sort columns by column names + unordered = frame.ix[:, [2, 1, 3, 0]] result = unordered.sort_index(axis=1) - assert_frame_equal(result, expected) + assert_frame_equal(result, frame) + + result = unordered.sort_index(axis=1, ascending=False) + expected = frame.ix[:, ::-1] assert_frame_equal(result, expected) - # sortlevel - mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + # sort rows by specified level of multi-index + mi = MultiIndex.from_tuples([[2, 1, 3], [1, 1, 1]], names=list('ABC')) df = DataFrame([[1, 2], [3, 4]], mi) result = df.sort_index(level='A', sort_remaining=False) expected = df.sortlevel('A', sort_remaining=False) assert_frame_equal(result, expected) + # sort columns by specified level of multi-index df = df.T result = df.sort_index(level='A', axis=1, sort_remaining=False) expected = df.sortlevel('A', axis=1, sort_remaining=False) assert_frame_equal(result, expected) - # MI sort, but no by + # MI sort, but no level mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) df = DataFrame([[1, 2], [3, 4]], mi) result = df.sort_index(sort_remaining=False) expected = df.sort_index() assert_frame_equal(result, expected) - def test_sort_index(self): + def test_sort_values(self): frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=['A', 'B', 'C', 'D']) - # axis=0 - unordered = frame.ix[[3, 2, 4, 1]] - sorted_df = unordered.sort_index(axis=0) - expected = frame - assert_frame_equal(sorted_df, expected) - - sorted_df = unordered.sort_index(ascending=False) - expected = frame[::-1] - assert_frame_equal(sorted_df, expected) - - # axis=1 - unordered = frame.ix[:, ['D', 'B', 'C', 'A']] - sorted_df = unordered.sort_index(axis=1) - expected = frame - assert_frame_equal(sorted_df, expected) - - sorted_df = unordered.sort_index(axis=1, ascending=False) - expected = frame.ix[:, ::-1] - assert_frame_equal(sorted_df, expected) - # by column sorted_df = frame.sort_values(by='A') indexer = frame['A'].argsort().values From 70e06d1757ada0e80f4df98922be5f972ba5ab4b Mon Sep 17 00:00:00 2001 From: Jeffrey Gerard Date: Thu, 23 Jun 2016 12:54:41 +0200 Subject: [PATCH 02/11] Factor out Series sorting tests to own file. --- pandas/tests/series/test_analytics.py | 136 ----------------------- pandas/tests/series/test_sorting.py | 151 ++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 136 deletions(-) create mode 100644 pandas/tests/series/test_sorting.py diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 433f0f4bc67f5..4b2e5d251f7eb 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -5,7 +5,6 @@ from distutils.version import LooseVersion import nose -import random from numpy import nan import numpy as np @@ -1414,141 +1413,6 @@ def test_is_monotonic(self): self.assertFalse(s.is_monotonic) self.assertTrue(s.is_monotonic_decreasing) - def test_sort_values(self): - - ts = self.ts.copy() - - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - ts.sort() - - self.assert_series_equal(ts, self.ts.sort_values()) - self.assert_index_equal(ts.index, self.ts.sort_values().index) - - ts.sort_values(ascending=False, inplace=True) - self.assert_series_equal(ts, self.ts.sort_values(ascending=False)) - self.assert_index_equal(ts.index, - self.ts.sort_values(ascending=False).index) - - # GH 5856/5853 - # Series.sort_values operating on a view - df = DataFrame(np.random.randn(10, 4)) - s = df.iloc[:, 0] - - def f(): - s.sort_values(inplace=True) - - self.assertRaises(ValueError, f) - - # test order/sort inplace - # GH6859 - ts1 = self.ts.copy() - ts1.sort_values(ascending=False, inplace=True) - ts2 = self.ts.copy() - ts2.sort_values(ascending=False, inplace=True) - assert_series_equal(ts1, ts2) - - ts1 = self.ts.copy() - ts1 = ts1.sort_values(ascending=False, inplace=False) - ts2 = self.ts.copy() - ts2 = ts.sort_values(ascending=False) - assert_series_equal(ts1, ts2) - - def test_sort_index(self): - rindex = list(self.ts.index) - random.shuffle(rindex) - - random_order = self.ts.reindex(rindex) - sorted_series = random_order.sort_index() - assert_series_equal(sorted_series, self.ts) - - # descending - sorted_series = random_order.sort_index(ascending=False) - assert_series_equal(sorted_series, - self.ts.reindex(self.ts.index[::-1])) - - def test_sort_index_inplace(self): - - # For #11402 - rindex = list(self.ts.index) - random.shuffle(rindex) - - # descending - random_order = self.ts.reindex(rindex) - result = random_order.sort_index(ascending=False, inplace=True) - self.assertIs(result, None, - msg='sort_index() inplace should return None') - assert_series_equal(random_order, self.ts.reindex(self.ts.index[::-1])) - - # ascending - random_order = self.ts.reindex(rindex) - result = random_order.sort_index(ascending=True, inplace=True) - self.assertIs(result, None, - msg='sort_index() inplace should return None') - assert_series_equal(random_order, self.ts) - - def test_sort_API(self): - - # API for 9816 - - # sortlevel - mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) - s = Series([1, 2], mi) - backwards = s.iloc[[1, 0]] - - res = s.sort_index(level='A') - assert_series_equal(backwards, res) - - # sort_index - rindex = list(self.ts.index) - random.shuffle(rindex) - - random_order = self.ts.reindex(rindex) - sorted_series = random_order.sort_index(level=0) - assert_series_equal(sorted_series, self.ts) - - # compat on axis - sorted_series = random_order.sort_index(axis=0) - assert_series_equal(sorted_series, self.ts) - - self.assertRaises(ValueError, lambda: random_order.sort_values(axis=1)) - - sorted_series = random_order.sort_index(level=0, axis=0) - assert_series_equal(sorted_series, self.ts) - - self.assertRaises(ValueError, - lambda: random_order.sort_index(level=0, axis=1)) - - def test_order(self): - - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - self.ts.order() - - ts = self.ts.copy() - ts[:5] = np.NaN - vals = ts.values - - result = ts.sort_values() - self.assertTrue(np.isnan(result[-5:]).all()) - self.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:])) - - result = ts.sort_values(na_position='first') - self.assertTrue(np.isnan(result[:5]).all()) - self.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:])) - - # something object-type - ser = Series(['A', 'B'], [1, 2]) - # no failure - ser.sort_values() - - # ascending=False - ordered = ts.sort_values(ascending=False) - expected = np.sort(ts.valid().values)[::-1] - assert_almost_equal(expected, ordered.valid().values) - ordered = ts.sort_values(ascending=False, na_position='first') - assert_almost_equal(expected, ordered.valid().values) - def test_nsmallest_nlargest(self): # float, int, datetime64 (use i8), timedelts64 (same), # object that are numbers, object that are strings diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py new file mode 100644 index 0000000000000..1d298af72dfff --- /dev/null +++ b/pandas/tests/series/test_sorting.py @@ -0,0 +1,151 @@ +# coding=utf-8 + +import numpy as np +import random + +from pandas import (DataFrame, Series, MultiIndex) + +from pandas.util.testing import (assert_series_equal, assert_almost_equal) +import pandas.util.testing as tm + +from .common import TestData + + +class TestSeriesSorting(TestData, tm.TestCase): + + _multiprocess_can_split_ = True + + def test_sort_values(self): + + ts = self.ts.copy() + + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + ts.sort() + + self.assert_series_equal(ts, self.ts.sort_values()) + self.assert_index_equal(ts.index, self.ts.sort_values().index) + + ts.sort_values(ascending=False, inplace=True) + self.assert_series_equal(ts, self.ts.sort_values(ascending=False)) + self.assert_index_equal(ts.index, + self.ts.sort_values(ascending=False).index) + + # GH 5856/5853 + # Series.sort_values operating on a view + df = DataFrame(np.random.randn(10, 4)) + s = df.iloc[:, 0] + + def f(): + s.sort_values(inplace=True) + + self.assertRaises(ValueError, f) + + # test order/sort inplace + # GH6859 + ts1 = self.ts.copy() + ts1.sort_values(ascending=False, inplace=True) + ts2 = self.ts.copy() + ts2.sort_values(ascending=False, inplace=True) + assert_series_equal(ts1, ts2) + + ts1 = self.ts.copy() + ts1 = ts1.sort_values(ascending=False, inplace=False) + ts2 = self.ts.copy() + ts2 = ts.sort_values(ascending=False) + assert_series_equal(ts1, ts2) + + def test_sort_index(self): + rindex = list(self.ts.index) + random.shuffle(rindex) + + random_order = self.ts.reindex(rindex) + sorted_series = random_order.sort_index() + assert_series_equal(sorted_series, self.ts) + + # descending + sorted_series = random_order.sort_index(ascending=False) + assert_series_equal(sorted_series, + self.ts.reindex(self.ts.index[::-1])) + + def test_sort_index_inplace(self): + + # For #11402 + rindex = list(self.ts.index) + random.shuffle(rindex) + + # descending + random_order = self.ts.reindex(rindex) + result = random_order.sort_index(ascending=False, inplace=True) + self.assertIs(result, None, + msg='sort_index() inplace should return None') + assert_series_equal(random_order, self.ts.reindex(self.ts.index[::-1])) + + # ascending + random_order = self.ts.reindex(rindex) + result = random_order.sort_index(ascending=True, inplace=True) + self.assertIs(result, None, + msg='sort_index() inplace should return None') + assert_series_equal(random_order, self.ts) + + def test_sort_API(self): + + # API for 9816 + + # sortlevel + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + s = Series([1, 2], mi) + backwards = s.iloc[[1, 0]] + + res = s.sort_index(level='A') + assert_series_equal(backwards, res) + + # sort_index + rindex = list(self.ts.index) + random.shuffle(rindex) + + random_order = self.ts.reindex(rindex) + sorted_series = random_order.sort_index(level=0) + assert_series_equal(sorted_series, self.ts) + + # compat on axis + sorted_series = random_order.sort_index(axis=0) + assert_series_equal(sorted_series, self.ts) + + self.assertRaises(ValueError, lambda: random_order.sort_values(axis=1)) + + sorted_series = random_order.sort_index(level=0, axis=0) + assert_series_equal(sorted_series, self.ts) + + self.assertRaises(ValueError, + lambda: random_order.sort_index(level=0, axis=1)) + + def test_order(self): + + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + self.ts.order() + + ts = self.ts.copy() + ts[:5] = np.NaN + vals = ts.values + + result = ts.sort_values() + self.assertTrue(np.isnan(result[-5:]).all()) + self.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:])) + + result = ts.sort_values(na_position='first') + self.assertTrue(np.isnan(result[:5]).all()) + self.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:])) + + # something object-type + ser = Series(['A', 'B'], [1, 2]) + # no failure + ser.sort_values() + + # ascending=False + ordered = ts.sort_values(ascending=False) + expected = np.sort(ts.valid().values)[::-1] + assert_almost_equal(expected, ordered.valid().values) + ordered = ts.sort_values(ascending=False, na_position='first') + assert_almost_equal(expected, ordered.valid().values) From 67f0ab09b347000d843b4e0b0a7577218ef265e3 Mon Sep 17 00:00:00 2001 From: Jeffrey Gerard Date: Thu, 23 Jun 2016 13:43:43 +0200 Subject: [PATCH 03/11] Delegate deprecated sort() and order() to their own tests. Before this commit, the `Series.sort_values()` tests relied on deprecated `Series.sort()` and `Series.order()` as the source of truth. However they both merely called `Series.sort_values()` under the hood. This commit consolidates the core test logic against `.sort_values()` directly, while `.sort()` and `.order()` merely check for equivalence with `.sort_values()`. Also removes some no-op assertions that had rotted from the old days of `sort()`/`order()`. --- pandas/tests/frame/test_sorting.py | 21 +++---- pandas/tests/series/test_sorting.py | 92 ++++++++++++++--------------- 2 files changed, 55 insertions(+), 58 deletions(-) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 64521e6a6335d..aff08cd66b9ae 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -22,18 +22,9 @@ class TestDataFrameSorting(tm.TestCase, TestData): _multiprocess_can_split_ = True def test_sort_index(self): - # API for 9816 - - # sort_index frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=['A', 'B', 'C', 'D']) - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - frame.sort(columns='A') - with tm.assert_produces_warning(FutureWarning): - frame.sort() - # axis=0 : sort rows by index labels unordered = frame.ix[[3, 2, 4, 1]] result = unordered.sort_index(axis=0) @@ -67,13 +58,23 @@ def test_sort_index(self): expected = df.sortlevel('A', axis=1, sort_remaining=False) assert_frame_equal(result, expected) - # MI sort, but no level + # MI sort, but no level: sort_level has no effect mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) df = DataFrame([[1, 2], [3, 4]], mi) result = df.sort_index(sort_remaining=False) expected = df.sort_index() assert_frame_equal(result, expected) + def test_sort(self): + frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], + columns=['A', 'B', 'C', 'D']) + + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + frame.sort(columns='A') + with tm.assert_produces_warning(FutureWarning): + frame.sort() + def test_sort_values(self): frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=['A', 'B', 'C', 'D']) diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index 1d298af72dfff..bc0e2671c2619 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -15,17 +15,57 @@ class TestSeriesSorting(TestData, tm.TestCase): _multiprocess_can_split_ = True - def test_sort_values(self): + def test_sort(self): ts = self.ts.copy() # 9816 deprecated with tm.assert_produces_warning(FutureWarning): - ts.sort() - + ts.sort() # sorts inplace self.assert_series_equal(ts, self.ts.sort_values()) - self.assert_index_equal(ts.index, self.ts.sort_values().index) + def test_order(self): + + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + result = self.ts.order() + self.assert_series_equal(result, self.ts.sort_values()) + + def test_sort_values(self): + + # check indexes are reordered corresponding with the values + ser = Series([3, 2, 4, 1], ['A', 'B', 'C', 'D']) + expected = Series([1, 2, 3, 4], ['D', 'B', 'A', 'C']) + result = ser.sort_values() + self.assert_series_equal(expected, result) + + ts = self.ts.copy() + ts[:5] = np.NaN + vals = ts.values + + result = ts.sort_values() + self.assertTrue(np.isnan(result[-5:]).all()) + self.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:])) + + # na_position + result = ts.sort_values(na_position='first') + self.assertTrue(np.isnan(result[:5]).all()) + self.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:])) + + # something object-type + ser = Series(['A', 'B'], [1, 2]) + # no failure + ser.sort_values() + + # ascending=False + ordered = ts.sort_values(ascending=False) + expected = np.sort(ts.valid().values)[::-1] + assert_almost_equal(expected, ordered.valid().values) + ordered = ts.sort_values(ascending=False, na_position='first') + assert_almost_equal(expected, ordered.valid().values) + + # inplace=True + ts = self.ts.copy() ts.sort_values(ascending=False, inplace=True) self.assert_series_equal(ts, self.ts.sort_values(ascending=False)) self.assert_index_equal(ts.index, @@ -41,20 +81,6 @@ def f(): self.assertRaises(ValueError, f) - # test order/sort inplace - # GH6859 - ts1 = self.ts.copy() - ts1.sort_values(ascending=False, inplace=True) - ts2 = self.ts.copy() - ts2.sort_values(ascending=False, inplace=True) - assert_series_equal(ts1, ts2) - - ts1 = self.ts.copy() - ts1 = ts1.sort_values(ascending=False, inplace=False) - ts2 = self.ts.copy() - ts2 = ts.sort_values(ascending=False) - assert_series_equal(ts1, ts2) - def test_sort_index(self): rindex = list(self.ts.index) random.shuffle(rindex) @@ -119,33 +145,3 @@ def test_sort_API(self): self.assertRaises(ValueError, lambda: random_order.sort_index(level=0, axis=1)) - - def test_order(self): - - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - self.ts.order() - - ts = self.ts.copy() - ts[:5] = np.NaN - vals = ts.values - - result = ts.sort_values() - self.assertTrue(np.isnan(result[-5:]).all()) - self.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:])) - - result = ts.sort_values(na_position='first') - self.assertTrue(np.isnan(result[:5]).all()) - self.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:])) - - # something object-type - ser = Series(['A', 'B'], [1, 2]) - # no failure - ser.sort_values() - - # ascending=False - ordered = ts.sort_values(ascending=False) - expected = np.sort(ts.valid().values)[::-1] - assert_almost_equal(expected, ordered.valid().values) - ordered = ts.sort_values(ascending=False, na_position='first') - assert_almost_equal(expected, ordered.valid().values) From a12cea532c8db68641173150e6b984d359395ea3 Mon Sep 17 00:00:00 2001 From: Jeffrey Gerard Date: Tue, 28 Jun 2016 10:40:01 -0500 Subject: [PATCH 04/11] Remove 'by' docstring from Series.sort_values --- pandas/core/frame.py | 7 +++++-- pandas/core/generic.py | 13 +++++++------ pandas/core/series.py | 3 ++- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4b35953b4282..c3fd7f9e87378 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -68,8 +68,11 @@ # --------------------------------------------------------------------- # Docstring templates -_shared_doc_kwargs = dict(axes='index, columns', klass='DataFrame', - axes_single_arg="{0, 1, 'index', 'columns'}") +_shared_doc_kwargs = dict( + axes='index, columns', klass='DataFrame', + axes_single_arg="{0, 1, 'index', 'columns'}", + optional_by="""by : str or list of str + Name or list of names which refer to the axis items.""") _numeric_only_doc = """numeric_only : boolean, default None Include only float, int, boolean data. If None, will attempt to use diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 348281d1a7e30..2364b9f4392d2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -37,10 +37,12 @@ # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = dict() -_shared_doc_kwargs = dict(axes='keywords for axes', klass='NDFrame', - axes_single_arg='int or labels for object', - args_transpose='axes to permute (int or label for' - ' object)') +_shared_doc_kwargs = dict( + axes='keywords for axes', klass='NDFrame', + axes_single_arg='int or labels for object', + args_transpose='axes to permute (int or label for object)', + optional_by="""by : str or list of str + Name or list of names which refer to the axis items.""") def is_dictlike(x): @@ -1956,8 +1958,7 @@ def add_suffix(self, suffix): .. versionadded:: 0.17.0 Parameters - ---------- - by : string name or list of names which refer to the axis items + ----------%(optional_by)s axis : %(axes)s to direct sorting ascending : bool or list of bool Sort ascending vs. descending. Specify list for multiple sort diff --git a/pandas/core/series.py b/pandas/core/series.py index cf1639bacc3be..6f3190c288e94 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -62,7 +62,8 @@ axes='index', klass='Series', axes_single_arg="{0, 'index'}", inplace="""inplace : boolean, default False If True, performs operation inplace and returns None.""", - duplicated='Series') + duplicated='Series', + optional_by='') def _coerce_method(converter): From 600e2dede6ca466de4896185796d7c6aea061708 Mon Sep 17 00:00:00 2001 From: Jeffrey Gerard Date: Tue, 28 Jun 2016 10:56:08 -0500 Subject: [PATCH 05/11] Document defaults for optional sorting args --- pandas/core/generic.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2364b9f4392d2..e5f8b435ca93f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1959,19 +1959,19 @@ def add_suffix(self, suffix): Parameters ----------%(optional_by)s - axis : %(axes)s to direct sorting - ascending : bool or list of bool + axis : %(axes)s to direct sorting, default 0 + ascending : bool or list of bool, default True Sort ascending vs. descending. Specify list for multiple sort orders. If this is a list of bools, must match the length of the by. - inplace : bool + inplace : bool, default False if True, perform operation in-place - kind : {`quicksort`, `mergesort`, `heapsort`} + kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' Choice of sorting algorithm. See also ndarray.np.sort for more information. `mergesort` is the only stable algorithm. For DataFrames, this option is only applied when sorting on a single column or label. - na_position : {'first', 'last'} + na_position : {'first', 'last'}, default 'last' `first` puts NaNs at the beginning, `last` puts NaNs at the end Returns @@ -1993,16 +1993,16 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, if not None, sort on values in specified index level(s) ascending : boolean, default True Sort ascending vs. descending - inplace : bool + inplace : bool, default False if True, perform operation in-place - kind : {`quicksort`, `mergesort`, `heapsort`} + kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' Choice of sorting algorithm. See also ndarray.np.sort for more information. `mergesort` is the only stable algorithm. For DataFrames, this option is only applied when sorting on a single column or label. - na_position : {'first', 'last'} + na_position : {'first', 'last'}, default 'last' `first` puts NaNs at the beginning, `last` puts NaNs at the end - sort_remaining : bool + sort_remaining : bool, default True if true and sorting by level and index is multilevel, sort by other levels too (in order) after sorting by specified level From 698399d70d42a892093bae0404aad1d3b069be0f Mon Sep 17 00:00:00 2001 From: Jeffrey Gerard Date: Thu, 7 Jul 2016 19:00:57 +0200 Subject: [PATCH 06/11] Move more sort_values, sort_index tests to be together. --- pandas/tests/frame/test_sorting.py | 38 +++++++++++++------------- pandas/tests/series/test_sorting.py | 41 +++++++++++++---------------- 2 files changed, 37 insertions(+), 42 deletions(-) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index aff08cd66b9ae..2e70f695e370d 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -118,6 +118,25 @@ def test_sort_values(self): with assertRaisesRegexp(ValueError, msg): frame.sort_values(by=['A', 'B'], axis=0, ascending=[True] * 5) + def test_sort_values_inplace(self): + frame = DataFrame(np.random.randn(4, 4), index=[1, 2, 3, 4], + columns=['A', 'B', 'C', 'D']) + + sorted_df = frame.copy() + sorted_df.sort_values(by='A', inplace=True) + expected = frame.sort_values(by='A') + assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + sorted_df.sort_values(by='A', ascending=False, inplace=True) + expected = frame.sort_values(by='A', ascending=False) + assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + sorted_df.sort_values(by=['A', 'B'], ascending=False, inplace=True) + expected = frame.sort_values(by=['A', 'B'], ascending=False) + assert_frame_equal(sorted_df, expected) + def test_sort_index_categorical_index(self): df = (DataFrame({'A': np.arange(6, dtype='int64'), @@ -349,25 +368,6 @@ def test_sort_index_different_sortorder(self): result = idf['C'].sort_index(ascending=[1, 0]) assert_series_equal(result, expected['C']) - def test_sort_inplace(self): - frame = DataFrame(np.random.randn(4, 4), index=[1, 2, 3, 4], - columns=['A', 'B', 'C', 'D']) - - sorted_df = frame.copy() - sorted_df.sort_values(by='A', inplace=True) - expected = frame.sort_values(by='A') - assert_frame_equal(sorted_df, expected) - - sorted_df = frame.copy() - sorted_df.sort_values(by='A', ascending=False, inplace=True) - expected = frame.sort_values(by='A', ascending=False) - assert_frame_equal(sorted_df, expected) - - sorted_df = frame.copy() - sorted_df.sort_values(by=['A', 'B'], ascending=False, inplace=True) - expected = frame.sort_values(by=['A', 'B'], ascending=False) - assert_frame_equal(sorted_df, expected) - def test_sort_index_duplicates(self): # with 9816, these are all translated to .sort_values diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index bc0e2671c2619..e3d711fc7a7fb 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -94,6 +94,23 @@ def test_sort_index(self): assert_series_equal(sorted_series, self.ts.reindex(self.ts.index[::-1])) + # compat on level + sorted_series = random_order.sort_index(level=0) + assert_series_equal(sorted_series, self.ts) + + # compat on axis + sorted_series = random_order.sort_index(axis=0) + assert_series_equal(sorted_series, self.ts) + + self.assertRaises(ValueError, lambda: random_order.sort_values(axis=1)) + + sorted_series = random_order.sort_index(level=0, axis=0) + assert_series_equal(sorted_series, self.ts) + + self.assertRaises(ValueError, + lambda: random_order.sort_index(level=0, axis=1)) + + def test_sort_index_inplace(self): # For #11402 @@ -114,11 +131,8 @@ def test_sort_index_inplace(self): msg='sort_index() inplace should return None') assert_series_equal(random_order, self.ts) - def test_sort_API(self): + def test_sort_index_multiindex(self): - # API for 9816 - - # sortlevel mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) s = Series([1, 2], mi) backwards = s.iloc[[1, 0]] @@ -126,22 +140,3 @@ def test_sort_API(self): res = s.sort_index(level='A') assert_series_equal(backwards, res) - # sort_index - rindex = list(self.ts.index) - random.shuffle(rindex) - - random_order = self.ts.reindex(rindex) - sorted_series = random_order.sort_index(level=0) - assert_series_equal(sorted_series, self.ts) - - # compat on axis - sorted_series = random_order.sort_index(axis=0) - assert_series_equal(sorted_series, self.ts) - - self.assertRaises(ValueError, lambda: random_order.sort_values(axis=1)) - - sorted_series = random_order.sort_index(level=0, axis=0) - assert_series_equal(sorted_series, self.ts) - - self.assertRaises(ValueError, - lambda: random_order.sort_index(level=0, axis=1)) From f0b6127f17bc22f9e34124c99781f50050559543 Mon Sep 17 00:00:00 2001 From: Jeffrey Gerard Date: Thu, 7 Jul 2016 19:16:45 +0200 Subject: [PATCH 07/11] Add test for Series.sort_index(sort_remaining=True) --- pandas/tests/series/test_sorting.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index e3d711fc7a7fb..6c993805a2a62 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -137,6 +137,10 @@ def test_sort_index_multiindex(self): s = Series([1, 2], mi) backwards = s.iloc[[1, 0]] + # implicit sort_remaining=True res = s.sort_index(level='A') assert_series_equal(backwards, res) + # rows share same level='A': sort has no effect without remaining lvls + res = s.sort_index(level='A', sort_remaining=False) + assert_series_equal(s, res) From 44dd7067e60fe5daa9e635c7c99a896d62e4cc48 Mon Sep 17 00:00:00 2001 From: Jeffrey Gerard Date: Fri, 8 Jul 2016 17:51:34 +0200 Subject: [PATCH 08/11] Improve `sort_values` tests when multiple `by`s Duplicates values in the test DataFrame are necessary to fully test this feature. --- pandas/tests/frame/test_sorting.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 2e70f695e370d..ceea59c98637b 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -76,8 +76,8 @@ def test_sort(self): frame.sort() def test_sort_values(self): - frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], - columns=['A', 'B', 'C', 'D']) + frame = DataFrame([[1, 1, 2], [3, 1, 0], [4, 5, 6]], + index=[1, 2, 3], columns=list('ABC')) # by column sorted_df = frame.sort_values(by='A') @@ -97,16 +97,17 @@ def test_sort_values(self): sorted_df = frame.sort_values(by=['A'], ascending=[False]) assert_frame_equal(sorted_df, expected) - # check for now - sorted_df = frame.sort_values(by='A') - assert_frame_equal(sorted_df, expected[::-1]) - expected = frame.sort_values(by='A') + # multiple bys + sorted_df = frame.sort_values(by=['B', 'C']) + expected = frame.loc[[2, 1, 3]] assert_frame_equal(sorted_df, expected) - expected = frame.sort_values(by=['A', 'B'], ascending=False) - sorted_df = frame.sort_values(by=['A', 'B']) + sorted_df = frame.sort_values(by=['B', 'C'], ascending=False) assert_frame_equal(sorted_df, expected[::-1]) + sorted_df = frame.sort_values(by=['B', 'A'], ascending=[True, False]) + assert_frame_equal(sorted_df, expected) + self.assertRaises(ValueError, lambda: frame.sort_values( by=['A', 'B'], axis=2, inplace=True)) From 5988277d71df88814b65a1e1af2dc77d86f4e0fe Mon Sep 17 00:00:00 2001 From: Jeffrey Gerard Date: Sat, 9 Jul 2016 21:30:22 +0200 Subject: [PATCH 09/11] PEP8 cleanup --- pandas/tests/series/test_sorting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index 6c993805a2a62..dab35120ea887 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -110,7 +110,6 @@ def test_sort_index(self): self.assertRaises(ValueError, lambda: random_order.sort_index(level=0, axis=1)) - def test_sort_index_inplace(self): # For #11402 From f76197219e0cb62ae595f58130d6ace501d0f522 Mon Sep 17 00:00:00 2001 From: Jeffrey Gerard Date: Sun, 10 Jul 2016 17:05:12 +0200 Subject: [PATCH 10/11] Annotate tests with GH issue --- pandas/tests/frame/test_sorting.py | 5 +++++ pandas/tests/series/test_sorting.py | 1 + 2 files changed, 6 insertions(+) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index ceea59c98637b..4d57216c8f870 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -22,6 +22,8 @@ class TestDataFrameSorting(tm.TestCase, TestData): _multiprocess_can_split_ = True def test_sort_index(self): + # GH13496 + frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=['A', 'B', 'C', 'D']) @@ -44,6 +46,9 @@ def test_sort_index(self): expected = frame.ix[:, ::-1] assert_frame_equal(result, expected) + def test_sort_index_multiindex(self): + # GH13496 + # sort rows by specified level of multi-index mi = MultiIndex.from_tuples([[2, 1, 3], [1, 1, 1]], names=list('ABC')) df = DataFrame([[1, 2], [3, 4]], mi) diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index dab35120ea887..826201adbdb50 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -140,6 +140,7 @@ def test_sort_index_multiindex(self): res = s.sort_index(level='A') assert_series_equal(backwards, res) + # GH13496 # rows share same level='A': sort has no effect without remaining lvls res = s.sort_index(level='A', sort_remaining=False) assert_series_equal(s, res) From a333162cd10080fee441bacc7c3012fc7ab61719 Mon Sep 17 00:00:00 2001 From: Jeffrey Gerard Date: Mon, 11 Jul 2016 14:09:36 +0200 Subject: [PATCH 11/11] Fix indentation - docstring string replacement --- pandas/core/frame.py | 5 +++-- pandas/core/generic.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c3fd7f9e87378..15d4bb6b5a537 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -71,8 +71,9 @@ _shared_doc_kwargs = dict( axes='index, columns', klass='DataFrame', axes_single_arg="{0, 1, 'index', 'columns'}", - optional_by="""by : str or list of str - Name or list of names which refer to the axis items.""") + optional_by=""" + by : str or list of str + Name or list of names which refer to the axis items.""") _numeric_only_doc = """numeric_only : boolean, default None Include only float, int, boolean data. If None, will attempt to use diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e5f8b435ca93f..0512afa402692 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -41,8 +41,9 @@ axes='keywords for axes', klass='NDFrame', axes_single_arg='int or labels for object', args_transpose='axes to permute (int or label for object)', - optional_by="""by : str or list of str - Name or list of names which refer to the axis items.""") + optional_by=""" + by : str or list of str + Name or list of names which refer to the axis items.""") def is_dictlike(x):