From dd771e6555eebac944ee4182243629222d2f2a43 Mon Sep 17 00:00:00 2001 From: Roger Thomas Date: Mon, 3 Apr 2017 16:20:15 +0100 Subject: [PATCH 1/6] Add More Error Tests --- pandas/tests/frame/test_analytics.py | 35 +++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 4d13cd1d18914..35a48d678366e 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1966,12 +1966,35 @@ def test_n(self, df_strings, n, order): tm.assert_frame_equal(result, expected) def test_n_error(self, df_strings): - # b alone raises a TypeError - df = df_strings - with pytest.raises(TypeError): - df.nsmallest(1, 'b') - with pytest.raises(TypeError): - df.nlargest(1, 'b') + df = pd.DataFrame( + {'group': [1, 1, 2], + 'int': [1, 2, 3], + 'float': [4., 5., 6.], + 'string': list('abc'), + 'category_string': pd.Series(list('abc')).astype('category'), + 'category_int': [7, 8, 9], + 'datetime': pd.date_range('20130101', periods=3), + 'datetimetz': pd.date_range('20130101', + periods=3, + tz='US/Eastern'), + 'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')}, + columns=['group', 'int', 'float', 'string', + 'category_string', 'category_int', + 'datetime', 'datetimetz', + 'timedelta']) + columns_with_errors = {'category_string', 'string'} + columns_without_errors = list(set(df) - columns_with_errors) + for column in columns_with_errors: + with pytest.raises(TypeError): + df.nsmallest(2, column) + with pytest.raises(TypeError): + df.nsmallest(2, ['group', column]) + with pytest.raises(TypeError): + df.nlargest(2, column) + with pytest.raises(TypeError): + df.nlargest(2, ['group', column]) + df.nsmallest(2, columns_without_errors) + df.nsmallest(2, ['int', 'string']) # int column is unique => OK def test_n_identical_values(self): # GH15297 From 6ecc9848a7020a47272057471c00cf7ef73f4ebe Mon Sep 17 00:00:00 2001 From: Roger Thomas Date: Mon, 3 Apr 2017 16:22:45 +0100 Subject: [PATCH 2/6] Refactor --- pandas/tests/frame/test_analytics.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 35a48d678366e..91c6fd601b951 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1985,14 +1985,11 @@ def test_n_error(self, df_strings): columns_with_errors = {'category_string', 'string'} columns_without_errors = list(set(df) - columns_with_errors) for column in columns_with_errors: - with pytest.raises(TypeError): - df.nsmallest(2, column) - with pytest.raises(TypeError): - df.nsmallest(2, ['group', column]) - with pytest.raises(TypeError): - df.nlargest(2, column) - with pytest.raises(TypeError): - df.nlargest(2, ['group', column]) + for columns in (column, ['group', column]): + with pytest.raises(TypeError): + df.nsmallest(2, columns) + with pytest.raises(TypeError): + df.nlargest(2, columns) df.nsmallest(2, columns_without_errors) df.nsmallest(2, ['int', 'string']) # int column is unique => OK From ded42ea86725caa2502396ab87bf4d42710e6b5f Mon Sep 17 00:00:00 2001 From: Roger Thomas Date: Mon, 3 Apr 2017 16:25:50 +0100 Subject: [PATCH 3/6] Update --- pandas/tests/frame/test_analytics.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 91c6fd601b951..087fba67b108b 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1985,11 +1985,15 @@ def test_n_error(self, df_strings): columns_with_errors = {'category_string', 'string'} columns_without_errors = list(set(df) - columns_with_errors) for column in columns_with_errors: + dtype = df[column].dtype for columns in (column, ['group', column]): - with pytest.raises(TypeError): + msg_template = "Cannot use method '%s' with dtype %s" + with pytest.raises(TypeError) as exc_info: df.nsmallest(2, columns) - with pytest.raises(TypeError): + assert exc_info.value, msg_template % ('nsmallest', dtype) + with pytest.raises(TypeError) as exc_info: df.nlargest(2, columns) + assert exc_info.value, msg_template % ('nlargest', dtype) df.nsmallest(2, columns_without_errors) df.nsmallest(2, ['int', 'string']) # int column is unique => OK From aa5339f24f653b06a63c9f01adcd391a2c22d29b Mon Sep 17 00:00:00 2001 From: Roger Thomas Date: Mon, 3 Apr 2017 16:34:32 +0100 Subject: [PATCH 4/6] Refactor --- pandas/tests/frame/test_analytics.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 087fba67b108b..a69c1b82d62fb 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1984,16 +1984,15 @@ def test_n_error(self, df_strings): 'timedelta']) columns_with_errors = {'category_string', 'string'} columns_without_errors = list(set(df) - columns_with_errors) - for column in columns_with_errors: - dtype = df[column].dtype - for columns in (column, ['group', column]): - msg_template = "Cannot use method '%s' with dtype %s" + methods = 'nsmallest', 'nlargest' + for col in columns_with_errors: + for method, cols in product(methods, (col, ['group', col])): with pytest.raises(TypeError) as exc_info: - df.nsmallest(2, columns) - assert exc_info.value, msg_template % ('nsmallest', dtype) - with pytest.raises(TypeError) as exc_info: - df.nlargest(2, columns) - assert exc_info.value, msg_template % ('nlargest', dtype) + getattr(df, method)(2, cols) + msg = "Cannot use method '%s' with dtype %s" % ( + method, df[col].dtype + ) + assert exc_info.value, msg df.nsmallest(2, columns_without_errors) df.nsmallest(2, ['int', 'string']) # int column is unique => OK From 4d7a7c2ef08b53ad652f90c15fdbd66454f0160b Mon Sep 17 00:00:00 2001 From: Roger Thomas Date: Mon, 3 Apr 2017 16:57:52 +0100 Subject: [PATCH 5/6] Add Error Tests --- pandas/tests/series/test_analytics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 2a1e6415940d3..4c1c5ad1d5b47 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1667,7 +1667,9 @@ class TestNLargestNSmallest(object): Series([3., 2, 1, 2, 5], dtype='object'), # not supported on some archs # Series([3., 2, 1, 2, 5], dtype='complex256'), - Series([3., 2, 1, 2, 5], dtype='complex128')]) + Series([3., 2, 1, 2, 5], dtype='complex128'), + Series(list('abcde'), dtype='category'), + Series(list('abcde'))]) def test_error(self, r): dt = r.dtype msg = "Cannot use method 'n(larg|small)est' with dtype %s" % dt From af493a74451854344fe35ad13c7734d3678b2ef9 Mon Sep 17 00:00:00 2001 From: Roger Thomas Date: Wed, 5 Apr 2017 10:10:12 +0100 Subject: [PATCH 6/6] Update Tests --- pandas/tests/frame/test_analytics.py | 108 ++++++++++++++------------- 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index a69c1b82d62fb..535bd320838b4 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1916,13 +1916,33 @@ def df_strings(): 'c': np.random.permutation(10).astype('float64')}) +@pytest.fixture +def df_main_dtypes(): + return pd.DataFrame( + {'group': [1, 1, 2], + 'int': [1, 2, 3], + 'float': [4., 5., 6.], + 'string': list('abc'), + 'category_string': pd.Series(list('abc')).astype('category'), + 'category_int': [7, 8, 9], + 'datetime': pd.date_range('20130101', periods=3), + 'datetimetz': pd.date_range('20130101', + periods=3, + tz='US/Eastern'), + 'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')}, + columns=['group', 'int', 'float', 'string', + 'category_string', 'category_int', + 'datetime', 'datetimetz', + 'timedelta']) + + class TestNLargestNSmallest(object): # ---------------------------------------------------------------------- # Top / bottom @pytest.mark.parametrize( - 'n, order', - product(range(1, 11), + 'method, n, order', + product(['nsmallest', 'nlargest'], range(1, 11), [['a'], ['c'], ['a', 'b'], @@ -1939,62 +1959,46 @@ class TestNLargestNSmallest(object): ['b', 'c', 'c'], ])) - def test_n(self, df_strings, n, order): + def test_n(self, df_strings, method, n, order): # GH10393 df = df_strings - - error_msg = ( - "'b' has dtype: object, cannot use method 'nsmallest' " - "with this dtype" - ) - if 'b' in order: - with pytest.raises(TypeError) as exception: - df.nsmallest(n, order) - assert exception.value, error_msg + if order[0] == 'b': + + # Only expect error when 'b' is first in order, as 'a' and 'c' are + # unique + error_msg = ( + "'b' has dtype: object, cannot use method 'nsmallest' " + "with this dtype" + ) + with pytest.raises(TypeError) as exc_info: + getattr(df, method)(n, order) + assert exc_info.value, error_msg else: - result = df.nsmallest(n, order) - expected = df.sort_values(order).head(n) + ascending = method == 'nsmallest' + result = getattr(df, method)(n, order) + expected = df.sort_values(order, ascending=ascending).head(n) tm.assert_frame_equal(result, expected) - if 'b' in order: - with pytest.raises(TypeError) as exception: - df.nsmallest(n, order) - assert exception.value, error_msg - else: - result = df.nlargest(n, order) - expected = df.sort_values(order, ascending=False).head(n) - tm.assert_frame_equal(result, expected) - - def test_n_error(self, df_strings): - df = pd.DataFrame( - {'group': [1, 1, 2], - 'int': [1, 2, 3], - 'float': [4., 5., 6.], - 'string': list('abc'), - 'category_string': pd.Series(list('abc')).astype('category'), - 'category_int': [7, 8, 9], - 'datetime': pd.date_range('20130101', periods=3), - 'datetimetz': pd.date_range('20130101', - periods=3, - tz='US/Eastern'), - 'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')}, - columns=['group', 'int', 'float', 'string', - 'category_string', 'category_int', - 'datetime', 'datetimetz', - 'timedelta']) - columns_with_errors = {'category_string', 'string'} - columns_without_errors = list(set(df) - columns_with_errors) - methods = 'nsmallest', 'nlargest' - for col in columns_with_errors: - for method, cols in product(methods, (col, ['group', col])): - with pytest.raises(TypeError) as exc_info: - getattr(df, method)(2, cols) - msg = "Cannot use method '%s' with dtype %s" % ( - method, df[col].dtype - ) - assert exc_info.value, msg - df.nsmallest(2, columns_without_errors) + @pytest.mark.parametrize( + 'method, columns', + product(['nsmallest', 'nlargest'], + product(['group'], ['category_string', 'string']) + )) + def test_n_error(self, df_main_dtypes, method, columns): + df = df_main_dtypes + with pytest.raises(TypeError) as exc_info: + getattr(df, method)(2, columns) + msg = "Cannot use method '%s' with dtype %s" % ( + method, df[columns[1]].dtype + ) + assert exc_info.value, msg + + def test_n_all_dtypes(self, df_main_dtypes): + df = df_main_dtypes + df.nsmallest(2, list(set(df) - {'category_string', 'string'})) df.nsmallest(2, ['int', 'string']) # int column is unique => OK + df.nlargest(2, list(set(df) - {'category_string', 'string'})) + df.nlargest(2, ['int', 'string']) # int column is unique => OK def test_n_identical_values(self): # GH15297