From 35596c61d40bc36594215bb3afc6cda02363a2e4 Mon Sep 17 00:00:00 2001 From: jschendel Date: Fri, 22 Jun 2018 18:37:18 -0600 Subject: [PATCH] TST: Use multiple instances of parametrize instead of product --- pandas/tests/dtypes/test_dtypes.py | 14 +-- pandas/tests/frame/test_rank.py | 124 +++++++++++----------- pandas/tests/groupby/test_function.py | 7 +- pandas/tests/groupby/test_whitelist.py | 12 +-- pandas/tests/reshape/test_concat.py | 11 +- pandas/tests/sparse/series/test_series.py | 20 ++-- pandas/tests/test_multilevel.py | 80 +++++++------- pandas/tests/test_resample.py | 98 +++++++++-------- pandas/tests/test_window.py | 7 +- 9 files changed, 185 insertions(+), 188 deletions(-) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index cc833af03ae66..eee53a2fcac6a 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -2,8 +2,6 @@ import re import pytest -from itertools import product - import numpy as np import pandas as pd from pandas import ( @@ -233,12 +231,14 @@ def test_dst(self): assert is_datetimetz(s2) assert s1.dtype == s2.dtype - def test_parser(self): + @pytest.mark.parametrize('tz', ['UTC', 'US/Eastern']) + @pytest.mark.parametrize('constructor', ['M8', 'datetime64']) + def test_parser(self, tz, constructor): # pr #11245 - for tz, constructor in product(('UTC', 'US/Eastern'), - ('M8', 'datetime64')): - assert (DatetimeTZDtype('%s[ns, %s]' % (constructor, tz)) == - DatetimeTZDtype('ns', tz)) + dtz_str = '{con}[ns, {tz}]'.format(con=constructor, tz=tz) + result = DatetimeTZDtype(dtz_str) + expected = DatetimeTZDtype('ns', tz) + assert result == expected def test_empty(self): dt = DatetimeTZDtype() diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index b8ba408b54715..a1210f1ed54e4 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -10,7 +10,6 @@ from pandas.util.testing import assert_frame_equal from pandas.tests.frame.common import TestData from pandas import Series, DataFrame -from pandas.compat import product class TestRank(TestData): @@ -26,6 +25,13 @@ class TestRank(TestData): 'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]), } + @pytest.fixture(params=['average', 'min', 'max', 'first', 'dense']) + def method(self, request): + """ + Fixture for trying all rank methods + """ + return request.param + def test_rank(self): rankdata = pytest.importorskip('scipy.stats.rankdata') @@ -217,34 +223,35 @@ def test_rank_methods_frame(self): expected = expected.astype('float64') tm.assert_frame_equal(result, expected) - def test_rank_descending(self): - dtypes = ['O', 'f8', 'i8'] + @pytest.mark.parametrize('dtype', ['O', 'f8', 'i8']) + def test_rank_descending(self, method, dtype): - for dtype, method in product(dtypes, self.results): - if 'i' in dtype: - df = self.df.dropna() - else: - df = self.df.astype(dtype) + if 'i' in dtype: + df = self.df.dropna() + else: + df = self.df.astype(dtype) - res = df.rank(ascending=False) - expected = (df.max() - df).rank() - assert_frame_equal(res, expected) + res = df.rank(ascending=False) + expected = (df.max() - df).rank() + assert_frame_equal(res, expected) - if method == 'first' and dtype == 'O': - continue + if method == 'first' and dtype == 'O': + return - expected = (df.max() - df).rank(method=method) + expected = (df.max() - df).rank(method=method) - if dtype != 'O': - res2 = df.rank(method=method, ascending=False, - numeric_only=True) - assert_frame_equal(res2, expected) + if dtype != 'O': + res2 = df.rank(method=method, ascending=False, + numeric_only=True) + assert_frame_equal(res2, expected) - res3 = df.rank(method=method, ascending=False, - numeric_only=False) - assert_frame_equal(res3, expected) + res3 = df.rank(method=method, ascending=False, + numeric_only=False) + assert_frame_equal(res3, expected) - def test_rank_2d_tie_methods(self): + @pytest.mark.parametrize('axis', [0, 1]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_rank_2d_tie_methods(self, method, axis, dtype): df = self.df def _check2d(df, expected, method='average', axis=0): @@ -257,43 +264,38 @@ def _check2d(df, expected, method='average', axis=0): result = df.rank(method=method, axis=axis) assert_frame_equal(result, exp_df) - dtypes = [None, object] disabled = set([(object, 'first')]) - results = self.results - - for method, axis, dtype in product(results, [0, 1], dtypes): - if (dtype, method) in disabled: - continue - frame = df if dtype is None else df.astype(dtype) - _check2d(frame, results[method], method=method, axis=axis) - - -@pytest.mark.parametrize( - "method,exp", [("dense", - [[1., 1., 1.], - [1., 0.5, 2. / 3], - [1., 0.5, 1. / 3]]), - ("min", - [[1. / 3, 1., 1.], - [1. / 3, 1. / 3, 2. / 3], - [1. / 3, 1. / 3, 1. / 3]]), - ("max", - [[1., 1., 1.], - [1., 2. / 3, 2. / 3], - [1., 2. / 3, 1. / 3]]), - ("average", - [[2. / 3, 1., 1.], - [2. / 3, 0.5, 2. / 3], - [2. / 3, 0.5, 1. / 3]]), - ("first", - [[1. / 3, 1., 1.], - [2. / 3, 1. / 3, 2. / 3], - [3. / 3, 2. / 3, 1. / 3]])]) -def test_rank_pct_true(method, exp): - # see gh-15630. - - df = DataFrame([[2012, 66, 3], [2012, 65, 2], [2012, 65, 1]]) - result = df.rank(method=method, pct=True) - - expected = DataFrame(exp) - tm.assert_frame_equal(result, expected) + if (dtype, method) in disabled: + return + frame = df if dtype is None else df.astype(dtype) + _check2d(frame, self.results[method], method=method, axis=axis) + + @pytest.mark.parametrize( + "method,exp", [("dense", + [[1., 1., 1.], + [1., 0.5, 2. / 3], + [1., 0.5, 1. / 3]]), + ("min", + [[1. / 3, 1., 1.], + [1. / 3, 1. / 3, 2. / 3], + [1. / 3, 1. / 3, 1. / 3]]), + ("max", + [[1., 1., 1.], + [1., 2. / 3, 2. / 3], + [1., 2. / 3, 1. / 3]]), + ("average", + [[2. / 3, 1., 1.], + [2. / 3, 0.5, 2. / 3], + [2. / 3, 0.5, 1. / 3]]), + ("first", + [[1. / 3, 1., 1.], + [2. / 3, 1. / 3, 2. / 3], + [3. / 3, 2. / 3, 1. / 3]])]) + def test_rank_pct_true(self, method, exp): + # see gh-15630. + + df = DataFrame([[2012, 66, 3], [2012, 65, 2], [2012, 65, 1]]) + result = df.rank(method=method, pct=True) + + expected = DataFrame(exp) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index f1d678db4ff7f..9df362a8e132f 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -778,9 +778,10 @@ def test_frame_describe_unstacked_format(): # nunique # -------------------------------- -@pytest.mark.parametrize("n, m", cart_product(10 ** np.arange(2, 6), - (10, 100, 1000))) -@pytest.mark.parametrize("sort, dropna", cart_product((False, True), repeat=2)) +@pytest.mark.parametrize('n', 10 ** np.arange(2, 6)) +@pytest.mark.parametrize('m', [10, 100, 1000]) +@pytest.mark.parametrize('sort', [False, True]) +@pytest.mark.parametrize('dropna', [False, True]) def test_series_groupby_nunique(n, m, sort, dropna): def check_nunique(df, keys, as_index=True): diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index 8d6e074881cbb..f4a58b9cbe61b 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -8,7 +8,6 @@ import numpy as np from pandas import DataFrame, Series, compat, date_range, Index, MultiIndex from pandas.util import testing as tm -from pandas.compat import lrange, product AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad', 'std', 'var', 'sem'] @@ -175,12 +174,11 @@ def raw_frame(): return raw_frame -@pytest.mark.parametrize( - "op, level, axis, skipna, sort", - product(AGG_FUNCTIONS, - lrange(2), lrange(2), - [True, False], - [True, False])) +@pytest.mark.parametrize('op', AGG_FUNCTIONS) +@pytest.mark.parametrize('level', [0, 1]) +@pytest.mark.parametrize('axis', [0, 1]) +@pytest.mark.parametrize('skipna', [True, False]) +@pytest.mark.parametrize('sort', [True, False]) def test_regression_whitelist_methods( raw_frame, op, level, axis, skipna, sort): diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index dea305d4b3fee..8d819f9926abb 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1,5 +1,5 @@ from warnings import catch_warnings -from itertools import combinations, product +from itertools import combinations import datetime as dt import dateutil @@ -941,10 +941,11 @@ def test_append_different_columns_types(self, df_columns, series_index): columns=combined_columns) assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "index_can_append, index_cannot_append_with_other", - product(indexes_can_append, indexes_cannot_append_with_other), - ids=lambda x: x.__class__.__name__) + @pytest.mark.parametrize('index_can_append', indexes_can_append, + ids=lambda x: x.__class__.__name__) + @pytest.mark.parametrize('index_cannot_append_with_other', + indexes_cannot_append_with_other, + ids=lambda x: x.__class__.__name__) def test_append_different_columns_types_raises( self, index_can_append, index_cannot_append_with_other): # GH18359 diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index eb63c87820070..921c30234660f 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -23,8 +23,6 @@ from pandas.core.sparse.api import SparseSeries from pandas.tests.series.test_api import SharedWithSparse -from itertools import product - def _test_data1(): # nan-based @@ -985,16 +983,16 @@ def test_combine_first(self): tm.assert_sp_series_equal(result, result2) tm.assert_sp_series_equal(result, expected) - @pytest.mark.parametrize('deep,fill_values', [([True, False], - [0, 1, np.nan, None])]) - def test_memory_usage_deep(self, deep, fill_values): - for deep, fill_value in product(deep, fill_values): - sparse_series = SparseSeries(fill_values, fill_value=fill_value) - dense_series = Series(fill_values) - sparse_usage = sparse_series.memory_usage(deep=deep) - dense_usage = dense_series.memory_usage(deep=deep) + @pytest.mark.parametrize('deep', [True, False]) + @pytest.mark.parametrize('fill_value', [0, 1, np.nan, None]) + def test_memory_usage_deep(self, deep, fill_value): + values = [0, 1, np.nan, None] + sparse_series = SparseSeries(values, fill_value=fill_value) + dense_series = Series(values) + sparse_usage = sparse_series.memory_usage(deep=deep) + dense_usage = dense_series.memory_usage(deep=deep) - assert sparse_usage < dense_usage + assert sparse_usage < dense_usage class TestSparseHandlingMultiIndexes(object): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 79e05c90a21b0..3caee2b44c579 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -20,6 +20,9 @@ import pandas as pd import pandas._libs.index as _index +AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad', + 'std', 'var', 'sem'] + class Base(object): @@ -1389,60 +1392,57 @@ def test_count(self): pytest.raises(KeyError, series.count, 'x') pytest.raises(KeyError, frame.count, level='x') - AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', - 'mad', 'std', 'var', 'sem'] - + @pytest.mark.parametrize('op', AGG_FUNCTIONS) + @pytest.mark.parametrize('level', [0, 1]) + @pytest.mark.parametrize('skipna', [True, False]) @pytest.mark.parametrize('sort', [True, False]) - def test_series_group_min_max(self, sort): + def test_series_group_min_max(self, op, level, skipna, sort): # GH 17537 - for op, level, skipna in cart_product(self.AGG_FUNCTIONS, lrange(2), - [False, True]): - grouped = self.series.groupby(level=level, sort=sort) - aggf = lambda x: getattr(x, op)(skipna=skipna) - # skipna=True - leftside = grouped.agg(aggf) - rightside = getattr(self.series, op)(level=level, skipna=skipna) - if sort: - rightside = rightside.sort_index(level=level) - tm.assert_series_equal(leftside, rightside) - + grouped = self.series.groupby(level=level, sort=sort) + # skipna=True + leftside = grouped.agg(lambda x: getattr(x, op)(skipna=skipna)) + rightside = getattr(self.series, op)(level=level, skipna=skipna) + if sort: + rightside = rightside.sort_index(level=level) + tm.assert_series_equal(leftside, rightside) + + @pytest.mark.parametrize('op', AGG_FUNCTIONS) + @pytest.mark.parametrize('level', [0, 1]) + @pytest.mark.parametrize('axis', [0, 1]) + @pytest.mark.parametrize('skipna', [True, False]) @pytest.mark.parametrize('sort', [True, False]) - def test_frame_group_ops(self, sort): + def test_frame_group_ops(self, op, level, axis, skipna, sort): # GH 17537 self.frame.iloc[1, [1, 2]] = np.nan self.frame.iloc[7, [0, 1]] = np.nan - for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, - lrange(2), lrange(2), - [False, True]): - - if axis == 0: - frame = self.frame - else: - frame = self.frame.T + if axis == 0: + frame = self.frame + else: + frame = self.frame.T - grouped = frame.groupby(level=level, axis=axis, sort=sort) + grouped = frame.groupby(level=level, axis=axis, sort=sort) - pieces = [] + pieces = [] - def aggf(x): - pieces.append(x) - return getattr(x, op)(skipna=skipna, axis=axis) + def aggf(x): + pieces.append(x) + return getattr(x, op)(skipna=skipna, axis=axis) - leftside = grouped.agg(aggf) - rightside = getattr(frame, op)(level=level, axis=axis, - skipna=skipna) - if sort: - rightside = rightside.sort_index(level=level, axis=axis) - frame = frame.sort_index(level=level, axis=axis) + leftside = grouped.agg(aggf) + rightside = getattr(frame, op)(level=level, axis=axis, + skipna=skipna) + if sort: + rightside = rightside.sort_index(level=level, axis=axis) + frame = frame.sort_index(level=level, axis=axis) - # for good measure, groupby detail - level_index = frame._get_axis(axis).levels[level] + # for good measure, groupby detail + level_index = frame._get_axis(axis).levels[level] - tm.assert_index_equal(leftside._get_axis(axis), level_index) - tm.assert_index_equal(rightside._get_axis(axis), level_index) + tm.assert_index_equal(leftside._get_axis(axis), level_index) + tm.assert_index_equal(rightside._get_axis(axis), level_index) - tm.assert_frame_equal(leftside, rightside) + tm.assert_frame_equal(leftside, rightside) def test_stat_op_corner(self): obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)])) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 6f0ad0535c6b4..60f23309b11d9 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -17,7 +17,7 @@ from pandas import (Series, DataFrame, Panel, Index, isna, notna, Timestamp) -from pandas.compat import range, lrange, zip, product, OrderedDict +from pandas.compat import range, lrange, zip, OrderedDict from pandas.errors import UnsupportedFunctionCall from pandas.core.groupby.groupby import DataError import pandas.core.common as com @@ -1951,30 +1951,32 @@ def test_resample_nunique_with_date_gap(self): assert_series_equal(results[0], results[2]) assert_series_equal(results[0], results[3]) - def test_resample_group_info(self): # GH10914 - for n, k in product((10000, 100000), (10, 100, 1000)): - dr = date_range(start='2015-08-27', periods=n // 10, freq='T') - ts = Series(np.random.randint(0, n // k, n).astype('int64'), - index=np.random.choice(dr, n)) + @pytest.mark.parametrize('n', [10000, 100000]) + @pytest.mark.parametrize('k', [10, 100, 1000]) + def test_resample_group_info(self, n, k): + # GH10914 + dr = date_range(start='2015-08-27', periods=n // 10, freq='T') + ts = Series(np.random.randint(0, n // k, n).astype('int64'), + index=np.random.choice(dr, n)) - left = ts.resample('30T').nunique() - ix = date_range(start=ts.index.min(), end=ts.index.max(), - freq='30T') + left = ts.resample('30T').nunique() + ix = date_range(start=ts.index.min(), end=ts.index.max(), + freq='30T') - vals = ts.values - bins = np.searchsorted(ix.values, ts.index, side='right') + vals = ts.values + bins = np.searchsorted(ix.values, ts.index, side='right') - sorter = np.lexsort((vals, bins)) - vals, bins = vals[sorter], bins[sorter] + sorter = np.lexsort((vals, bins)) + vals, bins = vals[sorter], bins[sorter] - mask = np.r_[True, vals[1:] != vals[:-1]] - mask |= np.r_[True, bins[1:] != bins[:-1]] + mask = np.r_[True, vals[1:] != vals[:-1]] + mask |= np.r_[True, bins[1:] != bins[:-1]] - arr = np.bincount(bins[mask] - 1, - minlength=len(ix)).astype('int64', copy=False) - right = Series(arr, index=ix) + arr = np.bincount(bins[mask] - 1, + minlength=len(ix)).astype('int64', copy=False) + right = Series(arr, index=ix) - assert_series_equal(left, right) + assert_series_equal(left, right) def test_resample_size(self): n = 10000 @@ -2323,28 +2325,25 @@ def test_annual_upsample(self): method='ffill') assert_series_equal(result, expected) - def test_quarterly_upsample(self): - targets = ['D', 'B', 'M'] - - for month in MONTHS: - ts = _simple_pts('1/1/1990', '12/31/1995', freq='Q-%s' % month) - - for targ, conv in product(targets, ['start', 'end']): - result = ts.resample(targ, convention=conv).ffill() - expected = result.to_timestamp(targ, how=conv) - expected = expected.asfreq(targ, 'ffill').to_period() - assert_series_equal(result, expected) - - def test_monthly_upsample(self): - targets = ['D', 'B'] + @pytest.mark.parametrize('month', MONTHS) + @pytest.mark.parametrize('target', ['D', 'B', 'M']) + @pytest.mark.parametrize('convention', ['start', 'end']) + def test_quarterly_upsample(self, month, target, convention): + freq = 'Q-{month}'.format(month=month) + ts = _simple_pts('1/1/1990', '12/31/1995', freq=freq) + result = ts.resample(target, convention=convention).ffill() + expected = result.to_timestamp(target, how=convention) + expected = expected.asfreq(target, 'ffill').to_period() + assert_series_equal(result, expected) + @pytest.mark.parametrize('target', ['D', 'B']) + @pytest.mark.parametrize('convention', ['start', 'end']) + def test_monthly_upsample(self, target, convention): ts = _simple_pts('1/1/1990', '12/31/1995', freq='M') - - for targ, conv in product(targets, ['start', 'end']): - result = ts.resample(targ, convention=conv).ffill() - expected = result.to_timestamp(targ, how=conv) - expected = expected.asfreq(targ, 'ffill').to_period() - assert_series_equal(result, expected) + result = ts.resample(target, convention=convention).ffill() + expected = result.to_timestamp(target, how=convention) + expected = expected.asfreq(target, 'ffill').to_period() + assert_series_equal(result, expected) def test_resample_basic(self): # GH3609 @@ -2455,17 +2454,16 @@ def test_fill_method_and_how_upsample(self): both = s.resample('M').ffill().resample('M').last().astype('int64') assert_series_equal(last, both) - def test_weekly_upsample(self): - targets = ['D', 'B'] - - for day in DAYS: - ts = _simple_pts('1/1/1990', '12/31/1995', freq='W-%s' % day) - - for targ, conv in product(targets, ['start', 'end']): - result = ts.resample(targ, convention=conv).ffill() - expected = result.to_timestamp(targ, how=conv) - expected = expected.asfreq(targ, 'ffill').to_period() - assert_series_equal(result, expected) + @pytest.mark.parametrize('day', DAYS) + @pytest.mark.parametrize('target', ['D', 'B']) + @pytest.mark.parametrize('convention', ['start', 'end']) + def test_weekly_upsample(self, day, target, convention): + freq = 'W-{day}'.format(day=day) + ts = _simple_pts('1/1/1990', '12/31/1995', freq=freq) + result = ts.resample(target, convention=convention).ffill() + expected = result.to_timestamp(target, how=convention) + expected = expected.asfreq(target, 'ffill').to_period() + assert_series_equal(result, expected) def test_resample_to_timestamps(self): ts = _simple_pts('1/1/1990', '12/31/1995', freq='M') diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index cfd88f41f855e..78d1fa84cc5db 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -2105,10 +2105,9 @@ def _non_null_values(x): (mean_x * mean_y)) @pytest.mark.slow - @pytest.mark.parametrize( - 'min_periods, adjust, ignore_na', product([0, 1, 2, 3, 4], - [True, False], - [False, True])) + @pytest.mark.parametrize('min_periods', [0, 1, 2, 3, 4]) + @pytest.mark.parametrize('adjust', [True, False]) + @pytest.mark.parametrize('ignore_na', [True, False]) def test_ewm_consistency(self, min_periods, adjust, ignore_na): def _weights(s, com, adjust, ignore_na): if isinstance(s, DataFrame):