From c54ef94d5cfe661ff9c69ec6521b11b20130ce7f Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Tue, 26 Feb 2013 20:41:46 +0200 Subject: [PATCH 1/4] ENH: add display.max_info_rows option --- pandas/core/config_init.py | 13 +++++++++++++ pandas/core/frame.py | 23 ++++++++++++++++++++--- pandas/sparse/frame.py | 1 - pandas/tests/test_format.py | 24 ++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 114210d75959b..43dd529446f5a 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -123,10 +123,23 @@ will be displayed as exactly 0 by repr and friends. """ +pc_max_info_rows_doc = """ +: int or None + max_info_rows is the maximum number of rows for which a frame will + perform a null check on its columns when repr'ing To a console. + The default is 1,000,000 rows. So, if a DataFrame has more + 1,000,000 rows there will be no null check performed on the + columns and thus the representation will take much less time to + display in an interactive session. A value of None means always + perform a null check when repr'ing. +""" + with cf.config_prefix('display'): cf.register_option('precision', 7, pc_precision_doc, validator=is_int) cf.register_option('float_format', None, float_format_doc) cf.register_option('column_space', 12, validator=is_int) + cf.register_option('max_info_rows', 1000000, pc_max_info_rows_doc, + validator=lambda x: is_int(x) or x is None) cf.register_option('max_rows', 100, pc_max_rows_doc, validator=is_int) cf.register_option('max_colwidth', 50, max_colwidth_doc, validator=is_int) cf.register_option('max_columns', 20, pc_max_cols_doc, validator=is_int) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f4a077c61e9cc..ea79236989b96 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -51,7 +51,7 @@ import pandas.tslib as tslib import pandas.algos as _algos -from pandas.core.config import get_option +from pandas.core.config import get_option, set_option #---------------------------------------------------------------------- @@ -331,7 +331,6 @@ def f(self, other): class DataFrame(NDFrame): _auto_consolidate = True - _verbose_info = True _het_axis = 1 _info_axis = 'columns' _col_klass = Series @@ -560,6 +559,22 @@ def _wrap_array(self, arr, axes, copy=False): index, columns = axes return self._constructor(arr, index=index, columns=columns, copy=copy) + @property + def _verbose_info(self): + import warnings + warnings.warn('The _verbose_info property will be removed in version ' + '0.12', FutureWarning) + return get_option('display.max_info_rows') is None + + @_verbose_info.setter + def _verbose_info(self, value): + import warnings + warnings.warn('The _verbose_info property will be removed in version ' + '0.12', FutureWarning) + + value = None if value else 1000000 + set_option('display.max_info_rows', value) + @property def axes(self): return [self.index, self.columns] @@ -652,7 +667,9 @@ def __unicode__(self): """ buf = StringIO(u"") if self._need_info_repr_(): - self.info(buf=buf, verbose=self._verbose_info) + max_info_rows = get_option('display.max_info_rows') + verbose = max_info_rows is None or self.shape[0] <= max_info_rows + self.info(buf=buf, verbose=verbose) else: is_wide = self._need_wide_repr() line_width = None diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index f142b36534e22..6e003d5a032db 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -66,7 +66,6 @@ class SparseDataFrame(DataFrame): Default fill_value for converting Series to SparseSeries. Will not override SparseSeries passed in """ - _verbose_info = False _columns = None _series = None _is_mixed_type = False diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index c31f4e3b8061d..2f30f7817f3e1 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -470,6 +470,30 @@ def test_frame_info_encoding(self): repr(df.T) fmt.set_printoptions(max_rows=200) + def test_large_frame_repr(self): + old_max_rows = pd.get_option('display.max_rows') + old_max_info_rows = pd.get_option('display.max_info_rows') + + nrows, ncols = 3, 2 + + # need to set max rows so that we get an info-style repr + pd.set_option('display.max_rows', nrows - 1) + pd.set_option('display.max_info_rows', nrows) + + smallx = DataFrame(np.random.rand(nrows, ncols)) + repr_small = repr(smallx) + + bigx = DataFrame(np.random.rand(nrows + 1, ncols)) + repr_big = repr(bigx) + + diff = len(repr_small.splitlines()) - len(repr_big.splitlines()) + + # the difference in line count is the number of columns + self.assertEqual(diff, ncols) + + pd.set_option('display.max_rows', old_max_rows) + pd.set_option('display.max_info_rows', old_max_info_rows) + def test_wide_repr(self): with option_context('mode.sim_interactive', True): col = lambda l, k: [tm.rands(k) for _ in xrange(l)] From f3a882e9408af5bc93f1569c2e567a0f5229870c Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Tue, 26 Feb 2013 14:51:07 -0500 Subject: [PATCH 2/4] remove inconsistent validator --- pandas/core/config_init.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 43dd529446f5a..97bf7e73a5374 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -134,12 +134,12 @@ perform a null check when repr'ing. """ + with cf.config_prefix('display'): cf.register_option('precision', 7, pc_precision_doc, validator=is_int) cf.register_option('float_format', None, float_format_doc) cf.register_option('column_space', 12, validator=is_int) - cf.register_option('max_info_rows', 1000000, pc_max_info_rows_doc, - validator=lambda x: is_int(x) or x is None) + cf.register_option('max_info_rows', 1000000, pc_max_info_rows_doc) cf.register_option('max_rows', 100, pc_max_rows_doc, validator=is_int) cf.register_option('max_colwidth', 50, max_colwidth_doc, validator=is_int) cf.register_option('max_columns', 20, pc_max_cols_doc, validator=is_int) From 7e17d91148bb50972fef253449fd00faf93e7d8b Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Tue, 26 Feb 2013 14:51:43 -0500 Subject: [PATCH 3/4] add a test for setting display.max_info_rows to None --- pandas/tests/test_format.py | 46 ++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 2f30f7817f3e1..1499a2fdce1fb 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -471,28 +471,42 @@ def test_frame_info_encoding(self): fmt.set_printoptions(max_rows=200) def test_large_frame_repr(self): - old_max_rows = pd.get_option('display.max_rows') - old_max_info_rows = pd.get_option('display.max_info_rows') - - nrows, ncols = 3, 2 + def wrap_rows_options(f): + def _f(*args, **kwargs): + old_max_rows = pd.get_option('display.max_rows') + old_max_info_rows = pd.get_option('display.max_info_rows') + o = f(*args, **kwargs) + pd.set_option('display.max_rows', old_max_rows) + pd.set_option('display.max_info_rows', old_max_info_rows) + return o + return _f + + @wrap_rows_options + def test_setting(value, nrows=3, ncols=2): + if value is None: + expected_difference = 0 + elif isinstance(value, int): + expected_difference = ncols + else: + raise ValueError("'value' must be int or None") - # need to set max rows so that we get an info-style repr - pd.set_option('display.max_rows', nrows - 1) - pd.set_option('display.max_info_rows', nrows) + pd.set_option('display.max_rows', nrows - 1) + pd.set_option('display.max_info_rows', value) - smallx = DataFrame(np.random.rand(nrows, ncols)) - repr_small = repr(smallx) + smallx = DataFrame(np.random.rand(nrows, ncols)) + repr_small = repr(smallx) - bigx = DataFrame(np.random.rand(nrows + 1, ncols)) - repr_big = repr(bigx) + bigx = DataFrame(np.random.rand(nrows + 1, ncols)) + repr_big = repr(bigx) - diff = len(repr_small.splitlines()) - len(repr_big.splitlines()) + diff = len(repr_small.splitlines()) - len(repr_big.splitlines()) - # the difference in line count is the number of columns - self.assertEqual(diff, ncols) + # the difference in line count is the number of columns + self.assertEqual(diff, expected_difference) - pd.set_option('display.max_rows', old_max_rows) - pd.set_option('display.max_info_rows', old_max_info_rows) + test_setting(None) + test_setting(3) + self.assertRaises(ValueError, test_setting, 'string') def test_wide_repr(self): with option_context('mode.sim_interactive', True): From 525d770a67a297b382f0c136b9fb90feb7fb1c56 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 7 Mar 2013 11:34:55 -0500 Subject: [PATCH 4/4] add validator for max_info_rows option --- pandas/core/config_init.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 97bf7e73a5374..44edc0e4e9c4a 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -1,5 +1,6 @@ import pandas.core.config as cf -from pandas.core.config import is_int, is_bool, is_text, is_float +from pandas.core.config import (is_int, is_bool, is_text, is_float, + is_instance_factory) from pandas.core.format import detect_console_encoding """ @@ -139,7 +140,8 @@ cf.register_option('precision', 7, pc_precision_doc, validator=is_int) cf.register_option('float_format', None, float_format_doc) cf.register_option('column_space', 12, validator=is_int) - cf.register_option('max_info_rows', 1000000, pc_max_info_rows_doc) + cf.register_option('max_info_rows', 1000000, pc_max_info_rows_doc, + validator=is_instance_factory((int, type(None)))) cf.register_option('max_rows', 100, pc_max_rows_doc, validator=is_int) cf.register_option('max_colwidth', 50, max_colwidth_doc, validator=is_int) cf.register_option('max_columns', 20, pc_max_cols_doc, validator=is_int)