Change Index repr to adjust to string length

jorisvandenbossche · jreback · commit b190a9d0e0be · 2015-05-09T14:23:28.000-04:00
Conflicts:
	pandas/tseries/base.py

use new format_data

updates

Fix detection of good width

more fixes

Change [

Conflicts:
	pandas/core/index.py

more fixes

revsised according to comments
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
@@ -13,7 +13,10 @@ Highlights include:
 - New section on how-to-contribute to *pandas*, see :ref:`here <contributing>`
 - Revised "Merge, join, and concatenate" documentation, including graphical examples to make it easier to understand each operations, see :ref:`here <merging>`
 - New method ``sample`` for drawing random samples from Series, DataFrames and Panels. See :ref:`here <whatsnew_0161.enhancements.sample>`
-- ``BusinessHour`` date-offset is now supported, see :ref:`here <timeseries.businesshour>`
+- The default ``Index`` printing has changed to a more uniform format, see :ref:`here <whatsnew_0161.index_repr>`
+- ``BusinessHour`` datetime-offset is now supported, see :ref:`here <timeseries.businesshour>`
+
+>>>>>>> more fixes
 -  Further enhancement to the ``.str`` accessor to make string operations easier, see :ref:`here <whatsnew_0161.enhancements.string>`
 
 .. contents:: What's new in v0.16.1
@@ -273,8 +276,7 @@ API changes
 Index Representation
 ~~~~~~~~~~~~~~~~~~~~
 
-The string representation of ``Index`` and its sub-classes have now been unified. ``Index, Int64Index, Float64Index, CategoricalIndex`` are single-line display. The datetimelikes ``DatetimeIndex, PeriodIndex, TimedeltaIndex`` & ``MultiIndex`` will display in a multi-line format showing much more of the index values. The display width responds to the option ``display.max_seq_items``,
-which is now defaulted to 20 (previously was 100). (:issue:`6482`)
+The string representation of ``Index`` and its sub-classes have now been unified. These will show a single-line display if there are few values; a wrapped multi-line display for a lot of values (but less than ``display.max_seq_items``; if lots of items (> ``display.max_seq_items``) will show a truncated display (the head and tail of the data). The formatting for ``MultiIndex`` is unchanges (a multi-line wrapped display). The display width responds to the option ``display.max_seq_items``, which is defaulted to 100. (:issue:`6482`)
 
 Previous Behavior
 
@@ -307,8 +309,15 @@ New Behavior
 
    pd.get_option('max_seq_items')
    pd.Index(range(4),name='foo')
+   pd.Index(range(25),name='foo')
    pd.Index(range(104),name='foo')
+   pd.CategoricalIndex(['a','bb','ccc','dddd'],ordered=True,name='foobar')
+   pd.CategoricalIndex(['a','bb','ccc','dddd']*10,ordered=True,name='foobar')
+   pd.CategoricalIndex(['a','bb','ccc','dddd']*100,ordered=True,name='foobar')
+   pd.CategoricalIndex(np.arange(1000),ordered=True,name='foobar')
+   pd.Index(['a','bb','ccc','dddd']*100)
    pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
+   pd.date_range('20130101',periods=25,name='foo',tz='US/Eastern')
    pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
 
 .. _whatsnew_0161.deprecations:
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
     cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
                        validator=is_one_of_factory([True, False, 'truncate']))
     cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
-    cf.register_option('max_seq_items', 20, pc_max_seq_items)
+    cf.register_option('max_seq_items', 100, pc_max_seq_items)
     cf.register_option('mpl_style', None, pc_mpl_style_doc,
                        validator=is_one_of_factory([None, False, 'default']),
                        cb=mpl_style_cb)
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -8,6 +8,7 @@
 from pandas import compat
 import numpy as np
 
+from math import ceil
 from sys import getsizeof
 import pandas.tslib as tslib
 import pandas.lib as lib
@@ -405,8 +406,6 @@ def __unicode__(self):
         # no data provided, just attributes
         if data is None:
             data = ''
-        else:
-            data = "%s,%s" % (data, space)
 
         res = u("%s(%s%s)") % (klass,
                                data,
@@ -435,59 +434,97 @@ def _format_data(self):
         """
         Return the formatted data as a unicode string
         """
-        space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
-        space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
-        sep = ',%s' % space1
+        space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
+        space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
+
+        sep = ','
         max_seq_items = get_option('display.max_seq_items')
         formatter = self._formatter_func
+        needs_justify = self.inferred_type in ['string','categorical']
+
+        def best_len(values):
+            return max([len(x) for x in values]) + 2
+
+        def best_rows(values, max_len):
+            from pandas.core.format import get_console_size
+            display_width, _ = get_console_size()
+            if display_width is None:
+                display_width = get_option('display.width')
+            n_per_row = (display_width - len(self.__class__.__name__) - 2) // max_len
+            n_rows = int(ceil(len(values) / float(n_per_row)))
+            return n_per_row, n_rows
+
+        def best_fit(values, max_len, n_rows=None, justify=False):
+
+            # number of rows to generate
+            if n_rows is None:
+                n_per_row, n_rows = best_rows(values, max_len)
+            else:
+                n_per_row = len(values)
+
+            # adjust all values to max length if we have multi-lines
+            if justify:
+                values = [values[0].rjust(max_len-2)] + [x.rjust(max_len-1) for x in values[1:]]
+                multi_line_space = space1
+            else:
+                multi_line_space = space2
+
+            sep_elements = sep + ' '
+            summary = ''
+            for i in range(n_rows - 1):
+                summary += sep_elements.join(values[i*n_per_row:(i+1)*n_per_row])
+                summary += sep
+                summary += multi_line_space
+            summary += sep_elements.join(values[(n_rows - 1)*n_per_row:n_rows*n_per_row])
+
+            return summary
+
         n = len(self)
         if n == 0:
-            summary = '[]'
+            summary = '[], '
         elif n == 1:
             first = formatter(self[0])
-            summary = '[%s]' % first
+            summary = '[%s], ' % first
         elif n == 2:
             first = formatter(self[0])
             last = formatter(self[-1])
-            summary = '[%s%s%s]' % (first, sep, last)
+            summary = '[%s, %s], ' % (first, last)
         elif n > max_seq_items:
             n = min(max_seq_items//2,10)
 
-            head = sep.join([ formatter(x) for x in self[:n] ])
-            tail = sep.join([ formatter(x) for x in self[-n:] ])
-            summary = '[%s%s...%s%s]' % (head, space1, space1, tail)
-        else:
-            values = sep.join([ formatter(x) for x in self ])
-            summary = '[%s]' % (values)
+            head = [ formatter(x) for x in self[:n] ]
+            tail = [ formatter(x) for x in self[-n:] ]
+            max_len = max(best_len(head),best_len(tail))
 
-        return summary
+            if needs_justify:
+                n_rows = 1
+                justify = False
+            else:
+                n_rows = None
+                justify = True
+
+            summary = '['
+            summary += best_fit(head, max_len, n_rows=n_rows, justify=justify)
+            summary += ',' + space1 + ' ...' + space2
+            summary += best_fit(tail, max_len, n_rows=n_rows, justify=justify)
+            summary += '],'
+            summary += space1
 
-    def _format_data2(self):
-        """
-        Return the formatted data as a unicode string
-        """
-        max_seq_items = get_option('display.max_seq_items')
-        formatter = self._formatter_func
-        n = len(self)
-        if n == 0:
-            summary = '[]'
-        elif n == 1:
-            first = formatter(self[0])
-            summary = '[%s]' % first
-        elif n == 2:
-            first = formatter(self[0])
-            last = formatter(self[-1])
-            summary = '[%s, %s]' % (first, last)
-        elif n > max_seq_items:
-            n = min(max_seq_items//2,5)
-            head = ', '.join([ formatter(x) for x in self[:n] ])
-            tail = ', '.join([ formatter(x) for x in self[-n:] ])
-            summary = '[%s, ..., %s]' % (head, tail)
         else:
-            summary = "[%s]" % ', '.join([ formatter(x) for x in self ])
+            values = [ formatter(x) for x in self ]
 
-        return summary
+            max_len = best_len(values)
+            n_per_row, n_rows = best_rows(values, max_len)
+
+            summary = '['
+            summary += best_fit(values, max_len)
+            summary += '],'
+            if n_rows > 1:
+                summary += space1
+            else:
+                summary += ' '
 
+        return summary
 
     def _format_attrs(self):
         """
@@ -2911,7 +2948,9 @@ def _format_attrs(self):
         """
         Return a list of tuples of the (attr,formatted_value)
         """
-        attrs = [('categories', default_pprint(self.categories)),
+        max_categories = (10 if get_option("display.max_categories") == 0
+                    else get_option("display.max_categories"))
+        attrs = [('categories', default_pprint(self.categories, max_seq_items=max_categories)),
                  ('ordered',self.ordered)]
         if self.name is not None:
             attrs.append(('name',default_pprint(self.name)))
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -3220,8 +3220,8 @@ def test_dates(self):
 
     def test_mixed(self):
         text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)]))
-        self.assertTrue("['2013-01-01 00:00:00'," in text)
-        self.assertTrue(", '2014-01-01 00:00:00']" in text)
+        self.assertTrue("'2013-01-01 00:00:00'," in text)
+        self.assertTrue("'2014-01-01 00:00:00']" in text)
 
 
 class TestStringRepTimestamp(tm.TestCase):
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
@@ -2464,7 +2464,7 @@ def test_print_unicode_columns(self):
     def test_repr_summary(self):
         with cf.option_context('display.max_seq_items', 10):
             r = repr(pd.Index(np.arange(1000)))
-            self.assertTrue(len(r) < 100)
+            self.assertTrue(len(r) < 200)
             self.assertTrue("..." in r)
 
     def test_repr_roundtrip(self):
diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py
@@ -123,26 +123,20 @@ def test_representation(self):
 
         exp2 = """DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D', tz=None)"""
 
-        exp3 = """DatetimeIndex(['2011-01-01'
-               '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
+        exp3 = """DatetimeIndex(['2011-01-01', '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
 
-        exp4 = """DatetimeIndex(['2011-01-01',
-               '2011-01-02',
-               '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
+        exp4 = """DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
 
-        exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00',
-               '2011-01-01 10:00:00+09:00',
-               '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
+        exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00', '2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
 
-        exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00',
-               '2011-01-01 10:00:00-05:00',
-               'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
+        exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', 'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
 
-        for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
-                                 [exp1, exp2, exp3, exp4, exp5, exp6]):
-            for func in ['__repr__', '__unicode__', '__str__']:
-                result = getattr(idx, func)()
-                self.assertEqual(result, expected)
+        with pd.option_context('display.width', 300):
+            for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
+                                     [exp1, exp2, exp3, exp4, exp5, exp6]):
+                for func in ['__repr__', '__unicode__', '__str__']:
+                    result = getattr(idx, func)()
+                    self.assertEqual(result, expected)
 
     def test_summary(self):
         # GH9116
@@ -377,22 +371,18 @@ def test_representation(self):
 
         exp2 = """TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp3 = """TimedeltaIndex(['1 days'
-                '2 days'], dtype='timedelta64[ns]', freq='D')"""
+        exp3 = """TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp4 = """TimedeltaIndex(['1 days',
-                '2 days',
-                '3 days'], dtype='timedelta64[ns]', freq='D')"""
+        exp4 = """TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp5 = """TimedeltaIndex(['1 days 00:00:01',
-                '2 days 00:00:00',
-                '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
+        exp5 = """TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
 
-        for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
-                                 [exp1, exp2, exp3, exp4, exp5]):
-            for func in ['__repr__', '__unicode__', '__str__']:
-                result = getattr(idx, func)()
-                self.assertEqual(result, expected)
+        with pd.option_context('display.width',300):
+            for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
+                                     [exp1, exp2, exp3, exp4, exp5]):
+                for func in ['__repr__', '__unicode__', '__str__']:
+                    result = getattr(idx, func)()
+                    self.assertEqual(result, expected)
 
     def test_summary(self):
         # GH9116
@@ -846,29 +836,19 @@ def test_representation(self):
 
         exp2 = """PeriodIndex(['2011-01-01'], dtype='int64', freq='D')"""
 
-        exp3 = """PeriodIndex(['2011-01-01'
-             '2011-01-02'], dtype='int64', freq='D')"""
+        exp3 = """PeriodIndex(['2011-01-01', '2011-01-02'], dtype='int64', freq='D')"""
 
-        exp4 = """PeriodIndex(['2011-01-01',
-             '2011-01-02',
-             '2011-01-03'], dtype='int64', freq='D')"""
+        exp4 = """PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='int64', freq='D')"""
 
-        exp5 = """PeriodIndex(['2011',
-             '2012',
-             '2013'], dtype='int64', freq='A-DEC')"""
+        exp5 = """PeriodIndex(['2011', '2012', '2013'], dtype='int64', freq='A-DEC')"""
 
-        exp6 = """PeriodIndex(['2011-01-01 09:00',
-             '2012-02-01 10:00',
-             'NaT'], dtype='int64', freq='H')"""
+        exp6 = """PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], dtype='int64', freq='H')"""
 
         exp7 = """PeriodIndex(['2013Q1'], dtype='int64', freq='Q-DEC')"""
 
-        exp8 = """PeriodIndex(['2013Q1'
-             '2013Q2'], dtype='int64', freq='Q-DEC')"""
+        exp8 = """PeriodIndex(['2013Q1', '2013Q2'], dtype='int64', freq='Q-DEC')"""
 
-        exp9 = """PeriodIndex(['2013Q1',
-             '2013Q2',
-             '2013Q3'], dtype='int64', freq='Q-DEC')"""
+        exp9 = """PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], dtype='int64', freq='Q-DEC')"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
                                  [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]):