Skip to content

Commit b190a9d

Browse files
jorisvandenbosschejreback
authored andcommitted
Change Index repr to adjust to string length
Conflicts: pandas/tseries/base.py use new format_data updates Fix detection of good width more fixes Change [ Conflicts: pandas/core/index.py more fixes revsised according to comments
1 parent a3c52d1 commit b190a9d

File tree

6 files changed

+119
-91
lines changed

6 files changed

+119
-91
lines changed

doc/source/whatsnew/v0.16.1.txt

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@ Highlights include:
1313
- New section on how-to-contribute to *pandas*, see :ref:`here <contributing>`
1414
- Revised "Merge, join, and concatenate" documentation, including graphical examples to make it easier to understand each operations, see :ref:`here <merging>`
1515
- New method ``sample`` for drawing random samples from Series, DataFrames and Panels. See :ref:`here <whatsnew_0161.enhancements.sample>`
16-
- ``BusinessHour`` date-offset is now supported, see :ref:`here <timeseries.businesshour>`
16+
- The default ``Index`` printing has changed to a more uniform format, see :ref:`here <whatsnew_0161.index_repr>`
17+
- ``BusinessHour`` datetime-offset is now supported, see :ref:`here <timeseries.businesshour>`
18+
19+
>>>>>>> more fixes
1720
- Further enhancement to the ``.str`` accessor to make string operations easier, see :ref:`here <whatsnew_0161.enhancements.string>`
1821

1922
.. contents:: What's new in v0.16.1
@@ -273,8 +276,7 @@ API changes
273276
Index Representation
274277
~~~~~~~~~~~~~~~~~~~~
275278

276-
The string representation of ``Index`` and its sub-classes have now been unified. ``Index, Int64Index, Float64Index, CategoricalIndex`` are single-line display. The datetimelikes ``DatetimeIndex, PeriodIndex, TimedeltaIndex`` & ``MultiIndex`` will display in a multi-line format showing much more of the index values. The display width responds to the option ``display.max_seq_items``,
277-
which is now defaulted to 20 (previously was 100). (:issue:`6482`)
279+
The string representation of ``Index`` and its sub-classes have now been unified. These will show a single-line display if there are few values; a wrapped multi-line display for a lot of values (but less than ``display.max_seq_items``; if lots of items (> ``display.max_seq_items``) will show a truncated display (the head and tail of the data). The formatting for ``MultiIndex`` is unchanges (a multi-line wrapped display). The display width responds to the option ``display.max_seq_items``, which is defaulted to 100. (:issue:`6482`)
278280

279281
Previous Behavior
280282

@@ -307,8 +309,15 @@ New Behavior
307309

308310
pd.get_option('max_seq_items')
309311
pd.Index(range(4),name='foo')
312+
pd.Index(range(25),name='foo')
310313
pd.Index(range(104),name='foo')
314+
pd.CategoricalIndex(['a','bb','ccc','dddd'],ordered=True,name='foobar')
315+
pd.CategoricalIndex(['a','bb','ccc','dddd']*10,ordered=True,name='foobar')
316+
pd.CategoricalIndex(['a','bb','ccc','dddd']*100,ordered=True,name='foobar')
317+
pd.CategoricalIndex(np.arange(1000),ordered=True,name='foobar')
318+
pd.Index(['a','bb','ccc','dddd']*100)
311319
pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
320+
pd.date_range('20130101',periods=25,name='foo',tz='US/Eastern')
312321
pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
313322

314323
.. _whatsnew_0161.deprecations:

pandas/core/config_init.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
269269
cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
270270
validator=is_one_of_factory([True, False, 'truncate']))
271271
cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
272-
cf.register_option('max_seq_items', 20, pc_max_seq_items)
272+
cf.register_option('max_seq_items', 100, pc_max_seq_items)
273273
cf.register_option('mpl_style', None, pc_mpl_style_doc,
274274
validator=is_one_of_factory([None, False, 'default']),
275275
cb=mpl_style_cb)

pandas/core/index.py

Lines changed: 78 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pandas import compat
99
import numpy as np
1010

11+
from math import ceil
1112
from sys import getsizeof
1213
import pandas.tslib as tslib
1314
import pandas.lib as lib
@@ -405,8 +406,6 @@ def __unicode__(self):
405406
# no data provided, just attributes
406407
if data is None:
407408
data = ''
408-
else:
409-
data = "%s,%s" % (data, space)
410409

411410
res = u("%s(%s%s)") % (klass,
412411
data,
@@ -435,59 +434,97 @@ def _format_data(self):
435434
"""
436435
Return the formatted data as a unicode string
437436
"""
438-
space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
439-
space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
440-
sep = ',%s' % space1
437+
space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
438+
space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
439+
440+
sep = ','
441441
max_seq_items = get_option('display.max_seq_items')
442442
formatter = self._formatter_func
443+
needs_justify = self.inferred_type in ['string','categorical']
444+
445+
def best_len(values):
446+
return max([len(x) for x in values]) + 2
447+
448+
def best_rows(values, max_len):
449+
from pandas.core.format import get_console_size
450+
display_width, _ = get_console_size()
451+
if display_width is None:
452+
display_width = get_option('display.width')
453+
n_per_row = (display_width - len(self.__class__.__name__) - 2) // max_len
454+
n_rows = int(ceil(len(values) / float(n_per_row)))
455+
return n_per_row, n_rows
456+
457+
def best_fit(values, max_len, n_rows=None, justify=False):
458+
459+
# number of rows to generate
460+
if n_rows is None:
461+
n_per_row, n_rows = best_rows(values, max_len)
462+
else:
463+
n_per_row = len(values)
464+
465+
# adjust all values to max length if we have multi-lines
466+
if justify:
467+
values = [values[0].rjust(max_len-2)] + [x.rjust(max_len-1) for x in values[1:]]
468+
multi_line_space = space1
469+
else:
470+
multi_line_space = space2
471+
472+
sep_elements = sep + ' '
473+
summary = ''
474+
for i in range(n_rows - 1):
475+
summary += sep_elements.join(values[i*n_per_row:(i+1)*n_per_row])
476+
summary += sep
477+
summary += multi_line_space
478+
summary += sep_elements.join(values[(n_rows - 1)*n_per_row:n_rows*n_per_row])
479+
480+
return summary
481+
443482
n = len(self)
444483
if n == 0:
445-
summary = '[]'
484+
summary = '[], '
446485
elif n == 1:
447486
first = formatter(self[0])
448-
summary = '[%s]' % first
487+
summary = '[%s], ' % first
449488
elif n == 2:
450489
first = formatter(self[0])
451490
last = formatter(self[-1])
452-
summary = '[%s%s%s]' % (first, sep, last)
491+
summary = '[%s, %s], ' % (first, last)
453492
elif n > max_seq_items:
454493
n = min(max_seq_items//2,10)
455494

456-
head = sep.join([ formatter(x) for x in self[:n] ])
457-
tail = sep.join([ formatter(x) for x in self[-n:] ])
458-
summary = '[%s%s...%s%s]' % (head, space1, space1, tail)
459-
else:
460-
values = sep.join([ formatter(x) for x in self ])
461-
summary = '[%s]' % (values)
495+
head = [ formatter(x) for x in self[:n] ]
496+
tail = [ formatter(x) for x in self[-n:] ]
497+
max_len = max(best_len(head),best_len(tail))
462498

463-
return summary
499+
if needs_justify:
500+
n_rows = 1
501+
justify = False
502+
else:
503+
n_rows = None
504+
justify = True
505+
506+
summary = '['
507+
summary += best_fit(head, max_len, n_rows=n_rows, justify=justify)
508+
summary += ',' + space1 + ' ...' + space2
509+
summary += best_fit(tail, max_len, n_rows=n_rows, justify=justify)
510+
summary += '],'
511+
summary += space1
464512

465-
def _format_data2(self):
466-
"""
467-
Return the formatted data as a unicode string
468-
"""
469-
max_seq_items = get_option('display.max_seq_items')
470-
formatter = self._formatter_func
471-
n = len(self)
472-
if n == 0:
473-
summary = '[]'
474-
elif n == 1:
475-
first = formatter(self[0])
476-
summary = '[%s]' % first
477-
elif n == 2:
478-
first = formatter(self[0])
479-
last = formatter(self[-1])
480-
summary = '[%s, %s]' % (first, last)
481-
elif n > max_seq_items:
482-
n = min(max_seq_items//2,5)
483-
head = ', '.join([ formatter(x) for x in self[:n] ])
484-
tail = ', '.join([ formatter(x) for x in self[-n:] ])
485-
summary = '[%s, ..., %s]' % (head, tail)
486513
else:
487-
summary = "[%s]" % ', '.join([ formatter(x) for x in self ])
514+
values = [ formatter(x) for x in self ]
488515

489-
return summary
516+
max_len = best_len(values)
517+
n_per_row, n_rows = best_rows(values, max_len)
518+
519+
summary = '['
520+
summary += best_fit(values, max_len)
521+
summary += '],'
522+
if n_rows > 1:
523+
summary += space1
524+
else:
525+
summary += ' '
490526

527+
return summary
491528

492529
def _format_attrs(self):
493530
"""
@@ -2911,7 +2948,9 @@ def _format_attrs(self):
29112948
"""
29122949
Return a list of tuples of the (attr,formatted_value)
29132950
"""
2914-
attrs = [('categories', default_pprint(self.categories)),
2951+
max_categories = (10 if get_option("display.max_categories") == 0
2952+
else get_option("display.max_categories"))
2953+
attrs = [('categories', default_pprint(self.categories, max_seq_items=max_categories)),
29152954
('ordered',self.ordered)]
29162955
if self.name is not None:
29172956
attrs.append(('name',default_pprint(self.name)))

pandas/tests/test_format.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3220,8 +3220,8 @@ def test_dates(self):
32203220

32213221
def test_mixed(self):
32223222
text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)]))
3223-
self.assertTrue("['2013-01-01 00:00:00'," in text)
3224-
self.assertTrue(", '2014-01-01 00:00:00']" in text)
3223+
self.assertTrue("'2013-01-01 00:00:00'," in text)
3224+
self.assertTrue("'2014-01-01 00:00:00']" in text)
32253225

32263226

32273227
class TestStringRepTimestamp(tm.TestCase):

pandas/tests/test_index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2464,7 +2464,7 @@ def test_print_unicode_columns(self):
24642464
def test_repr_summary(self):
24652465
with cf.option_context('display.max_seq_items', 10):
24662466
r = repr(pd.Index(np.arange(1000)))
2467-
self.assertTrue(len(r) < 100)
2467+
self.assertTrue(len(r) < 200)
24682468
self.assertTrue("..." in r)
24692469

24702470
def test_repr_roundtrip(self):

pandas/tseries/tests/test_base.py

Lines changed: 25 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -123,26 +123,20 @@ def test_representation(self):
123123

124124
exp2 = """DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D', tz=None)"""
125125

126-
exp3 = """DatetimeIndex(['2011-01-01'
127-
'2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
126+
exp3 = """DatetimeIndex(['2011-01-01', '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
128127

129-
exp4 = """DatetimeIndex(['2011-01-01',
130-
'2011-01-02',
131-
'2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
128+
exp4 = """DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
132129

133-
exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00',
134-
'2011-01-01 10:00:00+09:00',
135-
'2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
130+
exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00', '2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
136131

137-
exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00',
138-
'2011-01-01 10:00:00-05:00',
139-
'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
132+
exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', 'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
140133

141-
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
142-
[exp1, exp2, exp3, exp4, exp5, exp6]):
143-
for func in ['__repr__', '__unicode__', '__str__']:
144-
result = getattr(idx, func)()
145-
self.assertEqual(result, expected)
134+
with pd.option_context('display.width', 300):
135+
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
136+
[exp1, exp2, exp3, exp4, exp5, exp6]):
137+
for func in ['__repr__', '__unicode__', '__str__']:
138+
result = getattr(idx, func)()
139+
self.assertEqual(result, expected)
146140

147141
def test_summary(self):
148142
# GH9116
@@ -377,22 +371,18 @@ def test_representation(self):
377371

378372
exp2 = """TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')"""
379373

380-
exp3 = """TimedeltaIndex(['1 days'
381-
'2 days'], dtype='timedelta64[ns]', freq='D')"""
374+
exp3 = """TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')"""
382375

383-
exp4 = """TimedeltaIndex(['1 days',
384-
'2 days',
385-
'3 days'], dtype='timedelta64[ns]', freq='D')"""
376+
exp4 = """TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq='D')"""
386377

387-
exp5 = """TimedeltaIndex(['1 days 00:00:01',
388-
'2 days 00:00:00',
389-
'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
378+
exp5 = """TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
390379

391-
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
392-
[exp1, exp2, exp3, exp4, exp5]):
393-
for func in ['__repr__', '__unicode__', '__str__']:
394-
result = getattr(idx, func)()
395-
self.assertEqual(result, expected)
380+
with pd.option_context('display.width',300):
381+
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
382+
[exp1, exp2, exp3, exp4, exp5]):
383+
for func in ['__repr__', '__unicode__', '__str__']:
384+
result = getattr(idx, func)()
385+
self.assertEqual(result, expected)
396386

397387
def test_summary(self):
398388
# GH9116
@@ -846,29 +836,19 @@ def test_representation(self):
846836

847837
exp2 = """PeriodIndex(['2011-01-01'], dtype='int64', freq='D')"""
848838

849-
exp3 = """PeriodIndex(['2011-01-01'
850-
'2011-01-02'], dtype='int64', freq='D')"""
839+
exp3 = """PeriodIndex(['2011-01-01', '2011-01-02'], dtype='int64', freq='D')"""
851840

852-
exp4 = """PeriodIndex(['2011-01-01',
853-
'2011-01-02',
854-
'2011-01-03'], dtype='int64', freq='D')"""
841+
exp4 = """PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='int64', freq='D')"""
855842

856-
exp5 = """PeriodIndex(['2011',
857-
'2012',
858-
'2013'], dtype='int64', freq='A-DEC')"""
843+
exp5 = """PeriodIndex(['2011', '2012', '2013'], dtype='int64', freq='A-DEC')"""
859844

860-
exp6 = """PeriodIndex(['2011-01-01 09:00',
861-
'2012-02-01 10:00',
862-
'NaT'], dtype='int64', freq='H')"""
845+
exp6 = """PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], dtype='int64', freq='H')"""
863846

864847
exp7 = """PeriodIndex(['2013Q1'], dtype='int64', freq='Q-DEC')"""
865848

866-
exp8 = """PeriodIndex(['2013Q1'
867-
'2013Q2'], dtype='int64', freq='Q-DEC')"""
849+
exp8 = """PeriodIndex(['2013Q1', '2013Q2'], dtype='int64', freq='Q-DEC')"""
868850

869-
exp9 = """PeriodIndex(['2013Q1',
870-
'2013Q2',
871-
'2013Q3'], dtype='int64', freq='Q-DEC')"""
851+
exp9 = """PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], dtype='int64', freq='Q-DEC')"""
872852

873853
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
874854
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]):

0 commit comments

Comments
 (0)