diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f79288c167356..145d116261a82 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -57,17 +57,11 @@ import pandas.core.missing as missing import pandas.core.algorithms as algos import pandas.core.sorting as sorting -from pandas.io.formats.printing import pprint_thing +from pandas.io.formats.printing import ( + pprint_thing, default_pprint, format_object_summary, format_object_attrs) from pandas.core.ops import make_invalid_op -from pandas.core.config import get_option from pandas.core.strings import StringMethods - -# simplify -default_pprint = lambda x, max_seq_items=None: \ - pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True, - max_seq_items=max_seq_items) - __all__ = ['Index'] _unsortable_types = frozenset(('mixed', 'mixed-integer')) @@ -1034,7 +1028,7 @@ def _format_space(self): @property def _formatter_func(self): """ - Return the formatted data as a unicode string + Return the formatter function """ return default_pprint @@ -1042,125 +1036,20 @@ def _format_data(self, name=None): """ Return the formatted data as a unicode string """ - from pandas.io.formats.console import get_console_size - from pandas.io.formats.format import _get_adjustment - display_width, _ = get_console_size() - if display_width is None: - display_width = get_option('display.width') or 80 - if name is None: - name = self.__class__.__name__ - - space1 = "\n%s" % (' ' * (len(name) + 1)) - space2 = "\n%s" % (' ' * (len(name) + 2)) - - n = len(self) - sep = ',' - max_seq_items = get_option('display.max_seq_items') or n - formatter = self._formatter_func # do we want to justify (only do so for non-objects) is_justify = not (self.inferred_type in ('string', 'unicode') or (self.inferred_type == 'categorical' and is_object_dtype(self.categories))) - # are we a truncated display - is_truncated = n > max_seq_items - - # adj can optionally handle unicode eastern asian width - adj = _get_adjustment() - - def _extend_line(s, line, value, display_width, next_line_prefix): - - if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >= - display_width): - s += line.rstrip() - line = next_line_prefix - line += value - return s, line - - def best_len(values): - if values: - return max(adj.len(x) for x in values) - else: - return 0 - - if n == 0: - summary = '[], ' - elif n == 1: - first = formatter(self[0]) - summary = '[%s], ' % first - elif n == 2: - first = formatter(self[0]) - last = formatter(self[-1]) - summary = '[%s, %s], ' % (first, last) - else: - - if n > max_seq_items: - n = min(max_seq_items // 2, 10) - head = [formatter(x) for x in self[:n]] - tail = [formatter(x) for x in self[-n:]] - else: - head = [] - tail = [formatter(x) for x in self] - - # adjust all values to max length if needed - if is_justify: - - # however, if we are not truncated and we are only a single - # line, then don't justify - if (is_truncated or - not (len(', '.join(head)) < display_width and - len(', '.join(tail)) < display_width)): - max_len = max(best_len(head), best_len(tail)) - head = [x.rjust(max_len) for x in head] - tail = [x.rjust(max_len) for x in tail] - - summary = "" - line = space2 - - for i in range(len(head)): - word = head[i] + sep + ' ' - summary, line = _extend_line(summary, line, word, - display_width, space2) - - if is_truncated: - # remove trailing space of last line - summary += line.rstrip() + space2 + '...' - line = space2 - - for i in range(len(tail) - 1): - word = tail[i] + sep + ' ' - summary, line = _extend_line(summary, line, word, - display_width, space2) - - # last value: no sep added + 1 space of width used for trailing ',' - summary, line = _extend_line(summary, line, tail[-1], - display_width - 2, space2) - summary += line - summary += '],' - - if len(summary) > (display_width): - summary += space1 - else: # one row - summary += ' ' - - # remove initial space - summary = '[' + summary[len(space2):] - - return summary + return format_object_summary(self, self._formatter_func, + is_justify=is_justify, name=name) def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value) """ - attrs = [] - attrs.append(('dtype', "'%s'" % self.dtype)) - if self.name is not None: - attrs.append(('name', default_pprint(self.name))) - max_seq_items = get_option('display.max_seq_items') or len(self) - if len(self) > max_seq_items: - attrs.append(('length', len(self))) - return attrs + return format_object_attrs(self) def to_series(self, index=None, name=None): """ diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index a101113da23ba..e22d7bce42841 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -261,3 +261,157 @@ class TableSchemaFormatter(BaseFormatter): # unregister tableschema mime-type if mimetype in formatters: formatters[mimetype].enabled = False + + +default_pprint = lambda x, max_seq_items=None: \ + pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True, + max_seq_items=max_seq_items) + + +def format_object_summary(obj, formatter, is_justify=True, name=None): + """ + Return the formatted obj as a unicode string + + Parameters + ---------- + obj : object + must be iterable and support __getitem__ + formatter : callable + string formatter for an element + is_justify : boolean + should justify the display + name : name, optiona + defaults to the class name of the obj + + Returns + ------- + summary string + + """ + from pandas.io.formats.console import get_console_size + from pandas.io.formats.format import _get_adjustment + + display_width, _ = get_console_size() + if display_width is None: + display_width = get_option('display.width') or 80 + if name is None: + name = obj.__class__.__name__ + + space1 = "\n%s" % (' ' * (len(name) + 1)) + space2 = "\n%s" % (' ' * (len(name) + 2)) + + n = len(obj) + sep = ',' + max_seq_items = get_option('display.max_seq_items') or n + + # are we a truncated display + is_truncated = n > max_seq_items + + # adj can optionally handle unicode eastern asian width + adj = _get_adjustment() + + def _extend_line(s, line, value, display_width, next_line_prefix): + + if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >= + display_width): + s += line.rstrip() + line = next_line_prefix + line += value + return s, line + + def best_len(values): + if values: + return max(adj.len(x) for x in values) + else: + return 0 + + if n == 0: + summary = '[], ' + elif n == 1: + first = formatter(obj[0]) + summary = '[%s], ' % first + elif n == 2: + first = formatter(obj[0]) + last = formatter(obj[-1]) + summary = '[%s, %s], ' % (first, last) + else: + + if n > max_seq_items: + n = min(max_seq_items // 2, 10) + head = [formatter(x) for x in obj[:n]] + tail = [formatter(x) for x in obj[-n:]] + else: + head = [] + tail = [formatter(x) for x in obj] + + # adjust all values to max length if needed + if is_justify: + + # however, if we are not truncated and we are only a single + # line, then don't justify + if (is_truncated or + not (len(', '.join(head)) < display_width and + len(', '.join(tail)) < display_width)): + max_len = max(best_len(head), best_len(tail)) + head = [x.rjust(max_len) for x in head] + tail = [x.rjust(max_len) for x in tail] + + summary = "" + line = space2 + + for i in range(len(head)): + word = head[i] + sep + ' ' + summary, line = _extend_line(summary, line, word, + display_width, space2) + + if is_truncated: + # remove trailing space of last line + summary += line.rstrip() + space2 + '...' + line = space2 + + for i in range(len(tail) - 1): + word = tail[i] + sep + ' ' + summary, line = _extend_line(summary, line, word, + display_width, space2) + + # last value: no sep added + 1 space of width used for trailing ',' + summary, line = _extend_line(summary, line, tail[-1], + display_width - 2, space2) + summary += line + summary += '],' + + if len(summary) > (display_width): + summary += space1 + else: # one row + summary += ' ' + + # remove initial space + summary = '[' + summary[len(space2):] + + return summary + + +def format_object_attrs(obj): + """ + Return a list of tuples of the (attr, formatted_value) + for common attrs, including dtype, name, length + + Parameters + ---------- + obj : object + must be iterable + + Returns + ------- + list + + """ + attrs = [] + if hasattr(obj, 'dtype'): + attrs.append(('dtype', "'{}'".format(obj.dtype))) + if getattr(obj, 'name', None) is not None: + attrs.append(('name', default_pprint(obj.name))) + max_seq_items = get_option('display.max_seq_items') or len(obj) + if len(obj) > max_seq_items: + attrs.append(('length', len(obj))) + return attrs