Skip to content

CLN: move common printing utilties to pandas.io.formats.printing #21234

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 6 additions & 117 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,11 @@
import pandas.core.missing as missing
import pandas.core.algorithms as algos
import pandas.core.sorting as sorting
from pandas.io.formats.printing import pprint_thing
from pandas.io.formats.printing import (
pprint_thing, default_pprint, format_object_summary, format_object_attrs)
from pandas.core.ops import make_invalid_op
from pandas.core.config import get_option
from pandas.core.strings import StringMethods


# simplify
default_pprint = lambda x, max_seq_items=None: \
pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True,
max_seq_items=max_seq_items)

__all__ = ['Index']

_unsortable_types = frozenset(('mixed', 'mixed-integer'))
Expand Down Expand Up @@ -1034,133 +1028,28 @@ def _format_space(self):
@property
def _formatter_func(self):
"""
Return the formatted data as a unicode string
Return the formatter function
"""
return default_pprint

def _format_data(self, name=None):
"""
Return the formatted data as a unicode string
"""
from pandas.io.formats.console import get_console_size
from pandas.io.formats.format import _get_adjustment
display_width, _ = get_console_size()
if display_width is None:
display_width = get_option('display.width') or 80
if name is None:
name = self.__class__.__name__

space1 = "\n%s" % (' ' * (len(name) + 1))
space2 = "\n%s" % (' ' * (len(name) + 2))

n = len(self)
sep = ','
max_seq_items = get_option('display.max_seq_items') or n
formatter = self._formatter_func

# do we want to justify (only do so for non-objects)
is_justify = not (self.inferred_type in ('string', 'unicode') or
(self.inferred_type == 'categorical' and
is_object_dtype(self.categories)))

# are we a truncated display
is_truncated = n > max_seq_items

# adj can optionally handle unicode eastern asian width
adj = _get_adjustment()

def _extend_line(s, line, value, display_width, next_line_prefix):

if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >=
display_width):
s += line.rstrip()
line = next_line_prefix
line += value
return s, line

def best_len(values):
if values:
return max(adj.len(x) for x in values)
else:
return 0

if n == 0:
summary = '[], '
elif n == 1:
first = formatter(self[0])
summary = '[%s], ' % first
elif n == 2:
first = formatter(self[0])
last = formatter(self[-1])
summary = '[%s, %s], ' % (first, last)
else:

if n > max_seq_items:
n = min(max_seq_items // 2, 10)
head = [formatter(x) for x in self[:n]]
tail = [formatter(x) for x in self[-n:]]
else:
head = []
tail = [formatter(x) for x in self]

# adjust all values to max length if needed
if is_justify:

# however, if we are not truncated and we are only a single
# line, then don't justify
if (is_truncated or
not (len(', '.join(head)) < display_width and
len(', '.join(tail)) < display_width)):
max_len = max(best_len(head), best_len(tail))
head = [x.rjust(max_len) for x in head]
tail = [x.rjust(max_len) for x in tail]

summary = ""
line = space2

for i in range(len(head)):
word = head[i] + sep + ' '
summary, line = _extend_line(summary, line, word,
display_width, space2)

if is_truncated:
# remove trailing space of last line
summary += line.rstrip() + space2 + '...'
line = space2

for i in range(len(tail) - 1):
word = tail[i] + sep + ' '
summary, line = _extend_line(summary, line, word,
display_width, space2)

# last value: no sep added + 1 space of width used for trailing ','
summary, line = _extend_line(summary, line, tail[-1],
display_width - 2, space2)
summary += line
summary += '],'

if len(summary) > (display_width):
summary += space1
else: # one row
summary += ' '

# remove initial space
summary = '[' + summary[len(space2):]

return summary
return format_object_summary(self, self._formatter_func,
is_justify=is_justify, name=name)

def _format_attrs(self):
"""
Return a list of tuples of the (attr,formatted_value)
"""
attrs = []
attrs.append(('dtype', "'%s'" % self.dtype))
if self.name is not None:
attrs.append(('name', default_pprint(self.name)))
max_seq_items = get_option('display.max_seq_items') or len(self)
if len(self) > max_seq_items:
attrs.append(('length', len(self)))
return attrs
return format_object_attrs(self)

def to_series(self, index=None, name=None):
"""
Expand Down
154 changes: 154 additions & 0 deletions pandas/io/formats/printing.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,157 @@ class TableSchemaFormatter(BaseFormatter):
# unregister tableschema mime-type
if mimetype in formatters:
formatters[mimetype].enabled = False


default_pprint = lambda x, max_seq_items=None: \
pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True,
max_seq_items=max_seq_items)


def format_object_summary(obj, formatter, is_justify=True, name=None):
"""
Return the formatted obj as a unicode string

Parameters
----------
obj : object
must be iterable and support __getitem__
formatter : callable
string formatter for an element
is_justify : boolean
should justify the display
name : name, optiona
defaults to the class name of the obj

Returns
-------
summary string

"""
from pandas.io.formats.console import get_console_size
from pandas.io.formats.format import _get_adjustment

display_width, _ = get_console_size()
if display_width is None:
display_width = get_option('display.width') or 80
if name is None:
name = obj.__class__.__name__

space1 = "\n%s" % (' ' * (len(name) + 1))
space2 = "\n%s" % (' ' * (len(name) + 2))

n = len(obj)
sep = ','
max_seq_items = get_option('display.max_seq_items') or n

# are we a truncated display
is_truncated = n > max_seq_items

# adj can optionally handle unicode eastern asian width
adj = _get_adjustment()

def _extend_line(s, line, value, display_width, next_line_prefix):

if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >=
display_width):
s += line.rstrip()
line = next_line_prefix
line += value
return s, line

def best_len(values):
if values:
return max(adj.len(x) for x in values)
else:
return 0

if n == 0:
summary = '[], '
elif n == 1:
first = formatter(obj[0])
summary = '[%s], ' % first
elif n == 2:
first = formatter(obj[0])
last = formatter(obj[-1])
summary = '[%s, %s], ' % (first, last)
else:

if n > max_seq_items:
n = min(max_seq_items // 2, 10)
head = [formatter(x) for x in obj[:n]]
tail = [formatter(x) for x in obj[-n:]]
else:
head = []
tail = [formatter(x) for x in obj]

# adjust all values to max length if needed
if is_justify:

# however, if we are not truncated and we are only a single
# line, then don't justify
if (is_truncated or
not (len(', '.join(head)) < display_width and
len(', '.join(tail)) < display_width)):
max_len = max(best_len(head), best_len(tail))
head = [x.rjust(max_len) for x in head]
tail = [x.rjust(max_len) for x in tail]

summary = ""
line = space2

for i in range(len(head)):
word = head[i] + sep + ' '
summary, line = _extend_line(summary, line, word,
display_width, space2)

if is_truncated:
# remove trailing space of last line
summary += line.rstrip() + space2 + '...'
line = space2

for i in range(len(tail) - 1):
word = tail[i] + sep + ' '
summary, line = _extend_line(summary, line, word,
display_width, space2)

# last value: no sep added + 1 space of width used for trailing ','
summary, line = _extend_line(summary, line, tail[-1],
display_width - 2, space2)
summary += line
summary += '],'

if len(summary) > (display_width):
summary += space1
else: # one row
summary += ' '

# remove initial space
summary = '[' + summary[len(space2):]

return summary


def format_object_attrs(obj):
"""
Return a list of tuples of the (attr, formatted_value)
for common attrs, including dtype, name, length

Parameters
----------
obj : object
must be iterable

Returns
-------
list

"""
attrs = []
if hasattr(obj, 'dtype'):
attrs.append(('dtype', "'{}'".format(obj.dtype)))
if getattr(obj, 'name', None) is not None:
attrs.append(('name', default_pprint(obj.name)))
max_seq_items = get_option('display.max_seq_items') or len(obj)
if len(obj) > max_seq_items:
attrs.append(('length', len(obj)))
return attrs