From bc32b733c35b3bb029b6f6ec6b1c94e934d10c36 Mon Sep 17 00:00:00 2001 From: Lunran Date: Thu, 14 Sep 2017 22:54:15 +0900 Subject: [PATCH 1/3] just copied _format_data(self) from Index to CategoricalFormatter --- pandas/io/formats/format.py | 109 ++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 547b9676717c9..ccb6721f838ab 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -125,6 +125,115 @@ def _get_formatted_values(self): return format_array(self.categorical.get_values(), None, float_format=None, na_rep=self.na_rep) + def _format_data(self): + """ + Return the formatted data as a unicode string + """ + from pandas.io.formats.console import get_console_size + from pandas.io.formats.format import _get_adjustment + display_width, _ = get_console_size() + if display_width is None: + display_width = get_option('display.width') or 80 + + space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) + space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2)) + + n = len(self) + sep = ',' + max_seq_items = get_option('display.max_seq_items') or n + formatter = self._formatter_func + + # do we want to justify (only do so for non-objects) + is_justify = not (self.inferred_type in ('string', 'unicode') or + (self.inferred_type == 'categorical' and + is_object_dtype(self.categories))) + + # are we a truncated display + is_truncated = n > max_seq_items + + # adj can optionaly handle unicode eastern asian width + adj = _get_adjustment() + + def _extend_line(s, line, value, display_width, next_line_prefix): + + if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >= + display_width): + s += line.rstrip() + line = next_line_prefix + line += value + return s, line + + def best_len(values): + if values: + return max([adj.len(x) for x in values]) + else: + return 0 + + if n == 0: + summary = '[], ' + elif n == 1: + first = formatter(self[0]) + summary = '[%s], ' % first + elif n == 2: + first = formatter(self[0]) + last = formatter(self[-1]) + summary = '[%s, %s], ' % (first, last) + else: + + if n > max_seq_items: + n = min(max_seq_items // 2, 10) + head = [formatter(x) for x in self[:n]] + tail = [formatter(x) for x in self[-n:]] + else: + head = [] + tail = [formatter(x) for x in self] + + # adjust all values to max length if needed + if is_justify: + + # however, if we are not truncated and we are only a single + # line, then don't justify + if (is_truncated or + not (len(', '.join(head)) < display_width and + len(', '.join(tail)) < display_width)): + max_len = max(best_len(head), best_len(tail)) + head = [x.rjust(max_len) for x in head] + tail = [x.rjust(max_len) for x in tail] + + summary = "" + line = space2 + + for i in range(len(head)): + word = head[i] + sep + ' ' + summary, line = _extend_line(summary, line, word, + display_width, space2) + + if is_truncated: + # remove trailing space of last line + summary += line.rstrip() + space2 + '...' + line = space2 + + for i in range(len(tail) - 1): + word = tail[i] + sep + ' ' + summary, line = _extend_line(summary, line, word, + display_width, space2) + + # last value: no sep added + 1 space of width used for trailing ',' + summary, line = _extend_line(summary, line, tail[-1], + display_width - 2, space2) + summary += line + summary += '],' + + if len(summary) > (display_width): + summary += space1 + else: # one row + summary += ' ' + + # remove initial space + summary = '[' + summary[len(space2):] + + return summary + def to_string(self): categorical = self.categorical From a337ae2c1eddca00174965d27509005e9154a5bd Mon Sep 17 00:00:00 2001 From: Lunran Date: Sun, 17 Sep 2017 17:45:15 +0900 Subject: [PATCH 2/3] fixed testcases and passed them --- pandas/core/categorical.py | 2 +- pandas/io/formats/format.py | 77 +++---- pandas/tests/test_categorical.py | 356 ++++++++++++++++++++++--------- 3 files changed, 288 insertions(+), 147 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 6f7eafe43dbbb..87d1f4f4a5da4 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1624,7 +1624,7 @@ def _tidy_repr(self, max_vals=10, footer=True): head = self[:num]._get_repr(length=False, footer=False) tail = self[-(max_vals - num):]._get_repr(length=False, footer=False) - result = '%s, ..., %s' % (head[:-1], tail[1:]) + result = '%s,\n ...\n %s' % (head[:-1], tail[1:]) if footer: result = '%s\n%s' % (result, self._repr_footer()) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ccb6721f838ab..73f2165a993fb 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -125,7 +125,7 @@ def _get_formatted_values(self): return format_array(self.categorical.get_values(), None, float_format=None, na_rep=self.na_rep) - def _format_data(self): + def _format_data(self, values): """ Return the formatted data as a unicode string """ @@ -135,18 +135,11 @@ def _format_data(self): if display_width is None: display_width = get_option('display.width') or 80 - space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) - space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2)) + space = "\n " - n = len(self) + n = len(values) sep = ',' max_seq_items = get_option('display.max_seq_items') or n - formatter = self._formatter_func - - # do we want to justify (only do so for non-objects) - is_justify = not (self.inferred_type in ('string', 'unicode') or - (self.inferred_type == 'categorical' and - is_object_dtype(self.categories))) # are we a truncated display is_truncated = n > max_seq_items @@ -170,67 +163,59 @@ def best_len(values): return 0 if n == 0: - summary = '[], ' + summary = '[]' elif n == 1: - first = formatter(self[0]) - summary = '[%s], ' % first + first = values[0].strip('\'') + summary = '[%s]' % first elif n == 2: - first = formatter(self[0]) - last = formatter(self[-1]) - summary = '[%s, %s], ' % (first, last) + first = values[0].strip('\'') + last = values[-1].strip('\'') + summary = '[%s, %s]' % (first, last) else: if n > max_seq_items: n = min(max_seq_items // 2, 10) - head = [formatter(x) for x in self[:n]] - tail = [formatter(x) for x in self[-n:]] + head = [x.strip('\'') for x in values[:n]] + tail = [x.strip('\'') for x in values[-n:]] else: head = [] - tail = [formatter(x) for x in self] - - # adjust all values to max length if needed - if is_justify: + tail = [x.strip('\'') for x in values] - # however, if we are not truncated and we are only a single - # line, then don't justify - if (is_truncated or - not (len(', '.join(head)) < display_width and - len(', '.join(tail)) < display_width)): - max_len = max(best_len(head), best_len(tail)) - head = [x.rjust(max_len) for x in head] - tail = [x.rjust(max_len) for x in tail] + # however, if we are not truncated and we are only a single + # line, then don't justify + if (is_truncated or + not (len(', '.join(head)) < display_width and + len(', '.join(tail)) < display_width)): + max_len = max(best_len(head), best_len(tail)) + head = [x.rjust(max_len) for x in head] + tail = [x.rjust(max_len) for x in tail] summary = "" - line = space2 + line = space for i in range(len(head)): word = head[i] + sep + ' ' summary, line = _extend_line(summary, line, word, - display_width, space2) + display_width, space) if is_truncated: # remove trailing space of last line - summary += line.rstrip() + space2 + '...' - line = space2 + summary += line.rstrip() + space + '...' + line = space for i in range(len(tail) - 1): word = tail[i] + sep + ' ' summary, line = _extend_line(summary, line, word, - display_width, space2) + display_width, space) # last value: no sep added + 1 space of width used for trailing ',' summary, line = _extend_line(summary, line, tail[-1], - display_width - 2, space2) + display_width - 2, space) summary += line - summary += '],' - - if len(summary) > (display_width): - summary += space1 - else: # one row - summary += ' ' + summary += ']' # remove initial space - summary = '[' + summary[len(space2):] + summary = '[' + summary[len(space):] return summary @@ -245,10 +230,8 @@ def to_string(self): fmt_values = self._get_formatted_values() - result = [u('{i}').format(i=i) for i in fmt_values] - result = [i.strip() for i in result] - result = u(', ').join(result) - result = [u('[') + result + u(']')] + fmt_values = [i.strip() for i in fmt_values] + result = [self._format_data(fmt_values)] if self.footer: footer = self._get_footer() if footer: diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index e6fa5d1af55be..2307f688cbe64 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -698,8 +698,8 @@ def test_print(self): def test_big_print(self): factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ['a', 'b', 'c'], fastpath=True) - expected = ["[a, b, c, a, b, ..., b, c, a, b, c]", "Length: 600", - "Categories (3, object): [a, b, c]"] + expected = ["[a, b, c, a, b,", " ...", " b, c, a, b, c]", + "Length: 600", "Categories (3, object): [a, b, c]"] expected = "\n".join(expected) actual = repr(factor) @@ -740,7 +740,9 @@ def test_unicode_print(self): c = pd.Categorical(['aaaaa', 'bb', 'cccc'] * 20) expected = u"""\ -[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc] +[aaaaa, bb, cccc, aaaaa, bb, + ... + bb, cccc, aaaaa, bb, cccc] Length: 60 Categories (3, object): [aaaaa, bb, cccc]""" @@ -748,7 +750,9 @@ def test_unicode_print(self): c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) expected = u"""\ -[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] +[ああああ, いいいいい, ううううううう, ああああ, いいいいい, + ... + いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa @@ -759,7 +763,9 @@ def test_unicode_print(self): with option_context('display.unicode.east_asian_width', True): c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) - expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] + expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, + ... + いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa @@ -2232,6 +2238,23 @@ def test_repr(self): assert exp == a.__unicode__() def test_categorical_repr(self): + c = pd.Categorical([]) + exp = "[], Categories (0, object): []" + + assert repr(c) == exp + + c = pd.Categorical([1]) + exp = """[1] +Categories (1, int64): [1]""" + + assert repr(c) == exp + + c = pd.Categorical([1, 2]) + exp = """[1, 2] +Categories (2, int64): [1, 2]""" + + assert repr(c) == exp + c = pd.Categorical([1, 2, 3]) exp = """[1, 2, 3] Categories (3, int64): [1, 2, 3]""" @@ -2245,14 +2268,18 @@ def test_categorical_repr(self): assert repr(c) == exp c = pd.Categorical([1, 2, 3, 4, 5] * 10) - exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] + exp = """[1, 2, 3, 4, 5, + ... + 1, 2, 3, 4, 5] Length: 50 Categories (5, int64): [1, 2, 3, 4, 5]""" assert repr(c) == exp c = pd.Categorical(np.arange(20)) - exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] + exp = """[0, 1, 2, 3, 4, + ... + 15, 16, 17, 18, 19] Length: 20 Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" @@ -2273,44 +2300,65 @@ def test_categorical_repr_ordered(self): assert repr(c) == exp c = pd.Categorical([1, 2, 3, 4, 5] * 10, ordered=True) - exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] + exp = """[1, 2, 3, 4, 5, + ... + 1, 2, 3, 4, 5] Length: 50 Categories (5, int64): [1 < 2 < 3 < 4 < 5]""" assert repr(c) == exp c = pd.Categorical(np.arange(20), ordered=True) - exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] + exp = """[0, 1, 2, 3, 4, + ... + 15, 16, 17, 18, 19] Length: 20 Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" assert repr(c) == exp def test_categorical_repr_datetime(self): + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=1) + c = pd.Categorical(idx) + + exp = ( + "[2011-01-01 09:00:00]\n" + "Categories (1, datetime64[ns]): [2011-01-01 09:00:00]") + + assert repr(c) == exp + + idx = pd.date_range('20110101 09:00:00', freq='H', periods=2) + c = pd.Categorical(idx) + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00]\n" + "Categories (2, datetime64[ns]): " + "[2011-01-01 09:00:00, 2011-01-01 10:00:00]") + + assert repr(c) == exp + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) c = pd.Categorical(idx) - # TODO(wesm): exceeding 80 characters in the console is not good - # behavior exp = ( - "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " - "2011-01-01 12:00:00, 2011-01-01 13:00:00]\n" - "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " - "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" - " 2011-01-01 12:00:00, " - "2011-01-01 13:00:00]""") + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, 2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): " + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " " + " 2011-01-01 12:00:00, 2011-01-01 13:00:00]""") + assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx) exp = ( - "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " - "2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, " - "2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, " - "2011-01-01 13:00:00]\n" - "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " - "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" - " 2011-01-01 12:00:00, " - "2011-01-01 13:00:00]") + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00,\n" + " 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00,\n" + " 2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): " + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " " + " 2011-01-01 12:00:00, 2011-01-01 13:00:00]") assert repr(c) == exp @@ -2318,25 +2366,25 @@ def test_categorical_repr_datetime(self): tz='US/Eastern') c = pd.Categorical(idx) exp = ( - "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " - "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " - "2011-01-01 13:00:00-05:00]\n" + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " 2011-01-01 13:00:00-05:00]\n" "Categories (5, datetime64[ns, US/Eastern]): " "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" - " " - "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" - " " - "2011-01-01 13:00:00-05:00]") + " " + " 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " " + " 2011-01-01 13:00:00-05:00]") assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx) exp = ( - "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " - "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " - "2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, " - "2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, " - "2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n" + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00,\n" + " 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00,\n" + " 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n" "Categories (5, datetime64[ns, US/Eastern]): " "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" " " @@ -2349,158 +2397,268 @@ def test_categorical_repr_datetime(self): def test_categorical_repr_datetime_ordered(self): idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) c = pd.Categorical(idx, ordered=True) - exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] -Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < - 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, 2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): " + "[2011-01-01 09:00:00 < 2011-01-01 10:00:00 <" + " 2011-01-01 11:00:00 <\n" + " " + " 2011-01-01 12:00:00 < 2011-01-01 13:00:00]") # noqa assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) - exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] -Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < - 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00,\n" + " 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00,\n" + " 2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): " + "[2011-01-01 09:00:00 < 2011-01-01 10:00:00 <" + " 2011-01-01 11:00:00 <\n" + " " + " 2011-01-01 12:00:00 < 2011-01-01 13:00:00]") # noqa assert repr(c) == exp idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') c = pd.Categorical(idx, ordered=True) - exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] -Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < - 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < - 2011-01-01 13:00:00-05:00]""" # noqa + exp = ( + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " 2011-01-01 13:00:00-05:00]\n" + "Categories (5, datetime64[ns, US/Eastern]): " + "[2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <\n" + " " + " 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <\n" + " " + " 2011-01-01 13:00:00-05:00]") # noqa assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) - exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] -Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < - 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < - 2011-01-01 13:00:00-05:00]""" # noqa + exp = ( + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00,\n" + " 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00,\n" + " 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n" + "Categories (5, datetime64[ns, US/Eastern]): " + "[2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <\n" + " " + " 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <\n" + " " + " 2011-01-01 13:00:00-05:00]") # noqa assert repr(c) == exp def test_categorical_repr_period(self): idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) c = pd.Categorical(idx) - exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] -Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" # noqa + exp = ( + "[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00," + " 2011-01-01 12:00,\n" + " 2011-01-01 13:00]\n" + "Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00," + " 2011-01-01 11:00, 2011-01-01 12:00,\n" + " 2011-01-01 13:00]") # noqa assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx) - exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] -Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" # noqa + exp = ( + "[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00," + " 2011-01-01 12:00,\n" + " 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00," + " 2011-01-01 11:00,\n" + " 2011-01-01 12:00, 2011-01-01 13:00]\n" + "Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00," + " 2011-01-01 11:00, 2011-01-01 12:00,\n" + " 2011-01-01 13:00]") # noqa assert repr(c) == exp idx = pd.period_range('2011-01', freq='M', periods=5) c = pd.Categorical(idx) - exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" + exp = ( + "[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]\n" + "Categories (5, period[M]): [2011-01, 2011-02, 2011-03," + " 2011-04, 2011-05]") assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx) - exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa + exp = ( + "[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01," + " 2011-02, 2011-03,\n" + " 2011-04, 2011-05]\n" + "Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04," + " 2011-05]") # noqa assert repr(c) == exp def test_categorical_repr_period_ordered(self): idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) c = pd.Categorical(idx, ordered=True) - exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] -Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" # noqa + exp = ( + "[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00," + " 2011-01-01 12:00,\n" + " 2011-01-01 13:00]\n" + "Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 <" + " 2011-01-01 11:00 < 2011-01-01 12:00 <\n" + " 2011-01-01 13:00]") # noqa assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) - exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] -Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" # noqa + exp = ( + "[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00," + " 2011-01-01 12:00,\n" + " 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00," + " 2011-01-01 11:00,\n" + " 2011-01-01 12:00, 2011-01-01 13:00]\n" + "Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 <" + " 2011-01-01 11:00 < 2011-01-01 12:00 <\n" + " 2011-01-01 13:00]") # noqa assert repr(c) == exp idx = pd.period_range('2011-01', freq='M', periods=5) c = pd.Categorical(idx, ordered=True) - exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" + exp = ( + "[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]\n" + "Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 <" + " 2011-04 < 2011-05]") assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) - exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa + exp = ( + "[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02," + " 2011-03,\n" + " 2011-04, 2011-05]\n" + "Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 <" + " 2011-04 < 2011-05]") # noqa assert repr(c) == exp def test_categorical_repr_timedelta(self): idx = pd.timedelta_range('1 days', periods=5) c = pd.Categorical(idx) - exp = """[1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" + exp = ( + "[1 days, 2 days, 3 days, 4 days, 5 days]\n" + "Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days," + " 5 days]") assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx) - exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa + exp = ( + "[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days," + " 4 days,\n" + " 5 days]\n" + "Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days," + " 5 days]") # noqa assert repr(c) == exp idx = pd.timedelta_range('1 hours', periods=20) c = pd.Categorical(idx) - exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] -Length: 20 -Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, - 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, - 18 days 01:00:00, 19 days 01:00:00]""" # noqa + exp = ( + "[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00," + " 3 days 01:00:00,\n" + " 4 days 01:00:00,\n" + " ...\n" + " 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00," + " 18 days 01:00:00,\n" + " 19 days 01:00:00]\n" + "Length: 20\n" + "Categories (20, timedelta64[ns]): " + "[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,\n" + " " + " 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,\n" + " " + " 18 days 01:00:00, 19 days 01:00:00]") # noqa assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx) - exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] -Length: 40 -Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, - 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, - 18 days 01:00:00, 19 days 01:00:00]""" # noqa + exp = ( + "[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00," + " 3 days 01:00:00,\n" + " 4 days 01:00:00,\n" + " ...\n" + " 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00," + " 18 days 01:00:00,\n" + " 19 days 01:00:00]\n" + "Length: 40\n" + "Categories (20, timedelta64[ns]): " + "[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,\n" + " " + " 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,\n" + " " + " 18 days 01:00:00, 19 days 01:00:00]") # noqa assert repr(c) == exp def test_categorical_repr_timedelta_ordered(self): idx = pd.timedelta_range('1 days', periods=5) c = pd.Categorical(idx, ordered=True) - exp = """[1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa + exp = ( + "[1 days, 2 days, 3 days, 4 days, 5 days]\n" + "Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days <" + " 4 days < 5 days]") # noqa assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) - exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa + exp = ( + "[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days," + " 4 days,\n" + " 5 days]\n" + "Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days <" + " 4 days < 5 days]") # noqa assert repr(c) == exp idx = pd.timedelta_range('1 hours', periods=20) c = pd.Categorical(idx, ordered=True) - exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] -Length: 20 -Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < - 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < - 18 days 01:00:00 < 19 days 01:00:00]""" # noqa + exp = ( + "[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00," + " 3 days 01:00:00,\n" + " 4 days 01:00:00,\n" + " ...\n" + " 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00," + " 18 days 01:00:00,\n" + " 19 days 01:00:00]\n" + "Length: 20\n" + "Categories (20, timedelta64[ns]): " + "[0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <\n" + " " + " 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <\n" + " " + " 18 days 01:00:00 < 19 days 01:00:00]") # noqa assert repr(c) == exp c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) - exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] -Length: 40 -Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < - 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < - 18 days 01:00:00 < 19 days 01:00:00]""" # noqa + exp = ( + "[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00," + " 3 days 01:00:00,\n" + " 4 days 01:00:00,\n" + " ...\n" + " 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00," + " 18 days 01:00:00,\n" + " 19 days 01:00:00]\n" + "Length: 40\n" + "Categories (20, timedelta64[ns]): " + "[0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <\n" + " " + " 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <\n" + " " + " 18 days 01:00:00 < 19 days 01:00:00]") # noqa assert repr(c) == exp From 640149e2d3c9c588d1d4212006a2a3668bfefe10 Mon Sep 17 00:00:00 2001 From: Lunran Date: Sat, 23 Sep 2017 09:56:47 +0900 Subject: [PATCH 3/3] added comment in release note --- doc/source/whatsnew/v0.21.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5003aa0d97c1c..444b7f4da16bf 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -585,6 +585,7 @@ Categorical - Bug in :func:`Series.isin` when called with a categorical (:issue`16639`) - Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`) - Bug in categorical operations with :ref:`Series.cat ' not preserving the original Series' name (:issue:`17509`) +- Bug in categorical formatter not folding the output when it exceeds display width (:issue:`12066`) PyPy ^^^^