Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions Lib/test/test_textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -1118,5 +1118,31 @@ def test_shorten_placeholder(self):
text_len=self.text_len)


class CustomWidthTestCase(BaseTestCase):
def text_len(self, text):
lengths = {
'A': 4,
'B': 2,
'Q': 0,
}

return sum(
lengths[c] if c in lengths else 1
for c in text
)

def test_zero_width_text_len(self):
text = "0QQ1234QQ56789"
self.check_wrap(text, 6, ["0QQ1234QQ5", "6789"], text_len=self.text_len)

def test_char_longer_than_width(self):
text = "AA0123"
self.check_wrap(text, 3, ["A", "A", "012", "3"], text_len=self.text_len)

def test_next_char_overflow(self):
text = "BB0123"
self.check_wrap(text, 3, ["B", "B0", "123"], text_len=self.text_len)


if __name__ == '__main__':
unittest.main()
23 changes: 21 additions & 2 deletions Lib/textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,25 @@ def _fix_sentence_endings(self, chunks):
else:
i += 1

def _find_width_index(self, text, width):
"""_find_length_index(text : string, width : int)

Find at which index the text has the required width, since when using a
different text_len, this index will not be equal to the required width.
"""
# When using default len as self.text_len, the required index and width
# will be equal, this prevents calculation time.
if self.text_len(text[:width]) == width:
# For character widths greater than one, width can be more than the
# number of characters
return min(width, len(text))
Copy link
Owner

@xi xi Nov 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if this heuristic works. What would happen in the following case:

lengths = {
    'A': 2,
    'B': 0,
}

def text_len(s):
    return sum(lengths.get(c, 1) for c in s)

wrap('AAABBB', 3)

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If performance is an issue, a binary search might be an option.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case the width of text[:width] (=['AAA']) is not equal to width (=3), so it will calculate it per character. However, if you'd used len as your text_len, you'd get that the width of ['AAA'] = 3, which is correct to it skips the calculation.

Binary search might work, but it'd require disallowing negative character lengths (which I agree don't make sense but the results of binary search might make the results more different than what you'd expect).

cur_text = ''
for i, c in enumerate(text):
cur_text += c
cur_width = self.text_len(cur_text)
if cur_width > width:
return max(i, 1)

def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
"""_handle_long_word(chunks : [string],
cur_line : [string],
Expand All @@ -217,12 +236,12 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
# If we're allowed to break long words, then do so: put as much
# of the next chunk onto the current line as will fit.
if self.break_long_words:
end = space_left
chunk = reversed_chunks[-1]
end = self._find_width_index(chunk, space_left)
if self.break_on_hyphens and self.text_len(chunk) > space_left:
# break after last hyphen, but only if there are
# non-hyphens before it
hyphen = chunk.rfind('-', 0, space_left)
hyphen = chunk.rfind('-', 0, end)
if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]):
end = hyphen + 1
cur_line.append(chunk[:end])
Expand Down