From df3f367ca583f55d7bf47d45629d0b26895d21cb Mon Sep 17 00:00:00 2001
From: Tobias Bengfort <tobias.bengfort@posteo.de>
Date: Fri, 3 Sep 2021 10:06:17 +0200
Subject: [PATCH 01/10] bpo-12499: textwrap.wrap: add control for fonts with
 different character widths

This also provides a generic solution for bpo-24665
---
 Doc/library/textwrap.rst  |  6 ++++++
 Lib/test/test_textwrap.py | 20 ++++++++++++++++++++
 Lib/textwrap.py           | 22 +++++++++++++---------
 3 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst
index 7780e241769657..59958e2bfe143f 100644
--- a/Doc/library/textwrap.rst
+++ b/Doc/library/textwrap.rst
@@ -281,6 +281,12 @@ hyphenated words; only then will long words be broken if necessary, unless
       .. versionadded:: 3.4
 
 
+   .. attribute:: text_len
+
+      (default: ``len``) Used to determine the length of a string. You can
+      provide a custom function, e.g. to account for wide characters.
+
+
    .. index:: single: ...; placeholder
 
    .. attribute:: placeholder
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index dfbc2b93dfc0d6..7ebb4a49534ec1 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -9,6 +9,7 @@
 #
 
 import unittest
+import unicodedata
 
 from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten
 
@@ -1076,5 +1077,24 @@ def test_first_word_too_long_but_placeholder_fits(self):
         self.check_shorten("Helloo", 5, "[...]")
 
 
+class WideCharacterTestCase(BaseTestCase):
+    def setUp(self):
+        def text_len(text):
+            n = 0
+            for c in text:
+                if unicodedata.east_asian_width(c) in ['F', 'W']:
+                    n += 2
+                else:
+                    n += 1
+            return n
+
+        self.wrapper = TextWrapper(width=5, text_len=text_len)
+
+    def test_wide_character(self):
+        text = "123 🔧"
+        result = self.wrapper.wrap(text, **kwargs)
+        self.check(result, ["123", "🔧"])
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 841de9baecf5d8..c304571a672850 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -125,7 +125,8 @@ def __init__(self,
                  tabsize=8,
                  *,
                  max_lines=None,
-                 placeholder=' [...]'):
+                 placeholder=' [...]',
+                 text_len=len):
         self.width = width
         self.initial_indent = initial_indent
         self.subsequent_indent = subsequent_indent
@@ -138,6 +139,7 @@ def __init__(self,
         self.tabsize = tabsize
         self.max_lines = max_lines
         self.placeholder = placeholder
+        self.text_len = text_len
 
 
     # -- Private methods -----------------------------------------------
@@ -217,7 +219,7 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
         if self.break_long_words:
             end = space_left
             chunk = reversed_chunks[-1]
-            if self.break_on_hyphens and len(chunk) > space_left:
+            if self.break_on_hyphens and self.text_len(chunk) > space_left:
                 # break after last hyphen, but only if there are
                 # non-hyphens before it
                 hyphen = chunk.rfind('-', 0, space_left)
@@ -259,7 +261,8 @@ def _wrap_chunks(self, chunks):
                 indent = self.subsequent_indent
             else:
                 indent = self.initial_indent
-            if len(indent) + len(self.placeholder.lstrip()) > self.width:
+            if self.text_len(indent) +
+                    self.text_len(self.placeholder.lstrip()) > self.width:
                 raise ValueError("placeholder too large for max width")
 
         # Arrange in reverse order so items can be efficiently popped
@@ -280,7 +283,7 @@ def _wrap_chunks(self, chunks):
                 indent = self.initial_indent
 
             # Maximum width for this line.
-            width = self.width - len(indent)
+            width = self.width - self.text_len(indent)
 
             # First chunk on line is whitespace -- drop it, unless this
             # is the very beginning of the text (ie. no lines started yet).
@@ -303,11 +306,11 @@ def _wrap_chunks(self, chunks):
             # fit on *any* line (not just this one).
             if chunks and len(chunks[-1]) > width:
                 self._handle_long_word(chunks, cur_line, cur_len, width)
-                cur_len = sum(map(len, cur_line))
+                cur_len = sum(map(self.text_len, cur_line))
 
             # If the last chunk on this line is all whitespace, drop it.
             if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
-                cur_len -= len(cur_line[-1])
+                cur_len -= self.text_len(cur_line[-1])
                 del cur_line[-1]
 
             if cur_line:
@@ -323,16 +326,17 @@ def _wrap_chunks(self, chunks):
                 else:
                     while cur_line:
                         if (cur_line[-1].strip() and
-                            cur_len + len(self.placeholder) <= width):
+                            cur_len + self.text_len(self.placeholder) <= width):
                             cur_line.append(self.placeholder)
                             lines.append(indent + ''.join(cur_line))
                             break
-                        cur_len -= len(cur_line[-1])
+                        cur_len -= self.text_len(cur_line[-1])
                         del cur_line[-1]
                     else:
                         if lines:
                             prev_line = lines[-1].rstrip()
-                            if (len(prev_line) + len(self.placeholder) <=
+                            if (self.text_len(prev_line) +
+                                    self.text_len(self.placeholder) <=
                                     self.width):
                                 lines[-1] = prev_line + self.placeholder
                                 break

From 97a2ec81f13e87ca5a9f89488bf633c3dfa86f91 Mon Sep 17 00:00:00 2001
From: Tobias Bengfort <tobias.bengfort@posteo.de>
Date: Fri, 3 Sep 2021 18:09:00 +0200
Subject: [PATCH 02/10] react to feedback

---
 Doc/library/textwrap.rst  |  2 ++
 Lib/test/test_textwrap.py | 11 ++++-------
 Lib/textwrap.py           |  4 ++--
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst
index 59958e2bfe143f..e7c153877e7a18 100644
--- a/Doc/library/textwrap.rst
+++ b/Doc/library/textwrap.rst
@@ -286,6 +286,8 @@ hyphenated words; only then will long words be broken if necessary, unless
       (default: ``len``) Used to determine the length of a string. You can
       provide a custom function, e.g. to account for wide characters.
 
+      .. versionadded:: 3.11
+
 
    .. index:: single: ...; placeholder
 
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index 7ebb4a49534ec1..21167008e4ae39 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -1078,22 +1078,19 @@ def test_first_word_too_long_but_placeholder_fits(self):
 
 
 class WideCharacterTestCase(BaseTestCase):
-    def setUp(self):
+    def test_wide_character(self):
         def text_len(text):
             n = 0
             for c in text:
-                if unicodedata.east_asian_width(c) in ['F', 'W']:
+                if unicodedata.east_asian_width(c) in {'F', 'W'}:
                     n += 2
                 else:
                     n += 1
             return n
 
-        self.wrapper = TextWrapper(width=5, text_len=text_len)
-
-    def test_wide_character(self):
         text = "123 🔧"
-        result = self.wrapper.wrap(text, **kwargs)
-        self.check(result, ["123", "🔧"])
+        expected = ["123", "🔧"]
+        self.check_wrap(text, 6, expected, text_len=text_len)
 
 
 if __name__ == '__main__':
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index c304571a672850..9dd7d940552510 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -261,8 +261,8 @@ def _wrap_chunks(self, chunks):
                 indent = self.subsequent_indent
             else:
                 indent = self.initial_indent
-            if self.text_len(indent) +
-                    self.text_len(self.placeholder.lstrip()) > self.width:
+            if (self.text_len(indent) +
+                    self.text_len(self.placeholder.lstrip()) > self.width):
                 raise ValueError("placeholder too large for max width")
 
         # Arrange in reverse order so items can be efficiently popped

From db82b6cdc04e3ed7f4628b6840742060d7c933c4 Mon Sep 17 00:00:00 2001
From: Tobias Bengfort <tobias.bengfort@posteo.de>
Date: Fri, 3 Sep 2021 18:15:40 +0200
Subject: [PATCH 03/10] typo

---
 Lib/test/test_textwrap.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index 21167008e4ae39..bb3586073d9e88 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -1090,7 +1090,7 @@ def text_len(text):
 
         text = "123 🔧"
         expected = ["123", "🔧"]
-        self.check_wrap(text, 6, expected, text_len=text_len)
+        self.check_wrap(text, 5, expected, text_len=text_len)
 
 
 if __name__ == '__main__':

From f457e20d835bc5099da512fb621b64ea887306ef Mon Sep 17 00:00:00 2001
From: Tobias Bengfort <tobias.bengfort@posteo.de>
Date: Fri, 3 Sep 2021 20:18:38 +0200
Subject: [PATCH 04/10] fix missing len occurences

---
 Lib/textwrap.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 9dd7d940552510..76358b235c4a32 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -291,7 +291,7 @@ def _wrap_chunks(self, chunks):
                 del chunks[-1]
 
             while chunks:
-                l = len(chunks[-1])
+                l = self.text_len(chunks[-1])
 
                 # Can at least squeeze this chunk onto the current line.
                 if cur_len + l <= width:
@@ -304,7 +304,7 @@ def _wrap_chunks(self, chunks):
 
             # The current line is full, and the next chunk is too big to
             # fit on *any* line (not just this one).
-            if chunks and len(chunks[-1]) > width:
+            if chunks and self.text_len(chunks[-1]) > width:
                 self._handle_long_word(chunks, cur_line, cur_len, width)
                 cur_len = sum(map(self.text_len, cur_line))
 

From e164780bf4ebf5713c10c088929e3876626699be Mon Sep 17 00:00:00 2001
From: Tobias Bengfort <tobias.bengfort@posteo.de>
Date: Sat, 4 Sep 2021 22:43:34 +0200
Subject: [PATCH 05/10] add more tests

---
 Lib/test/test_textwrap.py | 50 ++++++++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 11 deletions(-)

diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index bb3586073d9e88..3e07ea9f9e3999 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -1078,19 +1078,47 @@ def test_first_word_too_long_but_placeholder_fits(self):
 
 
 class WideCharacterTestCase(BaseTestCase):
-    def test_wide_character(self):
-        def text_len(text):
-            n = 0
-            for c in text:
-                if unicodedata.east_asian_width(c) in {'F', 'W'}:
-                    n += 2
-                else:
-                    n += 1
-            return n
+    def text_len(self, text):
+        n = 0
+        for c in text:
+            if unicodedata.east_asian_width(c) in {'F', 'W'}:
+                n += 2
+            else:
+                n += 1
+        return n
 
+    def check_shorten(self, text, width, expect, **kwargs):
+        result = shorten(text, width, **kwargs)
+        self.check(result, expect)
+
+    def test_wrap(self):
         text = "123 🔧"
-        expected = ["123", "🔧"]
-        self.check_wrap(text, 5, expected, text_len=text_len)
+        self.check_wrap(text, 5, ["123 🔧"])
+        self.check_wrap(text, 5, ["123", "🔧"], text_len=self.text_len)
+
+    def test_wrap_initial_indent(self):
+        text = "12 12"
+        self.check_wrap(text, 6, ["🔧12 12"], initial_indent="🔧")
+        self.check_wrap(text, 6, ["🔧12", "12"], initial_indent="🔧",
+                        text_len=self.text_len)
+
+    def test_wrap_subsequent_indent(self):
+        text = "12 12 12 12"
+        self.check_wrap(text, 6, ["12 12", "🔧12 12"], subsequent_indent="🔧")
+        self.check_wrap(text, 6, ["12 12", "🔧12", "🔧12"],
+                        subsequent_indent="🔧", text_len=self.text_len)
+
+    def test_shorten(self):
+        text = "123 1234🔧"
+        expected = "123 [...]"
+        self.check_shorten(text, 9, "123 1234🔧")
+        self.check_shorten(text, 9, "123 [...]", text_len=self.text_len)
+
+    def test_shorten_placeholder(self):
+        text = "123 1 123"
+        self.check_shorten(text, 7, "123 1 🔧", placeholder=" 🔧")
+        self.check_shorten(text, 7, "123 🔧", placeholder=" 🔧",
+                           text_len=self.text_len)
 
 
 if __name__ == '__main__':

From 09ce4cee17454ce8067c8dd1f82ac170c1054e06 Mon Sep 17 00:00:00 2001
From: Tobias Bengfort <tobias.bengfort@posteo.de>
Date: Sun, 5 Sep 2021 07:50:29 +0200
Subject: [PATCH 06/10] fix idle test

---
 Lib/idlelib/idle_test/test_calltip.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/idlelib/idle_test/test_calltip.py b/Lib/idlelib/idle_test/test_calltip.py
index b23915c5ab7849..0900d7fca6c4a8 100644
--- a/Lib/idlelib/idle_test/test_calltip.py
+++ b/Lib/idlelib/idle_test/test_calltip.py
@@ -99,7 +99,7 @@ def test_signature_wrap(self):
 (width=70, initial_indent='', subsequent_indent='', expand_tabs=True,
     replace_whitespace=True, fix_sentence_endings=False, break_long_words=True,
     drop_whitespace=True, break_on_hyphens=True, tabsize=8, *, max_lines=None,
-    placeholder=' [...]')
+    placeholder=' [...]', text_len=<built-in function len>)
 Object for wrapping/filling text.  The public interface consists of
 the wrap() and fill() methods; the other methods are just there for
 subclasses to override in order to tweak the default behaviour.

From bf8dad5755cd86ba60af8deb6805e2244b400650 Mon Sep 17 00:00:00 2001
From: Tobias Bengfort <tobias.bengfort@posteo.de>
Date: Sun, 5 Sep 2021 07:50:35 +0200
Subject: [PATCH 07/10] optimize text_len function

---
 Lib/test/test_textwrap.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index 3e07ea9f9e3999..831d826d100ea1 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -1079,13 +1079,10 @@ def test_first_word_too_long_but_placeholder_fits(self):
 
 class WideCharacterTestCase(BaseTestCase):
     def text_len(self, text):
-        n = 0
-        for c in text:
-            if unicodedata.east_asian_width(c) in {'F', 'W'}:
-                n += 2
-            else:
-                n += 1
-        return n
+        sum(
+            2 if unicodedata.east_asian_width(c) in {'F', 'W'} else 1
+            for c in text
+        )
 
     def check_shorten(self, text, width, expect, **kwargs):
         result = shorten(text, width, **kwargs)

From 68e8098a68a952ddca55981abefb8b159c4b2bcf Mon Sep 17 00:00:00 2001
From: Tobias Bengfort <tobias.bengfort@posteo.de>
Date: Wed, 13 Oct 2021 07:38:00 +0200
Subject: [PATCH 08/10] fixup

---
 Lib/test/test_textwrap.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index 831d826d100ea1..b14fe366aeb7ae 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -1079,7 +1079,7 @@ def test_first_word_too_long_but_placeholder_fits(self):
 
 class WideCharacterTestCase(BaseTestCase):
     def text_len(self, text):
-        sum(
+        return sum(
             2 if unicodedata.east_asian_width(c) in {'F', 'W'} else 1
             for c in text
         )

From e5d6d88c57615a9aa70ccf763a546917c836187e Mon Sep 17 00:00:00 2001
From: Tip ten Brink <75669206+tiptenbrink@users.noreply.github.com>
Date: Tue, 9 Nov 2021 21:00:49 +0100
Subject: [PATCH 09/10] _find_width_index and _handle_long_word change

---
 Lib/test/test_textwrap.py | 13 +++++++++++++
 Lib/textwrap.py           | 21 +++++++++++++++++++--
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index b14fe366aeb7ae..d983d1ab3f01ac 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -1118,5 +1118,18 @@ def test_shorten_placeholder(self):
                            text_len=self.text_len)
 
 
+class ZeroWidthTestCase(BaseTestCase):
+    def text_len(self, text):
+        return sum(
+            0 if c == 'Q' else 1
+            for c in text
+        )
+
+    def test_zero_width_text_len(self):
+
+        text = "0QQ1234QQ56789"
+        self.check_wrap(text, 6, ["0QQ1234QQ5", "6789"], text_len=self.text_len)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 76358b235c4a32..5dfec9e1ca746b 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -199,6 +199,23 @@ def _fix_sentence_endings(self, chunks):
             else:
                 i += 1
 
+    def _find_width_index(self, text, width):
+        """_find_width_index(text : string, width: int)
+
+        Find at which index the text has the required width.
+        """
+        # In most cases text_len will just use the number of characters, so this heuristic prevents calculating width
+        # for each character
+        if self.text_len(text[:width]) == width:
+            # For character widths greater than one, width can be more than the number of characters
+            return min(width, len(text))
+        cur_text = ''
+        for i, c in enumerate(text):
+            cur_text += c
+            cur_width = self.text_len(cur_text)
+            if cur_width >= width:
+                return i+1
+
     def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
         """_handle_long_word(chunks : [string],
                              cur_line : [string],
@@ -217,12 +234,12 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
         # If we're allowed to break long words, then do so: put as much
         # of the next chunk onto the current line as will fit.
         if self.break_long_words:
-            end = space_left
             chunk = reversed_chunks[-1]
+            end = self._find_width_index(chunk, space_left)
             if self.break_on_hyphens and self.text_len(chunk) > space_left:
                 # break after last hyphen, but only if there are
                 # non-hyphens before it
-                hyphen = chunk.rfind('-', 0, space_left)
+                hyphen = chunk.rfind('-', 0, end)
                 if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]):
                     end = hyphen + 1
             cur_line.append(chunk[:end])

From 7b32d0b1228987e2e13fcd324988b854c754f51b Mon Sep 17 00:00:00 2001
From: Tip ten Brink <75669206+tiptenbrink@users.noreply.github.com>
Date: Wed, 10 Nov 2021 17:38:55 +0100
Subject: [PATCH 10/10] Apply changes, ensure min 1 char on line

---
 Lib/test/test_textwrap.py | 19 ++++++++++++++++---
 Lib/textwrap.py           | 16 +++++++++-------
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index d983d1ab3f01ac..665418e7a072fe 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -1118,18 +1118,31 @@ def test_shorten_placeholder(self):
                            text_len=self.text_len)
 
 
-class ZeroWidthTestCase(BaseTestCase):
+class CustomWidthTestCase(BaseTestCase):
     def text_len(self, text):
+        lengths = {
+            'A': 4,
+            'B': 2,
+            'Q': 0,
+        }
+
         return sum(
-            0 if c == 'Q' else 1
+            lengths[c] if c in lengths else 1
             for c in text
         )
 
     def test_zero_width_text_len(self):
-
         text = "0QQ1234QQ56789"
         self.check_wrap(text, 6, ["0QQ1234QQ5", "6789"], text_len=self.text_len)
 
+    def test_char_longer_than_width(self):
+        text = "AA0123"
+        self.check_wrap(text, 3, ["A", "A", "012", "3"], text_len=self.text_len)
+
+    def test_next_char_overflow(self):
+        text = "BB0123"
+        self.check_wrap(text, 3, ["B", "B0", "123"], text_len=self.text_len)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 5dfec9e1ca746b..d4334154d9d9cb 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -200,21 +200,23 @@ def _fix_sentence_endings(self, chunks):
                 i += 1
 
     def _find_width_index(self, text, width):
-        """_find_width_index(text : string, width: int)
+        """_find_length_index(text : string, width : int)
 
-        Find at which index the text has the required width.
+        Find at which index the text has the required width, since when using a
+        different text_len, this index will not be equal to the required width.
         """
-        # In most cases text_len will just use the number of characters, so this heuristic prevents calculating width
-        # for each character
+        # When using default len as self.text_len, the required index and width
+        # will be equal, this prevents calculation time.
         if self.text_len(text[:width]) == width:
-            # For character widths greater than one, width can be more than the number of characters
+            # For character widths greater than one, width can be more than the
+            # number of characters
             return min(width, len(text))
         cur_text = ''
         for i, c in enumerate(text):
             cur_text += c
             cur_width = self.text_len(cur_text)
-            if cur_width >= width:
-                return i+1
+            if cur_width > width:
+                return max(i, 1)
 
     def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
         """_handle_long_word(chunks : [string],