From b3ccc450122840e038d1b650d9c4909da6ee54b7 Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Fri, 28 Feb 2025 19:28:10 +0000
Subject: [PATCH 01/24] Add logic to wrap and test

---
 Lib/test/test_tools/test_i18n.py              |  5 +++++
 ...-28-19-30-00-00.gh-issue-130703.ajhd21.rst |  1 +
 Tools/i18n/pygettext.py                       | 22 +++++++++++++++++--
 3 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst

diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index d73fcff4c9cb11..7de3afaafb9203 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -161,6 +161,11 @@ def test_POT_Creation_Date(self):
             # This will raise if the date format does not exactly match.
             datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
 
+    def test_wrap_to_width(self):
+        msgid = self.extract_docstrings_from_str(
+            '''_("thisisaveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverlongstring")''')
+        self.assertIn('\nlongstring', msgid[1])
+
     def test_funcdocstring(self):
         for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
             with self.subTest(doc):
diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst b/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst
new file mode 100644
index 00000000000000..0aec1e94fdcbe7
--- /dev/null
+++ b/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst
@@ -0,0 +1 @@
+Wrap msgids to specified ``width`` and not just comments in :program:`pygettext`.
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 0f5f32c7d6c18f..2970a570279c17 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -619,9 +619,27 @@ def write_pot_file(messages, options, fp):
             print('#, docstring', file=fp)
         if msg.msgctxt is not None:
             print('msgctxt', normalize(msg.msgctxt, encoding), file=fp)
-        print('msgid', normalize(msg.msgid, encoding), file=fp)
+
+        # If msgid is longer than width wrap
+        msgid = normalize(msg.msgid, encoding)[1:-1] # normalize returns "msg"
+        if len(msgid) > options.width:
+            print('msgid ""', file=fp)
+            while msgid:
+                print(f'"{msgid[:options.width]}"', file=fp)
+                msgid = msgid[options.width:]
+        else:
+            print(f'msgid "{msgid}"', file=fp)
+
+        # If msgid_plural is longer than width wrap
         if msg.msgid_plural is not None:
-            print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp)
+            msgid_plural = normalize(msg.msgid_plural, encoding)[1:-1]  # normalize returns "msg"
+            if len(msgid_plural) > options.width:
+                print('msgid_plural ""', file=fp)
+                while msgid_plural:
+                    print(f'"{msgid_plural[:options.width]}"', file=fp)
+                    msgid_plural = msgid_plural[options.width:]
+            else:
+                print(f'msgid_plural "{msgid_plural}"', file=fp)
             print('msgstr[0] ""', file=fp)
             print('msgstr[1] ""\n', file=fp)
         else:

From 33149ed698b6893920228a5098c35ff3ae8767e7 Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Fri, 28 Feb 2025 19:31:47 +0000
Subject: [PATCH 02/24] Fix NEWS name -- We don't want miliseconds

---
 ....ajhd21.rst => 2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename Misc/NEWS.d/next/Tools-Demos/{2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst => 2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst} (100%)

diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst b/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst
similarity index 100%
rename from Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst
rename to Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst

From 0e35e36eb3ce4151522ec24c5c8f9317e0e7b79a Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Fri, 28 Feb 2025 19:37:56 +0000
Subject: [PATCH 03/24] Change extract func in test

---
 Lib/test/test_tools/test_i18n.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index 7de3afaafb9203..1226aece55c8e7 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -162,7 +162,7 @@ def test_POT_Creation_Date(self):
             datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
 
     def test_wrap_to_width(self):
-        msgid = self.extract_docstrings_from_str(
+        msgid = self.extract_from_str(
             '''_("thisisaveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverlongstring")''')
         self.assertIn('\nlongstring', msgid[1])
 

From 92f227f305728b4e409c2d5e882524832289f9ec Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sat, 1 Mar 2025 09:51:35 +0000
Subject: [PATCH 04/24] Use a modified version of pybabel's code in normalize

---
 Lib/test/test_tools/i18n_data/messages.pot | 38 +++++++-----
 Lib/test/test_tools/i18n_data/messages.py  |  3 +
 Lib/test/test_tools/test_i18n.py           |  5 --
 Tools/i18n/pygettext.py                    | 70 +++++++++++-----------
 4 files changed, 60 insertions(+), 56 deletions(-)

diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot
index e8167acfc0742b..03f8dcb942a0ad 100644
--- a/Lib/test/test_tools/i18n_data/messages.pot
+++ b/Lib/test/test_tools/i18n_data/messages.pot
@@ -5,7 +5,7 @@
 msgid ""
 msgstr ""
 "Project-Id-Version: PACKAGE VERSION\n"
-"POT-Creation-Date: 2000-01-01 00:00+0000\n"
+"POT-Creation-Date: 2025-03-01 09:36+0000\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language-Team: LANGUAGE <LL@li.org>\n"
@@ -33,65 +33,71 @@ msgid ""
 "    multiline!\n"
 msgstr ""
 
-#: messages.py:46 messages.py:89 messages.py:90 messages.py:93 messages.py:94
-#: messages.py:99 messages.py:100 messages.py:101
+#: messages.py:32
+msgid ""
+"this is a very very very very very very very very very very very very very"
+"long string!"
+msgstr ""
+
+#: messages.py:49 messages.py:92 messages.py:93 messages.py:96 messages.py:97
+#: messages.py:102 messages.py:103 messages.py:104
 msgid "foo"
 msgid_plural "foos"
 msgstr[0] ""
 msgstr[1] ""
 
-#: messages.py:47
+#: messages.py:50
 msgid "something"
 msgstr ""
 
-#: messages.py:50
+#: messages.py:53
 msgid "Hello, {}!"
 msgstr ""
 
-#: messages.py:54
+#: messages.py:57
 msgid "1"
 msgstr ""
 
-#: messages.py:54
+#: messages.py:57
 msgid "2"
 msgstr ""
 
-#: messages.py:55 messages.py:56
+#: messages.py:58 messages.py:59
 msgid "A"
 msgstr ""
 
-#: messages.py:55 messages.py:56
+#: messages.py:58 messages.py:59
 msgid "B"
 msgstr ""
 
-#: messages.py:57
+#: messages.py:60
 msgid "set"
 msgstr ""
 
-#: messages.py:62 messages.py:63
+#: messages.py:65 messages.py:66
 msgid "nested string"
 msgstr ""
 
-#: messages.py:68
+#: messages.py:71
 msgid "baz"
 msgstr ""
 
-#: messages.py:71 messages.py:75
+#: messages.py:74 messages.py:78
 msgid "default value"
 msgstr ""
 
-#: messages.py:91 messages.py:92 messages.py:95 messages.py:96
+#: messages.py:94 messages.py:95 messages.py:98 messages.py:99
 msgctxt "context"
 msgid "foo"
 msgid_plural "foos"
 msgstr[0] ""
 msgstr[1] ""
 
-#: messages.py:102
+#: messages.py:105
 msgid "domain foo"
 msgstr ""
 
-#: messages.py:118 messages.py:119
+#: messages.py:121 messages.py:122
 msgid "world"
 msgid_plural "worlds"
 msgstr[0] ""
diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py
index 9457bcb8611020..5578334df8d19b 100644
--- a/Lib/test/test_tools/i18n_data/messages.py
+++ b/Lib/test/test_tools/i18n_data/messages.py
@@ -28,6 +28,9 @@
     multiline!
 """)
 
+# very long string that should be wrapped
+_("this is a very very very very very very very very very very very very very long string!")
+
 # Invalid arguments
 _()
 _(None)
diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index 1226aece55c8e7..d73fcff4c9cb11 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -161,11 +161,6 @@ def test_POT_Creation_Date(self):
             # This will raise if the date format does not exactly match.
             datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
 
-    def test_wrap_to_width(self):
-        msgid = self.extract_from_str(
-            '''_("thisisaveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverlongstring")''')
-        self.assertIn('\nlongstring', msgid[1])
-
     def test_funcdocstring(self):
         for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
             with self.subTest(doc):
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 2970a570279c17..0fc612a2f330b7 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -213,21 +213,39 @@ def escape_nonascii(s, encoding):
     return ''.join(escapes[b] for b in s.encode(encoding))
 
 
-def normalize(s, encoding):
+def normalize(s, encoding, options):
     # This converts the various Python string types into a format that is
-    # appropriate for .po files, namely much closer to C style.
-    lines = s.split('\n')
-    if len(lines) == 1:
-        s = '"' + escape(s, encoding) + '"'
-    else:
-        if not lines[-1]:
-            del lines[-1]
-            lines[-1] = lines[-1] + '\n'
-        for i in range(len(lines)):
-            lines[i] = escape(lines[i], encoding)
-        lineterm = '\\n"\n"'
-        s = '""\n"' + lineterm.join(lines) + '"'
-    return s
+    # appropriate for .po files, namely much closer to C style. While wrapping
+    # to options.width.
+    lines = []
+    for line in s.splitlines(True):
+        if len(escape(line, encoding)) > options.width:
+            words = line.split()
+            words.reverse()
+            while words:
+                buf = []
+                size = 2
+                while words:
+                    word = words[-1]
+                    escaped_word = escape(word, encoding)
+                    add_space = 1 if buf else 0
+                    if size + len(escaped_word) + add_space <= options.width:
+                        buf.append(words.pop())
+                        size += len(escaped_word) + add_space
+                    else:
+                        if not buf:
+                            buf.append(words.pop())
+                        break
+                lines.append(' '.join(buf))
+        else:
+            lines.append(line)
+    if len(lines) <= 1:
+        return '"' + escape(s, encoding) + '"'
+    if lines and not lines[-1]:
+        del lines[-1]
+        lines[-1] += '\n'
+    return '""\n' + '\n'.join(
+        [f'"{escape(line, encoding)}"' for line in lines])
 
 
 def containsAny(str, set):
@@ -618,28 +636,10 @@ def write_pot_file(messages, options, fp):
             # to skip translating some unimportant docstrings.
             print('#, docstring', file=fp)
         if msg.msgctxt is not None:
-            print('msgctxt', normalize(msg.msgctxt, encoding), file=fp)
-
-        # If msgid is longer than width wrap
-        msgid = normalize(msg.msgid, encoding)[1:-1] # normalize returns "msg"
-        if len(msgid) > options.width:
-            print('msgid ""', file=fp)
-            while msgid:
-                print(f'"{msgid[:options.width]}"', file=fp)
-                msgid = msgid[options.width:]
-        else:
-            print(f'msgid "{msgid}"', file=fp)
-
-        # If msgid_plural is longer than width wrap
+            print('msgctxt', normalize(msg.msgctxt, encoding, options), file=fp)
+        print('msgid', normalize(msg.msgid, encoding, options), file=fp)
         if msg.msgid_plural is not None:
-            msgid_plural = normalize(msg.msgid_plural, encoding)[1:-1]  # normalize returns "msg"
-            if len(msgid_plural) > options.width:
-                print('msgid_plural ""', file=fp)
-                while msgid_plural:
-                    print(f'"{msgid_plural[:options.width]}"', file=fp)
-                    msgid_plural = msgid_plural[options.width:]
-            else:
-                print(f'msgid_plural "{msgid_plural}"', file=fp)
+            print('msgid_plural', normalize(msg.msgid_plural, encoding, options), file=fp)
             print('msgstr[0] ""', file=fp)
             print('msgstr[1] ""\n', file=fp)
         else:

From f0ee9c47f11948548fa70e1271924ccae5b9e9fa Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sat, 1 Mar 2025 09:53:07 +0000
Subject: [PATCH 05/24] Minor tweak

---
 Lib/test/test_tools/i18n_data/messages.pot | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot
index 03f8dcb942a0ad..886d5714735637 100644
--- a/Lib/test/test_tools/i18n_data/messages.pot
+++ b/Lib/test/test_tools/i18n_data/messages.pot
@@ -5,7 +5,7 @@
 msgid ""
 msgstr ""
 "Project-Id-Version: PACKAGE VERSION\n"
-"POT-Creation-Date: 2025-03-01 09:36+0000\n"
+"POT-Creation-Date: 2000-01-01 00:00+0000\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language-Team: LANGUAGE <LL@li.org>\n"

From 843e3fa364acacda4b0500748a314f5ed7f89f22 Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sat, 1 Mar 2025 10:17:29 +0000
Subject: [PATCH 06/24] Update argparse snapshot

---
 Lib/test/translationdata/argparse/msgids.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/translationdata/argparse/msgids.txt b/Lib/test/translationdata/argparse/msgids.txt
index ae89ac74726ecf..2fafeae8353e3a 100644
--- a/Lib/test/translationdata/argparse/msgids.txt
+++ b/Lib/test/translationdata/argparse/msgids.txt
@@ -16,7 +16,7 @@ expected one argument
 ignored explicit argument %r
 invalid %(type)s value: %(value)r
 invalid choice: %(value)r (choose from %(choices)s)
-invalid choice: %(value)r, maybe you meant %(closest)r? (choose from %(choices)s)
+invalid choice: %(value)r, maybe you meant %(closest)r? (choose from%(choices)s)
 not allowed with argument %s
 one of the arguments %s is required
 option '%(option)s' is deprecated

From 7fc34cae55c6e44f9f9571a90c5dd4e584f50354 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 1 Mar 2025 10:19:38 +0000
Subject: [PATCH 07/24] =?UTF-8?q?B=C3=A9n=C3=A9dikt's=20suggestions?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Tools/i18n/pygettext.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 0fc612a2f330b7..b2118f82c20ac5 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -228,19 +228,21 @@ def normalize(s, encoding, options):
                 while words:
                     word = words[-1]
                     escaped_word = escape(word, encoding)
+                    escaped_word_len = len(escaped_word)
                     add_space = 1 if buf else 0
-                    if size + len(escaped_word) + add_space <= options.width:
+                    new_size = size + escaped_word_len + add_space
+                    if new_size <= options.width:
                         buf.append(words.pop())
-                        size += len(escaped_word) + add_space
+                        size = new_size
                     else:
                         if not buf:
-                            buf.append(words.pop())
+                            buf = [words.pop()]
                         break
                 lines.append(' '.join(buf))
         else:
             lines.append(line)
     if len(lines) <= 1:
-        return '"' + escape(s, encoding) + '"'
+        return f'"{escape(s, encoding)}"'
     if lines and not lines[-1]:
         del lines[-1]
         lines[-1] += '\n'

From 8d319b407159f0d6d3c891b08c8fa9e28d5c2595 Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sat, 1 Mar 2025 11:03:43 +0000
Subject: [PATCH 08/24] Preserve spaces and remove unnecessary checks

---
 Lib/test/test_tools/i18n_data/messages.pot   |  2 +-
 Lib/test/translationdata/argparse/msgids.txt |  2 +-
 Tools/i18n/pygettext.py                      | 31 +++++++++-----------
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot
index 886d5714735637..dbc8fd40dc87ac 100644
--- a/Lib/test/test_tools/i18n_data/messages.pot
+++ b/Lib/test/test_tools/i18n_data/messages.pot
@@ -35,7 +35,7 @@ msgstr ""
 
 #: messages.py:32
 msgid ""
-"this is a very very very very very very very very very very very very very"
+"this is a very very very very very very very very very very very very very "
 "long string!"
 msgstr ""
 
diff --git a/Lib/test/translationdata/argparse/msgids.txt b/Lib/test/translationdata/argparse/msgids.txt
index 2fafeae8353e3a..ae89ac74726ecf 100644
--- a/Lib/test/translationdata/argparse/msgids.txt
+++ b/Lib/test/translationdata/argparse/msgids.txt
@@ -16,7 +16,7 @@ expected one argument
 ignored explicit argument %r
 invalid %(type)s value: %(value)r
 invalid choice: %(value)r (choose from %(choices)s)
-invalid choice: %(value)r, maybe you meant %(closest)r? (choose from%(choices)s)
+invalid choice: %(value)r, maybe you meant %(closest)r? (choose from %(choices)s)
 not allowed with argument %s
 one of the arguments %s is required
 option '%(option)s' is deprecated
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index b2118f82c20ac5..54dfe42286df3c 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -148,6 +148,7 @@
 import sys
 import time
 import tokenize
+import re
 from dataclasses import dataclass, field
 from io import BytesIO
 from operator import itemgetter
@@ -220,25 +221,21 @@ def normalize(s, encoding, options):
     lines = []
     for line in s.splitlines(True):
         if len(escape(line, encoding)) > options.width:
-            words = line.split()
+            words = re.split(r'(\s+)', line)
             words.reverse()
+            buf = []
+            size = 2
             while words:
-                buf = []
-                size = 2
-                while words:
-                    word = words[-1]
-                    escaped_word = escape(word, encoding)
-                    escaped_word_len = len(escaped_word)
-                    add_space = 1 if buf else 0
-                    new_size = size + escaped_word_len + add_space
-                    if new_size <= options.width:
-                        buf.append(words.pop())
-                        size = new_size
-                    else:
-                        if not buf:
-                            buf = [words.pop()]
-                        break
-                lines.append(' '.join(buf))
+                word = words.pop()
+                escaped_word_len = len(escape(word, encoding))
+                if size + escaped_word_len <= options.width:
+                    buf.append(word)
+                    size += escaped_word_len
+                else:
+                    lines.append(''.join(buf))
+                    buf = [word]
+                    size = 2 + escaped_word_len
+            lines.append(''.join(buf))
         else:
             lines.append(line)
     if len(lines) <= 1:

From 91976886d4d07f0e8780a402338feb84e9880f7e Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Date: Sat, 1 Mar 2025 11:04:48 +0000
Subject: [PATCH 09/24] Improve comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
---
 Tools/i18n/pygettext.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 54dfe42286df3c..501440d1eedf3b 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -216,8 +216,8 @@ def escape_nonascii(s, encoding):
 
 def normalize(s, encoding, options):
     # This converts the various Python string types into a format that is
-    # appropriate for .po files, namely much closer to C style. While wrapping
-    # to options.width.
+    # appropriate for .po files, namely much closer to C style,
+    # while wrapping to options.width.
     lines = []
     for line in s.splitlines(True):
         if len(escape(line, encoding)) > options.width:

From 7c8637e0158381edeed29cb16dbea2af21bedd9a Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sat, 1 Mar 2025 11:16:59 +0000
Subject: [PATCH 10/24] Add test and sort imports

---
 Lib/test/test_tools/i18n_data/messages.pot | 36 +++++++++++++---------
 Lib/test/test_tools/i18n_data/messages.py  |  3 +-
 Tools/i18n/pygettext.py                    |  2 +-
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot
index dbc8fd40dc87ac..cc6735d3454b90 100644
--- a/Lib/test/test_tools/i18n_data/messages.pot
+++ b/Lib/test/test_tools/i18n_data/messages.pot
@@ -39,65 +39,71 @@ msgid ""
 "long string!"
 msgstr ""
 
-#: messages.py:49 messages.py:92 messages.py:93 messages.py:96 messages.py:97
-#: messages.py:102 messages.py:103 messages.py:104
+#: messages.py:33
+msgid ""
+"this  is  a  very  very    very very  very    very  very   very       very  "
+"very    very  very    very   long string with wierd spaces!"
+msgstr ""
+
+#: messages.py:50 messages.py:93 messages.py:94 messages.py:97 messages.py:98
+#: messages.py:103 messages.py:104 messages.py:105
 msgid "foo"
 msgid_plural "foos"
 msgstr[0] ""
 msgstr[1] ""
 
-#: messages.py:50
+#: messages.py:51
 msgid "something"
 msgstr ""
 
-#: messages.py:53
+#: messages.py:54
 msgid "Hello, {}!"
 msgstr ""
 
-#: messages.py:57
+#: messages.py:58
 msgid "1"
 msgstr ""
 
-#: messages.py:57
+#: messages.py:58
 msgid "2"
 msgstr ""
 
-#: messages.py:58 messages.py:59
+#: messages.py:59 messages.py:60
 msgid "A"
 msgstr ""
 
-#: messages.py:58 messages.py:59
+#: messages.py:59 messages.py:60
 msgid "B"
 msgstr ""
 
-#: messages.py:60
+#: messages.py:61
 msgid "set"
 msgstr ""
 
-#: messages.py:65 messages.py:66
+#: messages.py:66 messages.py:67
 msgid "nested string"
 msgstr ""
 
-#: messages.py:71
+#: messages.py:72
 msgid "baz"
 msgstr ""
 
-#: messages.py:74 messages.py:78
+#: messages.py:75 messages.py:79
 msgid "default value"
 msgstr ""
 
-#: messages.py:94 messages.py:95 messages.py:98 messages.py:99
+#: messages.py:95 messages.py:96 messages.py:99 messages.py:100
 msgctxt "context"
 msgid "foo"
 msgid_plural "foos"
 msgstr[0] ""
 msgstr[1] ""
 
-#: messages.py:105
+#: messages.py:106
 msgid "domain foo"
 msgstr ""
 
-#: messages.py:121 messages.py:122
+#: messages.py:122 messages.py:123
 msgid "world"
 msgid_plural "worlds"
 msgstr[0] ""
diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py
index 5578334df8d19b..454dbe3d2de019 100644
--- a/Lib/test/test_tools/i18n_data/messages.py
+++ b/Lib/test/test_tools/i18n_data/messages.py
@@ -28,8 +28,9 @@
     multiline!
 """)
 
-# very long string that should be wrapped
+# very long strings that should be wrapped
 _("this is a very very very very very very very very very very very very very long string!")
+_("this  is  a  very  very    very very  very    very  very   very       very  very    very  very    very   long string with wierd spaces!")
 
 # Invalid arguments
 _()
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 501440d1eedf3b..f748158e5e0068 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -145,10 +145,10 @@
 import importlib.machinery
 import importlib.util
 import os
+import re
 import sys
 import time
 import tokenize
-import re
 from dataclasses import dataclass, field
 from io import BytesIO
 from operator import itemgetter

From 66d8eacca43f32669c434f6aafc040e4dab414ce Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sat, 1 Mar 2025 11:31:43 +0000
Subject: [PATCH 11/24] Benedikt's suggestion

---
 Tools/i18n/pygettext.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index f748158e5e0068..9e7a5267605796 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -219,9 +219,10 @@ def normalize(s, encoding, options):
     # appropriate for .po files, namely much closer to C style,
     # while wrapping to options.width.
     lines = []
+    space_splitter = re.compile(r'(\s+)').split
     for line in s.splitlines(True):
         if len(escape(line, encoding)) > options.width:
-            words = re.split(r'(\s+)', line)
+            words = space_splitter(line)
             words.reverse()
             buf = []
             size = 2

From 430c0519bf2577ff39892fc54d161c80e1bfd008 Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sun, 2 Mar 2025 09:30:35 +0000
Subject: [PATCH 12/24] Add tests and simplify normalize

---
 Lib/test/test_tools/test_i18n.py | 39 +++++++++++++++++++++++++++++++-
 Tools/i18n/pygettext.py          | 10 +++-----
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index d73fcff4c9cb11..cb86b4844dfd04 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -4,6 +4,7 @@
 import re
 import sys
 import unittest
+from test.test_decimal import skip_expected
 from textwrap import dedent
 from pathlib import Path
 
@@ -18,7 +19,7 @@
 
 
 with imports_under_tool("i18n"):
-    from pygettext import parse_spec
+    from pygettext import parse_spec, normalize, make_escapes
 
 
 def normalize_POT_file(pot):
@@ -516,6 +517,42 @@ def test_parse_keyword_spec(self):
                     parse_spec(spec)
                 self.assertEqual(str(cm.exception), message)
 
+    def test_normalize_multiline(self):
+        # required to set up normalize
+        class NormOptions:
+            width = 78
+        make_escapes(True)
+
+        s = 'multi-line\n translation'
+        s_expected = '""\n"multi-line\\n"\n" translation"'
+
+        data = normalize(s, 'UTF-8', NormOptions)
+        self.assertEqual(s_expected, data)
+
+    def test_normalize_wrap(self):
+        # required to set up normalize
+        class NormOptions:
+            width = 30
+        make_escapes(True)
+
+        s = 'this string should be wrapped to 30 chars'
+        s_expected = '""\n"this string should be wrapped "\n"to 30 chars"'
+
+        data = normalize(s, 'UTF-8', NormOptions)
+        self.assertEqual(s_expected, data)
+
+    def test_normalize_nostr(self):
+        # required to set up normalize
+        class NormOptions:
+            width = 78
+        make_escapes(True)
+
+        s = ''
+        s_expected = '""'
+
+        data = normalize(s, 'UTF-8', NormOptions)
+        self.assertEqual(s_expected, data)
+
 
 def extract_from_snapshots():
     snapshots = {
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 9e7a5267605796..eb535e150a00f4 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -225,7 +225,7 @@ def normalize(s, encoding, options):
             words = space_splitter(line)
             words.reverse()
             buf = []
-            size = 2
+            size = 0
             while words:
                 word = words.pop()
                 escaped_word_len = len(escape(word, encoding))
@@ -235,17 +235,13 @@ def normalize(s, encoding, options):
                 else:
                     lines.append(''.join(buf))
                     buf = [word]
-                    size = 2 + escaped_word_len
+                    size = escaped_word_len
             lines.append(''.join(buf))
         else:
             lines.append(line)
     if len(lines) <= 1:
         return f'"{escape(s, encoding)}"'
-    if lines and not lines[-1]:
-        del lines[-1]
-        lines[-1] += '\n'
-    return '""\n' + '\n'.join(
-        [f'"{escape(line, encoding)}"' for line in lines])
+    return '""\n' + '\n'.join([f'"{escape(line, encoding)}"' for line in lines])
 
 
 def containsAny(str, set):

From abb90c2393b7228f2886b4c90b43d30913c620db Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Date: Sun, 2 Mar 2025 09:32:45 +0000
Subject: [PATCH 13/24] tomasr8 suggestion

Co-authored-by: Tomas R. <tomas.roun8@gmail.com>
---
 .../Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst b/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst
index 0aec1e94fdcbe7..a4156699f8500f 100644
--- a/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst
+++ b/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst
@@ -1 +1 @@
-Wrap msgids to specified ``width`` and not just comments in :program:`pygettext`.
+Wrap msgids to specified ``width`` in :program:`pygettext`.

From 7f947dbd06c481607e3b0b7f63981e0840bacf0d Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sun, 2 Mar 2025 09:33:30 +0000
Subject: [PATCH 14/24] Fix typo in test str

---
 Lib/test/test_tools/i18n_data/messages.pot | 2 +-
 Lib/test/test_tools/i18n_data/messages.py  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot
index cc6735d3454b90..e2a230fd30e9d3 100644
--- a/Lib/test/test_tools/i18n_data/messages.pot
+++ b/Lib/test/test_tools/i18n_data/messages.pot
@@ -42,7 +42,7 @@ msgstr ""
 #: messages.py:33
 msgid ""
 "this  is  a  very  very    very very  very    very  very   very       very  "
-"very    very  very    very   long string with wierd spaces!"
+"very    very  very    very   long string with weird spaces!"
 msgstr ""
 
 #: messages.py:50 messages.py:93 messages.py:94 messages.py:97 messages.py:98
diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py
index 454dbe3d2de019..e4a1c5e60f1e5a 100644
--- a/Lib/test/test_tools/i18n_data/messages.py
+++ b/Lib/test/test_tools/i18n_data/messages.py
@@ -28,9 +28,9 @@
     multiline!
 """)
 
-# very long strings that should be wrapped
+# very long strings that should be wrapped by normalize
 _("this is a very very very very very very very very very very very very very long string!")
-_("this  is  a  very  very    very very  very    very  very   very       very  very    very  very    very   long string with wierd spaces!")
+_("this  is  a  very  very    very very  very    very  very   very       very  very    very  very    very   long string with weird spaces!")
 
 # Invalid arguments
 _()

From ea5fa91b61d3a06a7823b7bef4f03610870d206b Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sun, 2 Mar 2025 09:50:14 +0000
Subject: [PATCH 15/24] Benedikt's suggestions

---
 Lib/test/test_tools/test_i18n.py | 17 +++++++----------
 Tools/i18n/pygettext.py          |  2 +-
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index cb86b4844dfd04..b6da0f6e858a45 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -4,7 +4,7 @@
 import re
 import sys
 import unittest
-from test.test_decimal import skip_expected
+from types import SimpleNamespace
 from textwrap import dedent
 from pathlib import Path
 
@@ -519,38 +519,35 @@ def test_parse_keyword_spec(self):
 
     def test_normalize_multiline(self):
         # required to set up normalize
-        class NormOptions:
-            width = 78
+        options = SimpleNamespace(width=78)
         make_escapes(True)
 
         s = 'multi-line\n translation'
         s_expected = '""\n"multi-line\\n"\n" translation"'
 
-        data = normalize(s, 'UTF-8', NormOptions)
+        data = normalize(s, 'UTF-8', options)
         self.assertEqual(s_expected, data)
 
     def test_normalize_wrap(self):
         # required to set up normalize
-        class NormOptions:
-            width = 30
+        options = SimpleNamespace(width=30)
         make_escapes(True)
 
         s = 'this string should be wrapped to 30 chars'
         s_expected = '""\n"this string should be wrapped "\n"to 30 chars"'
 
-        data = normalize(s, 'UTF-8', NormOptions)
+        data = normalize(s, 'UTF-8', options)
         self.assertEqual(s_expected, data)
 
     def test_normalize_nostr(self):
         # required to set up normalize
-        class NormOptions:
-            width = 78
+        options = SimpleNamespace(width=30)
         make_escapes(True)
 
         s = ''
         s_expected = '""'
 
-        data = normalize(s, 'UTF-8', NormOptions)
+        data = normalize(s, 'UTF-8', options)
         self.assertEqual(s_expected, data)
 
 
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index eb535e150a00f4..9e20f1d7c5d3f9 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -213,13 +213,13 @@ def escape_ascii(s, encoding):
 def escape_nonascii(s, encoding):
     return ''.join(escapes[b] for b in s.encode(encoding))
 
+space_splitter = re.compile(r'(\s+)').split
 
 def normalize(s, encoding, options):
     # This converts the various Python string types into a format that is
     # appropriate for .po files, namely much closer to C style,
     # while wrapping to options.width.
     lines = []
-    space_splitter = re.compile(r'(\s+)').split
     for line in s.splitlines(True):
         if len(escape(line, encoding)) > options.width:
             words = space_splitter(line)

From 4b02678a46c9de87c05dfe93016669fb7f05ba78 Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sun, 2 Mar 2025 09:59:24 +0000
Subject: [PATCH 16/24] More of Benedikt's suggestions

---
 Lib/test/test_tools/test_i18n.py |  4 ++--
 Tools/i18n/pygettext.py          | 14 +++++++++-----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index b6da0f6e858a45..be60dde2c1752b 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -4,8 +4,8 @@
 import re
 import sys
 import unittest
-from types import SimpleNamespace
 from textwrap import dedent
+from types import SimpleNamespace
 from pathlib import Path
 
 from test.support.script_helper import assert_python_ok
@@ -19,7 +19,7 @@
 
 
 with imports_under_tool("i18n"):
-    from pygettext import parse_spec, normalize, make_escapes
+    from pygettext import parse_spec, make_escapes, normalize
 
 
 def normalize_POT_file(pot):
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 9e20f1d7c5d3f9..20bfa68abea5d4 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -213,7 +213,10 @@ def escape_ascii(s, encoding):
 def escape_nonascii(s, encoding):
     return ''.join(escapes[b] for b in s.encode(encoding))
 
-space_splitter = re.compile(r'(\s+)').split
+# Split a string according to whitespaces and keep
+# the whitespaces in the resulting array thanks to
+# the capturing group.
+_space_splitter = re.compile(r'(\s+)').split
 
 def normalize(s, encoding, options):
     # This converts the various Python string types into a format that is
@@ -222,16 +225,17 @@ def normalize(s, encoding, options):
     lines = []
     for line in s.splitlines(True):
         if len(escape(line, encoding)) > options.width:
-            words = space_splitter(line)
+            words = _space_splitter(line)
             words.reverse()
             buf = []
             size = 0
             while words:
                 word = words.pop()
                 escaped_word_len = len(escape(word, encoding))
-                if size + escaped_word_len <= options.width:
+                new_size = size + escaped_word_len
+                if new_size <= options.width:
                     buf.append(word)
-                    size += escaped_word_len
+                    size = new_size
                 else:
                     lines.append(''.join(buf))
                     buf = [word]
@@ -241,7 +245,7 @@ def normalize(s, encoding, options):
             lines.append(line)
     if len(lines) <= 1:
         return f'"{escape(s, encoding)}"'
-    return '""\n' + '\n'.join([f'"{escape(line, encoding)}"' for line in lines])
+    return '""\n' + '\n'.join(f'"{escape(line, encoding)}"' for line in lines)
 
 
 def containsAny(str, set):

From 8d03cbf141068c4ac9812a967a4c9f5942e22d75 Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sun, 2 Mar 2025 10:23:20 +0000
Subject: [PATCH 17/24] Don't wrap for single words

---
 Lib/test/test_tools/test_i18n.py | 11 +++++++++++
 Tools/i18n/pygettext.py          |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index be60dde2c1752b..91808d9e03dd18 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -550,6 +550,17 @@ def test_normalize_nostr(self):
         data = normalize(s, 'UTF-8', options)
         self.assertEqual(s_expected, data)
 
+    def test_normalize_short_width(self):
+        # required to set up normalize
+        options = SimpleNamespace(width=3)
+        make_escapes(True)
+
+        s = 'foos'
+        s_expected = '"foos"'
+
+        data = normalize(s, 'UTF-8', options)
+        self.assertEqual(s_expected, data)
+
 
 def extract_from_snapshots():
     snapshots = {
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 20bfa68abea5d4..b14a302ed1d266 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -224,7 +224,7 @@ def normalize(s, encoding, options):
     # while wrapping to options.width.
     lines = []
     for line in s.splitlines(True):
-        if len(escape(line, encoding)) > options.width:
+        if len(escape(line, encoding)) > options.width and ' ' in line: # don't wrap single words
             words = _space_splitter(line)
             words.reverse()
             buf = []

From fbe5b9317c45432094438a4f8778315cac4f520a Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sun, 2 Mar 2025 15:01:55 +0000
Subject: [PATCH 18/24] Address Serhiy's suggestions

---
 Lib/test/test_tools/test_i18n.py | 10 ++++-----
 Tools/i18n/pygettext.py          | 35 +++++++++++++++++---------------
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index 91808d9e03dd18..da711d7af06ddd 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -525,7 +525,7 @@ def test_normalize_multiline(self):
         s = 'multi-line\n translation'
         s_expected = '""\n"multi-line\\n"\n" translation"'
 
-        data = normalize(s, 'UTF-8', options)
+        data = normalize(s, 'UTF-8', 'msgid', options)
         self.assertEqual(s_expected, data)
 
     def test_normalize_wrap(self):
@@ -534,9 +534,9 @@ def test_normalize_wrap(self):
         make_escapes(True)
 
         s = 'this string should be wrapped to 30 chars'
-        s_expected = '""\n"this string should be wrapped "\n"to 30 chars"'
+        s_expected = '""\n"this string should be "\n"wrapped to 30 chars"'
 
-        data = normalize(s, 'UTF-8', options)
+        data = normalize(s, 'UTF-8', 'msgid', options)
         self.assertEqual(s_expected, data)
 
     def test_normalize_nostr(self):
@@ -547,7 +547,7 @@ def test_normalize_nostr(self):
         s = ''
         s_expected = '""'
 
-        data = normalize(s, 'UTF-8', options)
+        data = normalize(s, 'UTF-8', 'msgid', options)
         self.assertEqual(s_expected, data)
 
     def test_normalize_short_width(self):
@@ -558,7 +558,7 @@ def test_normalize_short_width(self):
         s = 'foos'
         s_expected = '"foos"'
 
-        data = normalize(s, 'UTF-8', options)
+        data = normalize(s, 'UTF-8', 'msgid', options)
         self.assertEqual(s_expected, data)
 
 
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index b14a302ed1d266..057fa08c82e651 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -213,39 +213,42 @@ def escape_ascii(s, encoding):
 def escape_nonascii(s, encoding):
     return ''.join(escapes[b] for b in s.encode(encoding))
 
-# Split a string according to whitespaces and keep
-# the whitespaces in the resulting array thanks to
-# the capturing group.
-_space_splitter = re.compile(r'(\s+)').split
 
-def normalize(s, encoding, options):
+_space_splitter = re.compile(r'(\s+)')
+
+def normalize(s, encoding, prefix, options):
     # This converts the various Python string types into a format that is
     # appropriate for .po files, namely much closer to C style,
     # while wrapping to options.width.
     lines = []
     for line in s.splitlines(True):
-        if len(escape(line, encoding)) > options.width and ' ' in line: # don't wrap single words
-            words = _space_splitter(line)
+        escaped_line = escape(line, encoding)
+        if len(escaped_line) + len(prefix) + 2 > options.width and _space_splitter.search(line):  # don't wrap single words
+            words = _space_splitter.split(line)
             words.reverse()
             buf = []
             size = 0
             while words:
                 word = words.pop()
-                escaped_word_len = len(escape(word, encoding))
+                escaped_word = escape(word, encoding)
+                escaped_word_len = len(escaped_word)
                 new_size = size + escaped_word_len
-                if new_size <= options.width:
-                    buf.append(word)
+                if new_size + 2 <= options.width:
+                    buf.append(escaped_word)
+                    size = new_size
+                elif not buf:
+                    buf.append(escaped_word)
                     size = new_size
                 else:
                     lines.append(''.join(buf))
-                    buf = [word]
+                    buf = [escaped_word]
                     size = escaped_word_len
             lines.append(''.join(buf))
         else:
-            lines.append(line)
+            lines.append(escaped_line)
     if len(lines) <= 1:
         return f'"{escape(s, encoding)}"'
-    return '""\n' + '\n'.join(f'"{escape(line, encoding)}"' for line in lines)
+    return '""\n' + '\n'.join(f'"{line}"' for line in lines)
 
 
 def containsAny(str, set):
@@ -636,10 +639,10 @@ def write_pot_file(messages, options, fp):
             # to skip translating some unimportant docstrings.
             print('#, docstring', file=fp)
         if msg.msgctxt is not None:
-            print('msgctxt', normalize(msg.msgctxt, encoding, options), file=fp)
-        print('msgid', normalize(msg.msgid, encoding, options), file=fp)
+            print('msgctxt', normalize(msg.msgctxt, encoding, 'msgctxt', options), file=fp)
+        print('msgid', normalize(msg.msgid, encoding, 'msgid', options), file=fp)
         if msg.msgid_plural is not None:
-            print('msgid_plural', normalize(msg.msgid_plural, encoding, options), file=fp)
+            print('msgid_plural', normalize(msg.msgid_plural, encoding, 'msgid_plural', options), file=fp)
             print('msgstr[0] ""', file=fp)
             print('msgstr[1] ""\n', file=fp)
         else:

From 8d5f84fb9083e1680b5fb5d8b97b759a54debbb7 Mon Sep 17 00:00:00 2001
From: stan <stanulbrych@gmail.com>
Date: Sun, 2 Mar 2025 15:09:28 +0000
Subject: [PATCH 19/24] Use more complex pattern

---
 Tools/i18n/pygettext.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 057fa08c82e651..09d8c06340796b 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -214,7 +214,7 @@ def escape_nonascii(s, encoding):
     return ''.join(escapes[b] for b in s.encode(encoding))
 
 
-_space_splitter = re.compile(r'(\s+)')
+_space_splitter = re.compile(r'\s+|\S+\s*')
 
 def normalize(s, encoding, prefix, options):
     # This converts the various Python string types into a format that is
@@ -224,7 +224,8 @@ def normalize(s, encoding, prefix, options):
     for line in s.splitlines(True):
         escaped_line = escape(line, encoding)
         if len(escaped_line) + len(prefix) + 2 > options.width and _space_splitter.search(line):  # don't wrap single words
-            words = _space_splitter.split(line)
+            words = _space_splitter.findall(line)
+            words = [w for w in words if w]
             words.reverse()
             buf = []
             size = 0

From ae53774e267dae1a83ae6f80b24ad63fcb322cfd Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Sun, 2 Mar 2025 17:21:33 +0000
Subject: [PATCH 20/24] Serhiy's suggestions

---
 Tools/i18n/pygettext.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 09d8c06340796b..2c9f8b4c8b13d2 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -223,9 +223,8 @@ def normalize(s, encoding, prefix, options):
     lines = []
     for line in s.splitlines(True):
         escaped_line = escape(line, encoding)
-        if len(escaped_line) + len(prefix) + 2 > options.width and _space_splitter.search(line):  # don't wrap single words
+        if len(escaped_line) + len(prefix) + 3 > options.width:
             words = _space_splitter.findall(line)
-            words = [w for w in words if w]
             words.reverse()
             buf = []
             size = 0
@@ -234,10 +233,7 @@ def normalize(s, encoding, prefix, options):
                 escaped_word = escape(word, encoding)
                 escaped_word_len = len(escaped_word)
                 new_size = size + escaped_word_len
-                if new_size + 2 <= options.width:
-                    buf.append(escaped_word)
-                    size = new_size
-                elif not buf:
+                if new_size + 2 <= options.width or not buf:
                     buf.append(escaped_word)
                     size = new_size
                 else:

From 794fc8b1ad3ab9aace33f6637a59fd1ade93c77e Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Mon, 3 Mar 2025 18:48:37 +0000
Subject: [PATCH 21/24] Serhiy's suggestions

---
 Lib/test/test_tools/test_i18n.py | 57 ++++++++++++++++----------------
 Tools/i18n/pygettext.py          | 19 ++++++-----
 2 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index da711d7af06ddd..e550e4805db450 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -517,49 +517,50 @@ def test_parse_keyword_spec(self):
                     parse_spec(spec)
                 self.assertEqual(str(cm.exception), message)
 
-    def test_normalize_multiline(self):
-        # required to set up normalize
-        options = SimpleNamespace(width=78)
-        make_escapes(True)
+    # required to set up normalize
+    make_escapes(True)
 
+    def test_normalize_multiline(self):
         s = 'multi-line\n translation'
         s_expected = '""\n"multi-line\\n"\n" translation"'
 
-        data = normalize(s, 'UTF-8', 'msgid', options)
+        data = normalize(s, 'UTF-8', 'msgid', 78)
         self.assertEqual(s_expected, data)
 
     def test_normalize_wrap(self):
-        # required to set up normalize
-        options = SimpleNamespace(width=30)
-        make_escapes(True)
+        s = 'fee fi fo fum fee fi '                # len = 29
+        s_expected = '"fee fi fo fum fee fi "'
+        data = normalize(s, 'UTF-8', 'msgid', 30)
+        self.assertEqual(s_expected, data)
 
-        s = 'this string should be wrapped to 30 chars'
-        s_expected = '""\n"this string should be "\n"wrapped to 30 chars"'
+        s = 'fee fi fo fum fee fi f'               # len = 30
+        s_expected = '"fee fi fo fum fee fi f"'
+        data = normalize(s, 'UTF-8', 'msgid', 30)
+        self.assertEqual(s_expected, data)
 
-        data = normalize(s, 'UTF-8', 'msgid', options)
+        s = 'fee fi fo fum fee fi fo'              # len = 31
+        s_expected = '""\n"fee fi fo fum fee fi fo"'
+        data = normalize(s, 'UTF-8', 'msgid', 30)
         self.assertEqual(s_expected, data)
 
     def test_normalize_nostr(self):
-        # required to set up normalize
-        options = SimpleNamespace(width=30)
-        make_escapes(True)
-
-        s = ''
-        s_expected = '""'
-
-        data = normalize(s, 'UTF-8', 'msgid', options)
-        self.assertEqual(s_expected, data)
+        data = normalize('', 'UTF-8', 'msgid', 30)
+        self.assertEqual('""', data)
 
-    def test_normalize_short_width(self):
+    def test_normalize_single_word(self):
         # required to set up normalize
-        options = SimpleNamespace(width=3)
         make_escapes(True)
-
-        s = 'foos'
-        s_expected = '"foos"'
-
-        data = normalize(s, 'UTF-8', 'msgid', options)
-        self.assertEqual(s_expected, data)
+        for s in ("fee", "fi", "fo", "fums"):
+            data = normalize(s, 'UTF-8', 'msgid', 3)
+            self.assertNotIn('""', data) # did not wrap
+
+    def test_normalize_split_on_whitespace(self):
+        for space in (' ', ' ', ' ', '\t', '\r'):
+            s = f'longlonglong{space}word'
+            space = {'\t': '\\t', '\r': '\\r'}.get(space, space)
+            s_expected = f'""\n"longlonglong{space}"\n"word"'
+            data = normalize(s, 'UTF-8', 'msgid', 10)
+            self.assertEqual(s_expected, data)
 
 
 def extract_from_snapshots():
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 2c9f8b4c8b13d2..fcd7c6f9df78a6 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -155,6 +155,7 @@
 
 __version__ = '1.5'
 
+from test.test_doctest.test_doctest import wrapped
 
 # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
 # there.
@@ -216,24 +217,26 @@ def escape_nonascii(s, encoding):
 
 _space_splitter = re.compile(r'\s+|\S+\s*')
 
-def normalize(s, encoding, prefix, options):
+def normalize(s, encoding, prefix, width):
     # This converts the various Python string types into a format that is
     # appropriate for .po files, namely much closer to C style,
     # while wrapping to options.width.
     lines = []
+    wrap = False
     for line in s.splitlines(True):
         escaped_line = escape(line, encoding)
-        if len(escaped_line) + len(prefix) + 3 > options.width:
+        if len(escaped_line) + len(prefix) + 3 > width:
+            wrap = True
             words = _space_splitter.findall(line)
             words.reverse()
             buf = []
-            size = 0
+            size = 2
             while words:
                 word = words.pop()
                 escaped_word = escape(word, encoding)
                 escaped_word_len = len(escaped_word)
                 new_size = size + escaped_word_len
-                if new_size + 2 <= options.width or not buf:
+                if new_size <= width or not buf:
                     buf.append(escaped_word)
                     size = new_size
                 else:
@@ -243,7 +246,7 @@ def normalize(s, encoding, prefix, options):
             lines.append(''.join(buf))
         else:
             lines.append(escaped_line)
-    if len(lines) <= 1:
+    if len(lines) <= 1 and (not wrap or len(_space_splitter.findall(lines[0])) == 1):
         return f'"{escape(s, encoding)}"'
     return '""\n' + '\n'.join(f'"{line}"' for line in lines)
 
@@ -636,10 +639,10 @@ def write_pot_file(messages, options, fp):
             # to skip translating some unimportant docstrings.
             print('#, docstring', file=fp)
         if msg.msgctxt is not None:
-            print('msgctxt', normalize(msg.msgctxt, encoding, 'msgctxt', options), file=fp)
-        print('msgid', normalize(msg.msgid, encoding, 'msgid', options), file=fp)
+            print('msgctxt', normalize(msg.msgctxt, encoding, 'msgctxt', options.width), file=fp)
+        print('msgid', normalize(msg.msgid, encoding, 'msgid', options.width), file=fp)
         if msg.msgid_plural is not None:
-            print('msgid_plural', normalize(msg.msgid_plural, encoding, 'msgid_plural', options), file=fp)
+            print('msgid_plural', normalize(msg.msgid_plural, encoding, 'msgid_plural', options.width), file=fp)
             print('msgstr[0] ""', file=fp)
             print('msgstr[1] ""\n', file=fp)
         else:

From 47bfa291f5b6351c432dac563cbf87861065f546 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Mon, 3 Mar 2025 18:49:23 +0000
Subject: [PATCH 22/24] Clean up

---
 Lib/test/test_tools/test_i18n.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index e550e4805db450..9a63e81fed2ac3 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -548,8 +548,6 @@ def test_normalize_nostr(self):
         self.assertEqual('""', data)
 
     def test_normalize_single_word(self):
-        # required to set up normalize
-        make_escapes(True)
         for s in ("fee", "fi", "fo", "fums"):
             data = normalize(s, 'UTF-8', 'msgid', 3)
             self.assertNotIn('""', data) # did not wrap

From b6f128f41412bd9800a2f0e6d8538533c8f0949a Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Mon, 3 Mar 2025 21:15:37 +0000
Subject: [PATCH 23/24] Apply suggestions from Tomas

---
 Lib/test/test_tools/test_i18n.py | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index 9a63e81fed2ac3..e24d95be30399d 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -517,8 +517,9 @@ def test_parse_keyword_spec(self):
                     parse_spec(spec)
                 self.assertEqual(str(cm.exception), message)
 
-    # required to set up normalize
-    make_escapes(True)
+    def setUp(self):
+        # required to set up normalize
+        make_escapes(True)
 
     def test_normalize_multiline(self):
         s = 'multi-line\n translation'
@@ -528,20 +529,16 @@ def test_normalize_multiline(self):
         self.assertEqual(s_expected, data)
 
     def test_normalize_wrap(self):
-        s = 'fee fi fo fum fee fi '                # len = 29
-        s_expected = '"fee fi fo fum fee fi "'
-        data = normalize(s, 'UTF-8', 'msgid', 30)
-        self.assertEqual(s_expected, data)
-
-        s = 'fee fi fo fum fee fi f'               # len = 30
-        s_expected = '"fee fi fo fum fee fi f"'
-        data = normalize(s, 'UTF-8', 'msgid', 30)
-        self.assertEqual(s_expected, data)
-
-        s = 'fee fi fo fum fee fi fo'              # len = 31
-        s_expected = '""\n"fee fi fo fum fee fi fo"'
-        data = normalize(s, 'UTF-8', 'msgid', 30)
-        self.assertEqual(s_expected, data)
+        cases = (
+            ('multi-line\n translation', '""\n"multi-line\\n"\n" translation"'),
+            ('fee fi fo fum fee fi ', '"fee fi fo fum fee fi "'),         # len = 29
+            ('fee fi fo fum fee fi f',  '"fee fi fo fum fee fi f"'),      # len = 30
+            ('fee fi fo fum fee fi fo', '""\n"fee fi fo fum fee fi fo"' ),# len = 31
+        )
+        for raw, expected in cases:
+            with self.subTest(raw):
+                data = normalize(raw, 'UTF-8', 'msgid', 30)
+                self.assertEqual(expected, data)
 
     def test_normalize_nostr(self):
         data = normalize('', 'UTF-8', 'msgid', 30)

From a4823a795ccb6d1e7d304f0aa653ea7630556264 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Wed, 5 Mar 2025 18:46:58 +0000
Subject: [PATCH 24/24] Apply suggestions from Serhiy

---
 Lib/test/test_tools/test_i18n.py | 9 +++++++--
 Tools/i18n/pygettext.py          | 4 ++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index e24d95be30399d..c9002c124fa433 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -540,13 +540,13 @@ def test_normalize_wrap(self):
                 data = normalize(raw, 'UTF-8', 'msgid', 30)
                 self.assertEqual(expected, data)
 
-    def test_normalize_nostr(self):
+    def test_normalize_empty_str(self):
         data = normalize('', 'UTF-8', 'msgid', 30)
         self.assertEqual('""', data)
 
     def test_normalize_single_word(self):
         for s in ("fee", "fi", "fo", "fums"):
-            data = normalize(s, 'UTF-8', 'msgid', 3)
+            data = normalize(s, 'UTF-8', 'msgid', 8)
             self.assertNotIn('""', data) # did not wrap
 
     def test_normalize_split_on_whitespace(self):
@@ -557,6 +557,11 @@ def test_normalize_split_on_whitespace(self):
             data = normalize(s, 'UTF-8', 'msgid', 10)
             self.assertEqual(s_expected, data)
 
+        s = f'longlonglong\r\nword'
+        s_expected = f'""\n"longlonglong\\r\\n"\n"word"'
+        data = normalize(s, 'UTF-8', 'msgid', 30)
+        self.assertEqual(s_expected, data)
+
 
 def extract_from_snapshots():
     snapshots = {
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index fcd7c6f9df78a6..9e2fef22cea328 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -230,13 +230,13 @@ def normalize(s, encoding, prefix, width):
             words = _space_splitter.findall(line)
             words.reverse()
             buf = []
-            size = 2
+            size = 0
             while words:
                 word = words.pop()
                 escaped_word = escape(word, encoding)
                 escaped_word_len = len(escaped_word)
                 new_size = size + escaped_word_len
-                if new_size <= width or not buf:
+                if new_size + 2 <= width or not buf:
                     buf.append(escaped_word)
                     size = new_size
                 else: