python · StanFromIreland · Feb 28, 2025 · Feb 28, 2025 · Feb 28, 2025 · Mar 1, 2025
diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot
@@ -33,65 +33,71 @@ msgid ""
 "    multiline!\n"
 msgstr ""
 
-#: messages.py:46 messages.py:89 messages.py:90 messages.py:93 messages.py:94
-#: messages.py:99 messages.py:100 messages.py:101
+#: messages.py:32
+msgid ""
+"this is a very very very very very very very very very very very very very "
+"long string!"
+msgstr ""
+
+#: messages.py:49 messages.py:92 messages.py:93 messages.py:96 messages.py:97
+#: messages.py:102 messages.py:103 messages.py:104
 msgid "foo"
 msgid_plural "foos"
 msgstr[0] ""
 msgstr[1] ""
 
-#: messages.py:47
+#: messages.py:50
 msgid "something"
 msgstr ""
 
-#: messages.py:50
+#: messages.py:53
 msgid "Hello, {}!"
 msgstr ""
 
-#: messages.py:54
+#: messages.py:57
 msgid "1"
 msgstr ""
 
-#: messages.py:54
+#: messages.py:57
 msgid "2"
 msgstr ""
 
-#: messages.py:55 messages.py:56
+#: messages.py:58 messages.py:59
 msgid "A"
 msgstr ""
 
-#: messages.py:55 messages.py:56
+#: messages.py:58 messages.py:59
 msgid "B"
 msgstr ""
 
-#: messages.py:57
+#: messages.py:60
 msgid "set"
 msgstr ""
 
-#: messages.py:62 messages.py:63
+#: messages.py:65 messages.py:66
 msgid "nested string"
 msgstr ""
 
-#: messages.py:68
+#: messages.py:71
 msgid "baz"
 msgstr ""
 
-#: messages.py:71 messages.py:75
+#: messages.py:74 messages.py:78
 msgid "default value"
 msgstr ""
 
-#: messages.py:91 messages.py:92 messages.py:95 messages.py:96
+#: messages.py:94 messages.py:95 messages.py:98 messages.py:99
 msgctxt "context"
 msgid "foo"
 msgid_plural "foos"
 msgstr[0] ""
 msgstr[1] ""
 
-#: messages.py:102
+#: messages.py:105
 msgid "domain foo"
 msgstr ""
 
-#: messages.py:118 messages.py:119
+#: messages.py:121 messages.py:122
 msgid "world"
 msgid_plural "worlds"
 msgstr[0] ""

diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py
@@ -28,6 +28,9 @@
     multiline!
 """)
 
+# very long string that should be wrapped
+_("this is a very very very very very very very very very very very very very long string!")
+
 # Invalid arguments
 _()
 _(None)

diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst b/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst
@@ -0,0 +1 @@
+Wrap msgids to specified ``width`` and not just comments in :program:`pygettext`.
@@ -148,6 +148,7 @@
 import sys
 import time
 import tokenize
+import re
 from dataclasses import dataclass, field
 from io import BytesIO
 from operator import itemgetter
@@ -213,21 +214,37 @@ def escape_nonascii(s, encoding):
     return ''.join(escapes[b] for b in s.encode(encoding))
 
 
-def normalize(s, encoding):
+def normalize(s, encoding, options):
     # This converts the various Python string types into a format that is
-    # appropriate for .po files, namely much closer to C style.
-    lines = s.split('\n')
-    if len(lines) == 1:
-        s = '"' + escape(s, encoding) + '"'
-    else:
-        if not lines[-1]:
-            del lines[-1]
-            lines[-1] = lines[-1] + '\n'
-        for i in range(len(lines)):
-            lines[i] = escape(lines[i], encoding)
-        lineterm = '\\n"\n"'
-        s = '""\n"' + lineterm.join(lines) + '"'
-    return s
+    # appropriate for .po files, namely much closer to C style,
+    # while wrapping to options.width.
+    lines = []
+    for line in s.splitlines(True):
+        if len(escape(line, encoding)) > options.width:
+            words = re.split(r'(\s+)', line)
+            words.reverse()
+            buf = []
+            size = 2
+            while words:
+                word = words.pop()
+                escaped_word_len = len(escape(word, encoding))
+                if size + escaped_word_len <= options.width:
+                    buf.append(word)
+                    size += escaped_word_len
+                else:
+                    lines.append(''.join(buf))
+                    buf = [word]
+                    size = 2 + escaped_word_len
+            lines.append(''.join(buf))
+        else:
+            lines.append(line)
+    if len(lines) <= 1:
+        return f'"{escape(s, encoding)}"'
+    if lines and not lines[-1]:
+        del lines[-1]
+        lines[-1] += '\n'
+    return '""\n' + '\n'.join(
+        [f'"{escape(line, encoding)}"' for line in lines])
 
 
 def containsAny(str, set):
@@ -618,10 +635,10 @@ def write_pot_file(messages, options, fp):
             # to skip translating some unimportant docstrings.
             print('#, docstring', file=fp)
         if msg.msgctxt is not None:
-            print('msgctxt', normalize(msg.msgctxt, encoding), file=fp)
-        print('msgid', normalize(msg.msgid, encoding), file=fp)
+            print('msgctxt', normalize(msg.msgctxt, encoding, options), file=fp)
+        print('msgid', normalize(msg.msgid, encoding, options), file=fp)
         if msg.msgid_plural is not None:
-            print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp)
+            print('msgid_plural', normalize(msg.msgid_plural, encoding, options), file=fp)
             print('msgstr[0] ""', file=fp)
             print('msgstr[1] ""\n', file=fp)
         else:
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Wrap msgids to specified ``width`` and not just comments in :program:`pygettext`.
StanFromIreland marked this conversation as resolved. Show resolved Hide resolved