From b3ccc450122840e038d1b650d9c4909da6ee54b7 Mon Sep 17 00:00:00 2001 From: stan Date: Fri, 28 Feb 2025 19:28:10 +0000 Subject: [PATCH 01/24] Add logic to wrap and test --- Lib/test/test_tools/test_i18n.py | 5 +++++ ...-28-19-30-00-00.gh-issue-130703.ajhd21.rst | 1 + Tools/i18n/pygettext.py | 22 +++++++++++++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index d73fcff4c9cb11..7de3afaafb9203 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -161,6 +161,11 @@ def test_POT_Creation_Date(self): # This will raise if the date format does not exactly match. datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z') + def test_wrap_to_width(self): + msgid = self.extract_docstrings_from_str( + '''_("thisisaveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverlongstring")''') + self.assertIn('\nlongstring', msgid[1]) + def test_funcdocstring(self): for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): with self.subTest(doc): diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst b/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst new file mode 100644 index 00000000000000..0aec1e94fdcbe7 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst @@ -0,0 +1 @@ +Wrap msgids to specified ``width`` and not just comments in :program:`pygettext`. diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 0f5f32c7d6c18f..2970a570279c17 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -619,9 +619,27 @@ def write_pot_file(messages, options, fp): print('#, docstring', file=fp) if msg.msgctxt is not None: print('msgctxt', normalize(msg.msgctxt, encoding), file=fp) - print('msgid', normalize(msg.msgid, encoding), file=fp) + + # If msgid is longer than width wrap + msgid = normalize(msg.msgid, encoding)[1:-1] # normalize returns "msg" + if len(msgid) > options.width: + print('msgid ""', file=fp) + while msgid: + print(f'"{msgid[:options.width]}"', file=fp) + msgid = msgid[options.width:] + else: + print(f'msgid "{msgid}"', file=fp) + + # If msgid_plural is longer than width wrap if msg.msgid_plural is not None: - print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp) + msgid_plural = normalize(msg.msgid_plural, encoding)[1:-1] # normalize returns "msg" + if len(msgid_plural) > options.width: + print('msgid_plural ""', file=fp) + while msgid_plural: + print(f'"{msgid_plural[:options.width]}"', file=fp) + msgid_plural = msgid_plural[options.width:] + else: + print(f'msgid_plural "{msgid_plural}"', file=fp) print('msgstr[0] ""', file=fp) print('msgstr[1] ""\n', file=fp) else: From 33149ed698b6893920228a5098c35ff3ae8767e7 Mon Sep 17 00:00:00 2001 From: stan Date: Fri, 28 Feb 2025 19:31:47 +0000 Subject: [PATCH 02/24] Fix NEWS name -- We don't want miliseconds --- ....ajhd21.rst => 2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/Tools-Demos/{2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst => 2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst} (100%) diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst b/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst similarity index 100% rename from Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00-00.gh-issue-130703.ajhd21.rst rename to Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst From 0e35e36eb3ce4151522ec24c5c8f9317e0e7b79a Mon Sep 17 00:00:00 2001 From: stan Date: Fri, 28 Feb 2025 19:37:56 +0000 Subject: [PATCH 03/24] Change extract func in test --- Lib/test/test_tools/test_i18n.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 7de3afaafb9203..1226aece55c8e7 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -162,7 +162,7 @@ def test_POT_Creation_Date(self): datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z') def test_wrap_to_width(self): - msgid = self.extract_docstrings_from_str( + msgid = self.extract_from_str( '''_("thisisaveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverlongstring")''') self.assertIn('\nlongstring', msgid[1]) From 92f227f305728b4e409c2d5e882524832289f9ec Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 09:51:35 +0000 Subject: [PATCH 04/24] Use a modified version of pybabel's code in normalize --- Lib/test/test_tools/i18n_data/messages.pot | 38 +++++++----- Lib/test/test_tools/i18n_data/messages.py | 3 + Lib/test/test_tools/test_i18n.py | 5 -- Tools/i18n/pygettext.py | 70 +++++++++++----------- 4 files changed, 60 insertions(+), 56 deletions(-) diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot index e8167acfc0742b..03f8dcb942a0ad 100644 --- a/Lib/test/test_tools/i18n_data/messages.pot +++ b/Lib/test/test_tools/i18n_data/messages.pot @@ -5,7 +5,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" -"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"POT-Creation-Date: 2025-03-01 09:36+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -33,65 +33,71 @@ msgid "" " multiline!\n" msgstr "" -#: messages.py:46 messages.py:89 messages.py:90 messages.py:93 messages.py:94 -#: messages.py:99 messages.py:100 messages.py:101 +#: messages.py:32 +msgid "" +"this is a very very very very very very very very very very very very very" +"long string!" +msgstr "" + +#: messages.py:49 messages.py:92 messages.py:93 messages.py:96 messages.py:97 +#: messages.py:102 messages.py:103 messages.py:104 msgid "foo" msgid_plural "foos" msgstr[0] "" msgstr[1] "" -#: messages.py:47 +#: messages.py:50 msgid "something" msgstr "" -#: messages.py:50 +#: messages.py:53 msgid "Hello, {}!" msgstr "" -#: messages.py:54 +#: messages.py:57 msgid "1" msgstr "" -#: messages.py:54 +#: messages.py:57 msgid "2" msgstr "" -#: messages.py:55 messages.py:56 +#: messages.py:58 messages.py:59 msgid "A" msgstr "" -#: messages.py:55 messages.py:56 +#: messages.py:58 messages.py:59 msgid "B" msgstr "" -#: messages.py:57 +#: messages.py:60 msgid "set" msgstr "" -#: messages.py:62 messages.py:63 +#: messages.py:65 messages.py:66 msgid "nested string" msgstr "" -#: messages.py:68 +#: messages.py:71 msgid "baz" msgstr "" -#: messages.py:71 messages.py:75 +#: messages.py:74 messages.py:78 msgid "default value" msgstr "" -#: messages.py:91 messages.py:92 messages.py:95 messages.py:96 +#: messages.py:94 messages.py:95 messages.py:98 messages.py:99 msgctxt "context" msgid "foo" msgid_plural "foos" msgstr[0] "" msgstr[1] "" -#: messages.py:102 +#: messages.py:105 msgid "domain foo" msgstr "" -#: messages.py:118 messages.py:119 +#: messages.py:121 messages.py:122 msgid "world" msgid_plural "worlds" msgstr[0] "" diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py index 9457bcb8611020..5578334df8d19b 100644 --- a/Lib/test/test_tools/i18n_data/messages.py +++ b/Lib/test/test_tools/i18n_data/messages.py @@ -28,6 +28,9 @@ multiline! """) +# very long string that should be wrapped +_("this is a very very very very very very very very very very very very very long string!") + # Invalid arguments _() _(None) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 1226aece55c8e7..d73fcff4c9cb11 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -161,11 +161,6 @@ def test_POT_Creation_Date(self): # This will raise if the date format does not exactly match. datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z') - def test_wrap_to_width(self): - msgid = self.extract_from_str( - '''_("thisisaveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverlongstring")''') - self.assertIn('\nlongstring', msgid[1]) - def test_funcdocstring(self): for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): with self.subTest(doc): diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 2970a570279c17..0fc612a2f330b7 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -213,21 +213,39 @@ def escape_nonascii(s, encoding): return ''.join(escapes[b] for b in s.encode(encoding)) -def normalize(s, encoding): +def normalize(s, encoding, options): # This converts the various Python string types into a format that is - # appropriate for .po files, namely much closer to C style. - lines = s.split('\n') - if len(lines) == 1: - s = '"' + escape(s, encoding) + '"' - else: - if not lines[-1]: - del lines[-1] - lines[-1] = lines[-1] + '\n' - for i in range(len(lines)): - lines[i] = escape(lines[i], encoding) - lineterm = '\\n"\n"' - s = '""\n"' + lineterm.join(lines) + '"' - return s + # appropriate for .po files, namely much closer to C style. While wrapping + # to options.width. + lines = [] + for line in s.splitlines(True): + if len(escape(line, encoding)) > options.width: + words = line.split() + words.reverse() + while words: + buf = [] + size = 2 + while words: + word = words[-1] + escaped_word = escape(word, encoding) + add_space = 1 if buf else 0 + if size + len(escaped_word) + add_space <= options.width: + buf.append(words.pop()) + size += len(escaped_word) + add_space + else: + if not buf: + buf.append(words.pop()) + break + lines.append(' '.join(buf)) + else: + lines.append(line) + if len(lines) <= 1: + return '"' + escape(s, encoding) + '"' + if lines and not lines[-1]: + del lines[-1] + lines[-1] += '\n' + return '""\n' + '\n'.join( + [f'"{escape(line, encoding)}"' for line in lines]) def containsAny(str, set): @@ -618,28 +636,10 @@ def write_pot_file(messages, options, fp): # to skip translating some unimportant docstrings. print('#, docstring', file=fp) if msg.msgctxt is not None: - print('msgctxt', normalize(msg.msgctxt, encoding), file=fp) - - # If msgid is longer than width wrap - msgid = normalize(msg.msgid, encoding)[1:-1] # normalize returns "msg" - if len(msgid) > options.width: - print('msgid ""', file=fp) - while msgid: - print(f'"{msgid[:options.width]}"', file=fp) - msgid = msgid[options.width:] - else: - print(f'msgid "{msgid}"', file=fp) - - # If msgid_plural is longer than width wrap + print('msgctxt', normalize(msg.msgctxt, encoding, options), file=fp) + print('msgid', normalize(msg.msgid, encoding, options), file=fp) if msg.msgid_plural is not None: - msgid_plural = normalize(msg.msgid_plural, encoding)[1:-1] # normalize returns "msg" - if len(msgid_plural) > options.width: - print('msgid_plural ""', file=fp) - while msgid_plural: - print(f'"{msgid_plural[:options.width]}"', file=fp) - msgid_plural = msgid_plural[options.width:] - else: - print(f'msgid_plural "{msgid_plural}"', file=fp) + print('msgid_plural', normalize(msg.msgid_plural, encoding, options), file=fp) print('msgstr[0] ""', file=fp) print('msgstr[1] ""\n', file=fp) else: From f0ee9c47f11948548fa70e1271924ccae5b9e9fa Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 09:53:07 +0000 Subject: [PATCH 05/24] Minor tweak --- Lib/test/test_tools/i18n_data/messages.pot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot index 03f8dcb942a0ad..886d5714735637 100644 --- a/Lib/test/test_tools/i18n_data/messages.pot +++ b/Lib/test/test_tools/i18n_data/messages.pot @@ -5,7 +5,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" -"POT-Creation-Date: 2025-03-01 09:36+0000\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" From 843e3fa364acacda4b0500748a314f5ed7f89f22 Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 10:17:29 +0000 Subject: [PATCH 06/24] Update argparse snapshot --- Lib/test/translationdata/argparse/msgids.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/translationdata/argparse/msgids.txt b/Lib/test/translationdata/argparse/msgids.txt index ae89ac74726ecf..2fafeae8353e3a 100644 --- a/Lib/test/translationdata/argparse/msgids.txt +++ b/Lib/test/translationdata/argparse/msgids.txt @@ -16,7 +16,7 @@ expected one argument ignored explicit argument %r invalid %(type)s value: %(value)r invalid choice: %(value)r (choose from %(choices)s) -invalid choice: %(value)r, maybe you meant %(closest)r? (choose from %(choices)s) +invalid choice: %(value)r, maybe you meant %(closest)r? (choose from%(choices)s) not allowed with argument %s one of the arguments %s is required option '%(option)s' is deprecated From 7fc34cae55c6e44f9f9571a90c5dd4e584f50354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 1 Mar 2025 10:19:38 +0000 Subject: [PATCH 07/24] =?UTF-8?q?B=C3=A9n=C3=A9dikt's=20suggestions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Tools/i18n/pygettext.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 0fc612a2f330b7..b2118f82c20ac5 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -228,19 +228,21 @@ def normalize(s, encoding, options): while words: word = words[-1] escaped_word = escape(word, encoding) + escaped_word_len = len(escaped_word) add_space = 1 if buf else 0 - if size + len(escaped_word) + add_space <= options.width: + new_size = size + escaped_word_len + add_space + if new_size <= options.width: buf.append(words.pop()) - size += len(escaped_word) + add_space + size = new_size else: if not buf: - buf.append(words.pop()) + buf = [words.pop()] break lines.append(' '.join(buf)) else: lines.append(line) if len(lines) <= 1: - return '"' + escape(s, encoding) + '"' + return f'"{escape(s, encoding)}"' if lines and not lines[-1]: del lines[-1] lines[-1] += '\n' From 8d319b407159f0d6d3c891b08c8fa9e28d5c2595 Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 11:03:43 +0000 Subject: [PATCH 08/24] Preserve spaces and remove unnecessary checks --- Lib/test/test_tools/i18n_data/messages.pot | 2 +- Lib/test/translationdata/argparse/msgids.txt | 2 +- Tools/i18n/pygettext.py | 31 +++++++++----------- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot index 886d5714735637..dbc8fd40dc87ac 100644 --- a/Lib/test/test_tools/i18n_data/messages.pot +++ b/Lib/test/test_tools/i18n_data/messages.pot @@ -35,7 +35,7 @@ msgstr "" #: messages.py:32 msgid "" -"this is a very very very very very very very very very very very very very" +"this is a very very very very very very very very very very very very very " "long string!" msgstr "" diff --git a/Lib/test/translationdata/argparse/msgids.txt b/Lib/test/translationdata/argparse/msgids.txt index 2fafeae8353e3a..ae89ac74726ecf 100644 --- a/Lib/test/translationdata/argparse/msgids.txt +++ b/Lib/test/translationdata/argparse/msgids.txt @@ -16,7 +16,7 @@ expected one argument ignored explicit argument %r invalid %(type)s value: %(value)r invalid choice: %(value)r (choose from %(choices)s) -invalid choice: %(value)r, maybe you meant %(closest)r? (choose from%(choices)s) +invalid choice: %(value)r, maybe you meant %(closest)r? (choose from %(choices)s) not allowed with argument %s one of the arguments %s is required option '%(option)s' is deprecated diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index b2118f82c20ac5..54dfe42286df3c 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -148,6 +148,7 @@ import sys import time import tokenize +import re from dataclasses import dataclass, field from io import BytesIO from operator import itemgetter @@ -220,25 +221,21 @@ def normalize(s, encoding, options): lines = [] for line in s.splitlines(True): if len(escape(line, encoding)) > options.width: - words = line.split() + words = re.split(r'(\s+)', line) words.reverse() + buf = [] + size = 2 while words: - buf = [] - size = 2 - while words: - word = words[-1] - escaped_word = escape(word, encoding) - escaped_word_len = len(escaped_word) - add_space = 1 if buf else 0 - new_size = size + escaped_word_len + add_space - if new_size <= options.width: - buf.append(words.pop()) - size = new_size - else: - if not buf: - buf = [words.pop()] - break - lines.append(' '.join(buf)) + word = words.pop() + escaped_word_len = len(escape(word, encoding)) + if size + escaped_word_len <= options.width: + buf.append(word) + size += escaped_word_len + else: + lines.append(''.join(buf)) + buf = [word] + size = 2 + escaped_word_len + lines.append(''.join(buf)) else: lines.append(line) if len(lines) <= 1: From 91976886d4d07f0e8780a402338feb84e9880f7e Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sat, 1 Mar 2025 11:04:48 +0000 Subject: [PATCH 09/24] Improve comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Tools/i18n/pygettext.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 54dfe42286df3c..501440d1eedf3b 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -216,8 +216,8 @@ def escape_nonascii(s, encoding): def normalize(s, encoding, options): # This converts the various Python string types into a format that is - # appropriate for .po files, namely much closer to C style. While wrapping - # to options.width. + # appropriate for .po files, namely much closer to C style, + # while wrapping to options.width. lines = [] for line in s.splitlines(True): if len(escape(line, encoding)) > options.width: From 7c8637e0158381edeed29cb16dbea2af21bedd9a Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 11:16:59 +0000 Subject: [PATCH 10/24] Add test and sort imports --- Lib/test/test_tools/i18n_data/messages.pot | 36 +++++++++++++--------- Lib/test/test_tools/i18n_data/messages.py | 3 +- Tools/i18n/pygettext.py | 2 +- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot index dbc8fd40dc87ac..cc6735d3454b90 100644 --- a/Lib/test/test_tools/i18n_data/messages.pot +++ b/Lib/test/test_tools/i18n_data/messages.pot @@ -39,65 +39,71 @@ msgid "" "long string!" msgstr "" -#: messages.py:49 messages.py:92 messages.py:93 messages.py:96 messages.py:97 -#: messages.py:102 messages.py:103 messages.py:104 +#: messages.py:33 +msgid "" +"this is a very very very very very very very very very " +"very very very very long string with wierd spaces!" +msgstr "" + +#: messages.py:50 messages.py:93 messages.py:94 messages.py:97 messages.py:98 +#: messages.py:103 messages.py:104 messages.py:105 msgid "foo" msgid_plural "foos" msgstr[0] "" msgstr[1] "" -#: messages.py:50 +#: messages.py:51 msgid "something" msgstr "" -#: messages.py:53 +#: messages.py:54 msgid "Hello, {}!" msgstr "" -#: messages.py:57 +#: messages.py:58 msgid "1" msgstr "" -#: messages.py:57 +#: messages.py:58 msgid "2" msgstr "" -#: messages.py:58 messages.py:59 +#: messages.py:59 messages.py:60 msgid "A" msgstr "" -#: messages.py:58 messages.py:59 +#: messages.py:59 messages.py:60 msgid "B" msgstr "" -#: messages.py:60 +#: messages.py:61 msgid "set" msgstr "" -#: messages.py:65 messages.py:66 +#: messages.py:66 messages.py:67 msgid "nested string" msgstr "" -#: messages.py:71 +#: messages.py:72 msgid "baz" msgstr "" -#: messages.py:74 messages.py:78 +#: messages.py:75 messages.py:79 msgid "default value" msgstr "" -#: messages.py:94 messages.py:95 messages.py:98 messages.py:99 +#: messages.py:95 messages.py:96 messages.py:99 messages.py:100 msgctxt "context" msgid "foo" msgid_plural "foos" msgstr[0] "" msgstr[1] "" -#: messages.py:105 +#: messages.py:106 msgid "domain foo" msgstr "" -#: messages.py:121 messages.py:122 +#: messages.py:122 messages.py:123 msgid "world" msgid_plural "worlds" msgstr[0] "" diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py index 5578334df8d19b..454dbe3d2de019 100644 --- a/Lib/test/test_tools/i18n_data/messages.py +++ b/Lib/test/test_tools/i18n_data/messages.py @@ -28,8 +28,9 @@ multiline! """) -# very long string that should be wrapped +# very long strings that should be wrapped _("this is a very very very very very very very very very very very very very long string!") +_("this is a very very very very very very very very very very very very very long string with wierd spaces!") # Invalid arguments _() diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 501440d1eedf3b..f748158e5e0068 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -145,10 +145,10 @@ import importlib.machinery import importlib.util import os +import re import sys import time import tokenize -import re from dataclasses import dataclass, field from io import BytesIO from operator import itemgetter From 66d8eacca43f32669c434f6aafc040e4dab414ce Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 11:31:43 +0000 Subject: [PATCH 11/24] Benedikt's suggestion --- Tools/i18n/pygettext.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index f748158e5e0068..9e7a5267605796 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -219,9 +219,10 @@ def normalize(s, encoding, options): # appropriate for .po files, namely much closer to C style, # while wrapping to options.width. lines = [] + space_splitter = re.compile(r'(\s+)').split for line in s.splitlines(True): if len(escape(line, encoding)) > options.width: - words = re.split(r'(\s+)', line) + words = space_splitter(line) words.reverse() buf = [] size = 2 From 430c0519bf2577ff39892fc54d161c80e1bfd008 Mon Sep 17 00:00:00 2001 From: stan Date: Sun, 2 Mar 2025 09:30:35 +0000 Subject: [PATCH 12/24] Add tests and simplify normalize --- Lib/test/test_tools/test_i18n.py | 39 +++++++++++++++++++++++++++++++- Tools/i18n/pygettext.py | 10 +++----- 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index d73fcff4c9cb11..cb86b4844dfd04 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -4,6 +4,7 @@ import re import sys import unittest +from test.test_decimal import skip_expected from textwrap import dedent from pathlib import Path @@ -18,7 +19,7 @@ with imports_under_tool("i18n"): - from pygettext import parse_spec + from pygettext import parse_spec, normalize, make_escapes def normalize_POT_file(pot): @@ -516,6 +517,42 @@ def test_parse_keyword_spec(self): parse_spec(spec) self.assertEqual(str(cm.exception), message) + def test_normalize_multiline(self): + # required to set up normalize + class NormOptions: + width = 78 + make_escapes(True) + + s = 'multi-line\n translation' + s_expected = '""\n"multi-line\\n"\n" translation"' + + data = normalize(s, 'UTF-8', NormOptions) + self.assertEqual(s_expected, data) + + def test_normalize_wrap(self): + # required to set up normalize + class NormOptions: + width = 30 + make_escapes(True) + + s = 'this string should be wrapped to 30 chars' + s_expected = '""\n"this string should be wrapped "\n"to 30 chars"' + + data = normalize(s, 'UTF-8', NormOptions) + self.assertEqual(s_expected, data) + + def test_normalize_nostr(self): + # required to set up normalize + class NormOptions: + width = 78 + make_escapes(True) + + s = '' + s_expected = '""' + + data = normalize(s, 'UTF-8', NormOptions) + self.assertEqual(s_expected, data) + def extract_from_snapshots(): snapshots = { diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 9e7a5267605796..eb535e150a00f4 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -225,7 +225,7 @@ def normalize(s, encoding, options): words = space_splitter(line) words.reverse() buf = [] - size = 2 + size = 0 while words: word = words.pop() escaped_word_len = len(escape(word, encoding)) @@ -235,17 +235,13 @@ def normalize(s, encoding, options): else: lines.append(''.join(buf)) buf = [word] - size = 2 + escaped_word_len + size = escaped_word_len lines.append(''.join(buf)) else: lines.append(line) if len(lines) <= 1: return f'"{escape(s, encoding)}"' - if lines and not lines[-1]: - del lines[-1] - lines[-1] += '\n' - return '""\n' + '\n'.join( - [f'"{escape(line, encoding)}"' for line in lines]) + return '""\n' + '\n'.join([f'"{escape(line, encoding)}"' for line in lines]) def containsAny(str, set): From abb90c2393b7228f2886b4c90b43d30913c620db Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sun, 2 Mar 2025 09:32:45 +0000 Subject: [PATCH 13/24] tomasr8 suggestion Co-authored-by: Tomas R. --- .../Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst b/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst index 0aec1e94fdcbe7..a4156699f8500f 100644 --- a/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst +++ b/Misc/NEWS.d/next/Tools-Demos/2025-02-28-19-30-00.gh-issue-130703.ajhd21.rst @@ -1 +1 @@ -Wrap msgids to specified ``width`` and not just comments in :program:`pygettext`. +Wrap msgids to specified ``width`` in :program:`pygettext`. From 7f947dbd06c481607e3b0b7f63981e0840bacf0d Mon Sep 17 00:00:00 2001 From: stan Date: Sun, 2 Mar 2025 09:33:30 +0000 Subject: [PATCH 14/24] Fix typo in test str --- Lib/test/test_tools/i18n_data/messages.pot | 2 +- Lib/test/test_tools/i18n_data/messages.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot index cc6735d3454b90..e2a230fd30e9d3 100644 --- a/Lib/test/test_tools/i18n_data/messages.pot +++ b/Lib/test/test_tools/i18n_data/messages.pot @@ -42,7 +42,7 @@ msgstr "" #: messages.py:33 msgid "" "this is a very very very very very very very very very " -"very very very very long string with wierd spaces!" +"very very very very long string with weird spaces!" msgstr "" #: messages.py:50 messages.py:93 messages.py:94 messages.py:97 messages.py:98 diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py index 454dbe3d2de019..e4a1c5e60f1e5a 100644 --- a/Lib/test/test_tools/i18n_data/messages.py +++ b/Lib/test/test_tools/i18n_data/messages.py @@ -28,9 +28,9 @@ multiline! """) -# very long strings that should be wrapped +# very long strings that should be wrapped by normalize _("this is a very very very very very very very very very very very very very long string!") -_("this is a very very very very very very very very very very very very very long string with wierd spaces!") +_("this is a very very very very very very very very very very very very very long string with weird spaces!") # Invalid arguments _() From ea5fa91b61d3a06a7823b7bef4f03610870d206b Mon Sep 17 00:00:00 2001 From: stan Date: Sun, 2 Mar 2025 09:50:14 +0000 Subject: [PATCH 15/24] Benedikt's suggestions --- Lib/test/test_tools/test_i18n.py | 17 +++++++---------- Tools/i18n/pygettext.py | 2 +- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index cb86b4844dfd04..b6da0f6e858a45 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -4,7 +4,7 @@ import re import sys import unittest -from test.test_decimal import skip_expected +from types import SimpleNamespace from textwrap import dedent from pathlib import Path @@ -519,38 +519,35 @@ def test_parse_keyword_spec(self): def test_normalize_multiline(self): # required to set up normalize - class NormOptions: - width = 78 + options = SimpleNamespace(width=78) make_escapes(True) s = 'multi-line\n translation' s_expected = '""\n"multi-line\\n"\n" translation"' - data = normalize(s, 'UTF-8', NormOptions) + data = normalize(s, 'UTF-8', options) self.assertEqual(s_expected, data) def test_normalize_wrap(self): # required to set up normalize - class NormOptions: - width = 30 + options = SimpleNamespace(width=30) make_escapes(True) s = 'this string should be wrapped to 30 chars' s_expected = '""\n"this string should be wrapped "\n"to 30 chars"' - data = normalize(s, 'UTF-8', NormOptions) + data = normalize(s, 'UTF-8', options) self.assertEqual(s_expected, data) def test_normalize_nostr(self): # required to set up normalize - class NormOptions: - width = 78 + options = SimpleNamespace(width=30) make_escapes(True) s = '' s_expected = '""' - data = normalize(s, 'UTF-8', NormOptions) + data = normalize(s, 'UTF-8', options) self.assertEqual(s_expected, data) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index eb535e150a00f4..9e20f1d7c5d3f9 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -213,13 +213,13 @@ def escape_ascii(s, encoding): def escape_nonascii(s, encoding): return ''.join(escapes[b] for b in s.encode(encoding)) +space_splitter = re.compile(r'(\s+)').split def normalize(s, encoding, options): # This converts the various Python string types into a format that is # appropriate for .po files, namely much closer to C style, # while wrapping to options.width. lines = [] - space_splitter = re.compile(r'(\s+)').split for line in s.splitlines(True): if len(escape(line, encoding)) > options.width: words = space_splitter(line) From 4b02678a46c9de87c05dfe93016669fb7f05ba78 Mon Sep 17 00:00:00 2001 From: stan Date: Sun, 2 Mar 2025 09:59:24 +0000 Subject: [PATCH 16/24] More of Benedikt's suggestions --- Lib/test/test_tools/test_i18n.py | 4 ++-- Tools/i18n/pygettext.py | 14 +++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index b6da0f6e858a45..be60dde2c1752b 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -4,8 +4,8 @@ import re import sys import unittest -from types import SimpleNamespace from textwrap import dedent +from types import SimpleNamespace from pathlib import Path from test.support.script_helper import assert_python_ok @@ -19,7 +19,7 @@ with imports_under_tool("i18n"): - from pygettext import parse_spec, normalize, make_escapes + from pygettext import parse_spec, make_escapes, normalize def normalize_POT_file(pot): diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 9e20f1d7c5d3f9..20bfa68abea5d4 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -213,7 +213,10 @@ def escape_ascii(s, encoding): def escape_nonascii(s, encoding): return ''.join(escapes[b] for b in s.encode(encoding)) -space_splitter = re.compile(r'(\s+)').split +# Split a string according to whitespaces and keep +# the whitespaces in the resulting array thanks to +# the capturing group. +_space_splitter = re.compile(r'(\s+)').split def normalize(s, encoding, options): # This converts the various Python string types into a format that is @@ -222,16 +225,17 @@ def normalize(s, encoding, options): lines = [] for line in s.splitlines(True): if len(escape(line, encoding)) > options.width: - words = space_splitter(line) + words = _space_splitter(line) words.reverse() buf = [] size = 0 while words: word = words.pop() escaped_word_len = len(escape(word, encoding)) - if size + escaped_word_len <= options.width: + new_size = size + escaped_word_len + if new_size <= options.width: buf.append(word) - size += escaped_word_len + size = new_size else: lines.append(''.join(buf)) buf = [word] @@ -241,7 +245,7 @@ def normalize(s, encoding, options): lines.append(line) if len(lines) <= 1: return f'"{escape(s, encoding)}"' - return '""\n' + '\n'.join([f'"{escape(line, encoding)}"' for line in lines]) + return '""\n' + '\n'.join(f'"{escape(line, encoding)}"' for line in lines) def containsAny(str, set): From 8d03cbf141068c4ac9812a967a4c9f5942e22d75 Mon Sep 17 00:00:00 2001 From: stan Date: Sun, 2 Mar 2025 10:23:20 +0000 Subject: [PATCH 17/24] Don't wrap for single words --- Lib/test/test_tools/test_i18n.py | 11 +++++++++++ Tools/i18n/pygettext.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index be60dde2c1752b..91808d9e03dd18 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -550,6 +550,17 @@ def test_normalize_nostr(self): data = normalize(s, 'UTF-8', options) self.assertEqual(s_expected, data) + def test_normalize_short_width(self): + # required to set up normalize + options = SimpleNamespace(width=3) + make_escapes(True) + + s = 'foos' + s_expected = '"foos"' + + data = normalize(s, 'UTF-8', options) + self.assertEqual(s_expected, data) + def extract_from_snapshots(): snapshots = { diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 20bfa68abea5d4..b14a302ed1d266 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -224,7 +224,7 @@ def normalize(s, encoding, options): # while wrapping to options.width. lines = [] for line in s.splitlines(True): - if len(escape(line, encoding)) > options.width: + if len(escape(line, encoding)) > options.width and ' ' in line: # don't wrap single words words = _space_splitter(line) words.reverse() buf = [] From fbe5b9317c45432094438a4f8778315cac4f520a Mon Sep 17 00:00:00 2001 From: stan Date: Sun, 2 Mar 2025 15:01:55 +0000 Subject: [PATCH 18/24] Address Serhiy's suggestions --- Lib/test/test_tools/test_i18n.py | 10 ++++----- Tools/i18n/pygettext.py | 35 +++++++++++++++++--------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 91808d9e03dd18..da711d7af06ddd 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -525,7 +525,7 @@ def test_normalize_multiline(self): s = 'multi-line\n translation' s_expected = '""\n"multi-line\\n"\n" translation"' - data = normalize(s, 'UTF-8', options) + data = normalize(s, 'UTF-8', 'msgid', options) self.assertEqual(s_expected, data) def test_normalize_wrap(self): @@ -534,9 +534,9 @@ def test_normalize_wrap(self): make_escapes(True) s = 'this string should be wrapped to 30 chars' - s_expected = '""\n"this string should be wrapped "\n"to 30 chars"' + s_expected = '""\n"this string should be "\n"wrapped to 30 chars"' - data = normalize(s, 'UTF-8', options) + data = normalize(s, 'UTF-8', 'msgid', options) self.assertEqual(s_expected, data) def test_normalize_nostr(self): @@ -547,7 +547,7 @@ def test_normalize_nostr(self): s = '' s_expected = '""' - data = normalize(s, 'UTF-8', options) + data = normalize(s, 'UTF-8', 'msgid', options) self.assertEqual(s_expected, data) def test_normalize_short_width(self): @@ -558,7 +558,7 @@ def test_normalize_short_width(self): s = 'foos' s_expected = '"foos"' - data = normalize(s, 'UTF-8', options) + data = normalize(s, 'UTF-8', 'msgid', options) self.assertEqual(s_expected, data) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index b14a302ed1d266..057fa08c82e651 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -213,39 +213,42 @@ def escape_ascii(s, encoding): def escape_nonascii(s, encoding): return ''.join(escapes[b] for b in s.encode(encoding)) -# Split a string according to whitespaces and keep -# the whitespaces in the resulting array thanks to -# the capturing group. -_space_splitter = re.compile(r'(\s+)').split -def normalize(s, encoding, options): +_space_splitter = re.compile(r'(\s+)') + +def normalize(s, encoding, prefix, options): # This converts the various Python string types into a format that is # appropriate for .po files, namely much closer to C style, # while wrapping to options.width. lines = [] for line in s.splitlines(True): - if len(escape(line, encoding)) > options.width and ' ' in line: # don't wrap single words - words = _space_splitter(line) + escaped_line = escape(line, encoding) + if len(escaped_line) + len(prefix) + 2 > options.width and _space_splitter.search(line): # don't wrap single words + words = _space_splitter.split(line) words.reverse() buf = [] size = 0 while words: word = words.pop() - escaped_word_len = len(escape(word, encoding)) + escaped_word = escape(word, encoding) + escaped_word_len = len(escaped_word) new_size = size + escaped_word_len - if new_size <= options.width: - buf.append(word) + if new_size + 2 <= options.width: + buf.append(escaped_word) + size = new_size + elif not buf: + buf.append(escaped_word) size = new_size else: lines.append(''.join(buf)) - buf = [word] + buf = [escaped_word] size = escaped_word_len lines.append(''.join(buf)) else: - lines.append(line) + lines.append(escaped_line) if len(lines) <= 1: return f'"{escape(s, encoding)}"' - return '""\n' + '\n'.join(f'"{escape(line, encoding)}"' for line in lines) + return '""\n' + '\n'.join(f'"{line}"' for line in lines) def containsAny(str, set): @@ -636,10 +639,10 @@ def write_pot_file(messages, options, fp): # to skip translating some unimportant docstrings. print('#, docstring', file=fp) if msg.msgctxt is not None: - print('msgctxt', normalize(msg.msgctxt, encoding, options), file=fp) - print('msgid', normalize(msg.msgid, encoding, options), file=fp) + print('msgctxt', normalize(msg.msgctxt, encoding, 'msgctxt', options), file=fp) + print('msgid', normalize(msg.msgid, encoding, 'msgid', options), file=fp) if msg.msgid_plural is not None: - print('msgid_plural', normalize(msg.msgid_plural, encoding, options), file=fp) + print('msgid_plural', normalize(msg.msgid_plural, encoding, 'msgid_plural', options), file=fp) print('msgstr[0] ""', file=fp) print('msgstr[1] ""\n', file=fp) else: From 8d5f84fb9083e1680b5fb5d8b97b759a54debbb7 Mon Sep 17 00:00:00 2001 From: stan Date: Sun, 2 Mar 2025 15:09:28 +0000 Subject: [PATCH 19/24] Use more complex pattern --- Tools/i18n/pygettext.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 057fa08c82e651..09d8c06340796b 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -214,7 +214,7 @@ def escape_nonascii(s, encoding): return ''.join(escapes[b] for b in s.encode(encoding)) -_space_splitter = re.compile(r'(\s+)') +_space_splitter = re.compile(r'\s+|\S+\s*') def normalize(s, encoding, prefix, options): # This converts the various Python string types into a format that is @@ -224,7 +224,8 @@ def normalize(s, encoding, prefix, options): for line in s.splitlines(True): escaped_line = escape(line, encoding) if len(escaped_line) + len(prefix) + 2 > options.width and _space_splitter.search(line): # don't wrap single words - words = _space_splitter.split(line) + words = _space_splitter.findall(line) + words = [w for w in words if w] words.reverse() buf = [] size = 0 From ae53774e267dae1a83ae6f80b24ad63fcb322cfd Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 2 Mar 2025 17:21:33 +0000 Subject: [PATCH 20/24] Serhiy's suggestions --- Tools/i18n/pygettext.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 09d8c06340796b..2c9f8b4c8b13d2 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -223,9 +223,8 @@ def normalize(s, encoding, prefix, options): lines = [] for line in s.splitlines(True): escaped_line = escape(line, encoding) - if len(escaped_line) + len(prefix) + 2 > options.width and _space_splitter.search(line): # don't wrap single words + if len(escaped_line) + len(prefix) + 3 > options.width: words = _space_splitter.findall(line) - words = [w for w in words if w] words.reverse() buf = [] size = 0 @@ -234,10 +233,7 @@ def normalize(s, encoding, prefix, options): escaped_word = escape(word, encoding) escaped_word_len = len(escaped_word) new_size = size + escaped_word_len - if new_size + 2 <= options.width: - buf.append(escaped_word) - size = new_size - elif not buf: + if new_size + 2 <= options.width or not buf: buf.append(escaped_word) size = new_size else: From 794fc8b1ad3ab9aace33f6637a59fd1ade93c77e Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 3 Mar 2025 18:48:37 +0000 Subject: [PATCH 21/24] Serhiy's suggestions --- Lib/test/test_tools/test_i18n.py | 57 ++++++++++++++++---------------- Tools/i18n/pygettext.py | 19 ++++++----- 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index da711d7af06ddd..e550e4805db450 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -517,49 +517,50 @@ def test_parse_keyword_spec(self): parse_spec(spec) self.assertEqual(str(cm.exception), message) - def test_normalize_multiline(self): - # required to set up normalize - options = SimpleNamespace(width=78) - make_escapes(True) + # required to set up normalize + make_escapes(True) + def test_normalize_multiline(self): s = 'multi-line\n translation' s_expected = '""\n"multi-line\\n"\n" translation"' - data = normalize(s, 'UTF-8', 'msgid', options) + data = normalize(s, 'UTF-8', 'msgid', 78) self.assertEqual(s_expected, data) def test_normalize_wrap(self): - # required to set up normalize - options = SimpleNamespace(width=30) - make_escapes(True) + s = 'fee fi fo fum fee fi ' # len = 29 + s_expected = '"fee fi fo fum fee fi "' + data = normalize(s, 'UTF-8', 'msgid', 30) + self.assertEqual(s_expected, data) - s = 'this string should be wrapped to 30 chars' - s_expected = '""\n"this string should be "\n"wrapped to 30 chars"' + s = 'fee fi fo fum fee fi f' # len = 30 + s_expected = '"fee fi fo fum fee fi f"' + data = normalize(s, 'UTF-8', 'msgid', 30) + self.assertEqual(s_expected, data) - data = normalize(s, 'UTF-8', 'msgid', options) + s = 'fee fi fo fum fee fi fo' # len = 31 + s_expected = '""\n"fee fi fo fum fee fi fo"' + data = normalize(s, 'UTF-8', 'msgid', 30) self.assertEqual(s_expected, data) def test_normalize_nostr(self): - # required to set up normalize - options = SimpleNamespace(width=30) - make_escapes(True) - - s = '' - s_expected = '""' - - data = normalize(s, 'UTF-8', 'msgid', options) - self.assertEqual(s_expected, data) + data = normalize('', 'UTF-8', 'msgid', 30) + self.assertEqual('""', data) - def test_normalize_short_width(self): + def test_normalize_single_word(self): # required to set up normalize - options = SimpleNamespace(width=3) make_escapes(True) - - s = 'foos' - s_expected = '"foos"' - - data = normalize(s, 'UTF-8', 'msgid', options) - self.assertEqual(s_expected, data) + for s in ("fee", "fi", "fo", "fums"): + data = normalize(s, 'UTF-8', 'msgid', 3) + self.assertNotIn('""', data) # did not wrap + + def test_normalize_split_on_whitespace(self): + for space in (' ', ' ', ' ', '\t', '\r'): + s = f'longlonglong{space}word' + space = {'\t': '\\t', '\r': '\\r'}.get(space, space) + s_expected = f'""\n"longlonglong{space}"\n"word"' + data = normalize(s, 'UTF-8', 'msgid', 10) + self.assertEqual(s_expected, data) def extract_from_snapshots(): diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 2c9f8b4c8b13d2..fcd7c6f9df78a6 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -155,6 +155,7 @@ __version__ = '1.5' +from test.test_doctest.test_doctest import wrapped # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's # there. @@ -216,24 +217,26 @@ def escape_nonascii(s, encoding): _space_splitter = re.compile(r'\s+|\S+\s*') -def normalize(s, encoding, prefix, options): +def normalize(s, encoding, prefix, width): # This converts the various Python string types into a format that is # appropriate for .po files, namely much closer to C style, # while wrapping to options.width. lines = [] + wrap = False for line in s.splitlines(True): escaped_line = escape(line, encoding) - if len(escaped_line) + len(prefix) + 3 > options.width: + if len(escaped_line) + len(prefix) + 3 > width: + wrap = True words = _space_splitter.findall(line) words.reverse() buf = [] - size = 0 + size = 2 while words: word = words.pop() escaped_word = escape(word, encoding) escaped_word_len = len(escaped_word) new_size = size + escaped_word_len - if new_size + 2 <= options.width or not buf: + if new_size <= width or not buf: buf.append(escaped_word) size = new_size else: @@ -243,7 +246,7 @@ def normalize(s, encoding, prefix, options): lines.append(''.join(buf)) else: lines.append(escaped_line) - if len(lines) <= 1: + if len(lines) <= 1 and (not wrap or len(_space_splitter.findall(lines[0])) == 1): return f'"{escape(s, encoding)}"' return '""\n' + '\n'.join(f'"{line}"' for line in lines) @@ -636,10 +639,10 @@ def write_pot_file(messages, options, fp): # to skip translating some unimportant docstrings. print('#, docstring', file=fp) if msg.msgctxt is not None: - print('msgctxt', normalize(msg.msgctxt, encoding, 'msgctxt', options), file=fp) - print('msgid', normalize(msg.msgid, encoding, 'msgid', options), file=fp) + print('msgctxt', normalize(msg.msgctxt, encoding, 'msgctxt', options.width), file=fp) + print('msgid', normalize(msg.msgid, encoding, 'msgid', options.width), file=fp) if msg.msgid_plural is not None: - print('msgid_plural', normalize(msg.msgid_plural, encoding, 'msgid_plural', options), file=fp) + print('msgid_plural', normalize(msg.msgid_plural, encoding, 'msgid_plural', options.width), file=fp) print('msgstr[0] ""', file=fp) print('msgstr[1] ""\n', file=fp) else: From 47bfa291f5b6351c432dac563cbf87861065f546 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 3 Mar 2025 18:49:23 +0000 Subject: [PATCH 22/24] Clean up --- Lib/test/test_tools/test_i18n.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index e550e4805db450..9a63e81fed2ac3 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -548,8 +548,6 @@ def test_normalize_nostr(self): self.assertEqual('""', data) def test_normalize_single_word(self): - # required to set up normalize - make_escapes(True) for s in ("fee", "fi", "fo", "fums"): data = normalize(s, 'UTF-8', 'msgid', 3) self.assertNotIn('""', data) # did not wrap From b6f128f41412bd9800a2f0e6d8538533c8f0949a Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 3 Mar 2025 21:15:37 +0000 Subject: [PATCH 23/24] Apply suggestions from Tomas --- Lib/test/test_tools/test_i18n.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 9a63e81fed2ac3..e24d95be30399d 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -517,8 +517,9 @@ def test_parse_keyword_spec(self): parse_spec(spec) self.assertEqual(str(cm.exception), message) - # required to set up normalize - make_escapes(True) + def setUp(self): + # required to set up normalize + make_escapes(True) def test_normalize_multiline(self): s = 'multi-line\n translation' @@ -528,20 +529,16 @@ def test_normalize_multiline(self): self.assertEqual(s_expected, data) def test_normalize_wrap(self): - s = 'fee fi fo fum fee fi ' # len = 29 - s_expected = '"fee fi fo fum fee fi "' - data = normalize(s, 'UTF-8', 'msgid', 30) - self.assertEqual(s_expected, data) - - s = 'fee fi fo fum fee fi f' # len = 30 - s_expected = '"fee fi fo fum fee fi f"' - data = normalize(s, 'UTF-8', 'msgid', 30) - self.assertEqual(s_expected, data) - - s = 'fee fi fo fum fee fi fo' # len = 31 - s_expected = '""\n"fee fi fo fum fee fi fo"' - data = normalize(s, 'UTF-8', 'msgid', 30) - self.assertEqual(s_expected, data) + cases = ( + ('multi-line\n translation', '""\n"multi-line\\n"\n" translation"'), + ('fee fi fo fum fee fi ', '"fee fi fo fum fee fi "'), # len = 29 + ('fee fi fo fum fee fi f', '"fee fi fo fum fee fi f"'), # len = 30 + ('fee fi fo fum fee fi fo', '""\n"fee fi fo fum fee fi fo"' ),# len = 31 + ) + for raw, expected in cases: + with self.subTest(raw): + data = normalize(raw, 'UTF-8', 'msgid', 30) + self.assertEqual(expected, data) def test_normalize_nostr(self): data = normalize('', 'UTF-8', 'msgid', 30) From a4823a795ccb6d1e7d304f0aa653ea7630556264 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Wed, 5 Mar 2025 18:46:58 +0000 Subject: [PATCH 24/24] Apply suggestions from Serhiy --- Lib/test/test_tools/test_i18n.py | 9 +++++++-- Tools/i18n/pygettext.py | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index e24d95be30399d..c9002c124fa433 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -540,13 +540,13 @@ def test_normalize_wrap(self): data = normalize(raw, 'UTF-8', 'msgid', 30) self.assertEqual(expected, data) - def test_normalize_nostr(self): + def test_normalize_empty_str(self): data = normalize('', 'UTF-8', 'msgid', 30) self.assertEqual('""', data) def test_normalize_single_word(self): for s in ("fee", "fi", "fo", "fums"): - data = normalize(s, 'UTF-8', 'msgid', 3) + data = normalize(s, 'UTF-8', 'msgid', 8) self.assertNotIn('""', data) # did not wrap def test_normalize_split_on_whitespace(self): @@ -557,6 +557,11 @@ def test_normalize_split_on_whitespace(self): data = normalize(s, 'UTF-8', 'msgid', 10) self.assertEqual(s_expected, data) + s = f'longlonglong\r\nword' + s_expected = f'""\n"longlonglong\\r\\n"\n"word"' + data = normalize(s, 'UTF-8', 'msgid', 30) + self.assertEqual(s_expected, data) + def extract_from_snapshots(): snapshots = { diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index fcd7c6f9df78a6..9e2fef22cea328 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -230,13 +230,13 @@ def normalize(s, encoding, prefix, width): words = _space_splitter.findall(line) words.reverse() buf = [] - size = 2 + size = 0 while words: word = words.pop() escaped_word = escape(word, encoding) escaped_word_len = len(escaped_word) new_size = size + escaped_word_len - if new_size <= width or not buf: + if new_size + 2 <= width or not buf: buf.append(escaped_word) size = new_size else: