Skip to content

gh-130703: Implement wrapping to width for msgids #130705

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b3ccc45
Add logic to wrap and test
StanFromIreland Feb 28, 2025
33149ed
Fix NEWS name -- We don't want miliseconds
StanFromIreland Feb 28, 2025
0e35e36
Change extract func in test
StanFromIreland Feb 28, 2025
92f227f
Use a modified version of pybabel's code in normalize
StanFromIreland Mar 1, 2025
f0ee9c4
Minor tweak
StanFromIreland Mar 1, 2025
843e3fa
Update argparse snapshot
StanFromIreland Mar 1, 2025
7fc34ca
Bénédikt's suggestions
picnixz Mar 1, 2025
8d319b4
Preserve spaces and remove unnecessary checks
StanFromIreland Mar 1, 2025
9197688
Improve comment
StanFromIreland Mar 1, 2025
7c8637e
Add test and sort imports
StanFromIreland Mar 1, 2025
66d8eac
Benedikt's suggestion
StanFromIreland Mar 1, 2025
430c051
Add tests and simplify normalize
StanFromIreland Mar 2, 2025
abb90c2
tomasr8 suggestion
StanFromIreland Mar 2, 2025
7f947db
Fix typo in test str
StanFromIreland Mar 2, 2025
ea5fa91
Benedikt's suggestions
StanFromIreland Mar 2, 2025
4b02678
More of Benedikt's suggestions
StanFromIreland Mar 2, 2025
8d03cbf
Don't wrap for single words
StanFromIreland Mar 2, 2025
fbe5b93
Address Serhiy's suggestions
StanFromIreland Mar 2, 2025
8d5f84f
Use more complex pattern
StanFromIreland Mar 2, 2025
ae53774
Serhiy's suggestions
StanFromIreland Mar 2, 2025
794fc8b
Serhiy's suggestions
StanFromIreland Mar 3, 2025
47bfa29
Clean up
StanFromIreland Mar 3, 2025
b6f128f
Apply suggestions from Tomas
StanFromIreland Mar 3, 2025
a4823a7
Apply suggestions from Serhiy
StanFromIreland Mar 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 21 additions & 15 deletions Lib/test/test_tools/i18n_data/messages.pot
Original file line number Diff line number Diff line change
Expand Up @@ -33,65 +33,71 @@ msgid ""
" multiline!\n"
msgstr ""

#: messages.py:46 messages.py:89 messages.py:90 messages.py:93 messages.py:94
#: messages.py:99 messages.py:100 messages.py:101
#: messages.py:32
msgid ""
"this is a very very very very very very very very very very very very very "
"long string!"
msgstr ""

#: messages.py:49 messages.py:92 messages.py:93 messages.py:96 messages.py:97
#: messages.py:102 messages.py:103 messages.py:104
msgid "foo"
msgid_plural "foos"
msgstr[0] ""
msgstr[1] ""

#: messages.py:47
#: messages.py:50
msgid "something"
msgstr ""

#: messages.py:50
#: messages.py:53
msgid "Hello, {}!"
msgstr ""

#: messages.py:54
#: messages.py:57
msgid "1"
msgstr ""

#: messages.py:54
#: messages.py:57
msgid "2"
msgstr ""

#: messages.py:55 messages.py:56
#: messages.py:58 messages.py:59
msgid "A"
msgstr ""

#: messages.py:55 messages.py:56
#: messages.py:58 messages.py:59
msgid "B"
msgstr ""

#: messages.py:57
#: messages.py:60
msgid "set"
msgstr ""

#: messages.py:62 messages.py:63
#: messages.py:65 messages.py:66
msgid "nested string"
msgstr ""

#: messages.py:68
#: messages.py:71
msgid "baz"
msgstr ""

#: messages.py:71 messages.py:75
#: messages.py:74 messages.py:78
msgid "default value"
msgstr ""

#: messages.py:91 messages.py:92 messages.py:95 messages.py:96
#: messages.py:94 messages.py:95 messages.py:98 messages.py:99
msgctxt "context"
msgid "foo"
msgid_plural "foos"
msgstr[0] ""
msgstr[1] ""

#: messages.py:102
#: messages.py:105
msgid "domain foo"
msgstr ""

#: messages.py:118 messages.py:119
#: messages.py:121 messages.py:122
msgid "world"
msgid_plural "worlds"
msgstr[0] ""
Expand Down
3 changes: 3 additions & 0 deletions Lib/test/test_tools/i18n_data/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
multiline!
""")

# very long string that should be wrapped
_("this is a very very very very very very very very very very very very very long string!")

# Invalid arguments
_()
_(None)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Wrap msgids to specified ``width`` and not just comments in :program:`pygettext`.
51 changes: 34 additions & 17 deletions Tools/i18n/pygettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@
import sys
import time
import tokenize
import re
from dataclasses import dataclass, field
from io import BytesIO
from operator import itemgetter
Expand Down Expand Up @@ -213,21 +214,37 @@ def escape_nonascii(s, encoding):
return ''.join(escapes[b] for b in s.encode(encoding))


def normalize(s, encoding):
def normalize(s, encoding, options):
# This converts the various Python string types into a format that is
# appropriate for .po files, namely much closer to C style.
lines = s.split('\n')
if len(lines) == 1:
s = '"' + escape(s, encoding) + '"'
else:
if not lines[-1]:
del lines[-1]
lines[-1] = lines[-1] + '\n'
for i in range(len(lines)):
lines[i] = escape(lines[i], encoding)
lineterm = '\\n"\n"'
s = '""\n"' + lineterm.join(lines) + '"'
return s
# appropriate for .po files, namely much closer to C style,
# while wrapping to options.width.
lines = []
for line in s.splitlines(True):
if len(escape(line, encoding)) > options.width:
words = re.split(r'(\s+)', line)
words.reverse()
buf = []
size = 2
while words:
word = words.pop()
escaped_word_len = len(escape(word, encoding))
if size + escaped_word_len <= options.width:
buf.append(word)
size += escaped_word_len
else:
lines.append(''.join(buf))
buf = [word]
size = 2 + escaped_word_len
lines.append(''.join(buf))
else:
lines.append(line)
if len(lines) <= 1:
return f'"{escape(s, encoding)}"'
if lines and not lines[-1]:
del lines[-1]
lines[-1] += '\n'
return '""\n' + '\n'.join(
[f'"{escape(line, encoding)}"' for line in lines])


def containsAny(str, set):
Expand Down Expand Up @@ -618,10 +635,10 @@ def write_pot_file(messages, options, fp):
# to skip translating some unimportant docstrings.
print('#, docstring', file=fp)
if msg.msgctxt is not None:
print('msgctxt', normalize(msg.msgctxt, encoding), file=fp)
print('msgid', normalize(msg.msgid, encoding), file=fp)
print('msgctxt', normalize(msg.msgctxt, encoding, options), file=fp)
print('msgid', normalize(msg.msgid, encoding, options), file=fp)
if msg.msgid_plural is not None:
print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp)
print('msgid_plural', normalize(msg.msgid_plural, encoding, options), file=fp)
print('msgstr[0] ""', file=fp)
print('msgstr[1] ""\n', file=fp)
else:
Expand Down
Loading