Skip to content

Commit c73d460

Browse files
authored
gh-130197: Improve test coverage of msgfmt.py (GH-133048)
1 parent fa52f28 commit c73d460

File tree

1 file changed

+114
-13
lines changed

1 file changed

+114
-13
lines changed

Lib/test/test_tools/test_msgfmt.py

Lines changed: 114 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,21 @@
99

1010
from test.support.os_helper import temp_cwd
1111
from test.support.script_helper import assert_python_failure, assert_python_ok
12-
from test.test_tools import skip_if_missing, toolsdir
12+
from test.test_tools import imports_under_tool, skip_if_missing, toolsdir
1313

1414

1515
skip_if_missing('i18n')
1616

1717
data_dir = (Path(__file__).parent / 'msgfmt_data').resolve()
1818
script_dir = Path(toolsdir) / 'i18n'
19-
msgfmt = script_dir / 'msgfmt.py'
19+
msgfmt_py = script_dir / 'msgfmt.py'
20+
21+
with imports_under_tool("i18n"):
22+
import msgfmt
2023

2124

2225
def compile_messages(po_file, mo_file):
23-
assert_python_ok(msgfmt, '-o', mo_file, po_file)
26+
assert_python_ok(msgfmt_py, '-o', mo_file, po_file)
2427

2528

2629
class CompilationTest(unittest.TestCase):
@@ -92,7 +95,7 @@ def test_po_with_bom(self):
9295
with temp_cwd():
9396
Path('bom.po').write_bytes(b'\xef\xbb\xbfmsgid "Python"\nmsgstr "Pioton"\n')
9497

95-
res = assert_python_failure(msgfmt, 'bom.po')
98+
res = assert_python_failure(msgfmt_py, 'bom.po')
9699
err = res.err.decode('utf-8')
97100
self.assertIn('The file bom.po starts with a UTF-8 BOM', err)
98101

@@ -103,7 +106,7 @@ def test_invalid_msgid_plural(self):
103106
msgstr[0] "singular"
104107
''')
105108

106-
res = assert_python_failure(msgfmt, 'invalid.po')
109+
res = assert_python_failure(msgfmt_py, 'invalid.po')
107110
err = res.err.decode('utf-8')
108111
self.assertIn('msgid_plural not preceded by msgid', err)
109112

@@ -114,7 +117,7 @@ def test_plural_without_msgid_plural(self):
114117
msgstr[0] "bar"
115118
''')
116119

117-
res = assert_python_failure(msgfmt, 'invalid.po')
120+
res = assert_python_failure(msgfmt_py, 'invalid.po')
118121
err = res.err.decode('utf-8')
119122
self.assertIn('plural without msgid_plural', err)
120123

@@ -126,7 +129,7 @@ def test_indexed_msgstr_without_msgid_plural(self):
126129
msgstr "bar"
127130
''')
128131

129-
res = assert_python_failure(msgfmt, 'invalid.po')
132+
res = assert_python_failure(msgfmt_py, 'invalid.po')
130133
err = res.err.decode('utf-8')
131134
self.assertIn('indexed msgstr required for plural', err)
132135

@@ -136,38 +139,136 @@ def test_generic_syntax_error(self):
136139
"foo"
137140
''')
138141

139-
res = assert_python_failure(msgfmt, 'invalid.po')
142+
res = assert_python_failure(msgfmt_py, 'invalid.po')
140143
err = res.err.decode('utf-8')
141144
self.assertIn('Syntax error', err)
142145

146+
147+
class POParserTest(unittest.TestCase):
148+
@classmethod
149+
def tearDownClass(cls):
150+
# msgfmt uses a global variable to store messages,
151+
# clear it after the tests.
152+
msgfmt.MESSAGES.clear()
153+
154+
def test_strings(self):
155+
# Test that the PO parser correctly handles and unescape
156+
# strings in the PO file.
157+
# The PO file format allows for a variety of escape sequences,
158+
# octal and hex escapes.
159+
valid_strings = (
160+
# empty strings
161+
('""', ''),
162+
('"" "" ""', ''),
163+
# allowed escape sequences
164+
(r'"\\"', '\\'),
165+
(r'"\""', '"'),
166+
(r'"\t"', '\t'),
167+
(r'"\n"', '\n'),
168+
(r'"\r"', '\r'),
169+
(r'"\f"', '\f'),
170+
(r'"\a"', '\a'),
171+
(r'"\b"', '\b'),
172+
(r'"\v"', '\v'),
173+
# non-empty strings
174+
('"foo"', 'foo'),
175+
('"foo" "bar"', 'foobar'),
176+
('"foo""bar"', 'foobar'),
177+
('"" "foo" ""', 'foo'),
178+
# newlines and tabs
179+
(r'"foo\nbar"', 'foo\nbar'),
180+
(r'"foo\n" "bar"', 'foo\nbar'),
181+
(r'"foo\tbar"', 'foo\tbar'),
182+
(r'"foo\t" "bar"', 'foo\tbar'),
183+
# escaped quotes
184+
(r'"foo\"bar"', 'foo"bar'),
185+
(r'"foo\"" "bar"', 'foo"bar'),
186+
(r'"foo\\" "bar"', 'foo\\bar'),
187+
# octal escapes
188+
(r'"\120\171\164\150\157\156"', 'Python'),
189+
(r'"\120\171\164" "\150\157\156"', 'Python'),
190+
(r'"\"\120\171\164" "\150\157\156\""', '"Python"'),
191+
# hex escapes
192+
(r'"\x50\x79\x74\x68\x6f\x6e"', 'Python'),
193+
(r'"\x50\x79\x74" "\x68\x6f\x6e"', 'Python'),
194+
(r'"\"\x50\x79\x74" "\x68\x6f\x6e\""', '"Python"'),
195+
)
196+
197+
with temp_cwd():
198+
for po_string, expected in valid_strings:
199+
with self.subTest(po_string=po_string):
200+
# Construct a PO file with a single entry,
201+
# compile it, read it into a catalog and
202+
# check the result.
203+
po = f'msgid {po_string}\nmsgstr "translation"'
204+
Path('messages.po').write_text(po)
205+
# Reset the global MESSAGES dictionary
206+
msgfmt.MESSAGES.clear()
207+
msgfmt.make('messages.po', 'messages.mo')
208+
209+
with open('messages.mo', 'rb') as f:
210+
actual = GNUTranslations(f)
211+
212+
self.assertDictEqual(actual._catalog, {expected: 'translation'})
213+
214+
invalid_strings = (
215+
# "''", # invalid but currently accepted
216+
'"',
217+
'"""',
218+
'"" "',
219+
'foo',
220+
'"" "foo',
221+
'"foo" foo',
222+
'42',
223+
'"" 42 ""',
224+
# disallowed escape sequences
225+
# r'"\'"', # invalid but currently accepted
226+
# r'"\e"', # invalid but currently accepted
227+
# r'"\8"', # invalid but currently accepted
228+
# r'"\9"', # invalid but currently accepted
229+
r'"\x"',
230+
r'"\u1234"',
231+
r'"\N{ROMAN NUMERAL NINE}"'
232+
)
233+
with temp_cwd():
234+
for invalid_string in invalid_strings:
235+
with self.subTest(string=invalid_string):
236+
po = f'msgid {invalid_string}\nmsgstr "translation"'
237+
Path('messages.po').write_text(po)
238+
# Reset the global MESSAGES dictionary
239+
msgfmt.MESSAGES.clear()
240+
with self.assertRaises(Exception):
241+
msgfmt.make('messages.po', 'messages.mo')
242+
243+
143244
class CLITest(unittest.TestCase):
144245

145246
def test_help(self):
146247
for option in ('--help', '-h'):
147-
res = assert_python_ok(msgfmt, option)
248+
res = assert_python_ok(msgfmt_py, option)
148249
err = res.err.decode('utf-8')
149250
self.assertIn('Generate binary message catalog from textual translation description.', err)
150251

151252
def test_version(self):
152253
for option in ('--version', '-V'):
153-
res = assert_python_ok(msgfmt, option)
254+
res = assert_python_ok(msgfmt_py, option)
154255
out = res.out.decode('utf-8').strip()
155256
self.assertEqual('msgfmt.py 1.2', out)
156257

157258
def test_invalid_option(self):
158-
res = assert_python_failure(msgfmt, '--invalid-option')
259+
res = assert_python_failure(msgfmt_py, '--invalid-option')
159260
err = res.err.decode('utf-8')
160261
self.assertIn('Generate binary message catalog from textual translation description.', err)
161262
self.assertIn('option --invalid-option not recognized', err)
162263

163264
def test_no_input_file(self):
164-
res = assert_python_ok(msgfmt)
265+
res = assert_python_ok(msgfmt_py)
165266
err = res.err.decode('utf-8').replace('\r\n', '\n')
166267
self.assertIn('No input file given\n'
167268
"Try `msgfmt --help' for more information.", err)
168269

169270
def test_nonexistent_file(self):
170-
assert_python_failure(msgfmt, 'nonexistent.po')
271+
assert_python_failure(msgfmt_py, 'nonexistent.po')
171272

172273

173274
def update_catalog_snapshots():

0 commit comments

Comments
 (0)