Skip to content

gh-121188: Escape invalid XML characters in regrtest #121195

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions Lib/test/libregrtest/testresult.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import traceback
import unittest
from test import support
from test.libregrtest.utils import escape_xml

class RegressionTestResult(unittest.TextTestResult):
USE_XML = False
Expand Down Expand Up @@ -65,23 +66,24 @@ def _add_result(self, test, capture=False, **args):
if capture:
if self._stdout_buffer is not None:
stdout = self._stdout_buffer.getvalue().rstrip()
ET.SubElement(e, 'system-out').text = stdout
ET.SubElement(e, 'system-out').text = escape_xml(stdout)
if self._stderr_buffer is not None:
stderr = self._stderr_buffer.getvalue().rstrip()
ET.SubElement(e, 'system-err').text = stderr
ET.SubElement(e, 'system-err').text = escape_xml(stderr)

for k, v in args.items():
if not k or not v:
continue

e2 = ET.SubElement(e, k)
if hasattr(v, 'items'):
for k2, v2 in v.items():
if k2:
e2.set(k2, str(v2))
e2.set(k2, escape_xml(str(v2)))
else:
e2.text = str(v2)
e2.text = escape_xml(str(v2))
else:
e2.text = str(v)
e2.text = escape_xml(str(v))

@classmethod
def __makeErrorDict(cls, err_type, err_value, err_tb):
Expand Down
17 changes: 17 additions & 0 deletions Lib/test/libregrtest/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os.path
import platform
import random
import re
import shlex
import signal
import subprocess
Expand Down Expand Up @@ -712,3 +713,19 @@ def get_signal_name(exitcode):
pass

return None


ILLEGAL_XML_CHARS_RE = re.compile(
'['
'\x00-\x1F' # ASCII control characters
'\uD800-\uDFFF' # surrogate characters
'\uFFFE'
'\uFFFF'
']')

def _escape_xml_replace(regs):
code_point = ord(regs[0])
return f"&#{code_point};"

def escape_xml(text):
return ILLEGAL_XML_CHARS_RE.sub(_escape_xml_replace, text)
57 changes: 57 additions & 0 deletions Lib/test/test_regrtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import tempfile
import textwrap
import unittest
from xml.etree import ElementTree

from test import support
from test.support import import_helper
from test.support import os_helper
Expand Down Expand Up @@ -2254,6 +2256,44 @@ def test_pass(self):
self.check_executed_tests(output, testname, stats=1, parallel=True)
self.assertNotIn('SPAM SPAM SPAM', output)

def test_xml(self):
code = textwrap.dedent(r"""
import unittest
from test import support

class VerboseTests(unittest.TestCase):
def test_failed(self):
print("abc \x1b def")
self.fail()
""")
testname = self.create_test(code=code)

# Run sequentially
filename = os_helper.TESTFN
self.addCleanup(os_helper.unlink, filename)

output = self.run_tests(testname, "--junit-xml", filename,
exitcode=EXITCODE_BAD_TEST)
self.check_executed_tests(output, testname,
failed=testname,
stats=TestStats(1, 1, 0))

# Test generated XML
with open(filename, encoding="utf8") as fp:
content = fp.read()

testsuite = ElementTree.fromstring(content)
self.assertEqual(int(testsuite.get('tests')), 1)
self.assertEqual(int(testsuite.get('errors')), 0)
self.assertEqual(int(testsuite.get('failures')), 1)

testcase = testsuite[0][0]
self.assertEqual(testcase.get('status'), 'run')
self.assertEqual(testcase.get('result'), 'completed')
self.assertGreater(float(testcase.get('time')), 0)
for out in testcase.iter('system-out'):
self.assertEqual(out.text, "abc  def")


class TestUtils(unittest.TestCase):
def test_format_duration(self):
Expand Down Expand Up @@ -2437,6 +2477,23 @@ def id(self):
self.assertTrue(match_test(test_chdir))
self.assertFalse(match_test(test_copy))

def test_escape_xml(self):
escape_xml = utils.escape_xml

# escape invalid XML characters
self.assertEqual(escape_xml('abc \x1b def'),
'abc  def')
self.assertEqual(escape_xml('nul:\x00, bell:\x07'),
'nul:�, bell:')
self.assertEqual(escape_xml('surrogate:\uDC80'),
'surrogate:�')
self.assertEqual(escape_xml('illegal \uFFFE and \uFFFF'),
'illegal  and ')

# no escape for valid XML characters
self.assertEqual(escape_xml('valid t\xe9xt \u20ac'),
'valid t\xe9xt \u20ac')


if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
When creating the JUnit XML file, regrtest now escapes characters which are
invalid in XML, such as the chr(27) control character used in ANSI escape
sequences. Patch by Victor Stinner.
Loading