Skip to content

Commit d7133c7

Browse files
hartworkmcepl
authored andcommitted
Fix etree XMLPullParser tests for Expat >=2.6.0 with reparse deferral
Combined with gh#python/cpython!31453 bpo-46811: Make test suite support Expat >=2.4.5 (pythonGH-31453) Curly brackets were never allowed in namespace URIs according to RFC 3986, and so-called namespace-validating XML parsers have the right to reject them a invalid URIs. libexpat >=2.4.5 has become strcter in that regard due to related security issues; with ET.XML instantiating a namespace-aware parser under the hood, this test has no future in CPython. References: - https://datatracker.ietf.org/doc/html/rfc3968 - https://www.w3.org/TR/xml-names/ Also, test_minidom.py: Support Expat >=2.4.5 (cherry picked from commit 2cae938) Co-authored-by: Sebastian Pipping <[email protected]> Fixes: gh#python#115133 From-PR: gh#python/cpython!115138 Patch: CVE-2023-52425-libexpat-2.6.0-backport.patch
1 parent 23fc28b commit d7133c7

7 files changed

+146
-9
lines changed

Lib/test/support/__init__.py

+12
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import re
2222
import shutil
2323
import socket
24+
import pyexpat
2425
import stat
2526
import struct
2627
import subprocess
@@ -112,6 +113,7 @@
112113
"run_with_locale", "swap_item",
113114
"swap_attr", "Matcher", "set_memlimit", "SuppressCrashReport", "sortdict",
114115
"run_with_tz", "PGO", "missing_compiler_executable", "fd_count",
116+
"fails_with_expat_2_6_0", "is_expat_2_6_0"
115117
]
116118

117119
class Error(Exception):
@@ -2882,3 +2884,13 @@ def adjust_int_max_str_digits(max_digits):
28822884
yield
28832885
finally:
28842886
sys.set_int_max_str_digits(current)
2887+
2888+
2889+
@functools.lru_cache(maxsize=32)
2890+
def _is_expat_2_6_0():
2891+
return hasattr(pyexpat.ParserCreate(), 'SetReparseDeferralEnabled')
2892+
is_expat_2_6_0 = _is_expat_2_6_0()
2893+
2894+
fails_with_expat_2_6_0 = (unittest.expectedFailure
2895+
if is_expat_2_6_0
2896+
else lambda test: test)

Lib/test/test_minidom.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from xml.dom.minidom import parse, Node, Document, parseString
1111
from xml.dom.minidom import getDOMImplementation
12+
from xml.parsers.expat import ExpatError
1213

1314

1415
tstfile = support.findfile("test.xml", subdir="xmltestdata")
@@ -1156,7 +1157,11 @@ def testEncodings(self):
11561157

11571158
# Verify that character decoding errors raise exceptions instead
11581159
# of crashing
1159-
self.assertRaises(UnicodeDecodeError, parseString,
1160+
# It doesn’t make any sense to insist on the exact text of the
1161+
# error message, or even the exact Exception … it is enough that
1162+
# the error has been discovered.
1163+
with self.assertRaises((UnicodeDecodeError, ExpatError)):
1164+
parseString(
11601165
b'<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')
11611166

11621167
doc.unlink()
@@ -1602,7 +1607,10 @@ def testEmptyXMLNSValue(self):
16021607
self.confirm(doc2.namespaceURI == xml.dom.EMPTY_NAMESPACE)
16031608

16041609
def testExceptionOnSpacesInXMLNSValue(self):
1605-
with self.assertRaisesRegex(ValueError, 'Unsupported syntax'):
1610+
# It doesn’t make any sense to insist on the exact text of the
1611+
# error message, or even the exact Exception … it is enough that
1612+
# the error has been discovered.
1613+
with self.assertRaises((ExpatError, ValueError)):
16061614
parseString('<element xmlns:abc="http:abc.com/de f g/hi/j k"><abc:foo /></element>')
16071615

16081616
def testDocRemoveChild(self):

Lib/test/test_pyexpat.py

+60-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from xml.parsers import expat
1212
from xml.parsers.expat import errors
1313

14-
from test.support import sortdict
14+
from test.support import sortdict, is_expat_2_6_0
1515

1616

1717
class SetAttributeTest(unittest.TestCase):
@@ -729,5 +729,64 @@ def resolve_entity(context, base, system_id, public_id):
729729
self.assertEqual(handler_call_args, [("bar", "baz")])
730730

731731

732+
class ReparseDeferralTest(unittest.TestCase):
733+
def test_getter_setter_round_trip(self):
734+
if not is_expat_2_6_0:
735+
self.skipTest("Linked libexpat doesn't support reparse deferral")
736+
737+
parser = expat.ParserCreate()
738+
enabled = (expat.version_info >= (2, 6, 0))
739+
740+
self.assertIs(parser.GetReparseDeferralEnabled(), enabled)
741+
parser.SetReparseDeferralEnabled(False)
742+
self.assertIs(parser.GetReparseDeferralEnabled(), False)
743+
parser.SetReparseDeferralEnabled(True)
744+
self.assertIs(parser.GetReparseDeferralEnabled(), enabled)
745+
746+
def test_reparse_deferral_enabled(self):
747+
if not is_expat_2_6_0:
748+
self.skipTest("Linked libexpat doesn't support reparse deferral")
749+
750+
started = []
751+
752+
def start_element(name, _):
753+
started.append(name)
754+
755+
parser = expat.ParserCreate()
756+
parser.StartElementHandler = start_element
757+
self.assertTrue(parser.GetReparseDeferralEnabled())
758+
759+
for chunk in (b'<doc', b'/>'):
760+
parser.Parse(chunk, False)
761+
762+
# The key test: Have handlers already fired? Expecting: no.
763+
self.assertEqual(started, [])
764+
765+
parser.Parse(b'', True)
766+
767+
self.assertEqual(started, ['doc'])
768+
769+
def test_reparse_deferral_disabled(self):
770+
if not is_expat_2_6_0:
771+
self.skipTest("Linked libexpat doesn't support reparse deferral")
772+
773+
started = []
774+
775+
def start_element(name, _):
776+
started.append(name)
777+
778+
parser = expat.ParserCreate()
779+
parser.StartElementHandler = start_element
780+
if is_expat_2_6_0:
781+
parser.SetReparseDeferralEnabled(False)
782+
self.assertFalse(parser.GetReparseDeferralEnabled())
783+
784+
for chunk in (b'<doc', b'/>'):
785+
parser.Parse(chunk, False)
786+
787+
# The key test: Have handlers already fired? Expecting: yes.
788+
self.assertEqual(started, ['doc'])
789+
790+
732791
if __name__ == "__main__":
733792
unittest.main()

Lib/test/test_sax.py

+53-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import shutil
2323
from urllib.error import URLError
2424
from test import support
25-
from test.support import findfile, run_unittest, TESTFN
25+
from test.support import findfile, run_unittest, TESTFN, is_expat_2_6_0
2626

2727
TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
2828
TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
@@ -1168,6 +1168,58 @@ def test_expat_incremental_reset(self):
11681168

11691169
self.assertEqual(result.getvalue(), start + b"<doc>text</doc>")
11701170

1171+
def test_flush_reparse_deferral_enabled(self):
1172+
if not is_expat_2_6_0:
1173+
self.skipTest("Linked libexpat doesn't support reparse deferral")
1174+
1175+
result = BytesIO()
1176+
xmlgen = XMLGenerator(result)
1177+
parser = create_parser()
1178+
parser.setContentHandler(xmlgen)
1179+
1180+
for chunk in ("<doc", ">"):
1181+
parser.feed(chunk)
1182+
1183+
self.assertEqual(result.getvalue(), start) # i.e. no elements started
1184+
self.assertTrue(parser._parser.GetReparseDeferralEnabled())
1185+
1186+
parser.flush()
1187+
1188+
self.assertTrue(parser._parser.GetReparseDeferralEnabled())
1189+
self.assertEqual(result.getvalue(), start + b"<doc>")
1190+
1191+
parser.feed("</doc>")
1192+
parser.close()
1193+
1194+
self.assertEqual(result.getvalue(), start + b"<doc></doc>")
1195+
1196+
def test_flush_reparse_deferral_disabled(self):
1197+
if not is_expat_2_6_0:
1198+
self.skipTest("Linked libexpat doesn't support reparse deferral")
1199+
1200+
result = BytesIO()
1201+
xmlgen = XMLGenerator(result)
1202+
parser = create_parser()
1203+
parser.setContentHandler(xmlgen)
1204+
1205+
for chunk in ("<doc", ">"):
1206+
parser.feed(chunk)
1207+
1208+
parser._parser.SetReparseDeferralEnabled(False)
1209+
self.assertEqual(result.getvalue(), start) # i.e. no elements started
1210+
1211+
self.assertFalse(parser._parser.GetReparseDeferralEnabled())
1212+
1213+
parser.flush()
1214+
1215+
self.assertFalse(parser._parser.GetReparseDeferralEnabled())
1216+
self.assertEqual(result.getvalue(), start + b"<doc>")
1217+
1218+
parser.feed("</doc>")
1219+
parser.close()
1220+
1221+
self.assertEqual(result.getvalue(), start + b"<doc></doc>")
1222+
11711223
# ===== Locator support
11721224

11731225
def test_expat_locator_noinfo(self):

Lib/test/test_xml_etree.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818

1919
from itertools import product
2020
from test import support
21-
from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
21+
from test.support import (TESTFN, findfile, import_fresh_module,
22+
gc_collect, swap_attr, is_expat_2_6_0, fails_with_expat_2_6_0)
2223

2324
# pyET is the pure-Python implementation.
2425
#
@@ -1047,6 +1048,7 @@ def assert_event_tags(self, parser, expected):
10471048
def test_simple_xml(self):
10481049
for chunk_size in (None, 1, 5):
10491050
with self.subTest(chunk_size=chunk_size):
1051+
expected_events = []
10501052
parser = ET.XMLPullParser()
10511053
self.assert_event_tags(parser, [])
10521054
self._feed(parser, "<!-- comment -->\n", chunk_size)
@@ -1056,16 +1058,17 @@ def test_simple_xml(self):
10561058
chunk_size)
10571059
self.assert_event_tags(parser, [])
10581060
self._feed(parser, ">\n", chunk_size)
1059-
self.assert_event_tags(parser, [('end', 'element')])
1061+
expected_events += [('end', 'element')]
10601062
self._feed(parser, "<element>text</element>tail\n", chunk_size)
10611063
self._feed(parser, "<empty-element/>\n", chunk_size)
1062-
self.assert_event_tags(parser, [
1064+
expected_events += [
10631065
('end', 'element'),
10641066
('end', 'empty-element'),
1065-
])
1067+
]
10661068
self._feed(parser, "</root>\n", chunk_size)
1067-
self.assert_event_tags(parser, [('end', 'root')])
1069+
expected_events += [('end', 'root')]
10681070
self.assertIsNone(parser.close())
1071+
self.assert_event_tags(parser, expected_events)
10691072

10701073
def test_feed_while_iterating(self):
10711074
parser = ET.XMLPullParser()
@@ -1668,6 +1671,7 @@ def test_issue6233(self):
16681671
b"<?xml version='1.0' encoding='ascii'?>\n"
16691672
b'<body>t&#227;g</body>')
16701673

1674+
@unittest.skip('Fails with modern libexpat.')
16711675
def test_issue3151(self):
16721676
e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
16731677
self.assertEqual(e.tag, '{${stuff}}localname')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Make test suite support Expat >=2.4.5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix etree XMLPullParser tests for Expat >=2.6.0 with reparse deferral

0 commit comments

Comments
 (0)