From bcf9cc1ab9d9a99c4302efab097851cff52eb898 Mon Sep 17 00:00:00 2001 From: Stephen Morton Date: Fri, 27 Dec 2024 02:20:40 -0800 Subject: [PATCH 1/7] fix apparent bug in xml.dom.xmlbuilder.DOMBuilder.parse() --- Lib/xml/dom/xmlbuilder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py index 8a200263497b89..e57d0e82546684 100644 --- a/Lib/xml/dom/xmlbuilder.py +++ b/Lib/xml/dom/xmlbuilder.py @@ -189,7 +189,7 @@ def parse(self, input): options.filter = self.filter options.errorHandler = self.errorHandler fp = input.byteStream - if fp is None and options.systemId: + if fp is None and input.systemId: import urllib.request fp = urllib.request.urlopen(input.systemId) return self._parse_bytestream(fp, options) From 4e2c24cb0b2d982b51dbd484f323fabab995d692 Mon Sep 17 00:00:00 2001 From: Stephen Morton Date: Fri, 27 Dec 2024 16:29:02 -0800 Subject: [PATCH 2/7] add NEWS entry --- .../Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst diff --git a/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst new file mode 100644 index 00000000000000..56e2fe6f85f4bf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst @@ -0,0 +1,3 @@ +Allow :meth:`!xml.dom.xmlbuilder.DOMParser.parse` to correctly handle +:class:`!xml.dom.xmlbuilder.DOMInputSource` instances that only have a +:attr:`!systemId` attribute set. From 6ce5ebd23e615c02b36bfff354e184b2c437e9eb Mon Sep 17 00:00:00 2001 From: Stephen Morton Date: Fri, 27 Dec 2024 18:34:38 -0800 Subject: [PATCH 3/7] update DOMEntityResolver._guess_media_encoding for python 3 --- Lib/xml/dom/xmlbuilder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py index e57d0e82546684..f52b3eb44a9bb4 100644 --- a/Lib/xml/dom/xmlbuilder.py +++ b/Lib/xml/dom/xmlbuilder.py @@ -248,9 +248,9 @@ def _create_opener(self): def _guess_media_encoding(self, source): info = source.byteStream.info() if "Content-Type" in info: - for param in info.getplist(): - if param.startswith("charset="): - return param.split("=", 1)[1].lower() + for param in info.get_params([]): + if param[0] == 'charset': + return param[1].lower() class DOMInputSource(object): From 9a0d009f73cbb50848a93c72a39b76d50296decc Mon Sep 17 00:00:00 2001 From: Stephen Morton Date: Fri, 27 Dec 2024 18:36:33 -0800 Subject: [PATCH 4/7] update NEWS --- .../Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst index 56e2fe6f85f4bf..35621c547e91f1 100644 --- a/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst +++ b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst @@ -1,3 +1,4 @@ Allow :meth:`!xml.dom.xmlbuilder.DOMParser.parse` to correctly handle :class:`!xml.dom.xmlbuilder.DOMInputSource` instances that only have a -:attr:`!systemId` attribute set. +:attr:`!systemId` attribute set. Also, fix the broken +:meth:`!xml.dom.xmlbuilder.DOMEntityResolver.resolveEntity` method. From ad5275c8617430d5462c70016d223d092a99cb94 Mon Sep 17 00:00:00 2001 From: Stephen Morton Date: Sun, 29 Dec 2024 13:53:26 -0800 Subject: [PATCH 5/7] split the NEWS entry into two, and simplify _guess_media_encoding --- Lib/xml/dom/xmlbuilder.py | 9 +++++---- .../2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst | 3 +-- .../2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst | 2 ++ 3 files changed, 8 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py index f52b3eb44a9bb4..8d74c774b24db6 100644 --- a/Lib/xml/dom/xmlbuilder.py +++ b/Lib/xml/dom/xmlbuilder.py @@ -247,10 +247,11 @@ def _create_opener(self): def _guess_media_encoding(self, source): info = source.byteStream.info() - if "Content-Type" in info: - for param in info.get_params([]): - if param[0] == 'charset': - return param[1].lower() + # import email.message + # assert isinstance(info, email.message.Message) + for ctp_name, ctp_value in info.get_params(()): + if ctp_name == 'charset': + return ctp_value.lower() class DOMInputSource(object): diff --git a/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst index 35621c547e91f1..56e2fe6f85f4bf 100644 --- a/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst +++ b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst @@ -1,4 +1,3 @@ Allow :meth:`!xml.dom.xmlbuilder.DOMParser.parse` to correctly handle :class:`!xml.dom.xmlbuilder.DOMInputSource` instances that only have a -:attr:`!systemId` attribute set. Also, fix the broken -:meth:`!xml.dom.xmlbuilder.DOMEntityResolver.resolveEntity` method. +:attr:`!systemId` attribute set. diff --git a/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst b/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst new file mode 100644 index 00000000000000..98c07297b06f8a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst @@ -0,0 +1,2 @@ +Fix :meth:`!xml.dom.xmlbuilder.DOMEntityResolver.resolveEntity`, which was +broken by the Python 3.0 transition. From b0cfb6a02a426e5ba9d34545e407dc3d68272218 Mon Sep 17 00:00:00 2001 From: Stephen Morton Date: Mon, 6 Jan 2025 15:14:46 -0800 Subject: [PATCH 6/7] use Message.get_param Co-authored-by: Serhiy Storchaka --- Lib/xml/dom/xmlbuilder.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py index 8d74c774b24db6..a8852625a2f9a2 100644 --- a/Lib/xml/dom/xmlbuilder.py +++ b/Lib/xml/dom/xmlbuilder.py @@ -249,9 +249,10 @@ def _guess_media_encoding(self, source): info = source.byteStream.info() # import email.message # assert isinstance(info, email.message.Message) - for ctp_name, ctp_value in info.get_params(()): - if ctp_name == 'charset': - return ctp_value.lower() + charset = info.get_param('charset') + if charset is not None: + return charset.lower() + return None class DOMInputSource(object): From 737f856f0733c3cde89a46254b87aebdcb509943 Mon Sep 17 00:00:00 2001 From: Stephen Morton Date: Mon, 6 Jan 2025 16:47:45 -0800 Subject: [PATCH 7/7] add a few tests for xml.dom.xmlbuilder --- Lib/test/test_xml_dom_xmlbuilder.py | 88 +++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 Lib/test/test_xml_dom_xmlbuilder.py diff --git a/Lib/test/test_xml_dom_xmlbuilder.py b/Lib/test/test_xml_dom_xmlbuilder.py new file mode 100644 index 00000000000000..5f5f2eb328df9f --- /dev/null +++ b/Lib/test/test_xml_dom_xmlbuilder.py @@ -0,0 +1,88 @@ +import io +import unittest +from http import client +from test.test_httplib import FakeSocket +from unittest import mock +from xml.dom import getDOMImplementation, minidom, xmlbuilder + +SMALL_SAMPLE = b""" + + +Introduction to XSL +
+

A. Namespace

+""" + + +class XMLBuilderTest(unittest.TestCase): + def test_entity_resolver(self): + body = ( + b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n" + + SMALL_SAMPLE + ) + + sock = FakeSocket(body) + response = client.HTTPResponse(sock) + response.begin() + attrs = {"open.return_value": response} + opener = mock.Mock(**attrs) + + resolver = xmlbuilder.DOMEntityResolver() + + with mock.patch("urllib.request.build_opener") as mock_build: + mock_build.return_value = opener + source = resolver.resolveEntity(None, "http://example.com/2000/svg") + + self.assertIsInstance(source, xmlbuilder.DOMInputSource) + self.assertIsNone(source.publicId) + self.assertEqual(source.systemId, "http://example.com/2000/svg") + self.assertEqual(source.baseURI, "http://example.com/2000/") + self.assertEqual(source.encoding, "utf-8") + self.assertIs(source.byteStream, response) + + self.assertIsNone(source.characterStream) + self.assertIsNone(source.stringData) + + def test_builder(self): + imp = getDOMImplementation() + self.assertIsInstance(imp, xmlbuilder.DOMImplementationLS) + + builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None) + self.assertIsInstance(builder, xmlbuilder.DOMBuilder) + + def test_parse_uri(self): + body = ( + b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n" + + SMALL_SAMPLE + ) + + sock = FakeSocket(body) + response = client.HTTPResponse(sock) + response.begin() + attrs = {"open.return_value": response} + opener = mock.Mock(**attrs) + + with mock.patch("urllib.request.build_opener") as mock_build: + mock_build.return_value = opener + + imp = getDOMImplementation() + builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None) + document = builder.parseURI("http://example.com/2000/svg") + + self.assertIsInstance(document, minidom.Document) + self.assertEqual(len(document.childNodes), 1) + + def test_parse_with_systemId(self): + response = io.BytesIO(SMALL_SAMPLE) + + with mock.patch("urllib.request.urlopen") as mock_open: + mock_open.return_value = response + + imp = getDOMImplementation() + source = imp.createDOMInputSource() + builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None) + source.systemId = "http://example.com/2000/svg" + document = builder.parse(source) + + self.assertIsInstance(document, minidom.Document) + self.assertEqual(len(document.childNodes), 1)