From c37145af72eef99b787f9c2bbf3d0160a57e443e Mon Sep 17 00:00:00 2001 From: hharutyunyan Date: Tue, 23 Jul 2019 03:05:01 +0400 Subject: [PATCH 01/14] Adding standalone declaration to minidom XML document writer (Issue37534). --- Lib/xml/dom/minidom.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 43569ddcbeacd9..22bd4e425697e6 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -46,7 +46,8 @@ def __bool__(self): def toxml(self, encoding=None): return self.toprettyxml("", "", encoding) - def toprettyxml(self, indent="\t", newl="\n", encoding=None): + def toprettyxml(self, indent="\t", newl="\n", encoding=None, + standalone=None): if encoding is None: writer = io.StringIO() else: @@ -56,7 +57,7 @@ def toprettyxml(self, indent="\t", newl="\n", encoding=None): newline='\n') if self.nodeType == Node.DOCUMENT_NODE: # Can pass encoding only to document, to put it into XML header - self.writexml(writer, "", indent, newl, encoding) + self.writexml(writer, "", indent, newl, encoding, standalone) else: self.writexml(writer, "", indent, newl) if encoding is None: @@ -1786,12 +1787,21 @@ def importNode(self, node, deep): raise xml.dom.NotSupportedErr("cannot import document type nodes") return _clone_node(node, deep, self) - def writexml(self, writer, indent="", addindent="", newl="", encoding=None): - if encoding is None: - writer.write(''+newl) + def writexml(self, writer, indent="", addindent="", newl="", encoding=None, + standalone=None): + # In case standalone declaration is set + if standalone is not None: + standalone = "standalone='{}'".format('yes' if standalone else 'no') else: - writer.write('%s' % ( - encoding, newl)) + standalone = '' + + writer.write( + '{newline}'.format( + encoding="encoding='{}'".format(encoding) if encoding else '', + standalone=standalone, + newline=newl) + ) + for node in self.childNodes: node.writexml(writer, indent, addindent, newl) From 40350464c861dcb565b10a3141bf8028c9ff73ab Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 20 Aug 2019 00:02:38 +0000 Subject: [PATCH 02/14] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2019-08-20-00-02-37.bpo-37534.TvjAUi.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2019-08-20-00-02-37.bpo-37534.TvjAUi.rst diff --git a/Misc/NEWS.d/next/Library/2019-08-20-00-02-37.bpo-37534.TvjAUi.rst b/Misc/NEWS.d/next/Library/2019-08-20-00-02-37.bpo-37534.TvjAUi.rst new file mode 100644 index 00000000000000..0c9dd29251af0a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-08-20-00-02-37.bpo-37534.TvjAUi.rst @@ -0,0 +1,2 @@ +When using minidom module to generate XML documents the ability to add Standalone Document Declaration is added. +All the changes are made to generate a document in compliance with Extensible Markup Language (XML) 1.0 (Fifth Edition) W3C Recommendation (available here: https://www.w3.org/TR/xml/#sec-prolog-dtd). \ No newline at end of file From e7b1c40c8048d517d9ee7057090d92f352d308b2 Mon Sep 17 00:00:00 2001 From: hharutyunyan Date: Tue, 20 Aug 2019 04:46:09 +0400 Subject: [PATCH 03/14] Adding standalone variable to toxml() method and removing the mistaken useless space. --- Lib/xml/dom/minidom.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 22bd4e425697e6..6cbbb3fedd3a4a 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -43,8 +43,8 @@ class Node(xml.dom.Node): def __bool__(self): return True - def toxml(self, encoding=None): - return self.toprettyxml("", "", encoding) + def toxml(self, encoding=None, standalone=None): + return self.toprettyxml("", "", encoding, standalone) def toprettyxml(self, indent="\t", newl="\n", encoding=None, standalone=None): @@ -1796,7 +1796,7 @@ def writexml(self, writer, indent="", addindent="", newl="", encoding=None, standalone = '' writer.write( - '{newline}'.format( + '{newline}'.format( encoding="encoding='{}'".format(encoding) if encoding else '', standalone=standalone, newline=newl) From e8b1b187bbab25593db40e603739c7e2e7d81212 Mon Sep 17 00:00:00 2001 From: hharutyunyan Date: Tue, 20 Aug 2019 04:46:56 +0400 Subject: [PATCH 04/14] Adding missing unit tests and adding my name to the list of contributors. --- Lib/test/test_minidom.py | 13 +++++++++++++ Misc/ACKS | 1 + 2 files changed, 14 insertions(+) diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index f3ef958b535373..852eeec6f61514 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -1152,6 +1152,19 @@ def testEncodings(self): doc.unlink() + def testStandalone(self): + doc = parseString('') + self.assertEqual(doc.toxml(), + '\u20ac') + self.assertEqual(doc.toxml(standalone=None), + '\u20ac') + self.assertEqual(doc.toxml(standalone=True), + '\u20ac') + self.assertEqual(doc.toxml(standalone=False), + '\u20ac') + + doc.unlink() + class UserDataHandler: called = 0 def handle(self, operation, key, data, src, dst): diff --git a/Misc/ACKS b/Misc/ACKS index 9cddcb3a871f70..45c647dc8e348f 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1848,3 +1848,4 @@ Peter Åstrand Zheao Li Carsten Klein Diego Rojas +Henrik Harutyunyan From 3c8db77ef1ad0c7f490a274ae9eb6cbd0eeae2c9 Mon Sep 17 00:00:00 2001 From: hharutyunyan Date: Tue, 20 Aug 2019 16:26:14 +0400 Subject: [PATCH 05/14] Minor fix to make the encoding tag backwards compatable. --- Lib/xml/dom/minidom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 8aef4fea9b6304..07ac78d947b023 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -1798,7 +1798,7 @@ def writexml(self, writer, indent="", addindent="", newl="", encoding=None, writer.write( '{newline}'.format( - encoding="encoding='{}'".format(encoding) if encoding else '', + encoding="encoding=\"{}\"".format(encoding) if encoding else '', standalone=standalone, newline=newl) ) From 70a69a9edfccba74b43a9289a5ce962cb39cfbbc Mon Sep 17 00:00:00 2001 From: hharutyunyan Date: Thu, 19 Sep 2019 02:43:29 +0400 Subject: [PATCH 06/14] Using f-strings for formatting the prolog. --- Lib/xml/dom/minidom.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 07ac78d947b023..db2ad976048a2e 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -1796,12 +1796,9 @@ def writexml(self, writer, indent="", addindent="", newl="", encoding=None, else: standalone = '' - writer.write( - '{newline}'.format( - encoding="encoding=\"{}\"".format(encoding) if encoding else '', - standalone=standalone, - newline=newl) - ) + encoding = "encoding=\"{}\"".format(encoding) if encoding else '' + + writer.write(f'{newl}') for node in self.childNodes: node.writexml(writer, indent, addindent, newl) From b603224aae45a58a6989ee40b666a1b3876ee63e Mon Sep 17 00:00:00 2001 From: hharutyunyan Date: Thu, 19 Sep 2019 02:46:21 +0400 Subject: [PATCH 07/14] Missing space added before standalone declaratoin. --- Lib/test/test_minidom.py | 4 ++-- Lib/xml/dom/minidom.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index b7df61c4f6a81e..d0a3c57d62a148 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -1159,9 +1159,9 @@ def testStandalone(self): self.assertEqual(doc.toxml(standalone=None), '\u20ac') self.assertEqual(doc.toxml(standalone=True), - '\u20ac') + '\u20ac') self.assertEqual(doc.toxml(standalone=False), - '\u20ac') + '\u20ac') doc.unlink() diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index db2ad976048a2e..439f1bf846e5be 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -1792,7 +1792,7 @@ def writexml(self, writer, indent="", addindent="", newl="", encoding=None, standalone=None): # In case standalone declaration is set if standalone is not None: - standalone = "standalone='{}'".format('yes' if standalone else 'no') + standalone = " standalone='{}'".format('yes' if standalone else 'no') else: standalone = '' From 59a34958a9c99814002d2e6f0332cc670f7be3a4 Mon Sep 17 00:00:00 2001 From: hharutyunyan Date: Thu, 19 Sep 2019 02:50:57 +0400 Subject: [PATCH 08/14] Using double quotes for standalone declaration. --- Lib/test/test_minidom.py | 4 ++-- Lib/xml/dom/minidom.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index d0a3c57d62a148..e16b9c421b5583 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -1159,9 +1159,9 @@ def testStandalone(self): self.assertEqual(doc.toxml(standalone=None), '\u20ac') self.assertEqual(doc.toxml(standalone=True), - '\u20ac') + '\u20ac') self.assertEqual(doc.toxml(standalone=False), - '\u20ac') + '\u20ac') doc.unlink() diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 439f1bf846e5be..ab0cccffe00d85 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -1792,7 +1792,7 @@ def writexml(self, writer, indent="", addindent="", newl="", encoding=None, standalone=None): # In case standalone declaration is set if standalone is not None: - standalone = " standalone='{}'".format('yes' if standalone else 'no') + standalone = ' standalone="{}"'.format('yes' if standalone else 'no') else: standalone = '' From 3a0594b9a33b0af793e5d7646f3120be4fd86e24 Mon Sep 17 00:00:00 2001 From: hharutyunyan Date: Thu, 19 Sep 2019 03:05:46 +0400 Subject: [PATCH 09/14] Test case with encoding declaration added. --- Lib/test/test_minidom.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index e16b9c421b5583..0ef7b6ad39148a 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -1159,9 +1159,12 @@ def testStandalone(self): self.assertEqual(doc.toxml(standalone=None), '\u20ac') self.assertEqual(doc.toxml(standalone=True), - '\u20ac') + '\u20ac') self.assertEqual(doc.toxml(standalone=False), - '\u20ac') + '\u20ac') + self.assertEqual(doc.toxml('utf-8', True), + b'' + b'\xe2\x82\xac') doc.unlink() From 162486ad1b866dd3ac9eb88db549bd8aca49dbb0 Mon Sep 17 00:00:00 2001 From: hharutyunyan Date: Thu, 19 Sep 2019 03:22:41 +0400 Subject: [PATCH 10/14] Last name alphabetical ordering fixed. --- Misc/ACKS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/ACKS b/Misc/ACKS index 726c87b7fa1e16..fbbb17cd7324de 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -643,6 +643,7 @@ David Harrigan Brian Harring Jonathan Hartley Travis B. Hartwell +Henrik Harutyunyan Shane Harvey Larry Hastings Tim Hatch @@ -1883,4 +1884,3 @@ Robert Leenders Tim Hopper Dan Lidral-Porter Ngalim Siregar -Henrik Harutyunyan From 5b8cee5519adbe2a437d47b99e73b0a6c082eb87 Mon Sep 17 00:00:00 2001 From: hharutyunyan Date: Fri, 25 Oct 2019 02:50:02 +0400 Subject: [PATCH 11/14] Fixing the double-space issue in document declarations. --- Lib/test/test_minidom.py | 4 ++-- Lib/xml/dom/minidom.py | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 0ef7b6ad39148a..1663b1f1143ddc 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -1159,9 +1159,9 @@ def testStandalone(self): self.assertEqual(doc.toxml(standalone=None), '\u20ac') self.assertEqual(doc.toxml(standalone=True), - '\u20ac') + '\u20ac') self.assertEqual(doc.toxml(standalone=False), - '\u20ac') + '\u20ac') self.assertEqual(doc.toxml('utf-8', True), b'' b'\xe2\x82\xac') diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index ab0cccffe00d85..8aadbfbff88f17 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -1790,15 +1790,17 @@ def importNode(self, node, deep): def writexml(self, writer, indent="", addindent="", newl="", encoding=None, standalone=None): + declarations = [] + + if encoding: + declarations.append('encoding="{}"'.format(encoding)) # In case standalone declaration is set if standalone is not None: - standalone = ' standalone="{}"'.format('yes' if standalone else 'no') - else: - standalone = '' - - encoding = "encoding=\"{}\"".format(encoding) if encoding else '' + declarations.append( + 'standalone="{}"'.format('yes' if standalone else 'no') + ) - writer.write(f'{newl}') + writer.write(f'{newl}') for node in self.childNodes: node.writexml(writer, indent, addindent, newl) From e108c3d02c1514b5c9848845c4705a9261caf434 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Tue, 28 Jan 2020 08:17:19 +0100 Subject: [PATCH 12/14] Use f-strings instead of mixing them with format() --- Lib/xml/dom/minidom.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 8aadbfbff88f17..1083b481387100 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -1793,12 +1793,9 @@ def writexml(self, writer, indent="", addindent="", newl="", encoding=None, declarations = [] if encoding: - declarations.append('encoding="{}"'.format(encoding)) - # In case standalone declaration is set + declarations.append(f'encoding="{encoding}"') if standalone is not None: - declarations.append( - 'standalone="{}"'.format('yes' if standalone else 'no') - ) + declarations.append(f'standalone="{"yes" if standalone else "no"}"') writer.write(f'{newl}') From 3b38bbe57e4d454446465f72ac9e38fdc4213d0c Mon Sep 17 00:00:00 2001 From: hharutyunyan Date: Sun, 23 Feb 2020 20:39:47 +0400 Subject: [PATCH 13/14] Documents modified to cover the new features. --- Doc/library/xml.dom.minidom.rst | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/Doc/library/xml.dom.minidom.rst b/Doc/library/xml.dom.minidom.rst index 8711242d95d741..897a117c51b1c7 100644 --- a/Doc/library/xml.dom.minidom.rst +++ b/Doc/library/xml.dom.minidom.rst @@ -132,7 +132,8 @@ module documentation. This section lists the differences between the API and ... # Work with dom. -.. method:: Node.writexml(writer, indent="", addindent="", newl="") +.. method:: Node.writexml(writer, indent="", addindent="", newl="", + encoding=None, standalone=None) Write XML to the writer object. The writer receives texts but not bytes as input, it should have a :meth:`write` method which matches that of the file object @@ -144,11 +145,18 @@ module documentation. This section lists the differences between the API and For the :class:`Document` node, an additional keyword argument *encoding* can be used to specify the encoding field of the XML header. + Silimarly, explicitly stating the *standalone* argument causes the + standalone document declarations to be added to the prologue of the XML + document. + If the value is set to `True`, `standalone="yes"` is added, + otherwise the it is set to `"no"`. + Not stating the argument will omit the declaration from the document. + .. versionchanged:: 3.8 The :meth:`writexml` method now preserves the attribute order specified by the user. -.. method:: Node.toxml(encoding=None) +.. method:: Node.toxml(encoding=None, standalone=None) Return a string or byte string containing the XML represented by the DOM node. @@ -160,11 +168,14 @@ module documentation. This section lists the differences between the API and encoding. Encoding this string in an encoding other than UTF-8 is likely incorrect, since UTF-8 is the default encoding of XML. + The *standalone* argument behaves exactly as in :meth:`writexml`. + .. versionchanged:: 3.8 The :meth:`toxml` method now preserves the attribute order specified by the user. -.. method:: Node.toprettyxml(indent="\\t", newl="\\n", encoding=None) +.. method:: Node.toprettyxml(indent="\\t", newl="\\n", encoding=None, + standalone=None) Return a pretty-printed version of the document. *indent* specifies the indentation string and defaults to a tabulator; *newl* specifies the string @@ -173,6 +184,8 @@ module documentation. This section lists the differences between the API and The *encoding* argument behaves like the corresponding argument of :meth:`toxml`. + The *standalone* argument behaves exactly as in :meth:`writexml`. + .. versionchanged:: 3.8 The :meth:`toprettyxml` method now preserves the attribute order specified by the user. From 51a5f0192c4a79f8e4f8a46babe796361c1f2bc6 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 28 Feb 2020 08:19:54 +0100 Subject: [PATCH 14/14] Fiy typo in Doc/library/xml.dom.minidom.rst --- Doc/library/xml.dom.minidom.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/xml.dom.minidom.rst b/Doc/library/xml.dom.minidom.rst index 897a117c51b1c7..2c78cd939243a8 100644 --- a/Doc/library/xml.dom.minidom.rst +++ b/Doc/library/xml.dom.minidom.rst @@ -149,7 +149,7 @@ module documentation. This section lists the differences between the API and standalone document declarations to be added to the prologue of the XML document. If the value is set to `True`, `standalone="yes"` is added, - otherwise the it is set to `"no"`. + otherwise it is set to `"no"`. Not stating the argument will omit the declaration from the document. .. versionchanged:: 3.8