Skip to content

Commit 1af1168

Browse files
authored
Merge pull request #560 from dubinsky/prolog-epilog
2 parents 441678d + 156f2ea commit 1af1168

File tree

4 files changed

+75
-24
lines changed

4 files changed

+75
-24
lines changed

jvm/src/test/scala/scala/xml/XMLTest.scala

+22-9
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,7 @@ class XMLTestJVM {
586586
def issue508commentParsing: Unit = {
587587
// confirm that comments are processed correctly now
588588
roundtrip("<a><!-- comment --> suffix</a>")
589-
roundtrip("<a>prefix <!-- comment --> suffix</a>")
589+
roundtrip("<a>prefix <!-- comment --> <!-- comment2 --> suffix</a>")
590590
roundtrip("<a>prefix <b><!-- comment --></b> suffix</a>")
591591
roundtrip("<a>prefix <b><!-- multi-\nline\n comment --></b> suffix</a>")
592592
roundtrip("""<a>prefix <b><!-- multi-
@@ -596,13 +596,7 @@ class XMLTestJVM {
596596
// confirm that processing instructions were always processed correctly
597597
roundtrip("<a><?target content ?> suffix</a>")
598598
roundtrip("<a>prefix <?target content ?> suffix</a>")
599-
roundtrip("<a>prefix <b><?target content?></b> suffix</a>")
600-
601-
// TODO since XMLLoader retrieves FactoryAdapter.rootNode,
602-
// capturing comments before and after the root element is not currently possible
603-
// (by the way, the same applies to processing instructions).
604-
//check("<!-- prologue --><a>text</a>")
605-
//check("<a>text</a><!-- epilogue -->")
599+
roundtrip("<a>prefix <b><?target content?> </b> suffix</a>")
606600
}
607601

608602
@UnitTest
@@ -613,7 +607,26 @@ class XMLTestJVM {
613607
roundtrip("""<a>prefix <b><![CDATA[
614608
| multi-
615609
| line cdata
616-
| section]]></b> suffix</a>""".stripMargin)
610+
| section]]> </b> suffix</a>""".stripMargin)
611+
}
612+
613+
def roundtripNodes(xml: String): Unit = assertEquals(xml, XML.loadStringNodes(xml).map(_.toString).mkString(""))
614+
615+
@UnitTest
616+
def xmlLoaderLoadNodes: Unit = {
617+
roundtripNodes("<!-- prolog --><a>text</a>")
618+
roundtripNodes("<!-- prolog --><?target content ?><!-- comment2 --><a>text</a>")
619+
roundtripNodes("""<!-- prolog
620+
| --><?target content ?><!--
621+
| comment2 --><a>text</a>""".stripMargin)
622+
623+
roundtripNodes("<a>text</a><!-- epilogue -->")
624+
roundtripNodes("<a>text</a><!-- epilogue --><?target content ?><!-- comment2 -->")
625+
626+
// Note: at least with the JDK's Xerces, whitespace in the prolog and epilogue gets lost in parsing:
627+
// the parser does not fire any white-space related events, so:
628+
// does not work: roundtripNodes("<!-- c --> <a/>")
629+
// does not work: roundtripNodes("<a/> <!-- epilogue -->")
617630
}
618631

619632
@UnitTest

shared/src/main/scala/scala/xml/factory/XMLLoader.scala

+28-7
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,29 @@ trait XMLLoader[T <: Node] {
5151
* The methods available in scala.xml.XML use the XML parser in the JDK.
5252
*/
5353
def loadXML(source: InputSource, parser: SAXParser): T = {
54-
val newAdapter = adapter
54+
val result: FactoryAdapter = parse(source, parser)
55+
result.rootElem.asInstanceOf[T]
56+
}
57+
58+
def loadXMLNodes(source: InputSource, parser: SAXParser): Seq[Node] = {
59+
val result: FactoryAdapter = parse(source, parser)
60+
result.prolog ++ (result.rootElem :: result.epilogue)
61+
}
62+
63+
private def parse(source: InputSource, parser: SAXParser): FactoryAdapter = {
64+
val result: FactoryAdapter = adapter
5565

5666
try {
57-
parser.setProperty("http://xml.org/sax/properties/lexical-handler", newAdapter)
67+
parser.setProperty("http://xml.org/sax/properties/lexical-handler", result)
5868
} catch {
5969
case _: SAXNotRecognizedException =>
6070
}
6171

62-
newAdapter.scopeStack = TopScope :: newAdapter.scopeStack
63-
parser.parse(source, newAdapter)
64-
newAdapter.scopeStack = newAdapter.scopeStack.tail
72+
result.scopeStack = TopScope :: result.scopeStack
73+
parser.parse(source, result)
74+
result.scopeStack = result.scopeStack.tail
6575

66-
newAdapter.rootElem.asInstanceOf[T]
76+
result
6777
}
6878

6979
/** Loads XML from the given file, file descriptor, or filename. */
@@ -80,4 +90,15 @@ trait XMLLoader[T <: Node] {
8090

8191
/** Loads XML from the given String. */
8292
def loadString(string: String): T = loadXML(fromString(string), parser)
83-
}
93+
94+
/** Load XML nodes, including comments and processing instructions that precede and follow the root element. */
95+
def loadFileNodes(file: File): Seq[Node] = loadXMLNodes(fromFile(file), parser)
96+
def loadFileNodes(fd: FileDescriptor): Seq[Node] = loadXMLNodes(fromFile(fd), parser)
97+
def loadFileNodes(name: String): Seq[Node] = loadXMLNodes(fromFile(name), parser)
98+
def loadNodes(is: InputStream): Seq[Node] = loadXMLNodes(fromInputStream(is), parser)
99+
def loadNodes(reader: Reader): Seq[Node] = loadXMLNodes(fromReader(reader), parser)
100+
def loadNodes(sysID: String): Seq[Node] = loadXMLNodes(fromSysId(sysID), parser)
101+
def loadNodes(source: InputSource): Seq[Node] = loadXMLNodes(source, parser)
102+
def loadNodes(url: URL): Seq[Node] = loadXMLNodes(fromInputStream(url.openStream()), parser)
103+
def loadStringNodes(string: String): Seq[Node] = loadXMLNodes(fromString(string), parser)
104+
}

shared/src/main/scala/scala/xml/parsing/FactoryAdapter.scala

+23-8
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,11 @@ trait ConsoleErrorHandler extends DefaultHandler2 {
4040
* underlying SAX parser.
4141
*/
4242
abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Node] {
43+
var prolog: List[Node] = List.empty
4344
var rootElem: Node = _
45+
var epilogue: List[Node] = List.empty
4446

45-
val buffer = new StringBuilder()
47+
val buffer: StringBuilder = new StringBuilder()
4648
private var inCDATA: Boolean = false
4749

4850
/** List of attributes
@@ -51,28 +53,28 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
5153
*
5254
* @since 2.0.0
5355
*/
54-
var attribStack = List.empty[MetaData]
56+
var attribStack: List[MetaData] = List.empty
5557
/** List of elements
5658
*
5759
* Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
5860
*
5961
* @since 2.0.0
6062
*/
61-
var hStack = List.empty[Node] // [ element ] contains siblings
63+
var hStack: List[Node] = List.empty // [ element ] contains siblings
6264
/** List of element names
6365
*
6466
* Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
6567
*
6668
* @since 2.0.0
6769
*/
68-
var tagStack = List.empty[String]
70+
var tagStack: List[String] = List.empty
6971
/** List of namespaces
7072
*
7173
* Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
7274
*
7375
* @since 2.0.0
7476
*/
75-
var scopeStack = List.empty[NamespaceBinding]
77+
var scopeStack: List[NamespaceBinding] = List.empty
7678

7779
var curTag: String = _
7880
var capture: Boolean = false
@@ -123,7 +125,7 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
123125
// ContentHandler methods
124126
//
125127

126-
val normalizeWhitespace = false
128+
val normalizeWhitespace: Boolean = false
127129

128130
/**
129131
* Capture characters, possibly normalizing whitespace.
@@ -177,13 +179,20 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
177179
attributes: Attributes): Unit =
178180
{
179181
captureText()
182+
183+
// capture the prolog at the start of the root element
184+
if (tagStack.isEmpty) {
185+
prolog = hStack.reverse
186+
hStack = List.empty
187+
}
188+
180189
tagStack = curTag :: tagStack
181190
curTag = qname
182191

183192
val localName = splitName(qname)._2
184193
capture = nodeContainsText(localName)
185194

186-
hStack = null :: hStack
195+
hStack = null :: hStack
187196
var m: MetaData = Null
188197
var scpe: NamespaceBinding =
189198
if (scopeStack.isEmpty) TopScope
@@ -193,7 +202,7 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
193202
val qname = attributes getQName i
194203
val value = attributes getValue i
195204
val (pre, key) = splitName(qname)
196-
def nullIfEmpty(s: String) = if (s == "") null else s
205+
def nullIfEmpty(s: String): String = if (s == "") null else s
197206

198207
if (pre == "xmlns" || (pre == null && qname == "xmlns")) {
199208
val arg = if (pre == null) null else key
@@ -250,6 +259,12 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
250259
capture = curTag != null && nodeContainsText(curTag) // root level
251260
}
252261

262+
override def endDocument(): Unit = {
263+
// capture the epilogue at the end of the document
264+
epilogue = hStack.init.reverse
265+
hStack = hStack.last :: Nil
266+
}
267+
253268
/**
254269
* Processing instruction.
255270
*/

shared/src/main/scala/scala/xml/parsing/MarkupParser.scala

+2
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
9898
var extIndex = -1
9999

100100
/** holds temporary values of pos */
101+
// Note: this is clearly an override, but if marked as such it causes a "...cannot override a mutable variable"
102+
// error with Scala 3; does it work with Scala 3 if not explicitly marked as an override remains to be seen...
101103
var tmppos: Int = _
102104

103105
/** holds the next character */

0 commit comments

Comments
 (0)