Skip to content

Commit 8b6527b

Browse files
dspeziarsc
authored andcommitted
encoding/xml: improve marshaller sanity checks of directives
When building a directive, the current sanity check prevents a '>' to be used, which makes a DOCTYPE directive with an internal subset be rejected. It is accepted by the parser though, so what can be parsed cannot be encoded. Improved the corresponding sanity check to mirror the behavior of the parser (in the way it handles angle brackets, quotes, and comments). Fixes #10158 Change-Id: Ieffea9f870f2694548e12897f8f47babc0ea4414 Reviewed-on: https://go-review.googlesource.com/11630 Reviewed-by: Russ Cox <[email protected]>
1 parent aed74b9 commit 8b6527b

File tree

2 files changed

+82
-3
lines changed

2 files changed

+82
-3
lines changed

src/encoding/xml/marshal.go

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ func (enc *Encoder) EncodeElement(v interface{}, start StartElement) error {
173173
}
174174

175175
var (
176+
begComment = []byte("<!--")
176177
endComment = []byte("-->")
177178
endProcInst = []byte("?>")
178179
endDirective = []byte(">")
@@ -238,8 +239,8 @@ func (enc *Encoder) EncodeToken(t Token) error {
238239
}
239240
p.WriteString("?>")
240241
case Directive:
241-
if bytes.Contains(t, endDirective) {
242-
return fmt.Errorf("xml: EncodeToken of Directive containing > marker")
242+
if !isValidDirective(t) {
243+
return fmt.Errorf("xml: EncodeToken of Directive containing wrong < or > markers")
243244
}
244245
p.WriteString("<!")
245246
p.Write(t)
@@ -248,6 +249,46 @@ func (enc *Encoder) EncodeToken(t Token) error {
248249
return p.cachedWriteError()
249250
}
250251

252+
// isValidDirective reports whether dir is a valid directive text,
253+
// meaning angle brackets are matched, ignoring comments and strings.
254+
func isValidDirective(dir Directive) bool {
255+
var (
256+
depth int
257+
inquote uint8
258+
incomment bool
259+
)
260+
for i, c := range dir {
261+
switch {
262+
case incomment:
263+
if c == '>' {
264+
if n := 1 + i - len(endComment); n >= 0 && bytes.Equal(dir[n:i+1], endComment) {
265+
incomment = false
266+
}
267+
}
268+
// Just ignore anything in comment
269+
case inquote != 0:
270+
if c == inquote {
271+
inquote = 0
272+
}
273+
// Just ignore anything within quotes
274+
case c == '\'' || c == '"':
275+
inquote = c
276+
case c == '<':
277+
if i+len(begComment) < len(dir) && bytes.Equal(dir[i:i+len(begComment)], begComment) {
278+
incomment = true
279+
} else {
280+
depth++
281+
}
282+
case c == '>':
283+
if depth == 0 {
284+
return false
285+
}
286+
depth--
287+
}
288+
}
289+
return depth == 0 && inquote == 0 && !incomment
290+
}
291+
251292
// Flush flushes any buffered XML to the underlying writer.
252293
// See the EncodeToken documentation for details about when it is necessary.
253294
func (enc *Encoder) Flush() error {

src/encoding/xml/marshal_test.go

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1527,12 +1527,18 @@ var encodeTokenTests = []struct {
15271527
Directive("foo"),
15281528
},
15291529
want: `<!foo>`,
1530+
}, {
1531+
desc: "more complex directive",
1532+
toks: []Token{
1533+
Directive("DOCTYPE doc [ <!ELEMENT doc '>'> <!-- com>ment --> ]"),
1534+
},
1535+
want: `<!DOCTYPE doc [ <!ELEMENT doc '>'> <!-- com>ment --> ]>`,
15301536
}, {
15311537
desc: "directive instruction with bad name",
15321538
toks: []Token{
15331539
Directive("foo>"),
15341540
},
1535-
err: "xml: EncodeToken of Directive containing > marker",
1541+
err: "xml: EncodeToken of Directive containing wrong < or > markers",
15361542
}, {
15371543
desc: "end tag without start tag",
15381544
toks: []Token{
@@ -1868,3 +1874,35 @@ func TestRace9796(t *testing.T) {
18681874
}
18691875
wg.Wait()
18701876
}
1877+
1878+
func TestIsValidDirective(t *testing.T) {
1879+
testOK := []string{
1880+
"<>",
1881+
"< < > >",
1882+
"<!DOCTYPE '<' '>' '>' <!--nothing-->>",
1883+
"<!DOCTYPE doc [ <!ELEMENT doc ANY> <!ELEMENT doc ANY> ]>",
1884+
"<!DOCTYPE doc [ <!ELEMENT doc \"ANY> '<' <!E\" LEMENT '>' doc ANY> ]>",
1885+
"<!DOCTYPE doc <!-- just>>>> a < comment --> [ <!ITEM anything> ] >",
1886+
}
1887+
testKO := []string{
1888+
"<",
1889+
">",
1890+
"<!--",
1891+
"-->",
1892+
"< > > < < >",
1893+
"<!dummy <!-- > -->",
1894+
"<!DOCTYPE doc '>",
1895+
"<!DOCTYPE doc '>'",
1896+
"<!DOCTYPE doc <!--comment>",
1897+
}
1898+
for _, s := range testOK {
1899+
if !isValidDirective(Directive(s)) {
1900+
t.Errorf("Directive %q is expected to be valid", s)
1901+
}
1902+
}
1903+
for _, s := range testKO {
1904+
if isValidDirective(Directive(s)) {
1905+
t.Errorf("Directive %q is expected to be invalid", s)
1906+
}
1907+
}
1908+
}

0 commit comments

Comments
 (0)