Skip to content

Commit e1351cd

Browse files
committed
encoding/xml: treat a namespaced name as two names, not one
An XML QName is syntactically two Names separated by a colon, rather than a single name that has a colon in it. This fixes multiple bugs in XML QName reading. Fixes: #68294 Fixes: #68392 Fixes: #68393
1 parent 239666c commit e1351cd

File tree

2 files changed

+106
-12
lines changed

2 files changed

+106
-12
lines changed

src/encoding/xml/xml.go

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,15 +1169,28 @@ func (d *Decoder) nsname() (name Name, ok bool) {
11691169
if !ok {
11701170
return
11711171
}
1172-
if strings.Count(s, ":") > 1 {
1173-
return name, false
1174-
} else if space, local, ok := strings.Cut(s, ":"); !ok || space == "" || local == "" {
1172+
// XML does not allow a document to end with a name, so there must
1173+
// be another byte.
1174+
b, ok := d.mustgetc()
1175+
if !ok {
1176+
return
1177+
}
1178+
if b != ':' {
1179+
d.ungetc(b)
11751180
name.Local = s
1176-
} else {
1177-
name.Space = space
1178-
name.Local = local
1181+
return
11791182
}
1180-
return name, true
1183+
n, ok := d.name()
1184+
if ok {
1185+
// give a better error message than would otherwise be possible
1186+
if d.nextByte == ':' {
1187+
d.err = d.syntaxError("colon after prefixed XML name " + string(s) + ":" + string(n))
1188+
return name, false
1189+
}
1190+
name.Space = s
1191+
name.Local = n
1192+
}
1193+
return
11811194
}
11821195

11831196
// Get name: /first(first|second)*/
@@ -1229,7 +1242,7 @@ func isNameByte(c byte) bool {
12291242
return 'A' <= c && c <= 'Z' ||
12301243
'a' <= c && c <= 'z' ||
12311244
'0' <= c && c <= '9' ||
1232-
c == '_' || c == ':' || c == '.' || c == '-'
1245+
c == '_' || c == '.' || c == '-'
12331246
}
12341247

12351248
func isName(s []byte) bool {
@@ -1287,7 +1300,6 @@ func isNameString(s string) bool {
12871300

12881301
var first = &unicode.RangeTable{
12891302
R16: []unicode.Range16{
1290-
{0x003A, 0x003A, 1},
12911303
{0x0041, 0x005A, 1},
12921304
{0x005F, 0x005F, 1},
12931305
{0x0061, 0x007A, 1},

src/encoding/xml/xml_test.go

Lines changed: 85 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,89 @@ func (t *toks) Token() (Token, error) {
3131
return tok, nil
3232
}
3333

34+
func TestDecodeBadName(t *testing.T) {
35+
tests := []struct {
36+
name string
37+
invalid string
38+
message string
39+
}{
40+
{
41+
name: "Number after colon",
42+
invalid: `<a:1/>`,
43+
message: "invalid XML name: 1",
44+
},
45+
{
46+
name: "Two colons at end",
47+
invalid: `<a::/>`,
48+
message: "expected element name after <",
49+
},
50+
{
51+
name: "Two colons together in middle",
52+
invalid: "<a::a/>",
53+
message: "expected element name after <",
54+
},
55+
{
56+
name: "Colon at end",
57+
invalid: "<a:/>",
58+
message: "expected element name after <",
59+
},
60+
{
61+
name: "Colon at start",
62+
invalid: "<:a/>",
63+
message: "expected element name after <",
64+
},
65+
{
66+
name: "Number after colon in attribute",
67+
invalid: `<a a:1=""/>`,
68+
message: "invalid XML name: 1",
69+
},
70+
{
71+
name: "Two colons separate",
72+
invalid: `<a a:b:c="1"/>`,
73+
message: "colon after prefixed XML name a:b",
74+
},
75+
{
76+
name: "Two colons at end",
77+
invalid: `<a a::="1"/>`,
78+
message: "expected attribute name in element",
79+
},
80+
{
81+
name: "Two colons together in middle",
82+
invalid: `<a a::a="1"/>`,
83+
message: "expected attribute name in element",
84+
},
85+
{
86+
name: "Colon at end",
87+
invalid: `<a a:="1"/>`,
88+
message: "expected attribute name in element",
89+
},
90+
{
91+
name: "Colon at start",
92+
invalid: `<a :a="1"/>`,
93+
message: "expected attribute name in element",
94+
},
95+
}
96+
for i, j := range tests {
97+
t.Run(j.name, func(t *testing.T) {
98+
d := NewDecoder(strings.NewReader(j.invalid))
99+
tok, err := d.RawToken()
100+
if tok != nil {
101+
t.Fatalf("%d: d.Decode: expected nil token, got %#v", i, tok)
102+
}
103+
if err == nil {
104+
t.Fatalf("%d: d.Decode: expected non-nil error, got nil", i)
105+
}
106+
syntaxError, ok := err.(*SyntaxError)
107+
if !ok {
108+
t.Fatalf("%d: d.Decode: expected syntax error", i)
109+
}
110+
if syntaxError.Msg != j.message {
111+
t.Errorf("%d: bad message: expected %q, got %q", i, j.message, syntaxError.Msg)
112+
}
113+
})
114+
}
115+
}
116+
34117
func TestDecodeEOF(t *testing.T) {
35118
start := StartElement{Name: Name{Local: "test"}}
36119
tests := []struct {
@@ -1130,12 +1213,12 @@ func TestIssue20396(t *testing.T) {
11301213
wantErr error
11311214
}{
11321215
{`<a:te:st xmlns:a="abcd"/>`, // Issue 20396
1133-
UnmarshalError("XML syntax error on line 1: expected element name after <")},
1216+
UnmarshalError("XML syntax error on line 1: colon after prefixed XML name a:te")},
11341217
{`<a:te=st xmlns:a="abcd"/>`, attrError},
11351218
{`<a:te&st xmlns:a="abcd"/>`, attrError},
11361219
{`<a:test xmlns:a="abcd"/>`, nil},
11371220
{`<a:te:st xmlns:a="abcd">1</a:te:st>`,
1138-
UnmarshalError("XML syntax error on line 1: expected element name after <")},
1221+
UnmarshalError("XML syntax error on line 1: colon after prefixed XML name a:te")},
11391222
{`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError},
11401223
{`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError},
11411224
{`<a:test xmlns:a="abcd">1</a:test>`, nil},
@@ -1324,7 +1407,6 @@ func testRoundTrip(t *testing.T, input string) {
13241407

13251408
func TestRoundTrip(t *testing.T) {
13261409
tests := map[string]string{
1327-
"trailing colon": `<foo abc:="x"></foo>`,
13281410
"comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
13291411
}
13301412
for name, input := range tests {

0 commit comments

Comments
 (0)