Skip to content

Commit 414161e

Browse files
callthingsoffgopherbot
authored andcommitted
encoding/xml: rewrite func procInst
This CL tries to make function procInst more exact, also adds test cases, however, including tricky ones. Change-Id: If421299fc84d136e56a25dba7a4919c4424702c8 GitHub-Last-Rev: b9a3192 GitHub-Pull-Request: #64336 Reviewed-on: https://go-review.googlesource.com/c/go/+/544475 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Dmitri Shuralyov <[email protected]> Auto-Submit: Ian Lance Taylor <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent a568349 commit 414161e

File tree

2 files changed

+25
-7
lines changed

2 files changed

+25
-7
lines changed

src/encoding/xml/xml.go

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2045,16 +2045,27 @@ func procInst(param, s string) string {
20452045
// TODO: this parsing is somewhat lame and not exact.
20462046
// It works for all actual cases, though.
20472047
param = param + "="
2048-
_, v, _ := strings.Cut(s, param)
2049-
if v == "" {
2050-
return ""
2048+
lenp := len(param)
2049+
i := 0
2050+
var sep byte
2051+
for i < len(s) {
2052+
sub := s[i:]
2053+
k := strings.Index(sub, param)
2054+
if k < 0 || lenp+k >= len(sub) {
2055+
return ""
2056+
}
2057+
i += lenp + k + 1
2058+
if c := sub[lenp+k]; c == '\'' || c == '"' {
2059+
sep = c
2060+
break
2061+
}
20512062
}
2052-
if v[0] != '\'' && v[0] != '"' {
2063+
if sep == 0 {
20532064
return ""
20542065
}
2055-
unquote, _, ok := strings.Cut(v[1:], v[:1])
2056-
if !ok {
2066+
j := strings.IndexByte(s[i:], sep)
2067+
if j < 0 {
20572068
return ""
20582069
}
2059-
return unquote
2070+
return s[i : i+j]
20602071
}

src/encoding/xml/xml_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -830,6 +830,13 @@ var procInstTests = []struct {
830830
{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
831831
{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
832832
{`encoding="FOO" `, [2]string{"", "FOO"}},
833+
{`version=2.0 version="1.0" encoding=utf-7 encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
834+
{`version= encoding=`, [2]string{"", ""}},
835+
{`encoding="version=1.0"`, [2]string{"", "version=1.0"}},
836+
{``, [2]string{"", ""}},
837+
// TODO: what's the right approach to handle these nested cases?
838+
{`encoding="version='1.0'"`, [2]string{"1.0", "version='1.0'"}},
839+
{`version="encoding='utf-8'"`, [2]string{"encoding='utf-8'", "utf-8"}},
833840
}
834841

835842
func TestProcInstEncoding(t *testing.T) {

0 commit comments

Comments
 (0)