"
+ d := NewDecoder(strings.NewReader(data))
+ d.Strict = false
+ token, err := d.Token()
+ if _, ok := err.(*SyntaxError); ok {
+ t.Errorf("Unexpected error: %v", err)
+ }
+ if token.(StartElement).Name.Local != "tag" {
+ t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
+ }
+ attr := token.(StartElement).Attr[0]
+ if attr.Value != "azAZ09:-_" {
+ t.Errorf("Unexpected attribute value: %v", attr.Value)
+ }
+ if attr.Name.Local != "attr" {
+ t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
+ }
+}
+
+func TestValuelessAttrs(t *testing.T) {
+ tests := [][3]string{
+ {"", "p", "nowrap"},
+ {"
", "p", "nowrap"},
+ {"", "input", "checked"},
+ {"", "input", "checked"},
+ }
+ for _, test := range tests {
+ d := NewDecoder(strings.NewReader(test[0]))
+ d.Strict = false
+ token, err := d.Token()
+ if _, ok := err.(*SyntaxError); ok {
+ t.Errorf("Unexpected error: %v", err)
+ }
+ if token.(StartElement).Name.Local != test[1] {
+ t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
+ }
+ attr := token.(StartElement).Attr[0]
+ if attr.Value != test[2] {
+ t.Errorf("Unexpected attribute value: %v", attr.Value)
+ }
+ if attr.Name.Local != test[2] {
+ t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
+ }
+ }
+}
+
+func TestCopyTokenCharData(t *testing.T) {
+ data := []byte("same data")
+ var tok1 Token = CharData(data)
+ tok2 := CopyToken(tok1)
+ if !reflect.DeepEqual(tok1, tok2) {
+ t.Error("CopyToken(CharData) != CharData")
+ }
+ data[1] = 'o'
+ if reflect.DeepEqual(tok1, tok2) {
+ t.Error("CopyToken(CharData) uses same buffer.")
+ }
+}
+
+func TestCopyTokenStartElement(t *testing.T) {
+ elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
+ var tok1 Token = elt
+ tok2 := CopyToken(tok1)
+ if tok1.(StartElement).Attr[0].Value != "en" {
+ t.Error("CopyToken overwrote Attr[0]")
+ }
+ if !reflect.DeepEqual(tok1, tok2) {
+ t.Error("CopyToken(StartElement) != StartElement")
+ }
+ tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
+ if reflect.DeepEqual(tok1, tok2) {
+ t.Error("CopyToken(CharData) uses same buffer.")
+ }
+}
+
+func TestCopyTokenComment(t *testing.T) {
+ data := []byte("")
+ var tok1 Token = Comment(data)
+ tok2 := CopyToken(tok1)
+ if !reflect.DeepEqual(tok1, tok2) {
+ t.Error("CopyToken(Comment) != Comment")
+ }
+ data[1] = 'o'
+ if reflect.DeepEqual(tok1, tok2) {
+ t.Error("CopyToken(Comment) uses same buffer.")
+ }
+}
+
+func TestSyntaxErrorLineNum(t *testing.T) {
+ testInput := "
Foo
\n\n
Bar>\n"
+ d := NewDecoder(strings.NewReader(testInput))
+ var err error
+ for _, err = d.Token(); err == nil; _, err = d.Token() {
+ }
+ synerr, ok := err.(*SyntaxError)
+ if !ok {
+ t.Error("Expected SyntaxError.")
+ }
+ if synerr.Line != 3 {
+ t.Error("SyntaxError didn't have correct line number.")
+ }
+}
+
+func TestTrailingRawToken(t *testing.T) {
+ input := ` `
+ d := NewDecoder(strings.NewReader(input))
+ var err error
+ for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
+ }
+ if err != io.EOF {
+ t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
+ }
+}
+
+func TestTrailingToken(t *testing.T) {
+ input := ` `
+ d := NewDecoder(strings.NewReader(input))
+ var err error
+ for _, err = d.Token(); err == nil; _, err = d.Token() {
+ }
+ if err != io.EOF {
+ t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
+ }
+}
+
+func TestEntityInsideCDATA(t *testing.T) {
+ input := ``
+ d := NewDecoder(strings.NewReader(input))
+ var err error
+ for _, err = d.Token(); err == nil; _, err = d.Token() {
+ }
+ if err != io.EOF {
+ t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
+ }
+}
+
+var characterTests = []struct {
+ in string
+ err string
+}{
+ {"\x12", "illegal character code U+0012"},
+ {"\x0b", "illegal character code U+000B"},
+ {"\xef\xbf\xbe", "illegal character code U+FFFE"},
+ {"\r\n\x07", "illegal character code U+0007"},
+ {"what's up", "expected attribute name in element"},
+ {"&abc\x01;", "invalid character entity &abc (no semicolon)"},
+ {"&\x01;", "invalid character entity & (no semicolon)"},
+ {"&\xef\xbf\xbe;", "invalid character entity &\uFFFE;"},
+ {"&hello;", "invalid character entity &hello;"},
+}
+
+func TestDisallowedCharacters(t *testing.T) {
+
+ for i, tt := range characterTests {
+ d := NewDecoder(strings.NewReader(tt.in))
+ var err error
+
+ for err == nil {
+ _, err = d.Token()
+ }
+ synerr, ok := err.(*SyntaxError)
+ if !ok {
+ t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
+ }
+ if synerr.Msg != tt.err {
+ t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
+ }
+ }
+}
+
+func TestIsInCharacterRange(t *testing.T) {
+ invalid := []rune{
+ utf8.MaxRune + 1,
+ 0xD800, // surrogate min
+ 0xDFFF, // surrogate max
+ -1,
+ }
+ for _, r := range invalid {
+ if isInCharacterRange(r) {
+ t.Errorf("rune %U considered valid", r)
+ }
+ }
+}
+
+var procInstTests = []struct {
+ input string
+ expect [2]string
+}{
+ {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
+ {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
+ {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
+ {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
+ {`encoding="FOO" `, [2]string{"", "FOO"}},
+}
+
+func TestProcInstEncoding(t *testing.T) {
+ for _, test := range procInstTests {
+ if got := procInst("version", test.input); got != test.expect[0] {
+ t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
+ }
+ if got := procInst("encoding", test.input); got != test.expect[1] {
+ t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
+ }
+ }
+}
+
+// Ensure that directives with comments include the complete
+// text of any nested directives.
+
+var directivesWithCommentsInput = `
+]>
+]>
+ --> --> []>
+`
+
+var directivesWithCommentsTokens = []Token{
+ CharData("\n"),
+ Directive(`DOCTYPE [ ]`),
+ CharData("\n"),
+ Directive(`DOCTYPE [ ]`),
+ CharData("\n"),
+ Directive(`DOCTYPE [ ]`),
+ CharData("\n"),
+}
+
+func TestDirectivesWithComments(t *testing.T) {
+ d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
+
+ for i, want := range directivesWithCommentsTokens {
+ have, err := d.Token()
+ if err != nil {
+ t.Fatalf("token %d: unexpected error: %s", i, err)
+ }
+ if !reflect.DeepEqual(have, want) {
+ t.Errorf("token %d = %#v want %#v", i, have, want)
+ }
+ }
+}
+
+// Writer whose Write method always returns an error.
+type errWriter struct{}
+
+func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
+
+func TestEscapeTextIOErrors(t *testing.T) {
+ expectErr := "unwritable"
+ err := EscapeText(errWriter{}, []byte{'A'})
+
+ if err == nil || err.Error() != expectErr {
+ t.Errorf("have %v, want %v", err, expectErr)
+ }
+}
+
+func TestEscapeTextInvalidChar(t *testing.T) {
+ input := []byte("A \x00 terminated string.")
+ expected := "A \uFFFD terminated string."
+
+ buff := new(strings.Builder)
+ if err := EscapeText(buff, input); err != nil {
+ t.Fatalf("have %v, want nil", err)
+ }
+ text := buff.String()
+
+ if text != expected {
+ t.Errorf("have %v, want %v", text, expected)
+ }
+}
+
+func TestIssue5880(t *testing.T) {
+ type T []byte
+ data, err := Marshal(T{192, 168, 0, 1})
+ if err != nil {
+ t.Errorf("Marshal error: %v", err)
+ }
+ if !utf8.Valid(data) {
+ t.Errorf("Marshal generated invalid UTF-8: %x", data)
+ }
+}
+
+func TestIssue8535(t *testing.T) {
+
+ type ExampleConflict struct {
+ XMLName Name `xml:"example"`
+ Link string `xml:"link"`
+ AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space
+ }
+ testCase := `
+ Example
+ http://example.com/default
+ http://example.com/home
+ http://example.com/ns
+ `
+
+ var dest ExampleConflict
+ d := NewDecoder(strings.NewReader(testCase))
+ if err := d.Decode(&dest); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestEncodeXMLNS(t *testing.T) {
+ testCases := []struct {
+ f func() ([]byte, error)
+ want string
+ ok bool
+ }{
+ {encodeXMLNS1, `hello world`, true},
+ {encodeXMLNS2, `hello world`, true},
+ {encodeXMLNS3, `hello world`, true},
+ {encodeXMLNS4, `hello world`, false},
+ }
+
+ for i, tc := range testCases {
+ if b, err := tc.f(); err == nil {
+ if got, want := string(b), tc.want; got != want {
+ t.Errorf("%d: got %s, want %s \n", i, got, want)
+ }
+ } else {
+ t.Errorf("%d: marshal failed with %s", i, err)
+ }
+ }
+}
+
+func encodeXMLNS1() ([]byte, error) {
+
+ type T struct {
+ XMLName Name `xml:"Test"`
+ Ns string `xml:"xmlns,attr"`
+ Body string
+ }
+
+ s := &T{Ns: "http://example.com/ns", Body: "hello world"}
+ return Marshal(s)
+}
+
+func encodeXMLNS2() ([]byte, error) {
+
+ type Test struct {
+ Body string `xml:"http://example.com/ns body"`
+ }
+
+ s := &Test{Body: "hello world"}
+ return Marshal(s)
+}
+
+func encodeXMLNS3() ([]byte, error) {
+
+ type Test struct {
+ XMLName Name `xml:"http://example.com/ns Test"`
+ Body string
+ }
+
+ //s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing
+ // as documentation states
+ s := &Test{Body: "hello world"}
+ return Marshal(s)
+}
+
+func encodeXMLNS4() ([]byte, error) {
+
+ type Test struct {
+ Ns string `xml:"xmlns,attr"`
+ Body string
+ }
+
+ s := &Test{Ns: "http://example.com/ns", Body: "hello world"}
+ return Marshal(s)
+}
+
+func TestIssue11405(t *testing.T) {
+ testCases := []string{
+ "",
+ "",
+ "",
+ }
+ for _, tc := range testCases {
+ d := NewDecoder(strings.NewReader(tc))
+ var err error
+ for {
+ _, err = d.Token()
+ if err != nil {
+ break
+ }
+ }
+ if _, ok := err.(*SyntaxError); !ok {
+ t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
+ }
+ }
+}
+
+func TestIssue12417(t *testing.T) {
+ testCases := []struct {
+ s string
+ ok bool
+ }{
+ {``, true},
+ {``, true},
+ {``, true},
+ {``, false},
+ }
+ for _, tc := range testCases {
+ d := NewDecoder(strings.NewReader(tc.s))
+ var err error
+ for {
+ _, err = d.Token()
+ if err != nil {
+ if err == io.EOF {
+ err = nil
+ }
+ break
+ }
+ }
+ if err != nil && tc.ok {
+ t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
+ continue
+ }
+ if err == nil && !tc.ok {
+ t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
+ }
+ }
+}
+
+func TestIssue7113(t *testing.T) {
+ type C struct {
+ XMLName Name `xml:""` // Sets empty namespace
+ }
+
+ type A struct {
+ XMLName Name `xml:""`
+ C C `xml:""`
+ }
+
+ var a A
+ structSpace := "b"
+ xmlTest := ``
+ t.Log(xmlTest)
+ err := Unmarshal([]byte(xmlTest), &a)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if a.XMLName.Space != structSpace {
+ t.Errorf("overidding with empty namespace: unmarshalling, got %s, want %s\n", a.XMLName.Space, structSpace)
+ }
+ if len(a.C.XMLName.Space) != 0 {
+ t.Fatalf("overidding with empty namespace: unmarshalling, got %s, want empty\n", a.C.XMLName.Space)
+ }
+
+ var b []byte
+ b, err = Marshal(&a)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(a.C.XMLName.Space) != 0 {
+ t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space)
+ }
+ if string(b) != xmlTest {
+ t.Fatalf("overidding with empty namespace: marshalling, got %s, want %s\n", b, xmlTest)
+ }
+ var c A
+ err = Unmarshal(b, &c)
+ if err != nil {
+ t.Fatalf("second Unmarshal failed: %s", err)
+ }
+ if c.XMLName.Space != "b" {
+ t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace)
+ }
+ if len(c.C.XMLName.Space) != 0 {
+ t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
+ }
+}
+
+func TestIssue20396(t *testing.T) {
+
+ var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element")
+
+ testCases := []struct {
+ s string
+ wantErr error
+ }{
+ {``, // Issue 20396
+ UnmarshalError("XML syntax error on line 1: expected element name after <")},
+ {``, attrError},
+ {``, attrError},
+ {``, nil},
+ {`1`,
+ UnmarshalError("XML syntax error on line 1: expected element name after <")},
+ {`1`, attrError},
+ {`1`, attrError},
+ {`1`, nil},
+ }
+
+ var dest string
+ for _, tc := range testCases {
+ if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want {
+ if got == nil {
+ t.Errorf("%s: Unexpected success, want %v", tc.s, want)
+ } else if want == nil {
+ t.Errorf("%s: Unexpected error, got %v", tc.s, got)
+ } else if got.Error() != want.Error() {
+ t.Errorf("%s: got %v, want %v", tc.s, got, want)
+ }
+ }
+ }
+}
+
+func TestIssue20685(t *testing.T) {
+ testCases := []struct {
+ s string
+ ok bool
+ }{
+ {`one`, false},
+ {`one`, true},
+ {`one`, false},
+ {`one`, false},
+ {`one`, false},
+ {`one`, false},
+ {`one`, false},
+ }
+ for _, tc := range testCases {
+ d := NewDecoder(strings.NewReader(tc.s))
+ var err error
+ for {
+ _, err = d.Token()
+ if err != nil {
+ if err == io.EOF {
+ err = nil
+ }
+ break
+ }
+ }
+ if err != nil && tc.ok {
+ t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err)
+ continue
+ }
+ if err == nil && !tc.ok {
+ t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s)
+ }
+ }
+}
+
+func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
+ return func(src TokenReader) TokenReader {
+ return mapper{
+ t: src,
+ f: mapping,
+ }
+ }
+}
+
+type mapper struct {
+ t TokenReader
+ f func(Token) Token
+}
+
+func (m mapper) Token() (Token, error) {
+ tok, err := m.t.Token()
+ if err != nil {
+ return nil, err
+ }
+ return m.f(tok), nil
+}
+
+func TestNewTokenDecoderIdempotent(t *testing.T) {
+ d := NewDecoder(strings.NewReader(`
`))
+ d2 := NewTokenDecoder(d)
+ if d != d2 {
+ t.Error("NewTokenDecoder did not detect underlying Decoder")
+ }
+}
+
+func TestWrapDecoder(t *testing.T) {
+ d := NewDecoder(strings.NewReader(`[Re-enter Clown with a letter, and FABIAN]
`))
+ m := tokenMap(func(t Token) Token {
+ switch tok := t.(type) {
+ case StartElement:
+ if tok.Name.Local == "quote" {
+ tok.Name.Local = "blocking"
+ return tok
+ }
+ case EndElement:
+ if tok.Name.Local == "quote" {
+ tok.Name.Local = "blocking"
+ return tok
+ }
+ }
+ return t
+ })
+
+ d = NewTokenDecoder(m(d))
+
+ o := struct {
+ XMLName Name `xml:"blocking"`
+ Chardata string `xml:",chardata"`
+ }{}
+
+ if err := d.Decode(&o); err != nil {
+ t.Fatal("Got unexpected error while decoding:", err)
+ }
+
+ if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
+ t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
+ }
+}
+
+type tokReader struct{}
+
+func (tokReader) Token() (Token, error) {
+ return StartElement{}, nil
+}
+
+type Failure struct{}
+
+func (Failure) UnmarshalXML(*Decoder, StartElement) error {
+ return nil
+}
+
+func TestTokenUnmarshaler(t *testing.T) {
+ defer func() {
+ if r := recover(); r != nil {
+ t.Error("Unexpected panic using custom token unmarshaler")
+ }
+ }()
+
+ d := NewTokenDecoder(tokReader{})
+ d.Decode(&Failure{})
+}
+
+func testRoundTrip(t *testing.T, input string) {
+ d := NewDecoder(strings.NewReader(input))
+ var tokens []Token
+ var buf bytes.Buffer
+ e := NewEncoder(&buf)
+ for {
+ tok, err := d.Token()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ t.Fatalf("invalid input: %v", err)
+ }
+ if err := e.EncodeToken(tok); err != nil {
+ t.Fatalf("failed to re-encode input: %v", err)
+ }
+ tokens = append(tokens, CopyToken(tok))
+ }
+ if err := e.Flush(); err != nil {
+ t.Fatal(err)
+ }
+
+ d = NewDecoder(&buf)
+ for {
+ tok, err := d.Token()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ t.Fatalf("failed to decode output: %v", err)
+ }
+ if len(tokens) == 0 {
+ t.Fatalf("unexpected token: %#v", tok)
+ }
+ a, b := tokens[0], tok
+ if !reflect.DeepEqual(a, b) {
+ t.Fatalf("token mismatch: %#v vs %#v", a, b)
+ }
+ tokens = tokens[1:]
+ }
+ if len(tokens) > 0 {
+ t.Fatalf("lost tokens: %#v", tokens)
+ }
+}
+
+func TestRoundTrip(t *testing.T) {
+ tests := map[string]string{
+ "trailing colon": ``,
+ "comments in directives": `--x --> > --x ]>`,
+ }
+ for name, input := range tests {
+ t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
+ }
+}
+
+func TestParseErrors(t *testing.T) {
+ withDefaultHeader := func(s string) string {
+ return `` + s
+ }
+ tests := []struct {
+ src string
+ err string
+ }{
+ {withDefaultHeader(``), `unexpected end element