1
2
3
4
5 package xml
6
7 import (
8 "bytes"
9 "fmt"
10 "io"
11 "reflect"
12 "strings"
13 "testing"
14 "unicode/utf8"
15 )
16
17 type toks struct {
18 earlyEOF bool
19 t []Token
20 }
21
22 func (t *toks) Token() (Token, error) {
23 if len(t.t) == 0 {
24 return nil, io.EOF
25 }
26 var tok Token
27 tok, t.t = t.t[0], t.t[1:]
28 if t.earlyEOF && len(t.t) == 0 {
29 return tok, io.EOF
30 }
31 return tok, nil
32 }
33
34 func TestDecodeEOF(t *testing.T) {
35 start := StartElement{Name: Name{Local: "test"}}
36 tests := []struct {
37 name string
38 tokens []Token
39 ok bool
40 }{
41 {
42 name: "OK",
43 tokens: []Token{
44 start,
45 start.End(),
46 },
47 ok: true,
48 },
49 {
50 name: "Malformed",
51 tokens: []Token{
52 start,
53 StartElement{Name: Name{Local: "bad"}},
54 start.End(),
55 },
56 ok: false,
57 },
58 }
59 for _, tc := range tests {
60 for _, eof := range []bool{true, false} {
61 name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof)
62 t.Run(name, func(t *testing.T) {
63 d := NewTokenDecoder(&toks{
64 earlyEOF: eof,
65 t: tc.tokens,
66 })
67 err := d.Decode(&struct {
68 XMLName Name `xml:"test"`
69 }{})
70 if tc.ok && err != nil {
71 t.Fatalf("d.Decode: expected nil error, got %v", err)
72 }
73 if _, ok := err.(*SyntaxError); !tc.ok && !ok {
74 t.Errorf("d.Decode: expected syntax error, got %v", err)
75 }
76 })
77 }
78 }
79 }
80
81 type toksNil struct {
82 returnEOF bool
83 t []Token
84 }
85
86 func (t *toksNil) Token() (Token, error) {
87 if len(t.t) == 0 {
88 if !t.returnEOF {
89
90
91 t.returnEOF = true
92 return nil, nil
93 }
94 return nil, io.EOF
95 }
96 var tok Token
97 tok, t.t = t.t[0], t.t[1:]
98 return tok, nil
99 }
100
101 func TestDecodeNilToken(t *testing.T) {
102 for _, strict := range []bool{true, false} {
103 name := fmt.Sprintf("Strict=%v", strict)
104 t.Run(name, func(t *testing.T) {
105 start := StartElement{Name: Name{Local: "test"}}
106 bad := StartElement{Name: Name{Local: "bad"}}
107 d := NewTokenDecoder(&toksNil{
108
109 t: []Token{start, bad, start.End()},
110 })
111 d.Strict = strict
112 err := d.Decode(&struct {
113 XMLName Name `xml:"test"`
114 }{})
115 if _, ok := err.(*SyntaxError); !ok {
116 t.Errorf("d.Decode: expected syntax error, got %v", err)
117 }
118 })
119 }
120 }
121
122 const testInput = `
123 <?xml version="1.0" encoding="UTF-8"?>
124 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
125 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
126 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
127 "\r\n\t" + ` >
128 <hello lang="en">World <>'" 白鵬翔</hello>
129 <query>&何; &is-it;</query>
130 <goodbye />
131 <outer foo:attr="value" xmlns:tag="ns4">
132 <inner/>
133 </outer>
134 <tag:name>
135 <![CDATA[Some text here.]]>
136 </tag:name>
137 </body><!-- missing final newline -->`
138
139 var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
140
141 var rawTokens = []Token{
142 CharData("\n"),
143 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
144 CharData("\n"),
145 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
146 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
147 CharData("\n"),
148 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
149 CharData("\n "),
150 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
151 CharData("World <>'\" 白鵬翔"),
152 EndElement{Name{"", "hello"}},
153 CharData("\n "),
154 StartElement{Name{"", "query"}, []Attr{}},
155 CharData("What is it?"),
156 EndElement{Name{"", "query"}},
157 CharData("\n "),
158 StartElement{Name{"", "goodbye"}, []Attr{}},
159 EndElement{Name{"", "goodbye"}},
160 CharData("\n "),
161 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
162 CharData("\n "),
163 StartElement{Name{"", "inner"}, []Attr{}},
164 EndElement{Name{"", "inner"}},
165 CharData("\n "),
166 EndElement{Name{"", "outer"}},
167 CharData("\n "),
168 StartElement{Name{"tag", "name"}, []Attr{}},
169 CharData("\n "),
170 CharData("Some text here."),
171 CharData("\n "),
172 EndElement{Name{"tag", "name"}},
173 CharData("\n"),
174 EndElement{Name{"", "body"}},
175 Comment(" missing final newline "),
176 }
177
178 var cookedTokens = []Token{
179 CharData("\n"),
180 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
181 CharData("\n"),
182 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
183 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
184 CharData("\n"),
185 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
186 CharData("\n "),
187 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
188 CharData("World <>'\" 白鵬翔"),
189 EndElement{Name{"ns2", "hello"}},
190 CharData("\n "),
191 StartElement{Name{"ns2", "query"}, []Attr{}},
192 CharData("What is it?"),
193 EndElement{Name{"ns2", "query"}},
194 CharData("\n "),
195 StartElement{Name{"ns2", "goodbye"}, []Attr{}},
196 EndElement{Name{"ns2", "goodbye"}},
197 CharData("\n "),
198 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
199 CharData("\n "),
200 StartElement{Name{"ns2", "inner"}, []Attr{}},
201 EndElement{Name{"ns2", "inner"}},
202 CharData("\n "),
203 EndElement{Name{"ns2", "outer"}},
204 CharData("\n "),
205 StartElement{Name{"ns3", "name"}, []Attr{}},
206 CharData("\n "),
207 CharData("Some text here."),
208 CharData("\n "),
209 EndElement{Name{"ns3", "name"}},
210 CharData("\n"),
211 EndElement{Name{"ns2", "body"}},
212 Comment(" missing final newline "),
213 }
214
215 const testInputAltEncoding = `
216 <?xml version="1.0" encoding="x-testing-uppercase"?>
217 <TAG>VALUE</TAG>`
218
219 var rawTokensAltEncoding = []Token{
220 CharData("\n"),
221 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
222 CharData("\n"),
223 StartElement{Name{"", "tag"}, []Attr{}},
224 CharData("value"),
225 EndElement{Name{"", "tag"}},
226 }
227
228 var xmlInput = []string{
229
230 "<",
231 "<t",
232 "<t ",
233 "<t/",
234 "<!",
235 "<!-",
236 "<!--",
237 "<!--c-",
238 "<!--c--",
239 "<!d",
240 "<t></",
241 "<t></t",
242 "<?",
243 "<?p",
244 "<t a",
245 "<t a=",
246 "<t a='",
247 "<t a=''",
248 "<t/><![",
249 "<t/><![C",
250 "<t/><![CDATA[d",
251 "<t/><![CDATA[d]",
252 "<t/><![CDATA[d]]",
253
254
255 "<>",
256 "<t/a",
257 "<0 />",
258 "<?0 >",
259
260 "</0>",
261 "<t 0=''>",
262 "<t a='&'>",
263 "<t a='<'>",
264 "<t> c;</t>",
265 "<t a>",
266 "<t a=>",
267 "<t a=v>",
268
269 "<t></e>",
270 "<t></>",
271 "<t></t!",
272 "<t>cdata]]></t>",
273 }
274
275 func TestRawToken(t *testing.T) {
276 d := NewDecoder(strings.NewReader(testInput))
277 d.Entity = testEntity
278 testRawToken(t, d, testInput, rawTokens)
279 }
280
281 const nonStrictInput = `
282 <tag>non&entity</tag>
283 <tag>&unknown;entity</tag>
284 <tag>{</tag>
285 <tag>&#zzz;</tag>
286 <tag>&なまえ3;</tag>
287 <tag><-gt;</tag>
288 <tag>&;</tag>
289 <tag>&0a;</tag>
290 `
291
292 var nonStrictTokens = []Token{
293 CharData("\n"),
294 StartElement{Name{"", "tag"}, []Attr{}},
295 CharData("non&entity"),
296 EndElement{Name{"", "tag"}},
297 CharData("\n"),
298 StartElement{Name{"", "tag"}, []Attr{}},
299 CharData("&unknown;entity"),
300 EndElement{Name{"", "tag"}},
301 CharData("\n"),
302 StartElement{Name{"", "tag"}, []Attr{}},
303 CharData("{"),
304 EndElement{Name{"", "tag"}},
305 CharData("\n"),
306 StartElement{Name{"", "tag"}, []Attr{}},
307 CharData("&#zzz;"),
308 EndElement{Name{"", "tag"}},
309 CharData("\n"),
310 StartElement{Name{"", "tag"}, []Attr{}},
311 CharData("&なまえ3;"),
312 EndElement{Name{"", "tag"}},
313 CharData("\n"),
314 StartElement{Name{"", "tag"}, []Attr{}},
315 CharData("<-gt;"),
316 EndElement{Name{"", "tag"}},
317 CharData("\n"),
318 StartElement{Name{"", "tag"}, []Attr{}},
319 CharData("&;"),
320 EndElement{Name{"", "tag"}},
321 CharData("\n"),
322 StartElement{Name{"", "tag"}, []Attr{}},
323 CharData("&0a;"),
324 EndElement{Name{"", "tag"}},
325 CharData("\n"),
326 }
327
328 func TestNonStrictRawToken(t *testing.T) {
329 d := NewDecoder(strings.NewReader(nonStrictInput))
330 d.Strict = false
331 testRawToken(t, d, nonStrictInput, nonStrictTokens)
332 }
333
334 type downCaser struct {
335 t *testing.T
336 r io.ByteReader
337 }
338
339 func (d *downCaser) ReadByte() (c byte, err error) {
340 c, err = d.r.ReadByte()
341 if c >= 'A' && c <= 'Z' {
342 c += 'a' - 'A'
343 }
344 return
345 }
346
347 func (d *downCaser) Read(p []byte) (int, error) {
348 d.t.Fatalf("unexpected Read call on downCaser reader")
349 panic("unreachable")
350 }
351
352 func TestRawTokenAltEncoding(t *testing.T) {
353 d := NewDecoder(strings.NewReader(testInputAltEncoding))
354 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
355 if charset != "x-testing-uppercase" {
356 t.Fatalf("unexpected charset %q", charset)
357 }
358 return &downCaser{t, input.(io.ByteReader)}, nil
359 }
360 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
361 }
362
363 func TestRawTokenAltEncodingNoConverter(t *testing.T) {
364 d := NewDecoder(strings.NewReader(testInputAltEncoding))
365 token, err := d.RawToken()
366 if token == nil {
367 t.Fatalf("expected a token on first RawToken call")
368 }
369 if err != nil {
370 t.Fatal(err)
371 }
372 token, err = d.RawToken()
373 if token != nil {
374 t.Errorf("expected a nil token; got %#v", token)
375 }
376 if err == nil {
377 t.Fatalf("expected an error on second RawToken call")
378 }
379 const encoding = "x-testing-uppercase"
380 if !strings.Contains(err.Error(), encoding) {
381 t.Errorf("expected error to contain %q; got error: %v",
382 encoding, err)
383 }
384 }
385
386 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
387 lastEnd := int64(0)
388 for i, want := range rawTokens {
389 start := d.InputOffset()
390 have, err := d.RawToken()
391 end := d.InputOffset()
392 if err != nil {
393 t.Fatalf("token %d: unexpected error: %s", i, err)
394 }
395 if !reflect.DeepEqual(have, want) {
396 var shave, swant string
397 if _, ok := have.(CharData); ok {
398 shave = fmt.Sprintf("CharData(%q)", have)
399 } else {
400 shave = fmt.Sprintf("%#v", have)
401 }
402 if _, ok := want.(CharData); ok {
403 swant = fmt.Sprintf("CharData(%q)", want)
404 } else {
405 swant = fmt.Sprintf("%#v", want)
406 }
407 t.Errorf("token %d = %s, want %s", i, shave, swant)
408 }
409
410
411 switch {
412 case start < lastEnd:
413 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
414 case start >= end:
415
416 if start == end && end == lastEnd {
417 break
418 }
419 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
420 case end > int64(len(raw)):
421 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
422 default:
423 text := raw[start:end]
424 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
425 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
426 }
427 }
428 lastEnd = end
429 }
430 }
431
432
433
434
435
436 var nestedDirectivesInput = `
437 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
438 <!DOCTYPE [<!ENTITY xlt ">">]>
439 <!DOCTYPE [<!ENTITY xlt "<">]>
440 <!DOCTYPE [<!ENTITY xlt '>'>]>
441 <!DOCTYPE [<!ENTITY xlt '<'>]>
442 <!DOCTYPE [<!ENTITY xlt '">'>]>
443 <!DOCTYPE [<!ENTITY xlt "'<">]>
444 `
445
446 var nestedDirectivesTokens = []Token{
447 CharData("\n"),
448 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
449 CharData("\n"),
450 Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
451 CharData("\n"),
452 Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
453 CharData("\n"),
454 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
455 CharData("\n"),
456 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
457 CharData("\n"),
458 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
459 CharData("\n"),
460 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
461 CharData("\n"),
462 }
463
464 func TestNestedDirectives(t *testing.T) {
465 d := NewDecoder(strings.NewReader(nestedDirectivesInput))
466
467 for i, want := range nestedDirectivesTokens {
468 have, err := d.Token()
469 if err != nil {
470 t.Fatalf("token %d: unexpected error: %s", i, err)
471 }
472 if !reflect.DeepEqual(have, want) {
473 t.Errorf("token %d = %#v want %#v", i, have, want)
474 }
475 }
476 }
477
478 func TestToken(t *testing.T) {
479 d := NewDecoder(strings.NewReader(testInput))
480 d.Entity = testEntity
481
482 for i, want := range cookedTokens {
483 have, err := d.Token()
484 if err != nil {
485 t.Fatalf("token %d: unexpected error: %s", i, err)
486 }
487 if !reflect.DeepEqual(have, want) {
488 t.Errorf("token %d = %#v want %#v", i, have, want)
489 }
490 }
491 }
492
493 func TestSyntax(t *testing.T) {
494 for i := range xmlInput {
495 d := NewDecoder(strings.NewReader(xmlInput[i]))
496 var err error
497 for _, err = d.Token(); err == nil; _, err = d.Token() {
498 }
499 if _, ok := err.(*SyntaxError); !ok {
500 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
501 }
502 }
503 }
504
505 func TestInputLinePos(t *testing.T) {
506 testInput := `<root>
507 <?pi
508 ?> <elt
509 att
510 =
511 "val">
512 <![CDATA[
513 ]]><!--
514
515 --></elt>
516 </root>`
517 linePos := [][]int{
518 {1, 7},
519 {2, 1},
520 {3, 4},
521 {3, 6},
522 {6, 7},
523 {7, 1},
524 {8, 4},
525 {10, 4},
526 {10, 10},
527 {11, 1},
528 {11, 8},
529 }
530 dec := NewDecoder(strings.NewReader(testInput))
531 for _, want := range linePos {
532 if _, err := dec.Token(); err != nil {
533 t.Errorf("Unexpected error: %v", err)
534 continue
535 }
536
537 gotLine, gotCol := dec.InputPos()
538 if gotLine != want[0] || gotCol != want[1] {
539 t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1])
540 }
541 }
542 }
543
544 type allScalars struct {
545 True1 bool
546 True2 bool
547 False1 bool
548 False2 bool
549 Int int
550 Int8 int8
551 Int16 int16
552 Int32 int32
553 Int64 int64
554 Uint int
555 Uint8 uint8
556 Uint16 uint16
557 Uint32 uint32
558 Uint64 uint64
559 Uintptr uintptr
560 Float32 float32
561 Float64 float64
562 String string
563 PtrString *string
564 }
565
566 var all = allScalars{
567 True1: true,
568 True2: true,
569 False1: false,
570 False2: false,
571 Int: 1,
572 Int8: -2,
573 Int16: 3,
574 Int32: -4,
575 Int64: 5,
576 Uint: 6,
577 Uint8: 7,
578 Uint16: 8,
579 Uint32: 9,
580 Uint64: 10,
581 Uintptr: 11,
582 Float32: 13.0,
583 Float64: 14.0,
584 String: "15",
585 PtrString: &sixteen,
586 }
587
588 var sixteen = "16"
589
590 const testScalarsInput = `<allscalars>
591 <True1>true</True1>
592 <True2>1</True2>
593 <False1>false</False1>
594 <False2>0</False2>
595 <Int>1</Int>
596 <Int8>-2</Int8>
597 <Int16>3</Int16>
598 <Int32>-4</Int32>
599 <Int64>5</Int64>
600 <Uint>6</Uint>
601 <Uint8>7</Uint8>
602 <Uint16>8</Uint16>
603 <Uint32>9</Uint32>
604 <Uint64>10</Uint64>
605 <Uintptr>11</Uintptr>
606 <Float>12.0</Float>
607 <Float32>13.0</Float32>
608 <Float64>14.0</Float64>
609 <String>15</String>
610 <PtrString>16</PtrString>
611 </allscalars>`
612
613 func TestAllScalars(t *testing.T) {
614 var a allScalars
615 err := Unmarshal([]byte(testScalarsInput), &a)
616
617 if err != nil {
618 t.Fatal(err)
619 }
620 if !reflect.DeepEqual(a, all) {
621 t.Errorf("have %+v want %+v", a, all)
622 }
623 }
624
625 type item struct {
626 FieldA string
627 }
628
629 func TestIssue569(t *testing.T) {
630 data := `<item><FieldA>abcd</FieldA></item>`
631 var i item
632 err := Unmarshal([]byte(data), &i)
633
634 if err != nil || i.FieldA != "abcd" {
635 t.Fatal("Expecting abcd")
636 }
637 }
638
639 func TestUnquotedAttrs(t *testing.T) {
640 data := "<tag attr=azAZ09:-_\t>"
641 d := NewDecoder(strings.NewReader(data))
642 d.Strict = false
643 token, err := d.Token()
644 if _, ok := err.(*SyntaxError); ok {
645 t.Errorf("Unexpected error: %v", err)
646 }
647 if token.(StartElement).Name.Local != "tag" {
648 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
649 }
650 attr := token.(StartElement).Attr[0]
651 if attr.Value != "azAZ09:-_" {
652 t.Errorf("Unexpected attribute value: %v", attr.Value)
653 }
654 if attr.Name.Local != "attr" {
655 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
656 }
657 }
658
659 func TestValuelessAttrs(t *testing.T) {
660 tests := [][3]string{
661 {"<p nowrap>", "p", "nowrap"},
662 {"<p nowrap >", "p", "nowrap"},
663 {"<input checked/>", "input", "checked"},
664 {"<input checked />", "input", "checked"},
665 }
666 for _, test := range tests {
667 d := NewDecoder(strings.NewReader(test[0]))
668 d.Strict = false
669 token, err := d.Token()
670 if _, ok := err.(*SyntaxError); ok {
671 t.Errorf("Unexpected error: %v", err)
672 }
673 if token.(StartElement).Name.Local != test[1] {
674 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
675 }
676 attr := token.(StartElement).Attr[0]
677 if attr.Value != test[2] {
678 t.Errorf("Unexpected attribute value: %v", attr.Value)
679 }
680 if attr.Name.Local != test[2] {
681 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
682 }
683 }
684 }
685
686 func TestCopyTokenCharData(t *testing.T) {
687 data := []byte("same data")
688 var tok1 Token = CharData(data)
689 tok2 := CopyToken(tok1)
690 if !reflect.DeepEqual(tok1, tok2) {
691 t.Error("CopyToken(CharData) != CharData")
692 }
693 data[1] = 'o'
694 if reflect.DeepEqual(tok1, tok2) {
695 t.Error("CopyToken(CharData) uses same buffer.")
696 }
697 }
698
699 func TestCopyTokenStartElement(t *testing.T) {
700 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
701 var tok1 Token = elt
702 tok2 := CopyToken(tok1)
703 if tok1.(StartElement).Attr[0].Value != "en" {
704 t.Error("CopyToken overwrote Attr[0]")
705 }
706 if !reflect.DeepEqual(tok1, tok2) {
707 t.Error("CopyToken(StartElement) != StartElement")
708 }
709 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
710 if reflect.DeepEqual(tok1, tok2) {
711 t.Error("CopyToken(CharData) uses same buffer.")
712 }
713 }
714
715 func TestCopyTokenComment(t *testing.T) {
716 data := []byte("<!-- some comment -->")
717 var tok1 Token = Comment(data)
718 tok2 := CopyToken(tok1)
719 if !reflect.DeepEqual(tok1, tok2) {
720 t.Error("CopyToken(Comment) != Comment")
721 }
722 data[1] = 'o'
723 if reflect.DeepEqual(tok1, tok2) {
724 t.Error("CopyToken(Comment) uses same buffer.")
725 }
726 }
727
728 func TestSyntaxErrorLineNum(t *testing.T) {
729 testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
730 d := NewDecoder(strings.NewReader(testInput))
731 var err error
732 for _, err = d.Token(); err == nil; _, err = d.Token() {
733 }
734 synerr, ok := err.(*SyntaxError)
735 if !ok {
736 t.Error("Expected SyntaxError.")
737 }
738 if synerr.Line != 3 {
739 t.Error("SyntaxError didn't have correct line number.")
740 }
741 }
742
743 func TestTrailingRawToken(t *testing.T) {
744 input := `<FOO></FOO> `
745 d := NewDecoder(strings.NewReader(input))
746 var err error
747 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
748 }
749 if err != io.EOF {
750 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
751 }
752 }
753
754 func TestTrailingToken(t *testing.T) {
755 input := `<FOO></FOO> `
756 d := NewDecoder(strings.NewReader(input))
757 var err error
758 for _, err = d.Token(); err == nil; _, err = d.Token() {
759 }
760 if err != io.EOF {
761 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
762 }
763 }
764
765 func TestEntityInsideCDATA(t *testing.T) {
766 input := `<test><![CDATA[ &val=foo ]]></test>`
767 d := NewDecoder(strings.NewReader(input))
768 var err error
769 for _, err = d.Token(); err == nil; _, err = d.Token() {
770 }
771 if err != io.EOF {
772 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
773 }
774 }
775
776 var characterTests = []struct {
777 in string
778 err string
779 }{
780 {"\x12<doc/>", "illegal character code U+0012"},
781 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
782 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
783 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
784 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
785 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
786 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
787 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
788 {"<doc>&hello;</doc>", "invalid character entity &hello;"},
789 }
790
791 func TestDisallowedCharacters(t *testing.T) {
792
793 for i, tt := range characterTests {
794 d := NewDecoder(strings.NewReader(tt.in))
795 var err error
796
797 for err == nil {
798 _, err = d.Token()
799 }
800 synerr, ok := err.(*SyntaxError)
801 if !ok {
802 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
803 }
804 if synerr.Msg != tt.err {
805 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
806 }
807 }
808 }
809
810 func TestIsInCharacterRange(t *testing.T) {
811 invalid := []rune{
812 utf8.MaxRune + 1,
813 0xD800,
814 0xDFFF,
815 -1,
816 }
817 for _, r := range invalid {
818 if isInCharacterRange(r) {
819 t.Errorf("rune %U considered valid", r)
820 }
821 }
822 }
823
824 var procInstTests = []struct {
825 input string
826 expect [2]string
827 }{
828 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
829 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
830 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
831 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
832 {`encoding="FOO" `, [2]string{"", "FOO"}},
833 }
834
835 func TestProcInstEncoding(t *testing.T) {
836 for _, test := range procInstTests {
837 if got := procInst("version", test.input); got != test.expect[0] {
838 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
839 }
840 if got := procInst("encoding", test.input); got != test.expect[1] {
841 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
842 }
843 }
844 }
845
846
847
848
849 var directivesWithCommentsInput = `
850 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
851 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
852 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
853 `
854
855 var directivesWithCommentsTokens = []Token{
856 CharData("\n"),
857 Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
858 CharData("\n"),
859 Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
860 CharData("\n"),
861 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`),
862 CharData("\n"),
863 }
864
865 func TestDirectivesWithComments(t *testing.T) {
866 d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
867
868 for i, want := range directivesWithCommentsTokens {
869 have, err := d.Token()
870 if err != nil {
871 t.Fatalf("token %d: unexpected error: %s", i, err)
872 }
873 if !reflect.DeepEqual(have, want) {
874 t.Errorf("token %d = %#v want %#v", i, have, want)
875 }
876 }
877 }
878
879
880 type errWriter struct{}
881
882 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
883
884 func TestEscapeTextIOErrors(t *testing.T) {
885 expectErr := "unwritable"
886 err := EscapeText(errWriter{}, []byte{'A'})
887
888 if err == nil || err.Error() != expectErr {
889 t.Errorf("have %v, want %v", err, expectErr)
890 }
891 }
892
893 func TestEscapeTextInvalidChar(t *testing.T) {
894 input := []byte("A \x00 terminated string.")
895 expected := "A \uFFFD terminated string."
896
897 buff := new(bytes.Buffer)
898 if err := EscapeText(buff, input); err != nil {
899 t.Fatalf("have %v, want nil", err)
900 }
901 text := buff.String()
902
903 if text != expected {
904 t.Errorf("have %v, want %v", text, expected)
905 }
906 }
907
908 func TestIssue5880(t *testing.T) {
909 type T []byte
910 data, err := Marshal(T{192, 168, 0, 1})
911 if err != nil {
912 t.Errorf("Marshal error: %v", err)
913 }
914 if !utf8.Valid(data) {
915 t.Errorf("Marshal generated invalid UTF-8: %x", data)
916 }
917 }
918
919 func TestIssue11405(t *testing.T) {
920 testCases := []string{
921 "<root>",
922 "<root><foo>",
923 "<root><foo></foo>",
924 }
925 for _, tc := range testCases {
926 d := NewDecoder(strings.NewReader(tc))
927 var err error
928 for {
929 _, err = d.Token()
930 if err != nil {
931 break
932 }
933 }
934 if _, ok := err.(*SyntaxError); !ok {
935 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
936 }
937 }
938 }
939
940 func TestIssue12417(t *testing.T) {
941 testCases := []struct {
942 s string
943 ok bool
944 }{
945 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
946 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
947 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
948 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
949 }
950 for _, tc := range testCases {
951 d := NewDecoder(strings.NewReader(tc.s))
952 var err error
953 for {
954 _, err = d.Token()
955 if err != nil {
956 if err == io.EOF {
957 err = nil
958 }
959 break
960 }
961 }
962 if err != nil && tc.ok {
963 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
964 continue
965 }
966 if err == nil && !tc.ok {
967 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
968 }
969 }
970 }
971
972 func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
973 return func(src TokenReader) TokenReader {
974 return mapper{
975 t: src,
976 f: mapping,
977 }
978 }
979 }
980
981 type mapper struct {
982 t TokenReader
983 f func(Token) Token
984 }
985
986 func (m mapper) Token() (Token, error) {
987 tok, err := m.t.Token()
988 if err != nil {
989 return nil, err
990 }
991 return m.f(tok), nil
992 }
993
994 func TestNewTokenDecoderIdempotent(t *testing.T) {
995 d := NewDecoder(strings.NewReader(`<br>`))
996 d2 := NewTokenDecoder(d)
997 if d != d2 {
998 t.Error("NewTokenDecoder did not detect underlying Decoder")
999 }
1000 }
1001
1002 func TestWrapDecoder(t *testing.T) {
1003 d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
1004 m := tokenMap(func(t Token) Token {
1005 switch tok := t.(type) {
1006 case StartElement:
1007 if tok.Name.Local == "quote" {
1008 tok.Name.Local = "blocking"
1009 return tok
1010 }
1011 case EndElement:
1012 if tok.Name.Local == "quote" {
1013 tok.Name.Local = "blocking"
1014 return tok
1015 }
1016 }
1017 return t
1018 })
1019
1020 d = NewTokenDecoder(m(d))
1021
1022 o := struct {
1023 XMLName Name `xml:"blocking"`
1024 Chardata string `xml:",chardata"`
1025 }{}
1026
1027 if err := d.Decode(&o); err != nil {
1028 t.Fatal("Got unexpected error while decoding:", err)
1029 }
1030
1031 if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
1032 t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
1033 }
1034 }
1035
1036 type tokReader struct{}
1037
1038 func (tokReader) Token() (Token, error) {
1039 return StartElement{}, nil
1040 }
1041
1042 type Failure struct{}
1043
1044 func (Failure) UnmarshalXML(*Decoder, StartElement) error {
1045 return nil
1046 }
1047
1048 func TestTokenUnmarshaler(t *testing.T) {
1049 defer func() {
1050 if r := recover(); r != nil {
1051 t.Error("Unexpected panic using custom token unmarshaler")
1052 }
1053 }()
1054
1055 d := NewTokenDecoder(tokReader{})
1056 d.Decode(&Failure{})
1057 }
1058
1059 func testRoundTrip(t *testing.T, input string) {
1060 d := NewDecoder(strings.NewReader(input))
1061 var tokens []Token
1062 var buf bytes.Buffer
1063 e := NewEncoder(&buf)
1064 for {
1065 tok, err := d.Token()
1066 if err == io.EOF {
1067 break
1068 }
1069 if err != nil {
1070 t.Fatalf("invalid input: %v", err)
1071 }
1072 if err := e.EncodeToken(tok); err != nil {
1073 t.Fatalf("failed to re-encode input: %v", err)
1074 }
1075 tokens = append(tokens, CopyToken(tok))
1076 }
1077 if err := e.Flush(); err != nil {
1078 t.Fatal(err)
1079 }
1080
1081 d = NewDecoder(&buf)
1082 for {
1083 tok, err := d.Token()
1084 if err == io.EOF {
1085 break
1086 }
1087 if err != nil {
1088 t.Fatalf("failed to decode output: %v", err)
1089 }
1090 if len(tokens) == 0 {
1091 t.Fatalf("unexpected token: %#v", tok)
1092 }
1093 a, b := tokens[0], tok
1094 if !reflect.DeepEqual(a, b) {
1095 t.Fatalf("token mismatch: %#v vs %#v", a, b)
1096 }
1097 tokens = tokens[1:]
1098 }
1099 if len(tokens) > 0 {
1100 t.Fatalf("lost tokens: %#v", tokens)
1101 }
1102 }
1103
1104 func TestRoundTrip(t *testing.T) {
1105 tests := map[string]string{
1106 "leading colon": `<::Test ::foo="bar"><:::Hello></:::Hello><Hello></Hello></::Test>`,
1107 "trailing colon": `<foo abc:="x"></foo>`,
1108 "double colon": `<x:y:foo></x:y:foo>`,
1109 "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
1110 }
1111 for name, input := range tests {
1112 t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
1113 }
1114 }
1115
1116 func TestParseErrors(t *testing.T) {
1117 withDefaultHeader := func(s string) string {
1118 return `<?xml version="1.0" encoding="UTF-8"?>` + s
1119 }
1120 tests := []struct {
1121 src string
1122 err string
1123 }{
1124 {withDefaultHeader(`</foo>`), `unexpected end element </foo>`},
1125 {withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`},
1126 {withDefaultHeader(`<? not ok ?>`), `expected target name after <?`},
1127 {withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`},
1128 {withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`},
1129 {withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`},
1130 {withDefaultHeader("\xf1"), `invalid UTF-8`},
1131
1132
1133 {`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`},
1134
1135
1136 {withDefaultHeader(`<?ok?>`), ``},
1137 {withDefaultHeader(`<?ok version="ok"?>`), ``},
1138 }
1139
1140 for _, test := range tests {
1141 d := NewDecoder(strings.NewReader(test.src))
1142 var err error
1143 for {
1144 _, err = d.Token()
1145 if err != nil {
1146 break
1147 }
1148 }
1149 if test.err == "" {
1150 if err != io.EOF {
1151 t.Errorf("parse %s: have %q error, expected none", test.src, err)
1152 }
1153 continue
1154 }
1155 if err == nil || err == io.EOF {
1156 t.Errorf("parse %s: have no error, expected a non-nil error", test.src)
1157 continue
1158 }
1159 if !strings.Contains(err.Error(), test.err) {
1160 t.Errorf("parse %s: can't find %q error sudbstring\nerror: %q", test.src, test.err, err)
1161 continue
1162 }
1163 }
1164 }
1165
1166 const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?>
1167 <br>
1168 <br/><br/>
1169 <br><br>
1170 <br></br>
1171 <BR>
1172 <BR/><BR/>
1173 <Br></Br>
1174 <BR><span id="test">abc</span><br/><br/>`
1175
1176 func BenchmarkHTMLAutoClose(b *testing.B) {
1177 b.RunParallel(func(p *testing.PB) {
1178 for p.Next() {
1179 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
1180 d.Strict = false
1181 d.AutoClose = HTMLAutoClose
1182 d.Entity = HTMLEntity
1183 for {
1184 _, err := d.Token()
1185 if err != nil {
1186 if err == io.EOF {
1187 break
1188 }
1189 b.Fatalf("unexpected error: %v", err)
1190 }
1191 }
1192 }
1193 })
1194 }
1195
1196 func TestHTMLAutoClose(t *testing.T) {
1197 wantTokens := []Token{
1198 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
1199 CharData("\n"),
1200 StartElement{Name{"", "br"}, []Attr{}},
1201 EndElement{Name{"", "br"}},
1202 CharData("\n"),
1203 StartElement{Name{"", "br"}, []Attr{}},
1204 EndElement{Name{"", "br"}},
1205 StartElement{Name{"", "br"}, []Attr{}},
1206 EndElement{Name{"", "br"}},
1207 CharData("\n"),
1208 StartElement{Name{"", "br"}, []Attr{}},
1209 EndElement{Name{"", "br"}},
1210 StartElement{Name{"", "br"}, []Attr{}},
1211 EndElement{Name{"", "br"}},
1212 CharData("\n"),
1213 StartElement{Name{"", "br"}, []Attr{}},
1214 EndElement{Name{"", "br"}},
1215 CharData("\n"),
1216 StartElement{Name{"", "BR"}, []Attr{}},
1217 EndElement{Name{"", "BR"}},
1218 CharData("\n"),
1219 StartElement{Name{"", "BR"}, []Attr{}},
1220 EndElement{Name{"", "BR"}},
1221 StartElement{Name{"", "BR"}, []Attr{}},
1222 EndElement{Name{"", "BR"}},
1223 CharData("\n"),
1224 StartElement{Name{"", "Br"}, []Attr{}},
1225 EndElement{Name{"", "Br"}},
1226 CharData("\n"),
1227 StartElement{Name{"", "BR"}, []Attr{}},
1228 EndElement{Name{"", "BR"}},
1229 StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}},
1230 CharData("abc"),
1231 EndElement{Name{"", "span"}},
1232 StartElement{Name{"", "br"}, []Attr{}},
1233 EndElement{Name{"", "br"}},
1234 StartElement{Name{"", "br"}, []Attr{}},
1235 EndElement{Name{"", "br"}},
1236 }
1237
1238 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
1239 d.Strict = false
1240 d.AutoClose = HTMLAutoClose
1241 d.Entity = HTMLEntity
1242 var haveTokens []Token
1243 for {
1244 tok, err := d.Token()
1245 if err != nil {
1246 if err == io.EOF {
1247 break
1248 }
1249 t.Fatalf("unexpected error: %v", err)
1250 }
1251 haveTokens = append(haveTokens, CopyToken(tok))
1252 }
1253 if len(haveTokens) != len(wantTokens) {
1254 t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens))
1255 }
1256 for i, want := range wantTokens {
1257 if i >= len(haveTokens) {
1258 t.Errorf("token[%d] expected %#v, have no token", i, want)
1259 } else {
1260 have := haveTokens[i]
1261 if !reflect.DeepEqual(have, want) {
1262 t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want)
1263 }
1264 }
1265 }
1266 }
1267
View as plain text