1
2
3
4
5 package template
6
7 import (
8 "bytes"
9 "strings"
10 )
11
12
13
14
15
16 var transitionFunc = [...]func(context, []byte) (context, int){
17 stateText: tText,
18 stateTag: tTag,
19 stateAttrName: tAttrName,
20 stateAfterName: tAfterName,
21 stateBeforeValue: tBeforeValue,
22 stateHTMLCmt: tHTMLCmt,
23 stateRCDATA: tSpecialTagEnd,
24 stateAttr: tAttr,
25 stateURL: tURL,
26 stateSrcset: tURL,
27 stateJS: tJS,
28 stateJSDqStr: tJSDelimited,
29 stateJSSqStr: tJSDelimited,
30 stateJSBqStr: tJSDelimited,
31 stateJSRegexp: tJSDelimited,
32 stateJSBlockCmt: tBlockCmt,
33 stateJSLineCmt: tLineCmt,
34 stateJSHTMLOpenCmt: tLineCmt,
35 stateJSHTMLCloseCmt: tLineCmt,
36 stateCSS: tCSS,
37 stateCSSDqStr: tCSSStr,
38 stateCSSSqStr: tCSSStr,
39 stateCSSDqURL: tCSSStr,
40 stateCSSSqURL: tCSSStr,
41 stateCSSURL: tCSSStr,
42 stateCSSBlockCmt: tBlockCmt,
43 stateCSSLineCmt: tLineCmt,
44 stateError: tError,
45 }
46
47 var commentStart = []byte("<!--")
48 var commentEnd = []byte("-->")
49
50
51 func tText(c context, s []byte) (context, int) {
52 k := 0
53 for {
54 i := k + bytes.IndexByte(s[k:], '<')
55 if i < k || i+1 == len(s) {
56 return c, len(s)
57 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
58 return context{state: stateHTMLCmt}, i + 4
59 }
60 i++
61 end := false
62 if s[i] == '/' {
63 if i+1 == len(s) {
64 return c, len(s)
65 }
66 end, i = true, i+1
67 }
68 j, e := eatTagName(s, i)
69 if j != i {
70 if end {
71 e = elementNone
72 }
73
74 return context{state: stateTag, element: e}, j
75 }
76 k = j
77 }
78 }
79
80 var elementContentType = [...]state{
81 elementNone: stateText,
82 elementScript: stateJS,
83 elementStyle: stateCSS,
84 elementTextarea: stateRCDATA,
85 elementTitle: stateRCDATA,
86 }
87
88
89 func tTag(c context, s []byte) (context, int) {
90
91 i := eatWhiteSpace(s, 0)
92 if i == len(s) {
93 return c, len(s)
94 }
95 if s[i] == '>' {
96 return context{
97 state: elementContentType[c.element],
98 element: c.element,
99 }, i + 1
100 }
101 j, err := eatAttrName(s, i)
102 if err != nil {
103 return context{state: stateError, err: err}, len(s)
104 }
105 state, attr := stateTag, attrNone
106 if i == j {
107 return context{
108 state: stateError,
109 err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
110 }, len(s)
111 }
112
113 attrName := strings.ToLower(string(s[i:j]))
114 if c.element == elementScript && attrName == "type" {
115 attr = attrScriptType
116 } else {
117 switch attrType(attrName) {
118 case contentTypeURL:
119 attr = attrURL
120 case contentTypeCSS:
121 attr = attrStyle
122 case contentTypeJS:
123 attr = attrScript
124 case contentTypeSrcset:
125 attr = attrSrcset
126 }
127 }
128
129 if j == len(s) {
130 state = stateAttrName
131 } else {
132 state = stateAfterName
133 }
134 return context{state: state, element: c.element, attr: attr}, j
135 }
136
137
138 func tAttrName(c context, s []byte) (context, int) {
139 i, err := eatAttrName(s, 0)
140 if err != nil {
141 return context{state: stateError, err: err}, len(s)
142 } else if i != len(s) {
143 c.state = stateAfterName
144 }
145 return c, i
146 }
147
148
149 func tAfterName(c context, s []byte) (context, int) {
150
151 i := eatWhiteSpace(s, 0)
152 if i == len(s) {
153 return c, len(s)
154 } else if s[i] != '=' {
155
156 c.state = stateTag
157 return c, i
158 }
159 c.state = stateBeforeValue
160
161 return c, i + 1
162 }
163
164 var attrStartStates = [...]state{
165 attrNone: stateAttr,
166 attrScript: stateJS,
167 attrScriptType: stateAttr,
168 attrStyle: stateCSS,
169 attrURL: stateURL,
170 attrSrcset: stateSrcset,
171 }
172
173
174 func tBeforeValue(c context, s []byte) (context, int) {
175 i := eatWhiteSpace(s, 0)
176 if i == len(s) {
177 return c, len(s)
178 }
179
180 delim := delimSpaceOrTagEnd
181 switch s[i] {
182 case '\'':
183 delim, i = delimSingleQuote, i+1
184 case '"':
185 delim, i = delimDoubleQuote, i+1
186 }
187 c.state, c.delim = attrStartStates[c.attr], delim
188 return c, i
189 }
190
191
192 func tHTMLCmt(c context, s []byte) (context, int) {
193 if i := bytes.Index(s, commentEnd); i != -1 {
194 return context{}, i + 3
195 }
196 return c, len(s)
197 }
198
199
200
201 var specialTagEndMarkers = [...][]byte{
202 elementScript: []byte("script"),
203 elementStyle: []byte("style"),
204 elementTextarea: []byte("textarea"),
205 elementTitle: []byte("title"),
206 }
207
208 var (
209 specialTagEndPrefix = []byte("</")
210 tagEndSeparators = []byte("> \t\n\f/")
211 )
212
213
214
215 func tSpecialTagEnd(c context, s []byte) (context, int) {
216 if c.element != elementNone {
217
218
219 if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) {
220 return c, len(s)
221 }
222 if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
223 return context{}, i
224 }
225 }
226 return c, len(s)
227 }
228
229
230 func indexTagEnd(s []byte, tag []byte) int {
231 res := 0
232 plen := len(specialTagEndPrefix)
233 for len(s) > 0 {
234
235 i := bytes.Index(s, specialTagEndPrefix)
236 if i == -1 {
237 return i
238 }
239 s = s[i+plen:]
240
241 if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
242 s = s[len(tag):]
243
244 if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
245 return res + i
246 }
247 res += len(tag)
248 }
249 res += i + plen
250 }
251 return -1
252 }
253
254
255 func tAttr(c context, s []byte) (context, int) {
256 return c, len(s)
257 }
258
259
260 func tURL(c context, s []byte) (context, int) {
261 if bytes.ContainsAny(s, "#?") {
262 c.urlPart = urlPartQueryOrFrag
263 } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
264
265
266 c.urlPart = urlPartPreQuery
267 }
268 return c, len(s)
269 }
270
271
272 func tJS(c context, s []byte) (context, int) {
273 i := bytes.IndexAny(s, "\"`'/<-#")
274 if i == -1 {
275
276 c.jsCtx = nextJSCtx(s, c.jsCtx)
277 return c, len(s)
278 }
279 c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
280 switch s[i] {
281 case '"':
282 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
283 case '\'':
284 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
285 case '`':
286 c.state, c.jsCtx = stateJSBqStr, jsCtxRegexp
287 case '/':
288 switch {
289 case i+1 < len(s) && s[i+1] == '/':
290 c.state, i = stateJSLineCmt, i+1
291 case i+1 < len(s) && s[i+1] == '*':
292 c.state, i = stateJSBlockCmt, i+1
293 case c.jsCtx == jsCtxRegexp:
294 c.state = stateJSRegexp
295 case c.jsCtx == jsCtxDivOp:
296 c.jsCtx = jsCtxRegexp
297 default:
298 return context{
299 state: stateError,
300 err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
301 }, len(s)
302 }
303
304
305
306
307
308
309
310 case '<':
311 if i+3 < len(s) && bytes.Equal(commentStart, s[i:i+4]) {
312 c.state, i = stateJSHTMLOpenCmt, i+3
313 }
314 case '-':
315 if i+2 < len(s) && bytes.Equal(commentEnd, s[i:i+3]) {
316 c.state, i = stateJSHTMLCloseCmt, i+2
317 }
318
319 case '#':
320 if i+1 < len(s) && s[i+1] == '!' {
321 c.state, i = stateJSLineCmt, i+1
322 }
323 default:
324 panic("unreachable")
325 }
326 return c, i + 1
327 }
328
329
330
331 func tJSDelimited(c context, s []byte) (context, int) {
332 specials := `\"`
333 switch c.state {
334 case stateJSSqStr:
335 specials = `\'`
336 case stateJSBqStr:
337 specials = "`\\"
338 case stateJSRegexp:
339 specials = `\/[]`
340 }
341
342 k, inCharset := 0, false
343 for {
344 i := k + bytes.IndexAny(s[k:], specials)
345 if i < k {
346 break
347 }
348 switch s[i] {
349 case '\\':
350 i++
351 if i == len(s) {
352 return context{
353 state: stateError,
354 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
355 }, len(s)
356 }
357 case '[':
358 inCharset = true
359 case ']':
360 inCharset = false
361 case '/':
362
363
364
365 if i > 0 && i+7 <= len(s) && bytes.Compare(bytes.ToLower(s[i-1:i+7]), []byte("</script")) == 0 {
366 i++
367 } else if !inCharset {
368 c.state, c.jsCtx = stateJS, jsCtxDivOp
369 return c, i + 1
370 }
371 default:
372
373 if !inCharset {
374 c.state, c.jsCtx = stateJS, jsCtxDivOp
375 return c, i + 1
376 }
377 }
378 k = i + 1
379 }
380
381 if inCharset {
382
383
384 return context{
385 state: stateError,
386 err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
387 }, len(s)
388 }
389
390 return c, len(s)
391 }
392
393 var blockCommentEnd = []byte("*/")
394
395
396 func tBlockCmt(c context, s []byte) (context, int) {
397 i := bytes.Index(s, blockCommentEnd)
398 if i == -1 {
399 return c, len(s)
400 }
401 switch c.state {
402 case stateJSBlockCmt:
403 c.state = stateJS
404 case stateCSSBlockCmt:
405 c.state = stateCSS
406 default:
407 panic(c.state.String())
408 }
409 return c, i + 2
410 }
411
412
413 func tLineCmt(c context, s []byte) (context, int) {
414 var lineTerminators string
415 var endState state
416 switch c.state {
417 case stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt:
418 lineTerminators, endState = "\n\r\u2028\u2029", stateJS
419 case stateCSSLineCmt:
420 lineTerminators, endState = "\n\f\r", stateCSS
421
422
423
424
425
426
427
428 default:
429 panic(c.state.String())
430 }
431
432 i := bytes.IndexAny(s, lineTerminators)
433 if i == -1 {
434 return c, len(s)
435 }
436 c.state = endState
437
438
439
440
441
442 return c, i
443 }
444
445
446 func tCSS(c context, s []byte) (context, int) {
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474 k := 0
475 for {
476 i := k + bytes.IndexAny(s[k:], `("'/`)
477 if i < k {
478 return c, len(s)
479 }
480 switch s[i] {
481 case '(':
482
483 p := bytes.TrimRight(s[:i], "\t\n\f\r ")
484 if endsWithCSSKeyword(p, "url") {
485 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
486 switch {
487 case j != len(s) && s[j] == '"':
488 c.state, j = stateCSSDqURL, j+1
489 case j != len(s) && s[j] == '\'':
490 c.state, j = stateCSSSqURL, j+1
491 default:
492 c.state = stateCSSURL
493 }
494 return c, j
495 }
496 case '/':
497 if i+1 < len(s) {
498 switch s[i+1] {
499 case '/':
500 c.state = stateCSSLineCmt
501 return c, i + 2
502 case '*':
503 c.state = stateCSSBlockCmt
504 return c, i + 2
505 }
506 }
507 case '"':
508 c.state = stateCSSDqStr
509 return c, i + 1
510 case '\'':
511 c.state = stateCSSSqStr
512 return c, i + 1
513 }
514 k = i + 1
515 }
516 }
517
518
519 func tCSSStr(c context, s []byte) (context, int) {
520 var endAndEsc string
521 switch c.state {
522 case stateCSSDqStr, stateCSSDqURL:
523 endAndEsc = `\"`
524 case stateCSSSqStr, stateCSSSqURL:
525 endAndEsc = `\'`
526 case stateCSSURL:
527
528
529 endAndEsc = "\\\t\n\f\r )"
530 default:
531 panic(c.state.String())
532 }
533
534 k := 0
535 for {
536 i := k + bytes.IndexAny(s[k:], endAndEsc)
537 if i < k {
538 c, nread := tURL(c, decodeCSS(s[k:]))
539 return c, k + nread
540 }
541 if s[i] == '\\' {
542 i++
543 if i == len(s) {
544 return context{
545 state: stateError,
546 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
547 }, len(s)
548 }
549 } else {
550 c.state = stateCSS
551 return c, i + 1
552 }
553 c, _ = tURL(c, decodeCSS(s[:i+1]))
554 k = i + 1
555 }
556 }
557
558
559 func tError(c context, s []byte) (context, int) {
560 return c, len(s)
561 }
562
563
564
565
566
567 func eatAttrName(s []byte, i int) (int, *Error) {
568 for j := i; j < len(s); j++ {
569 switch s[j] {
570 case ' ', '\t', '\n', '\f', '\r', '=', '>':
571 return j, nil
572 case '\'', '"', '<':
573
574
575
576 return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
577 default:
578
579 }
580 }
581 return len(s), nil
582 }
583
584 var elementNameMap = map[string]element{
585 "script": elementScript,
586 "style": elementStyle,
587 "textarea": elementTextarea,
588 "title": elementTitle,
589 }
590
591
592 func asciiAlpha(c byte) bool {
593 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
594 }
595
596
597 func asciiAlphaNum(c byte) bool {
598 return asciiAlpha(c) || '0' <= c && c <= '9'
599 }
600
601
602 func eatTagName(s []byte, i int) (int, element) {
603 if i == len(s) || !asciiAlpha(s[i]) {
604 return i, elementNone
605 }
606 j := i + 1
607 for j < len(s) {
608 x := s[j]
609 if asciiAlphaNum(x) {
610 j++
611 continue
612 }
613
614 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
615 j += 2
616 continue
617 }
618 break
619 }
620 return j, elementNameMap[strings.ToLower(string(s[i:j]))]
621 }
622
623
624 func eatWhiteSpace(s []byte, i int) int {
625 for j := i; j < len(s); j++ {
626 switch s[j] {
627 case ' ', '\t', '\n', '\f', '\r':
628
629 default:
630 return j
631 }
632 }
633 return len(s)
634 }
635
View as plain text