js.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/json"
    10  	"fmt"
    11  	"reflect"
    12  	"regexp"
    13  	"strings"
    14  	"unicode/utf8"
    15  )
    16  
    17  // jsWhitespace contains all of the JS whitespace characters, as defined
    18  // by the \s character class.
    19  // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes.
    20  const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff"
    21  
    22  // nextJSCtx returns the context that determines whether a slash after the
    23  // given run of tokens starts a regular expression instead of a division
    24  // operator: / or /=.
    25  //
    26  // This assumes that the token run does not include any string tokens, comment
    27  // tokens, regular expression literal tokens, or division operators.
    28  //
    29  // This fails on some valid but nonsensical JavaScript programs like
    30  // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
    31  // fail on any known useful programs. It is based on the draft
    32  // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
    33  // https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
    34  func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
    35  	// Trim all JS whitespace characters
    36  	s = bytes.TrimRight(s, jsWhitespace)
    37  	if len(s) == 0 {
    38  		return preceding
    39  	}
    40  
    41  	// All cases below are in the single-byte UTF-8 group.
    42  	switch c, n := s[len(s)-1], len(s); c {
    43  	case '+', '-':
    44  		// ++ and -- are not regexp preceders, but + and - are whether
    45  		// they are used as infix or prefix operators.
    46  		start := n - 1
    47  		// Count the number of adjacent dashes or pluses.
    48  		for start > 0 && s[start-1] == c {
    49  			start--
    50  		}
    51  		if (n-start)&1 == 1 {
    52  			// Reached for trailing minus signs since "---" is the
    53  			// same as "-- -".
    54  			return jsCtxRegexp
    55  		}
    56  		return jsCtxDivOp
    57  	case '.':
    58  		// Handle "42."
    59  		if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
    60  			return jsCtxDivOp
    61  		}
    62  		return jsCtxRegexp
    63  	// Suffixes for all punctuators from section 7.7 of the language spec
    64  	// that only end binary operators not handled above.
    65  	case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
    66  		return jsCtxRegexp
    67  	// Suffixes for all punctuators from section 7.7 of the language spec
    68  	// that are prefix operators not handled above.
    69  	case '!', '~':
    70  		return jsCtxRegexp
    71  	// Matches all the punctuators from section 7.7 of the language spec
    72  	// that are open brackets not handled above.
    73  	case '(', '[':
    74  		return jsCtxRegexp
    75  	// Matches all the punctuators from section 7.7 of the language spec
    76  	// that precede expression starts.
    77  	case ':', ';', '{':
    78  		return jsCtxRegexp
    79  	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
    80  	// are handled in the default except for '}' which can precede a
    81  	// division op as in
    82  	//    ({ valueOf: function () { return 42 } } / 2
    83  	// which is valid, but, in practice, developers don't divide object
    84  	// literals, so our heuristic works well for code like
    85  	//    function () { ... }  /foo/.test(x) && sideEffect();
    86  	// The ')' punctuator can precede a regular expression as in
    87  	//     if (b) /foo/.test(x) && ...
    88  	// but this is much less likely than
    89  	//     (a + b) / c
    90  	case '}':
    91  		return jsCtxRegexp
    92  	default:
    93  		// Look for an IdentifierName and see if it is a keyword that
    94  		// can precede a regular expression.
    95  		j := n
    96  		for j > 0 && isJSIdentPart(rune(s[j-1])) {
    97  			j--
    98  		}
    99  		if regexpPrecederKeywords[string(s[j:])] {
   100  			return jsCtxRegexp
   101  		}
   102  	}
   103  	// Otherwise is a punctuator not listed above, or
   104  	// a string which precedes a div op, or an identifier
   105  	// which precedes a div op.
   106  	return jsCtxDivOp
   107  }
   108  
   109  // regexpPrecederKeywords is a set of reserved JS keywords that can precede a
   110  // regular expression in JS source.
   111  var regexpPrecederKeywords = map[string]bool{
   112  	"break":      true,
   113  	"case":       true,
   114  	"continue":   true,
   115  	"delete":     true,
   116  	"do":         true,
   117  	"else":       true,
   118  	"finally":    true,
   119  	"in":         true,
   120  	"instanceof": true,
   121  	"return":     true,
   122  	"throw":      true,
   123  	"try":        true,
   124  	"typeof":     true,
   125  	"void":       true,
   126  }
   127  
   128  var jsonMarshalType = reflect.TypeFor[json.Marshaler]()
   129  
   130  // indirectToJSONMarshaler returns the value, after dereferencing as many times
   131  // as necessary to reach the base type (or nil) or an implementation of json.Marshal.
   132  func indirectToJSONMarshaler(a any) any {
   133  	// text/template now supports passing untyped nil as a func call
   134  	// argument, so we must support it. Otherwise we'd panic below, as one
   135  	// cannot call the Type or Interface methods on an invalid
   136  	// reflect.Value. See golang.org/issue/18716.
   137  	if a == nil {
   138  		return nil
   139  	}
   140  
   141  	v := reflect.ValueOf(a)
   142  	for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() {
   143  		v = v.Elem()
   144  	}
   145  	return v.Interface()
   146  }
   147  
   148  var scriptTagRe = regexp.MustCompile("(?i)<(/?)script")
   149  
   150  // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
   151  // neither side-effects nor free variables outside (NaN, Infinity).
   152  func jsValEscaper(args ...any) string {
   153  	var a any
   154  	if len(args) == 1 {
   155  		a = indirectToJSONMarshaler(args[0])
   156  		switch t := a.(type) {
   157  		case JS:
   158  			return string(t)
   159  		case JSStr:
   160  			// TODO: normalize quotes.
   161  			return `"` + string(t) + `"`
   162  		case json.Marshaler:
   163  			// Do not treat as a Stringer.
   164  		case fmt.Stringer:
   165  			a = t.String()
   166  		}
   167  	} else {
   168  		for i, arg := range args {
   169  			args[i] = indirectToJSONMarshaler(arg)
   170  		}
   171  		a = fmt.Sprint(args...)
   172  	}
   173  	// TODO: detect cycles before calling Marshal which loops infinitely on
   174  	// cyclic data. This may be an unacceptable DoS risk.
   175  	b, err := json.Marshal(a)
   176  	if err != nil {
   177  		// While the standard JSON marshaler does not include user controlled
   178  		// information in the error message, if a type has a MarshalJSON method,
   179  		// the content of the error message is not guaranteed. Since we insert
   180  		// the error into the template, as part of a comment, we attempt to
   181  		// prevent the error from either terminating the comment, or the script
   182  		// block itself.
   183  		//
   184  		// In particular we:
   185  		//   * replace "*/" comment end tokens with "* /", which does not
   186  		//     terminate the comment
   187  		//   * replace "<script" and "</script" with "\x3Cscript" and "\x3C/script"
   188  		//     (case insensitively), and "<!--" with "\x3C!--", which prevents
   189  		//     confusing script block termination semantics
   190  		//
   191  		// We also put a space before the comment so that if it is flush against
   192  		// a division operator it is not turned into a line comment:
   193  		//     x/{{y}}
   194  		// turning into
   195  		//     x//* error marshaling y:
   196  		//          second line of error message */null
   197  		errStr := err.Error()
   198  		errStr = string(scriptTagRe.ReplaceAll([]byte(errStr), []byte(`\x3C${1}script`)))
   199  		errStr = strings.ReplaceAll(errStr, "*/", "* /")
   200  		errStr = strings.ReplaceAll(errStr, "<!--", `\x3C!--`)
   201  		return fmt.Sprintf(" /* %s */null ", errStr)
   202  	}
   203  
   204  	// TODO: maybe post-process output to prevent it from containing
   205  	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
   206  	// in case custom marshalers produce output containing those.
   207  	// Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
   208  	// supports ld+json content-type.
   209  	if len(b) == 0 {
   210  		// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
   211  		// not cause the output `x=y/*z`.
   212  		return " null "
   213  	}
   214  	first, _ := utf8.DecodeRune(b)
   215  	last, _ := utf8.DecodeLastRune(b)
   216  	var buf strings.Builder
   217  	// Prevent IdentifierNames and NumericLiterals from running into
   218  	// keywords: in, instanceof, typeof, void
   219  	pad := isJSIdentPart(first) || isJSIdentPart(last)
   220  	if pad {
   221  		buf.WriteByte(' ')
   222  	}
   223  	written := 0
   224  	// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
   225  	// so it falls within the subset of JSON which is valid JS.
   226  	for i := 0; i < len(b); {
   227  		rune, n := utf8.DecodeRune(b[i:])
   228  		repl := ""
   229  		if rune == 0x2028 {
   230  			repl = `\u2028`
   231  		} else if rune == 0x2029 {
   232  			repl = `\u2029`
   233  		}
   234  		if repl != "" {
   235  			buf.Write(b[written:i])
   236  			buf.WriteString(repl)
   237  			written = i + n
   238  		}
   239  		i += n
   240  	}
   241  	if buf.Len() != 0 {
   242  		buf.Write(b[written:])
   243  		if pad {
   244  			buf.WriteByte(' ')
   245  		}
   246  		return buf.String()
   247  	}
   248  	return string(b)
   249  }
   250  
   251  // jsStrEscaper produces a string that can be included between quotes in
   252  // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
   253  // or in an HTML5 event handler attribute such as onclick.
   254  func jsStrEscaper(args ...any) string {
   255  	s, t := stringify(args...)
   256  	if t == contentTypeJSStr {
   257  		return replace(s, jsStrNormReplacementTable)
   258  	}
   259  	return replace(s, jsStrReplacementTable)
   260  }
   261  
   262  func jsTmplLitEscaper(args ...any) string {
   263  	s, _ := stringify(args...)
   264  	return replace(s, jsBqStrReplacementTable)
   265  }
   266  
   267  // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
   268  // specials so the result is treated literally when included in a regular
   269  // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
   270  // the literal text of {{.X}} followed by the string "bar".
   271  func jsRegexpEscaper(args ...any) string {
   272  	s, _ := stringify(args...)
   273  	s = replace(s, jsRegexpReplacementTable)
   274  	if s == "" {
   275  		// /{{.X}}/ should not produce a line comment when .X == "".
   276  		return "(?:)"
   277  	}
   278  	return s
   279  }
   280  
   281  // replace replaces each rune r of s with replacementTable[r], provided that
   282  // r < len(replacementTable). If replacementTable[r] is the empty string then
   283  // no replacement is made.
   284  // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
   285  // `\u2029`.
   286  func replace(s string, replacementTable []string) string {
   287  	var b strings.Builder
   288  	r, w, written := rune(0), 0, 0
   289  	for i := 0; i < len(s); i += w {
   290  		// See comment in htmlEscaper.
   291  		r, w = utf8.DecodeRuneInString(s[i:])
   292  		var repl string
   293  		switch {
   294  		case int(r) < len(lowUnicodeReplacementTable):
   295  			repl = lowUnicodeReplacementTable[r]
   296  		case int(r) < len(replacementTable) && replacementTable[r] != "":
   297  			repl = replacementTable[r]
   298  		case r == '\u2028':
   299  			repl = `\u2028`
   300  		case r == '\u2029':
   301  			repl = `\u2029`
   302  		default:
   303  			continue
   304  		}
   305  		if written == 0 {
   306  			b.Grow(len(s))
   307  		}
   308  		b.WriteString(s[written:i])
   309  		b.WriteString(repl)
   310  		written = i + w
   311  	}
   312  	if written == 0 {
   313  		return s
   314  	}
   315  	b.WriteString(s[written:])
   316  	return b.String()
   317  }
   318  
   319  var lowUnicodeReplacementTable = []string{
   320  	0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
   321  	'\a': `\u0007`,
   322  	'\b': `\u0008`,
   323  	'\t': `\t`,
   324  	'\n': `\n`,
   325  	'\v': `\u000b`, // "\v" == "v" on IE 6.
   326  	'\f': `\f`,
   327  	'\r': `\r`,
   328  	0xe:  `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
   329  	0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
   330  	0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
   331  }
   332  
   333  var jsStrReplacementTable = []string{
   334  	0:    `\u0000`,
   335  	'\t': `\t`,
   336  	'\n': `\n`,
   337  	'\v': `\u000b`, // "\v" == "v" on IE 6.
   338  	'\f': `\f`,
   339  	'\r': `\r`,
   340  	// Encode HTML specials as hex so the output can be embedded
   341  	// in HTML attributes without further encoding.
   342  	'"':  `\u0022`,
   343  	'`':  `\u0060`,
   344  	'&':  `\u0026`,
   345  	'\'': `\u0027`,
   346  	'+':  `\u002b`,
   347  	'/':  `\/`,
   348  	'<':  `\u003c`,
   349  	'>':  `\u003e`,
   350  	'\\': `\\`,
   351  }
   352  
   353  // jsBqStrReplacementTable is like jsStrReplacementTable except it also contains
   354  // the special characters for JS template literals: $, {, and }.
   355  var jsBqStrReplacementTable = []string{
   356  	0:    `\u0000`,
   357  	'\t': `\t`,
   358  	'\n': `\n`,
   359  	'\v': `\u000b`, // "\v" == "v" on IE 6.
   360  	'\f': `\f`,
   361  	'\r': `\r`,
   362  	// Encode HTML specials as hex so the output can be embedded
   363  	// in HTML attributes without further encoding.
   364  	'"':  `\u0022`,
   365  	'`':  `\u0060`,
   366  	'&':  `\u0026`,
   367  	'\'': `\u0027`,
   368  	'+':  `\u002b`,
   369  	'/':  `\/`,
   370  	'<':  `\u003c`,
   371  	'>':  `\u003e`,
   372  	'\\': `\\`,
   373  	'$':  `\u0024`,
   374  	'{':  `\u007b`,
   375  	'}':  `\u007d`,
   376  }
   377  
   378  // jsStrNormReplacementTable is like jsStrReplacementTable but does not
   379  // overencode existing escapes since this table has no entry for `\`.
   380  var jsStrNormReplacementTable = []string{
   381  	0:    `\u0000`,
   382  	'\t': `\t`,
   383  	'\n': `\n`,
   384  	'\v': `\u000b`, // "\v" == "v" on IE 6.
   385  	'\f': `\f`,
   386  	'\r': `\r`,
   387  	// Encode HTML specials as hex so the output can be embedded
   388  	// in HTML attributes without further encoding.
   389  	'"':  `\u0022`,
   390  	'&':  `\u0026`,
   391  	'\'': `\u0027`,
   392  	'`':  `\u0060`,
   393  	'+':  `\u002b`,
   394  	'/':  `\/`,
   395  	'<':  `\u003c`,
   396  	'>':  `\u003e`,
   397  }
   398  var jsRegexpReplacementTable = []string{
   399  	0:    `\u0000`,
   400  	'\t': `\t`,
   401  	'\n': `\n`,
   402  	'\v': `\u000b`, // "\v" == "v" on IE 6.
   403  	'\f': `\f`,
   404  	'\r': `\r`,
   405  	// Encode HTML specials as hex so the output can be embedded
   406  	// in HTML attributes without further encoding.
   407  	'"':  `\u0022`,
   408  	'$':  `\$`,
   409  	'&':  `\u0026`,
   410  	'\'': `\u0027`,
   411  	'(':  `\(`,
   412  	')':  `\)`,
   413  	'*':  `\*`,
   414  	'+':  `\u002b`,
   415  	'-':  `\-`,
   416  	'.':  `\.`,
   417  	'/':  `\/`,
   418  	'<':  `\u003c`,
   419  	'>':  `\u003e`,
   420  	'?':  `\?`,
   421  	'[':  `\[`,
   422  	'\\': `\\`,
   423  	']':  `\]`,
   424  	'^':  `\^`,
   425  	'{':  `\{`,
   426  	'|':  `\|`,
   427  	'}':  `\}`,
   428  }
   429  
   430  // isJSIdentPart reports whether the given rune is a JS identifier part.
   431  // It does not handle all the non-Latin letters, joiners, and combining marks,
   432  // but it does handle every codepoint that can occur in a numeric literal or
   433  // a keyword.
   434  func isJSIdentPart(r rune) bool {
   435  	switch {
   436  	case r == '$':
   437  		return true
   438  	case '0' <= r && r <= '9':
   439  		return true
   440  	case 'A' <= r && r <= 'Z':
   441  		return true
   442  	case r == '_':
   443  		return true
   444  	case 'a' <= r && r <= 'z':
   445  		return true
   446  	}
   447  	return false
   448  }
   449  
   450  // isJSType reports whether the given MIME type should be considered JavaScript.
   451  //
   452  // It is used to determine whether a script tag with a type attribute is a javascript container.
   453  func isJSType(mimeType string) bool {
   454  	// per
   455  	//   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
   456  	//   https://tools.ietf.org/html/rfc7231#section-3.1.1
   457  	//   https://tools.ietf.org/html/rfc4329#section-3
   458  	//   https://www.ietf.org/rfc/rfc4627.txt
   459  	// discard parameters
   460  	mimeType, _, _ = strings.Cut(mimeType, ";")
   461  	mimeType = strings.ToLower(mimeType)
   462  	mimeType = strings.TrimSpace(mimeType)
   463  	switch mimeType {
   464  	case
   465  		"application/ecmascript",
   466  		"application/javascript",
   467  		"application/json",
   468  		"application/ld+json",
   469  		"application/x-ecmascript",
   470  		"application/x-javascript",
   471  		"module",
   472  		"text/ecmascript",
   473  		"text/javascript",
   474  		"text/javascript1.0",
   475  		"text/javascript1.1",
   476  		"text/javascript1.2",
   477  		"text/javascript1.3",
   478  		"text/javascript1.4",
   479  		"text/javascript1.5",
   480  		"text/jscript",
   481  		"text/livescript",
   482  		"text/x-ecmascript",
   483  		"text/x-javascript":
   484  		return true
   485  	default:
   486  		return false
   487  	}
   488  }
   489
View as plain text