// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package template import ( "bytes" "encoding/json" "fmt" "reflect" "strings" "unicode/utf8" ) // jsWhitespace contains all of the JS whitespace characters, as defined // by the \s character class. // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes. const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff" // nextJSCtx returns the context that determines whether a slash after the // given run of tokens starts a regular expression instead of a division // operator: / or /=. // // This assumes that the token run does not include any string tokens, comment // tokens, regular expression literal tokens, or division operators. // // This fails on some valid but nonsensical JavaScript programs like // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to // fail on any known useful programs. It is based on the draft // JavaScript 2.0 lexical grammar and requires one token of lookbehind: // https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html func nextJSCtx(s []byte, preceding jsCtx) jsCtx { // Trim all JS whitespace characters s = bytes.TrimRight(s, jsWhitespace) if len(s) == 0 { return preceding } // All cases below are in the single-byte UTF-8 group. switch c, n := s[len(s)-1], len(s); c { case '+', '-': // ++ and -- are not regexp preceders, but + and - are whether // they are used as infix or prefix operators. start := n - 1 // Count the number of adjacent dashes or pluses. for start > 0 && s[start-1] == c { start-- } if (n-start)&1 == 1 { // Reached for trailing minus signs since "---" is the // same as "-- -". return jsCtxRegexp } return jsCtxDivOp case '.': // Handle "42." if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' { return jsCtxDivOp } return jsCtxRegexp // Suffixes for all punctuators from section 7.7 of the language spec // that only end binary operators not handled above. case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?': return jsCtxRegexp // Suffixes for all punctuators from section 7.7 of the language spec // that are prefix operators not handled above. case '!', '~': return jsCtxRegexp // Matches all the punctuators from section 7.7 of the language spec // that are open brackets not handled above. case '(', '[': return jsCtxRegexp // Matches all the punctuators from section 7.7 of the language spec // that precede expression starts. case ':', ';', '{': return jsCtxRegexp // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and // are handled in the default except for '}' which can precede a // division op as in // ({ valueOf: function () { return 42 } } / 2 // which is valid, but, in practice, developers don't divide object // literals, so our heuristic works well for code like // function () { ... } /foo/.test(x) && sideEffect(); // The ')' punctuator can precede a regular expression as in // if (b) /foo/.test(x) && ... // but this is much less likely than // (a + b) / c case '}': return jsCtxRegexp default: // Look for an IdentifierName and see if it is a keyword that // can precede a regular expression. j := n for j > 0 && isJSIdentPart(rune(s[j-1])) { j-- } if regexpPrecederKeywords[string(s[j:])] { return jsCtxRegexp } } // Otherwise is a punctuator not listed above, or // a string which precedes a div op, or an identifier // which precedes a div op. return jsCtxDivOp } // regexpPrecederKeywords is a set of reserved JS keywords that can precede a // regular expression in JS source. var regexpPrecederKeywords = map[string]bool{ "break": true, "case": true, "continue": true, "delete": true, "do": true, "else": true, "finally": true, "in": true, "instanceof": true, "return": true, "throw": true, "try": true, "typeof": true, "void": true, } var jsonMarshalType = reflect.TypeFor[json.Marshaler]() // indirectToJSONMarshaler returns the value, after dereferencing as many times // as necessary to reach the base type (or nil) or an implementation of json.Marshal. func indirectToJSONMarshaler(a any) any { // text/template now supports passing untyped nil as a func call // argument, so we must support it. Otherwise we'd panic below, as one // cannot call the Type or Interface methods on an invalid // reflect.Value. See golang.org/issue/18716. if a == nil { return nil } v := reflect.ValueOf(a) for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() { v = v.Elem() } return v.Interface() } // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has // neither side-effects nor free variables outside (NaN, Infinity). func jsValEscaper(args ...any) string { var a any if len(args) == 1 { a = indirectToJSONMarshaler(args[0]) switch t := a.(type) { case JS: return string(t) case JSStr: // TODO: normalize quotes. return `"` + string(t) + `"` case json.Marshaler: // Do not treat as a Stringer. case fmt.Stringer: a = t.String() } } else { for i, arg := range args { args[i] = indirectToJSONMarshaler(arg) } a = fmt.Sprint(args...) } // TODO: detect cycles before calling Marshal which loops infinitely on // cyclic data. This may be an unacceptable DoS risk. b, err := json.Marshal(a) if err != nil { // While the standard JSON marshaller does not include user controlled // information in the error message, if a type has a MarshalJSON method, // the content of the error message is not guaranteed. Since we insert // the error into the template, as part of a comment, we attempt to // prevent the error from either terminating the comment, or the script // block itself. // // In particular we: // * replace "*/" comment end tokens with "* /", which does not // terminate the comment // * replace "", "", or " element, // or in an HTML5 event handler attribute such as onclick. func jsStrEscaper(args ...any) string { s, t := stringify(args...) if t == contentTypeJSStr { return replace(s, jsStrNormReplacementTable) } return replace(s, jsStrReplacementTable) } func jsTmplLitEscaper(args ...any) string { s, _ := stringify(args...) return replace(s, jsBqStrReplacementTable) } // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression // specials so the result is treated literally when included in a regular // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by // the literal text of {{.X}} followed by the string "bar". func jsRegexpEscaper(args ...any) string { s, _ := stringify(args...) s = replace(s, jsRegexpReplacementTable) if s == "" { // /{{.X}}/ should not produce a line comment when .X == "". return "(?:)" } return s } // replace replaces each rune r of s with replacementTable[r], provided that // r < len(replacementTable). If replacementTable[r] is the empty string then // no replacement is made. // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and // `\u2029`. func replace(s string, replacementTable []string) string { var b strings.Builder r, w, written := rune(0), 0, 0 for i := 0; i < len(s); i += w { // See comment in htmlEscaper. r, w = utf8.DecodeRuneInString(s[i:]) var repl string switch { case int(r) < len(lowUnicodeReplacementTable): repl = lowUnicodeReplacementTable[r] case int(r) < len(replacementTable) && replacementTable[r] != "": repl = replacementTable[r] case r == '\u2028': repl = `\u2028` case r == '\u2029': repl = `\u2029` default: continue } if written == 0 { b.Grow(len(s)) } b.WriteString(s[written:i]) b.WriteString(repl) written = i + w } if written == 0 { return s } b.WriteString(s[written:]) return b.String() } var lowUnicodeReplacementTable = []string{ 0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`, '\a': `\u0007`, '\b': `\u0008`, '\t': `\t`, '\n': `\n`, '\v': `\u000b`, // "\v" == "v" on IE 6. '\f': `\f`, '\r': `\r`, 0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`, 0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`, 0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`, } var jsStrReplacementTable = []string{ 0: `\u0000`, '\t': `\t`, '\n': `\n`, '\v': `\u000b`, // "\v" == "v" on IE 6. '\f': `\f`, '\r': `\r`, // Encode HTML specials as hex so the output can be embedded // in HTML attributes without further encoding. '"': `\u0022`, '`': `\u0060`, '&': `\u0026`, '\'': `\u0027`, '+': `\u002b`, '/': `\/`, '<': `\u003c`, '>': `\u003e`, '\\': `\\`, } // jsBqStrReplacementTable is like jsStrReplacementTable except it also contains // the special characters for JS template literals: $, {, and }. var jsBqStrReplacementTable = []string{ 0: `\u0000`, '\t': `\t`, '\n': `\n`, '\v': `\u000b`, // "\v" == "v" on IE 6. '\f': `\f`, '\r': `\r`, // Encode HTML specials as hex so the output can be embedded // in HTML attributes without further encoding. '"': `\u0022`, '`': `\u0060`, '&': `\u0026`, '\'': `\u0027`, '+': `\u002b`, '/': `\/`, '<': `\u003c`, '>': `\u003e`, '\\': `\\`, '$': `\u0024`, '{': `\u007b`, '}': `\u007d`, } // jsStrNormReplacementTable is like jsStrReplacementTable but does not // overencode existing escapes since this table has no entry for `\`. var jsStrNormReplacementTable = []string{ 0: `\u0000`, '\t': `\t`, '\n': `\n`, '\v': `\u000b`, // "\v" == "v" on IE 6. '\f': `\f`, '\r': `\r`, // Encode HTML specials as hex so the output can be embedded // in HTML attributes without further encoding. '"': `\u0022`, '&': `\u0026`, '\'': `\u0027`, '`': `\u0060`, '+': `\u002b`, '/': `\/`, '<': `\u003c`, '>': `\u003e`, } var jsRegexpReplacementTable = []string{ 0: `\u0000`, '\t': `\t`, '\n': `\n`, '\v': `\u000b`, // "\v" == "v" on IE 6. '\f': `\f`, '\r': `\r`, // Encode HTML specials as hex so the output can be embedded // in HTML attributes without further encoding. '"': `\u0022`, '$': `\$`, '&': `\u0026`, '\'': `\u0027`, '(': `\(`, ')': `\)`, '*': `\*`, '+': `\u002b`, '-': `\-`, '.': `\.`, '/': `\/`, '<': `\u003c`, '>': `\u003e`, '?': `\?`, '[': `\[`, '\\': `\\`, ']': `\]`, '^': `\^`, '{': `\{`, '|': `\|`, '}': `\}`, } // isJSIdentPart reports whether the given rune is a JS identifier part. // It does not handle all the non-Latin letters, joiners, and combining marks, // but it does handle every codepoint that can occur in a numeric literal or // a keyword. func isJSIdentPart(r rune) bool { switch { case r == '$': return true case '0' <= r && r <= '9': return true case 'A' <= r && r <= 'Z': return true case r == '_': return true case 'a' <= r && r <= 'z': return true } return false } // isJSType reports whether the given MIME type should be considered JavaScript. // // It is used to determine whether a script tag with a type attribute is a javascript container. func isJSType(mimeType string) bool { // per // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type // https://tools.ietf.org/html/rfc7231#section-3.1.1 // https://tools.ietf.org/html/rfc4329#section-3 // https://www.ietf.org/rfc/rfc4627.txt // discard parameters mimeType, _, _ = strings.Cut(mimeType, ";") mimeType = strings.ToLower(mimeType) mimeType = strings.TrimSpace(mimeType) switch mimeType { case "application/ecmascript", "application/javascript", "application/json", "application/ld+json", "application/x-ecmascript", "application/x-javascript", "module", "text/ecmascript", "text/javascript", "text/javascript1.0", "text/javascript1.1", "text/javascript1.2", "text/javascript1.3", "text/javascript1.4", "text/javascript1.5", "text/jscript", "text/livescript", "text/x-ecmascript", "text/x-javascript": return true default: return false } }