html.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"strings"
    11  	"unicode/utf8"
    12  )
    13  
    14  // htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
    15  func htmlNospaceEscaper(args ...any) string {
    16  	s, t := stringify(args...)
    17  	if s == "" {
    18  		return filterFailsafe
    19  	}
    20  	if t == contentTypeHTML {
    21  		return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
    22  	}
    23  	return htmlReplacer(s, htmlNospaceReplacementTable, false)
    24  }
    25  
    26  // attrEscaper escapes for inclusion in quoted attribute values.
    27  func attrEscaper(args ...any) string {
    28  	s, t := stringify(args...)
    29  	if t == contentTypeHTML {
    30  		return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
    31  	}
    32  	return htmlReplacer(s, htmlReplacementTable, true)
    33  }
    34  
    35  // rcdataEscaper escapes for inclusion in an RCDATA element body.
    36  func rcdataEscaper(args ...any) string {
    37  	s, t := stringify(args...)
    38  	if t == contentTypeHTML {
    39  		return htmlReplacer(s, htmlNormReplacementTable, true)
    40  	}
    41  	return htmlReplacer(s, htmlReplacementTable, true)
    42  }
    43  
    44  // htmlEscaper escapes for inclusion in HTML text.
    45  func htmlEscaper(args ...any) string {
    46  	s, t := stringify(args...)
    47  	if t == contentTypeHTML {
    48  		return s
    49  	}
    50  	return htmlReplacer(s, htmlReplacementTable, true)
    51  }
    52  
    53  // htmlReplacementTable contains the runes that need to be escaped
    54  // inside a quoted attribute value or in a text node.
    55  var htmlReplacementTable = []string{
    56  	// https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
    57  	// U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
    58  	// CHARACTER character to the current attribute's value.
    59  	// "
    60  	// and similarly
    61  	// https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
    62  	0:    "\uFFFD",
    63  	'"':  "&#34;",
    64  	'&':  "&amp;",
    65  	'\'': "&#39;",
    66  	'+':  "&#43;",
    67  	'<':  "&lt;",
    68  	'>':  "&gt;",
    69  }
    70  
    71  // htmlNormReplacementTable is like htmlReplacementTable but without '&' to
    72  // avoid over-encoding existing entities.
    73  var htmlNormReplacementTable = []string{
    74  	0:    "\uFFFD",
    75  	'"':  "&#34;",
    76  	'\'': "&#39;",
    77  	'+':  "&#43;",
    78  	'<':  "&lt;",
    79  	'>':  "&gt;",
    80  }
    81  
    82  // htmlNospaceReplacementTable contains the runes that need to be escaped
    83  // inside an unquoted attribute value.
    84  // The set of runes escaped is the union of the HTML specials and
    85  // those determined by running the JS below in browsers:
    86  // <div id=d></div>
    87  // <script>(function () {
    88  // var a = [], d = document.getElementById("d"), i, c, s;
    89  // for (i = 0; i < 0x10000; ++i) {
    90  //
    91  //	c = String.fromCharCode(i);
    92  //	d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
    93  //	s = d.getElementsByTagName("SPAN")[0];
    94  //	if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
    95  //
    96  // }
    97  // document.write(a.join(", "));
    98  // })()</script>
    99  var htmlNospaceReplacementTable = []string{
   100  	0:    "&#xfffd;",
   101  	'\t': "&#9;",
   102  	'\n': "&#10;",
   103  	'\v': "&#11;",
   104  	'\f': "&#12;",
   105  	'\r': "&#13;",
   106  	' ':  "&#32;",
   107  	'"':  "&#34;",
   108  	'&':  "&amp;",
   109  	'\'': "&#39;",
   110  	'+':  "&#43;",
   111  	'<':  "&lt;",
   112  	'=':  "&#61;",
   113  	'>':  "&gt;",
   114  	// A parse error in the attribute value (unquoted) and
   115  	// before attribute value states.
   116  	// Treated as a quoting character by IE.
   117  	'`': "&#96;",
   118  }
   119  
   120  // htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
   121  // without '&' to avoid over-encoding existing entities.
   122  var htmlNospaceNormReplacementTable = []string{
   123  	0:    "&#xfffd;",
   124  	'\t': "&#9;",
   125  	'\n': "&#10;",
   126  	'\v': "&#11;",
   127  	'\f': "&#12;",
   128  	'\r': "&#13;",
   129  	' ':  "&#32;",
   130  	'"':  "&#34;",
   131  	'\'': "&#39;",
   132  	'+':  "&#43;",
   133  	'<':  "&lt;",
   134  	'=':  "&#61;",
   135  	'>':  "&gt;",
   136  	// A parse error in the attribute value (unquoted) and
   137  	// before attribute value states.
   138  	// Treated as a quoting character by IE.
   139  	'`': "&#96;",
   140  }
   141  
   142  // htmlReplacer returns s with runes replaced according to replacementTable
   143  // and when badRunes is true, certain bad runes are allowed through unescaped.
   144  func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
   145  	written, b := 0, new(strings.Builder)
   146  	r, w := rune(0), 0
   147  	for i := 0; i < len(s); i += w {
   148  		// Cannot use 'for range s' because we need to preserve the width
   149  		// of the runes in the input. If we see a decoding error, the input
   150  		// width will not be utf8.Runelen(r) and we will overrun the buffer.
   151  		r, w = utf8.DecodeRuneInString(s[i:])
   152  		if int(r) < len(replacementTable) {
   153  			if repl := replacementTable[r]; len(repl) != 0 {
   154  				if written == 0 {
   155  					b.Grow(len(s))
   156  				}
   157  				b.WriteString(s[written:i])
   158  				b.WriteString(repl)
   159  				written = i + w
   160  			}
   161  		} else if badRunes {
   162  			// No-op.
   163  			// IE does not allow these ranges in unquoted attrs.
   164  		} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
   165  			if written == 0 {
   166  				b.Grow(len(s))
   167  			}
   168  			fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
   169  			written = i + w
   170  		}
   171  	}
   172  	if written == 0 {
   173  		return s
   174  	}
   175  	b.WriteString(s[written:])
   176  	return b.String()
   177  }
   178  
   179  // stripTags takes a snippet of HTML and returns only the text content.
   180  // For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
   181  func stripTags(html string) string {
   182  	var b strings.Builder
   183  	s, c, i, allText := []byte(html), context{}, 0, true
   184  	// Using the transition funcs helps us avoid mangling
   185  	// `<div title="1>2">` or `I <3 Ponies!`.
   186  	for i != len(s) {
   187  		if c.delim == delimNone {
   188  			st := c.state
   189  			// Use RCDATA instead of parsing into JS or CSS styles.
   190  			if c.element != elementNone && !isInTag(st) {
   191  				st = stateRCDATA
   192  			}
   193  			d, nread := transitionFunc[st](c, s[i:])
   194  			i1 := i + nread
   195  			if c.state == stateText || c.state == stateRCDATA {
   196  				// Emit text up to the start of the tag or comment.
   197  				j := i1
   198  				if d.state != c.state {
   199  					for j1 := j - 1; j1 >= i; j1-- {
   200  						if s[j1] == '<' {
   201  							j = j1
   202  							break
   203  						}
   204  					}
   205  				}
   206  				b.Write(s[i:j])
   207  			} else {
   208  				allText = false
   209  			}
   210  			c, i = d, i1
   211  			continue
   212  		}
   213  		i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
   214  		if i1 < i {
   215  			break
   216  		}
   217  		if c.delim != delimSpaceOrTagEnd {
   218  			// Consume any quote.
   219  			i1++
   220  		}
   221  		c, i = context{state: stateTag, element: c.element}, i1
   222  	}
   223  	if allText {
   224  		return html
   225  	} else if c.state == stateText || c.state == stateRCDATA {
   226  		b.Write(s[i:])
   227  	}
   228  	return b.String()
   229  }
   230  
   231  // htmlNameFilter accepts valid parts of an HTML attribute or tag name or
   232  // a known-safe HTML attribute.
   233  func htmlNameFilter(args ...any) string {
   234  	s, t := stringify(args...)
   235  	if t == contentTypeHTMLAttr {
   236  		return s
   237  	}
   238  	if len(s) == 0 {
   239  		// Avoid violation of structure preservation.
   240  		// <input checked {{.K}}={{.V}}>.
   241  		// Without this, if .K is empty then .V is the value of
   242  		// checked, but otherwise .V is the value of the attribute
   243  		// named .K.
   244  		return filterFailsafe
   245  	}
   246  	s = strings.ToLower(s)
   247  	if t := attrType(s); t != contentTypePlain {
   248  		// TODO: Split attr and element name part filters so we can recognize known attributes.
   249  		return filterFailsafe
   250  	}
   251  	for _, r := range s {
   252  		switch {
   253  		case '0' <= r && r <= '9':
   254  		case 'a' <= r && r <= 'z':
   255  		default:
   256  			return filterFailsafe
   257  		}
   258  	}
   259  	return s
   260  }
   261  
   262  // commentEscaper returns the empty string regardless of input.
   263  // Comment content does not correspond to any parsed structure or
   264  // human-readable content, so the simplest and most secure policy is to drop
   265  // content interpolated into comments.
   266  // This approach is equally valid whether or not static comment content is
   267  // removed from the template.
   268  func commentEscaper(args ...any) string {
   269  	return ""
   270  }
   271
View as plain text