Source file
src/strconv/quote.go
1
2
3
4
5
6
7 package strconv
8
9 import (
10 "unicode/utf8"
11 )
12
13 const (
14 lowerhex = "0123456789abcdef"
15 upperhex = "0123456789ABCDEF"
16 )
17
18
19 func contains(s string, c byte) bool {
20 return index(s, c) != -1
21 }
22
23 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
24 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
25 }
26
27 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string {
28 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly))
29 }
30
31 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte {
32
33
34 if cap(buf)-len(buf) < len(s) {
35 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
36 copy(nBuf, buf)
37 buf = nBuf
38 }
39 buf = append(buf, quote)
40 for width := 0; len(s) > 0; s = s[width:] {
41 r := rune(s[0])
42 width = 1
43 if r >= utf8.RuneSelf {
44 r, width = utf8.DecodeRuneInString(s)
45 }
46 if width == 1 && r == utf8.RuneError {
47 buf = append(buf, `\x`...)
48 buf = append(buf, lowerhex[s[0]>>4])
49 buf = append(buf, lowerhex[s[0]&0xF])
50 continue
51 }
52 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
53 }
54 buf = append(buf, quote)
55 return buf
56 }
57
58 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
59 buf = append(buf, quote)
60 if !utf8.ValidRune(r) {
61 r = utf8.RuneError
62 }
63 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
64 buf = append(buf, quote)
65 return buf
66 }
67
68 func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
69 var runeTmp [utf8.UTFMax]byte
70 if r == rune(quote) || r == '\\' {
71 buf = append(buf, '\\')
72 buf = append(buf, byte(r))
73 return buf
74 }
75 if ASCIIonly {
76 if r < utf8.RuneSelf && IsPrint(r) {
77 buf = append(buf, byte(r))
78 return buf
79 }
80 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) {
81 n := utf8.EncodeRune(runeTmp[:], r)
82 buf = append(buf, runeTmp[:n]...)
83 return buf
84 }
85 switch r {
86 case '\a':
87 buf = append(buf, `\a`...)
88 case '\b':
89 buf = append(buf, `\b`...)
90 case '\f':
91 buf = append(buf, `\f`...)
92 case '\n':
93 buf = append(buf, `\n`...)
94 case '\r':
95 buf = append(buf, `\r`...)
96 case '\t':
97 buf = append(buf, `\t`...)
98 case '\v':
99 buf = append(buf, `\v`...)
100 default:
101 switch {
102 case r < ' ' || r == 0x7f:
103 buf = append(buf, `\x`...)
104 buf = append(buf, lowerhex[byte(r)>>4])
105 buf = append(buf, lowerhex[byte(r)&0xF])
106 case !utf8.ValidRune(r):
107 r = 0xFFFD
108 fallthrough
109 case r < 0x10000:
110 buf = append(buf, `\u`...)
111 for s := 12; s >= 0; s -= 4 {
112 buf = append(buf, lowerhex[r>>uint(s)&0xF])
113 }
114 default:
115 buf = append(buf, `\U`...)
116 for s := 28; s >= 0; s -= 4 {
117 buf = append(buf, lowerhex[r>>uint(s)&0xF])
118 }
119 }
120 }
121 return buf
122 }
123
124
125
126
127
128 func Quote(s string) string {
129 return quoteWith(s, '"', false, false)
130 }
131
132
133
134 func AppendQuote(dst []byte, s string) []byte {
135 return appendQuotedWith(dst, s, '"', false, false)
136 }
137
138
139
140
141 func QuoteToASCII(s string) string {
142 return quoteWith(s, '"', true, false)
143 }
144
145
146
147 func AppendQuoteToASCII(dst []byte, s string) []byte {
148 return appendQuotedWith(dst, s, '"', true, false)
149 }
150
151
152
153
154
155 func QuoteToGraphic(s string) string {
156 return quoteWith(s, '"', false, true)
157 }
158
159
160
161 func AppendQuoteToGraphic(dst []byte, s string) []byte {
162 return appendQuotedWith(dst, s, '"', false, true)
163 }
164
165
166
167
168
169
170 func QuoteRune(r rune) string {
171 return quoteRuneWith(r, '\'', false, false)
172 }
173
174
175
176 func AppendQuoteRune(dst []byte, r rune) []byte {
177 return appendQuotedRuneWith(dst, r, '\'', false, false)
178 }
179
180
181
182
183
184
185
186 func QuoteRuneToASCII(r rune) string {
187 return quoteRuneWith(r, '\'', true, false)
188 }
189
190
191
192 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
193 return appendQuotedRuneWith(dst, r, '\'', true, false)
194 }
195
196
197
198
199
200
201
202 func QuoteRuneToGraphic(r rune) string {
203 return quoteRuneWith(r, '\'', false, true)
204 }
205
206
207
208 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte {
209 return appendQuotedRuneWith(dst, r, '\'', false, true)
210 }
211
212
213
214
215 func CanBackquote(s string) bool {
216 for len(s) > 0 {
217 r, wid := utf8.DecodeRuneInString(s)
218 s = s[wid:]
219 if wid > 1 {
220 if r == '\ufeff' {
221 return false
222 }
223 continue
224 }
225 if r == utf8.RuneError {
226 return false
227 }
228 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
229 return false
230 }
231 }
232 return true
233 }
234
235 func unhex(b byte) (v rune, ok bool) {
236 c := rune(b)
237 switch {
238 case '0' <= c && c <= '9':
239 return c - '0', true
240 case 'a' <= c && c <= 'f':
241 return c - 'a' + 10, true
242 case 'A' <= c && c <= 'F':
243 return c - 'A' + 10, true
244 }
245 return
246 }
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
263
264 if len(s) == 0 {
265 err = ErrSyntax
266 return
267 }
268 switch c := s[0]; {
269 case c == quote && (quote == '\'' || quote == '"'):
270 err = ErrSyntax
271 return
272 case c >= utf8.RuneSelf:
273 r, size := utf8.DecodeRuneInString(s)
274 return r, true, s[size:], nil
275 case c != '\\':
276 return rune(s[0]), false, s[1:], nil
277 }
278
279
280 if len(s) <= 1 {
281 err = ErrSyntax
282 return
283 }
284 c := s[1]
285 s = s[2:]
286
287 switch c {
288 case 'a':
289 value = '\a'
290 case 'b':
291 value = '\b'
292 case 'f':
293 value = '\f'
294 case 'n':
295 value = '\n'
296 case 'r':
297 value = '\r'
298 case 't':
299 value = '\t'
300 case 'v':
301 value = '\v'
302 case 'x', 'u', 'U':
303 n := 0
304 switch c {
305 case 'x':
306 n = 2
307 case 'u':
308 n = 4
309 case 'U':
310 n = 8
311 }
312 var v rune
313 if len(s) < n {
314 err = ErrSyntax
315 return
316 }
317 for j := 0; j < n; j++ {
318 x, ok := unhex(s[j])
319 if !ok {
320 err = ErrSyntax
321 return
322 }
323 v = v<<4 | x
324 }
325 s = s[n:]
326 if c == 'x' {
327
328 value = v
329 break
330 }
331 if !utf8.ValidRune(v) {
332 err = ErrSyntax
333 return
334 }
335 value = v
336 multibyte = true
337 case '0', '1', '2', '3', '4', '5', '6', '7':
338 v := rune(c) - '0'
339 if len(s) < 2 {
340 err = ErrSyntax
341 return
342 }
343 for j := 0; j < 2; j++ {
344 x := rune(s[j]) - '0'
345 if x < 0 || x > 7 {
346 err = ErrSyntax
347 return
348 }
349 v = (v << 3) | x
350 }
351 s = s[2:]
352 if v > 255 {
353 err = ErrSyntax
354 return
355 }
356 value = v
357 case '\\':
358 value = '\\'
359 case '\'', '"':
360 if c != quote {
361 err = ErrSyntax
362 return
363 }
364 value = rune(c)
365 default:
366 err = ErrSyntax
367 return
368 }
369 tail = s
370 return
371 }
372
373
374
375 func QuotedPrefix(s string) (string, error) {
376 out, _, err := unquote(s, false)
377 return out, err
378 }
379
380
381
382
383
384
385 func Unquote(s string) (string, error) {
386 out, rem, err := unquote(s, true)
387 if len(rem) > 0 {
388 return "", ErrSyntax
389 }
390 return out, err
391 }
392
393
394
395
396
397 func unquote(in string, unescape bool) (out, rem string, err error) {
398
399 if len(in) < 2 {
400 return "", in, ErrSyntax
401 }
402 quote := in[0]
403 end := index(in[1:], quote)
404 if end < 0 {
405 return "", in, ErrSyntax
406 }
407 end += 2
408
409 switch quote {
410 case '`':
411 switch {
412 case !unescape:
413 out = in[:end]
414 case !contains(in[:end], '\r'):
415 out = in[len("`") : end-len("`")]
416 default:
417
418
419 buf := make([]byte, 0, end-len("`")-len("\r")-len("`"))
420 for i := len("`"); i < end-len("`"); i++ {
421 if in[i] != '\r' {
422 buf = append(buf, in[i])
423 }
424 }
425 out = string(buf)
426 }
427
428
429
430
431
432 return out, in[end:], nil
433 case '"', '\'':
434
435 if !contains(in[:end], '\\') && !contains(in[:end], '\n') {
436 var valid bool
437 switch quote {
438 case '"':
439 valid = utf8.ValidString(in[len(`"`) : end-len(`"`)])
440 case '\'':
441 r, n := utf8.DecodeRuneInString(in[len("'") : end-len("'")])
442 valid = len("'")+n+len("'") == end && (r != utf8.RuneError || n != 1)
443 }
444 if valid {
445 out = in[:end]
446 if unescape {
447 out = out[1 : end-1]
448 }
449 return out, in[end:], nil
450 }
451 }
452
453
454 var buf []byte
455 in0 := in
456 in = in[1:]
457 if unescape {
458 buf = make([]byte, 0, 3*end/2)
459 }
460 for len(in) > 0 && in[0] != quote {
461
462
463 r, multibyte, rem, err := UnquoteChar(in, quote)
464 if in[0] == '\n' || err != nil {
465 return "", in0, ErrSyntax
466 }
467 in = rem
468
469
470 if unescape {
471 if r < utf8.RuneSelf || !multibyte {
472 buf = append(buf, byte(r))
473 } else {
474 var arr [utf8.UTFMax]byte
475 n := utf8.EncodeRune(arr[:], r)
476 buf = append(buf, arr[:n]...)
477 }
478 }
479
480
481 if quote == '\'' {
482 break
483 }
484 }
485
486
487 if !(len(in) > 0 && in[0] == quote) {
488 return "", in0, ErrSyntax
489 }
490 in = in[1:]
491
492 if unescape {
493 return string(buf), in, nil
494 }
495 return in0[:len(in0)-len(in)], in, nil
496 default:
497 return "", in, ErrSyntax
498 }
499 }
500
501
502
503 func bsearch16(a []uint16, x uint16) int {
504 i, j := 0, len(a)
505 for i < j {
506 h := i + (j-i)>>1
507 if a[h] < x {
508 i = h + 1
509 } else {
510 j = h
511 }
512 }
513 return i
514 }
515
516
517
518 func bsearch32(a []uint32, x uint32) int {
519 i, j := 0, len(a)
520 for i < j {
521 h := i + (j-i)>>1
522 if a[h] < x {
523 i = h + 1
524 } else {
525 j = h
526 }
527 }
528 return i
529 }
530
531
532
533
534
535
536
537
538
539
540 func IsPrint(r rune) bool {
541
542 if r <= 0xFF {
543 if 0x20 <= r && r <= 0x7E {
544
545 return true
546 }
547 if 0xA1 <= r && r <= 0xFF {
548
549 return r != 0xAD
550 }
551 return false
552 }
553
554
555
556
557
558
559
560 if 0 <= r && r < 1<<16 {
561 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
562 i := bsearch16(isPrint, rr)
563 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
564 return false
565 }
566 j := bsearch16(isNotPrint, rr)
567 return j >= len(isNotPrint) || isNotPrint[j] != rr
568 }
569
570 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
571 i := bsearch32(isPrint, rr)
572 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
573 return false
574 }
575 if r >= 0x20000 {
576 return true
577 }
578 r -= 0x10000
579 j := bsearch16(isNotPrint, uint16(r))
580 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
581 }
582
583
584
585
586 func IsGraphic(r rune) bool {
587 if IsPrint(r) {
588 return true
589 }
590 return isInGraphicList(r)
591 }
592
593
594
595
596 func isInGraphicList(r rune) bool {
597
598 if r > 0xFFFF {
599 return false
600 }
601 rr := uint16(r)
602 i := bsearch16(isGraphic, rr)
603 return i < len(isGraphic) && rr == isGraphic[i]
604 }
605
View as plain text