1
2
3
4
5 package syntax
6
7 import (
8 "sort"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15
16 type Error struct {
17 Code ErrorCode
18 Expr string
19 }
20
21 func (e *Error) Error() string {
22 return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`"
23 }
24
25
26 type ErrorCode string
27
28 const (
29
30 ErrInternalError ErrorCode = "regexp/syntax: internal error"
31
32
33 ErrInvalidCharClass ErrorCode = "invalid character class"
34 ErrInvalidCharRange ErrorCode = "invalid character class range"
35 ErrInvalidEscape ErrorCode = "invalid escape sequence"
36 ErrInvalidNamedCapture ErrorCode = "invalid named capture"
37 ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax"
38 ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator"
39 ErrInvalidRepeatSize ErrorCode = "invalid repeat count"
40 ErrInvalidUTF8 ErrorCode = "invalid UTF-8"
41 ErrMissingBracket ErrorCode = "missing closing ]"
42 ErrMissingParen ErrorCode = "missing closing )"
43 ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
44 ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"
45 ErrUnexpectedParen ErrorCode = "unexpected )"
46 ErrNestingDepth ErrorCode = "expression nests too deeply"
47 ErrLarge ErrorCode = "expression too large"
48 )
49
50 func (e ErrorCode) String() string {
51 return string(e)
52 }
53
54
55 type Flags uint16
56
57 const (
58 FoldCase Flags = 1 << iota
59 Literal
60 ClassNL
61 DotNL
62 OneLine
63 NonGreedy
64 PerlX
65 UnicodeGroups
66 WasDollar
67 Simple
68
69 MatchNL = ClassNL | DotNL
70
71 Perl = ClassNL | OneLine | PerlX | UnicodeGroups
72 POSIX Flags = 0
73 )
74
75
76 const (
77 opLeftParen = opPseudo + iota
78 opVerticalBar
79 )
80
81
82
83
84
85
86
87
88
89
90
91
92
93 const maxHeight = 1000
94
95
96
97
98
99
100
101 const (
102 maxSize = 128 << 20 / instSize
103 instSize = 5 * 8
104 )
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121 const (
122 maxRunes = 128 << 20 / runeSize
123 runeSize = 4
124 )
125
126 type parser struct {
127 flags Flags
128 stack []*Regexp
129 free *Regexp
130 numCap int
131 wholeRegexp string
132 tmpClass []rune
133 numRegexp int
134 numRunes int
135 repeats int64
136 height map[*Regexp]int
137 size map[*Regexp]int64
138 }
139
140 func (p *parser) newRegexp(op Op) *Regexp {
141 re := p.free
142 if re != nil {
143 p.free = re.Sub0[0]
144 *re = Regexp{}
145 } else {
146 re = new(Regexp)
147 p.numRegexp++
148 }
149 re.Op = op
150 return re
151 }
152
153 func (p *parser) reuse(re *Regexp) {
154 if p.height != nil {
155 delete(p.height, re)
156 }
157 re.Sub0[0] = p.free
158 p.free = re
159 }
160
161 func (p *parser) checkLimits(re *Regexp) {
162 if p.numRunes > maxRunes {
163 panic(ErrLarge)
164 }
165 p.checkSize(re)
166 p.checkHeight(re)
167 }
168
169 func (p *parser) checkSize(re *Regexp) {
170 if p.size == nil {
171
172
173
174
175
176 if p.repeats == 0 {
177 p.repeats = 1
178 }
179 if re.Op == OpRepeat {
180 n := re.Max
181 if n == -1 {
182 n = re.Min
183 }
184 if n <= 0 {
185 n = 1
186 }
187 if int64(n) > maxSize/p.repeats {
188 p.repeats = maxSize
189 } else {
190 p.repeats *= int64(n)
191 }
192 }
193 if int64(p.numRegexp) < maxSize/p.repeats {
194 return
195 }
196
197
198
199
200 p.size = make(map[*Regexp]int64)
201 for _, re := range p.stack {
202 p.checkSize(re)
203 }
204 }
205
206 if p.calcSize(re, true) > maxSize {
207 panic(ErrLarge)
208 }
209 }
210
211 func (p *parser) calcSize(re *Regexp, force bool) int64 {
212 if !force {
213 if size, ok := p.size[re]; ok {
214 return size
215 }
216 }
217
218 var size int64
219 switch re.Op {
220 case OpLiteral:
221 size = int64(len(re.Rune))
222 case OpCapture, OpStar:
223
224 size = 2 + p.calcSize(re.Sub[0], false)
225 case OpPlus, OpQuest:
226 size = 1 + p.calcSize(re.Sub[0], false)
227 case OpConcat:
228 for _, sub := range re.Sub {
229 size += p.calcSize(sub, false)
230 }
231 case OpAlternate:
232 for _, sub := range re.Sub {
233 size += p.calcSize(sub, false)
234 }
235 if len(re.Sub) > 1 {
236 size += int64(len(re.Sub)) - 1
237 }
238 case OpRepeat:
239 sub := p.calcSize(re.Sub[0], false)
240 if re.Max == -1 {
241 if re.Min == 0 {
242 size = 2 + sub
243 } else {
244 size = 1 + int64(re.Min)*sub
245 }
246 break
247 }
248
249 size = int64(re.Max)*sub + int64(re.Max-re.Min)
250 }
251
252 if size < 1 {
253 size = 1
254 }
255 p.size[re] = size
256 return size
257 }
258
259 func (p *parser) checkHeight(re *Regexp) {
260 if p.numRegexp < maxHeight {
261 return
262 }
263 if p.height == nil {
264 p.height = make(map[*Regexp]int)
265 for _, re := range p.stack {
266 p.checkHeight(re)
267 }
268 }
269 if p.calcHeight(re, true) > maxHeight {
270 panic(ErrNestingDepth)
271 }
272 }
273
274 func (p *parser) calcHeight(re *Regexp, force bool) int {
275 if !force {
276 if h, ok := p.height[re]; ok {
277 return h
278 }
279 }
280 h := 1
281 for _, sub := range re.Sub {
282 hsub := p.calcHeight(sub, false)
283 if h < 1+hsub {
284 h = 1 + hsub
285 }
286 }
287 p.height[re] = h
288 return h
289 }
290
291
292
293
294 func (p *parser) push(re *Regexp) *Regexp {
295 p.numRunes += len(re.Rune)
296 if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
297
298 if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
299 return nil
300 }
301 re.Op = OpLiteral
302 re.Rune = re.Rune[:1]
303 re.Flags = p.flags &^ FoldCase
304 } else if re.Op == OpCharClass && len(re.Rune) == 4 &&
305 re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] &&
306 unicode.SimpleFold(re.Rune[0]) == re.Rune[2] &&
307 unicode.SimpleFold(re.Rune[2]) == re.Rune[0] ||
308 re.Op == OpCharClass && len(re.Rune) == 2 &&
309 re.Rune[0]+1 == re.Rune[1] &&
310 unicode.SimpleFold(re.Rune[0]) == re.Rune[1] &&
311 unicode.SimpleFold(re.Rune[1]) == re.Rune[0] {
312
313 if p.maybeConcat(re.Rune[0], p.flags|FoldCase) {
314 return nil
315 }
316
317
318 re.Op = OpLiteral
319 re.Rune = re.Rune[:1]
320 re.Flags = p.flags | FoldCase
321 } else {
322
323 p.maybeConcat(-1, 0)
324 }
325
326 p.stack = append(p.stack, re)
327 p.checkLimits(re)
328 return re
329 }
330
331
332
333
334
335
336
337
338
339
340 func (p *parser) maybeConcat(r rune, flags Flags) bool {
341 n := len(p.stack)
342 if n < 2 {
343 return false
344 }
345
346 re1 := p.stack[n-1]
347 re2 := p.stack[n-2]
348 if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase {
349 return false
350 }
351
352
353 re2.Rune = append(re2.Rune, re1.Rune...)
354
355
356 if r >= 0 {
357 re1.Rune = re1.Rune0[:1]
358 re1.Rune[0] = r
359 re1.Flags = flags
360 return true
361 }
362
363 p.stack = p.stack[:n-1]
364 p.reuse(re1)
365 return false
366 }
367
368
369 func (p *parser) literal(r rune) {
370 re := p.newRegexp(OpLiteral)
371 re.Flags = p.flags
372 if p.flags&FoldCase != 0 {
373 r = minFoldRune(r)
374 }
375 re.Rune0[0] = r
376 re.Rune = re.Rune0[:1]
377 p.push(re)
378 }
379
380
381 func minFoldRune(r rune) rune {
382 if r < minFold || r > maxFold {
383 return r
384 }
385 min := r
386 r0 := r
387 for r = unicode.SimpleFold(r); r != r0; r = unicode.SimpleFold(r) {
388 if min > r {
389 min = r
390 }
391 }
392 return min
393 }
394
395
396
397 func (p *parser) op(op Op) *Regexp {
398 re := p.newRegexp(op)
399 re.Flags = p.flags
400 return p.push(re)
401 }
402
403
404
405
406
407 func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (string, error) {
408 flags := p.flags
409 if p.flags&PerlX != 0 {
410 if len(after) > 0 && after[0] == '?' {
411 after = after[1:]
412 flags ^= NonGreedy
413 }
414 if lastRepeat != "" {
415
416
417
418 return "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(after)]}
419 }
420 }
421 n := len(p.stack)
422 if n == 0 {
423 return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
424 }
425 sub := p.stack[n-1]
426 if sub.Op >= opPseudo {
427 return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
428 }
429
430 re := p.newRegexp(op)
431 re.Min = min
432 re.Max = max
433 re.Flags = flags
434 re.Sub = re.Sub0[:1]
435 re.Sub[0] = sub
436 p.stack[n-1] = re
437 p.checkLimits(re)
438
439 if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
440 return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
441 }
442
443 return after, nil
444 }
445
446
447
448
449
450
451
452
453
454
455 func repeatIsValid(re *Regexp, n int) bool {
456 if re.Op == OpRepeat {
457 m := re.Max
458 if m == 0 {
459 return true
460 }
461 if m < 0 {
462 m = re.Min
463 }
464 if m > n {
465 return false
466 }
467 if m > 0 {
468 n /= m
469 }
470 }
471 for _, sub := range re.Sub {
472 if !repeatIsValid(sub, n) {
473 return false
474 }
475 }
476 return true
477 }
478
479
480 func (p *parser) concat() *Regexp {
481 p.maybeConcat(-1, 0)
482
483
484 i := len(p.stack)
485 for i > 0 && p.stack[i-1].Op < opPseudo {
486 i--
487 }
488 subs := p.stack[i:]
489 p.stack = p.stack[:i]
490
491
492 if len(subs) == 0 {
493 return p.push(p.newRegexp(OpEmptyMatch))
494 }
495
496 return p.push(p.collapse(subs, OpConcat))
497 }
498
499
500 func (p *parser) alternate() *Regexp {
501
502
503 i := len(p.stack)
504 for i > 0 && p.stack[i-1].Op < opPseudo {
505 i--
506 }
507 subs := p.stack[i:]
508 p.stack = p.stack[:i]
509
510
511
512 if len(subs) > 0 {
513 cleanAlt(subs[len(subs)-1])
514 }
515
516
517
518 if len(subs) == 0 {
519 return p.push(p.newRegexp(OpNoMatch))
520 }
521
522 return p.push(p.collapse(subs, OpAlternate))
523 }
524
525
526 func cleanAlt(re *Regexp) {
527 switch re.Op {
528 case OpCharClass:
529 re.Rune = cleanClass(&re.Rune)
530 if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune {
531 re.Rune = nil
532 re.Op = OpAnyChar
533 return
534 }
535 if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune {
536 re.Rune = nil
537 re.Op = OpAnyCharNotNL
538 return
539 }
540 if cap(re.Rune)-len(re.Rune) > 100 {
541
542
543 re.Rune = append(re.Rune0[:0], re.Rune...)
544 }
545 }
546 }
547
548
549
550
551
552 func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
553 if len(subs) == 1 {
554 return subs[0]
555 }
556 re := p.newRegexp(op)
557 re.Sub = re.Sub0[:0]
558 for _, sub := range subs {
559 if sub.Op == op {
560 re.Sub = append(re.Sub, sub.Sub...)
561 p.reuse(sub)
562 } else {
563 re.Sub = append(re.Sub, sub)
564 }
565 }
566 if op == OpAlternate {
567 re.Sub = p.factor(re.Sub)
568 if len(re.Sub) == 1 {
569 old := re
570 re = re.Sub[0]
571 p.reuse(old)
572 }
573 }
574 return re
575 }
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592 func (p *parser) factor(sub []*Regexp) []*Regexp {
593 if len(sub) < 2 {
594 return sub
595 }
596
597
598 var str []rune
599 var strflags Flags
600 start := 0
601 out := sub[:0]
602 for i := 0; i <= len(sub); i++ {
603
604
605
606
607
608
609 var istr []rune
610 var iflags Flags
611 if i < len(sub) {
612 istr, iflags = p.leadingString(sub[i])
613 if iflags == strflags {
614 same := 0
615 for same < len(str) && same < len(istr) && str[same] == istr[same] {
616 same++
617 }
618 if same > 0 {
619
620
621 str = str[:same]
622 continue
623 }
624 }
625 }
626
627
628
629
630
631
632 if i == start {
633
634 } else if i == start+1 {
635
636 out = append(out, sub[start])
637 } else {
638
639 prefix := p.newRegexp(OpLiteral)
640 prefix.Flags = strflags
641 prefix.Rune = append(prefix.Rune[:0], str...)
642
643 for j := start; j < i; j++ {
644 sub[j] = p.removeLeadingString(sub[j], len(str))
645 p.checkLimits(sub[j])
646 }
647 suffix := p.collapse(sub[start:i], OpAlternate)
648
649 re := p.newRegexp(OpConcat)
650 re.Sub = append(re.Sub[:0], prefix, suffix)
651 out = append(out, re)
652 }
653
654
655 start = i
656 str = istr
657 strflags = iflags
658 }
659 sub = out
660
661
662
663
664
665
666
667
668
669 start = 0
670 out = sub[:0]
671 var first *Regexp
672 for i := 0; i <= len(sub); i++ {
673
674
675
676
677
678 var ifirst *Regexp
679 if i < len(sub) {
680 ifirst = p.leadingRegexp(sub[i])
681 if first != nil && first.Equal(ifirst) &&
682
683 (isCharClass(first) || (first.Op == OpRepeat && first.Min == first.Max && isCharClass(first.Sub[0]))) {
684 continue
685 }
686 }
687
688
689
690
691
692 if i == start {
693
694 } else if i == start+1 {
695
696 out = append(out, sub[start])
697 } else {
698
699 prefix := first
700 for j := start; j < i; j++ {
701 reuse := j != start
702 sub[j] = p.removeLeadingRegexp(sub[j], reuse)
703 p.checkLimits(sub[j])
704 }
705 suffix := p.collapse(sub[start:i], OpAlternate)
706
707 re := p.newRegexp(OpConcat)
708 re.Sub = append(re.Sub[:0], prefix, suffix)
709 out = append(out, re)
710 }
711
712
713 start = i
714 first = ifirst
715 }
716 sub = out
717
718
719 start = 0
720 out = sub[:0]
721 for i := 0; i <= len(sub); i++ {
722
723
724
725
726
727
728 if i < len(sub) && isCharClass(sub[i]) {
729 continue
730 }
731
732
733
734 if i == start {
735
736 } else if i == start+1 {
737 out = append(out, sub[start])
738 } else {
739
740
741 max := start
742 for j := start + 1; j < i; j++ {
743 if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) {
744 max = j
745 }
746 }
747 sub[start], sub[max] = sub[max], sub[start]
748
749 for j := start + 1; j < i; j++ {
750 mergeCharClass(sub[start], sub[j])
751 p.reuse(sub[j])
752 }
753 cleanAlt(sub[start])
754 out = append(out, sub[start])
755 }
756
757
758 if i < len(sub) {
759 out = append(out, sub[i])
760 }
761 start = i + 1
762 }
763 sub = out
764
765
766 start = 0
767 out = sub[:0]
768 for i := range sub {
769 if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch {
770 continue
771 }
772 out = append(out, sub[i])
773 }
774 sub = out
775
776 return sub
777 }
778
779
780
781 func (p *parser) leadingString(re *Regexp) ([]rune, Flags) {
782 if re.Op == OpConcat && len(re.Sub) > 0 {
783 re = re.Sub[0]
784 }
785 if re.Op != OpLiteral {
786 return nil, 0
787 }
788 return re.Rune, re.Flags & FoldCase
789 }
790
791
792
793 func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp {
794 if re.Op == OpConcat && len(re.Sub) > 0 {
795
796
797 sub := re.Sub[0]
798 sub = p.removeLeadingString(sub, n)
799 re.Sub[0] = sub
800 if sub.Op == OpEmptyMatch {
801 p.reuse(sub)
802 switch len(re.Sub) {
803 case 0, 1:
804
805 re.Op = OpEmptyMatch
806 re.Sub = nil
807 case 2:
808 old := re
809 re = re.Sub[1]
810 p.reuse(old)
811 default:
812 copy(re.Sub, re.Sub[1:])
813 re.Sub = re.Sub[:len(re.Sub)-1]
814 }
815 }
816 return re
817 }
818
819 if re.Op == OpLiteral {
820 re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])]
821 if len(re.Rune) == 0 {
822 re.Op = OpEmptyMatch
823 }
824 }
825 return re
826 }
827
828
829
830 func (p *parser) leadingRegexp(re *Regexp) *Regexp {
831 if re.Op == OpEmptyMatch {
832 return nil
833 }
834 if re.Op == OpConcat && len(re.Sub) > 0 {
835 sub := re.Sub[0]
836 if sub.Op == OpEmptyMatch {
837 return nil
838 }
839 return sub
840 }
841 return re
842 }
843
844
845
846
847 func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp {
848 if re.Op == OpConcat && len(re.Sub) > 0 {
849 if reuse {
850 p.reuse(re.Sub[0])
851 }
852 re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])]
853 switch len(re.Sub) {
854 case 0:
855 re.Op = OpEmptyMatch
856 re.Sub = nil
857 case 1:
858 old := re
859 re = re.Sub[0]
860 p.reuse(old)
861 }
862 return re
863 }
864 if reuse {
865 p.reuse(re)
866 }
867 return p.newRegexp(OpEmptyMatch)
868 }
869
870 func literalRegexp(s string, flags Flags) *Regexp {
871 re := &Regexp{Op: OpLiteral}
872 re.Flags = flags
873 re.Rune = re.Rune0[:0]
874 for _, c := range s {
875 if len(re.Rune) >= cap(re.Rune) {
876
877 re.Rune = []rune(s)
878 break
879 }
880 re.Rune = append(re.Rune, c)
881 }
882 return re
883 }
884
885
886
887
888
889
890 func Parse(s string, flags Flags) (*Regexp, error) {
891 return parse(s, flags)
892 }
893
894 func parse(s string, flags Flags) (_ *Regexp, err error) {
895 defer func() {
896 switch r := recover(); r {
897 default:
898 panic(r)
899 case nil:
900
901 case ErrLarge:
902 err = &Error{Code: ErrLarge, Expr: s}
903 case ErrNestingDepth:
904 err = &Error{Code: ErrNestingDepth, Expr: s}
905 }
906 }()
907
908 if flags&Literal != 0 {
909
910 if err := checkUTF8(s); err != nil {
911 return nil, err
912 }
913 return literalRegexp(s, flags), nil
914 }
915
916
917 var (
918 p parser
919 c rune
920 op Op
921 lastRepeat string
922 )
923 p.flags = flags
924 p.wholeRegexp = s
925 t := s
926 for t != "" {
927 repeat := ""
928 BigSwitch:
929 switch t[0] {
930 default:
931 if c, t, err = nextRune(t); err != nil {
932 return nil, err
933 }
934 p.literal(c)
935
936 case '(':
937 if p.flags&PerlX != 0 && len(t) >= 2 && t[1] == '?' {
938
939 if t, err = p.parsePerlFlags(t); err != nil {
940 return nil, err
941 }
942 break
943 }
944 p.numCap++
945 p.op(opLeftParen).Cap = p.numCap
946 t = t[1:]
947 case '|':
948 if err = p.parseVerticalBar(); err != nil {
949 return nil, err
950 }
951 t = t[1:]
952 case ')':
953 if err = p.parseRightParen(); err != nil {
954 return nil, err
955 }
956 t = t[1:]
957 case '^':
958 if p.flags&OneLine != 0 {
959 p.op(OpBeginText)
960 } else {
961 p.op(OpBeginLine)
962 }
963 t = t[1:]
964 case '$':
965 if p.flags&OneLine != 0 {
966 p.op(OpEndText).Flags |= WasDollar
967 } else {
968 p.op(OpEndLine)
969 }
970 t = t[1:]
971 case '.':
972 if p.flags&DotNL != 0 {
973 p.op(OpAnyChar)
974 } else {
975 p.op(OpAnyCharNotNL)
976 }
977 t = t[1:]
978 case '[':
979 if t, err = p.parseClass(t); err != nil {
980 return nil, err
981 }
982 case '*', '+', '?':
983 before := t
984 switch t[0] {
985 case '*':
986 op = OpStar
987 case '+':
988 op = OpPlus
989 case '?':
990 op = OpQuest
991 }
992 after := t[1:]
993 if after, err = p.repeat(op, 0, 0, before, after, lastRepeat); err != nil {
994 return nil, err
995 }
996 repeat = before
997 t = after
998 case '{':
999 op = OpRepeat
1000 before := t
1001 min, max, after, ok := p.parseRepeat(t)
1002 if !ok {
1003
1004 p.literal('{')
1005 t = t[1:]
1006 break
1007 }
1008 if min < 0 || min > 1000 || max > 1000 || max >= 0 && min > max {
1009
1010 return nil, &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
1011 }
1012 if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil {
1013 return nil, err
1014 }
1015 repeat = before
1016 t = after
1017 case '\\':
1018 if p.flags&PerlX != 0 && len(t) >= 2 {
1019 switch t[1] {
1020 case 'A':
1021 p.op(OpBeginText)
1022 t = t[2:]
1023 break BigSwitch
1024 case 'b':
1025 p.op(OpWordBoundary)
1026 t = t[2:]
1027 break BigSwitch
1028 case 'B':
1029 p.op(OpNoWordBoundary)
1030 t = t[2:]
1031 break BigSwitch
1032 case 'C':
1033
1034 return nil, &Error{ErrInvalidEscape, t[:2]}
1035 case 'Q':
1036
1037 var lit string
1038 lit, t, _ = strings.Cut(t[2:], `\E`)
1039 for lit != "" {
1040 c, rest, err := nextRune(lit)
1041 if err != nil {
1042 return nil, err
1043 }
1044 p.literal(c)
1045 lit = rest
1046 }
1047 break BigSwitch
1048 case 'z':
1049 p.op(OpEndText)
1050 t = t[2:]
1051 break BigSwitch
1052 }
1053 }
1054
1055 re := p.newRegexp(OpCharClass)
1056 re.Flags = p.flags
1057
1058
1059 if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') {
1060 r, rest, err := p.parseUnicodeClass(t, re.Rune0[:0])
1061 if err != nil {
1062 return nil, err
1063 }
1064 if r != nil {
1065 re.Rune = r
1066 t = rest
1067 p.push(re)
1068 break BigSwitch
1069 }
1070 }
1071
1072
1073 if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil {
1074 re.Rune = r
1075 t = rest
1076 p.push(re)
1077 break BigSwitch
1078 }
1079 p.reuse(re)
1080
1081
1082 if c, t, err = p.parseEscape(t); err != nil {
1083 return nil, err
1084 }
1085 p.literal(c)
1086 }
1087 lastRepeat = repeat
1088 }
1089
1090 p.concat()
1091 if p.swapVerticalBar() {
1092
1093 p.stack = p.stack[:len(p.stack)-1]
1094 }
1095 p.alternate()
1096
1097 n := len(p.stack)
1098 if n != 1 {
1099 return nil, &Error{ErrMissingParen, s}
1100 }
1101 return p.stack[0], nil
1102 }
1103
1104
1105
1106
1107 func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) {
1108 if s == "" || s[0] != '{' {
1109 return
1110 }
1111 s = s[1:]
1112 var ok1 bool
1113 if min, s, ok1 = p.parseInt(s); !ok1 {
1114 return
1115 }
1116 if s == "" {
1117 return
1118 }
1119 if s[0] != ',' {
1120 max = min
1121 } else {
1122 s = s[1:]
1123 if s == "" {
1124 return
1125 }
1126 if s[0] == '}' {
1127 max = -1
1128 } else if max, s, ok1 = p.parseInt(s); !ok1 {
1129 return
1130 } else if max < 0 {
1131
1132 min = -1
1133 }
1134 }
1135 if s == "" || s[0] != '}' {
1136 return
1137 }
1138 rest = s[1:]
1139 ok = true
1140 return
1141 }
1142
1143
1144
1145
1146 func (p *parser) parsePerlFlags(s string) (rest string, err error) {
1147 t := s
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164 if len(t) > 4 && t[2] == 'P' && t[3] == '<' {
1165
1166 end := strings.IndexRune(t, '>')
1167 if end < 0 {
1168 if err = checkUTF8(t); err != nil {
1169 return "", err
1170 }
1171 return "", &Error{ErrInvalidNamedCapture, s}
1172 }
1173
1174 capture := t[:end+1]
1175 name := t[4:end]
1176 if err = checkUTF8(name); err != nil {
1177 return "", err
1178 }
1179 if !isValidCaptureName(name) {
1180 return "", &Error{ErrInvalidNamedCapture, capture}
1181 }
1182
1183
1184 p.numCap++
1185 re := p.op(opLeftParen)
1186 re.Cap = p.numCap
1187 re.Name = name
1188 return t[end+1:], nil
1189 }
1190
1191
1192 var c rune
1193 t = t[2:]
1194 flags := p.flags
1195 sign := +1
1196 sawFlag := false
1197 Loop:
1198 for t != "" {
1199 if c, t, err = nextRune(t); err != nil {
1200 return "", err
1201 }
1202 switch c {
1203 default:
1204 break Loop
1205
1206
1207 case 'i':
1208 flags |= FoldCase
1209 sawFlag = true
1210 case 'm':
1211 flags &^= OneLine
1212 sawFlag = true
1213 case 's':
1214 flags |= DotNL
1215 sawFlag = true
1216 case 'U':
1217 flags |= NonGreedy
1218 sawFlag = true
1219
1220
1221 case '-':
1222 if sign < 0 {
1223 break Loop
1224 }
1225 sign = -1
1226
1227
1228 flags = ^flags
1229 sawFlag = false
1230
1231
1232 case ':', ')':
1233 if sign < 0 {
1234 if !sawFlag {
1235 break Loop
1236 }
1237 flags = ^flags
1238 }
1239 if c == ':' {
1240
1241 p.op(opLeftParen)
1242 }
1243 p.flags = flags
1244 return t, nil
1245 }
1246 }
1247
1248 return "", &Error{ErrInvalidPerlOp, s[:len(s)-len(t)]}
1249 }
1250
1251
1252
1253
1254
1255
1256 func isValidCaptureName(name string) bool {
1257 if name == "" {
1258 return false
1259 }
1260 for _, c := range name {
1261 if c != '_' && !isalnum(c) {
1262 return false
1263 }
1264 }
1265 return true
1266 }
1267
1268
1269 func (p *parser) parseInt(s string) (n int, rest string, ok bool) {
1270 if s == "" || s[0] < '0' || '9' < s[0] {
1271 return
1272 }
1273
1274 if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' {
1275 return
1276 }
1277 t := s
1278 for s != "" && '0' <= s[0] && s[0] <= '9' {
1279 s = s[1:]
1280 }
1281 rest = s
1282 ok = true
1283
1284 t = t[:len(t)-len(s)]
1285 for i := 0; i < len(t); i++ {
1286
1287 if n >= 1e8 {
1288 n = -1
1289 break
1290 }
1291 n = n*10 + int(t[i]) - '0'
1292 }
1293 return
1294 }
1295
1296
1297
1298 func isCharClass(re *Regexp) bool {
1299 return re.Op == OpLiteral && len(re.Rune) == 1 ||
1300 re.Op == OpCharClass ||
1301 re.Op == OpAnyCharNotNL ||
1302 re.Op == OpAnyChar
1303 }
1304
1305
1306 func matchRune(re *Regexp, r rune) bool {
1307 switch re.Op {
1308 case OpLiteral:
1309 return len(re.Rune) == 1 && re.Rune[0] == r
1310 case OpCharClass:
1311 for i := 0; i < len(re.Rune); i += 2 {
1312 if re.Rune[i] <= r && r <= re.Rune[i+1] {
1313 return true
1314 }
1315 }
1316 return false
1317 case OpAnyCharNotNL:
1318 return r != '\n'
1319 case OpAnyChar:
1320 return true
1321 }
1322 return false
1323 }
1324
1325
1326 func (p *parser) parseVerticalBar() error {
1327 p.concat()
1328
1329
1330
1331
1332
1333 if !p.swapVerticalBar() {
1334 p.op(opVerticalBar)
1335 }
1336
1337 return nil
1338 }
1339
1340
1341
1342
1343 func mergeCharClass(dst, src *Regexp) {
1344 switch dst.Op {
1345 case OpAnyChar:
1346
1347 case OpAnyCharNotNL:
1348
1349 if matchRune(src, '\n') {
1350 dst.Op = OpAnyChar
1351 }
1352 case OpCharClass:
1353
1354 if src.Op == OpLiteral {
1355 dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
1356 } else {
1357 dst.Rune = appendClass(dst.Rune, src.Rune)
1358 }
1359 case OpLiteral:
1360
1361 if src.Rune[0] == dst.Rune[0] && src.Flags == dst.Flags {
1362 break
1363 }
1364 dst.Op = OpCharClass
1365 dst.Rune = appendLiteral(dst.Rune[:0], dst.Rune[0], dst.Flags)
1366 dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
1367 }
1368 }
1369
1370
1371
1372
1373 func (p *parser) swapVerticalBar() bool {
1374
1375
1376 n := len(p.stack)
1377 if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) {
1378 re1 := p.stack[n-1]
1379 re3 := p.stack[n-3]
1380
1381 if re1.Op > re3.Op {
1382 re1, re3 = re3, re1
1383 p.stack[n-3] = re3
1384 }
1385 mergeCharClass(re3, re1)
1386 p.reuse(re1)
1387 p.stack = p.stack[:n-1]
1388 return true
1389 }
1390
1391 if n >= 2 {
1392 re1 := p.stack[n-1]
1393 re2 := p.stack[n-2]
1394 if re2.Op == opVerticalBar {
1395 if n >= 3 {
1396
1397
1398 cleanAlt(p.stack[n-3])
1399 }
1400 p.stack[n-2] = re1
1401 p.stack[n-1] = re2
1402 return true
1403 }
1404 }
1405 return false
1406 }
1407
1408
1409 func (p *parser) parseRightParen() error {
1410 p.concat()
1411 if p.swapVerticalBar() {
1412
1413 p.stack = p.stack[:len(p.stack)-1]
1414 }
1415 p.alternate()
1416
1417 n := len(p.stack)
1418 if n < 2 {
1419 return &Error{ErrUnexpectedParen, p.wholeRegexp}
1420 }
1421 re1 := p.stack[n-1]
1422 re2 := p.stack[n-2]
1423 p.stack = p.stack[:n-2]
1424 if re2.Op != opLeftParen {
1425 return &Error{ErrUnexpectedParen, p.wholeRegexp}
1426 }
1427
1428 p.flags = re2.Flags
1429 if re2.Cap == 0 {
1430
1431 p.push(re1)
1432 } else {
1433 re2.Op = OpCapture
1434 re2.Sub = re2.Sub0[:1]
1435 re2.Sub[0] = re1
1436 p.push(re2)
1437 }
1438 return nil
1439 }
1440
1441
1442
1443 func (p *parser) parseEscape(s string) (r rune, rest string, err error) {
1444 t := s[1:]
1445 if t == "" {
1446 return 0, "", &Error{ErrTrailingBackslash, ""}
1447 }
1448 c, t, err := nextRune(t)
1449 if err != nil {
1450 return 0, "", err
1451 }
1452
1453 Switch:
1454 switch c {
1455 default:
1456 if c < utf8.RuneSelf && !isalnum(c) {
1457
1458
1459
1460
1461 return c, t, nil
1462 }
1463
1464
1465 case '1', '2', '3', '4', '5', '6', '7':
1466
1467 if t == "" || t[0] < '0' || t[0] > '7' {
1468 break
1469 }
1470 fallthrough
1471 case '0':
1472
1473 r = c - '0'
1474 for i := 1; i < 3; i++ {
1475 if t == "" || t[0] < '0' || t[0] > '7' {
1476 break
1477 }
1478 r = r*8 + rune(t[0]) - '0'
1479 t = t[1:]
1480 }
1481 return r, t, nil
1482
1483
1484 case 'x':
1485 if t == "" {
1486 break
1487 }
1488 if c, t, err = nextRune(t); err != nil {
1489 return 0, "", err
1490 }
1491 if c == '{' {
1492
1493
1494
1495
1496 nhex := 0
1497 r = 0
1498 for {
1499 if t == "" {
1500 break Switch
1501 }
1502 if c, t, err = nextRune(t); err != nil {
1503 return 0, "", err
1504 }
1505 if c == '}' {
1506 break
1507 }
1508 v := unhex(c)
1509 if v < 0 {
1510 break Switch
1511 }
1512 r = r*16 + v
1513 if r > unicode.MaxRune {
1514 break Switch
1515 }
1516 nhex++
1517 }
1518 if nhex == 0 {
1519 break Switch
1520 }
1521 return r, t, nil
1522 }
1523
1524
1525 x := unhex(c)
1526 if c, t, err = nextRune(t); err != nil {
1527 return 0, "", err
1528 }
1529 y := unhex(c)
1530 if x < 0 || y < 0 {
1531 break
1532 }
1533 return x*16 + y, t, nil
1534
1535
1536
1537
1538
1539
1540
1541 case 'a':
1542 return '\a', t, err
1543 case 'f':
1544 return '\f', t, err
1545 case 'n':
1546 return '\n', t, err
1547 case 'r':
1548 return '\r', t, err
1549 case 't':
1550 return '\t', t, err
1551 case 'v':
1552 return '\v', t, err
1553 }
1554 return 0, "", &Error{ErrInvalidEscape, s[:len(s)-len(t)]}
1555 }
1556
1557
1558
1559 func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err error) {
1560 if s == "" {
1561 return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
1562 }
1563
1564
1565
1566 if s[0] == '\\' {
1567 return p.parseEscape(s)
1568 }
1569
1570 return nextRune(s)
1571 }
1572
1573 type charGroup struct {
1574 sign int
1575 class []rune
1576 }
1577
1578
1579
1580
1581 func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) {
1582 if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {
1583 return
1584 }
1585 g := perlGroup[s[0:2]]
1586 if g.sign == 0 {
1587 return
1588 }
1589 return p.appendGroup(r, g), s[2:]
1590 }
1591
1592
1593
1594
1595 func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) {
1596 if len(s) < 2 || s[0] != '[' || s[1] != ':' {
1597 return
1598 }
1599
1600 i := strings.Index(s[2:], ":]")
1601 if i < 0 {
1602 return
1603 }
1604 i += 2
1605 name, s := s[0:i+2], s[i+2:]
1606 g := posixGroup[name]
1607 if g.sign == 0 {
1608 return nil, "", &Error{ErrInvalidCharRange, name}
1609 }
1610 return p.appendGroup(r, g), s, nil
1611 }
1612
1613 func (p *parser) appendGroup(r []rune, g charGroup) []rune {
1614 if p.flags&FoldCase == 0 {
1615 if g.sign < 0 {
1616 r = appendNegatedClass(r, g.class)
1617 } else {
1618 r = appendClass(r, g.class)
1619 }
1620 } else {
1621 tmp := p.tmpClass[:0]
1622 tmp = appendFoldedClass(tmp, g.class)
1623 p.tmpClass = tmp
1624 tmp = cleanClass(&p.tmpClass)
1625 if g.sign < 0 {
1626 r = appendNegatedClass(r, tmp)
1627 } else {
1628 r = appendClass(r, tmp)
1629 }
1630 }
1631 return r
1632 }
1633
1634 var anyTable = &unicode.RangeTable{
1635 R16: []unicode.Range16{{Lo: 0, Hi: 1<<16 - 1, Stride: 1}},
1636 R32: []unicode.Range32{{Lo: 1 << 16, Hi: unicode.MaxRune, Stride: 1}},
1637 }
1638
1639
1640
1641 func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
1642
1643 if name == "Any" {
1644 return anyTable, anyTable
1645 }
1646 if t := unicode.Categories[name]; t != nil {
1647 return t, unicode.FoldCategory[name]
1648 }
1649 if t := unicode.Scripts[name]; t != nil {
1650 return t, unicode.FoldScript[name]
1651 }
1652 return nil, nil
1653 }
1654
1655
1656
1657
1658 func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) {
1659 if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
1660 return
1661 }
1662
1663
1664 sign := +1
1665 if s[1] == 'P' {
1666 sign = -1
1667 }
1668 t := s[2:]
1669 c, t, err := nextRune(t)
1670 if err != nil {
1671 return
1672 }
1673 var seq, name string
1674 if c != '{' {
1675
1676 seq = s[:len(s)-len(t)]
1677 name = seq[2:]
1678 } else {
1679
1680 end := strings.IndexRune(s, '}')
1681 if end < 0 {
1682 if err = checkUTF8(s); err != nil {
1683 return
1684 }
1685 return nil, "", &Error{ErrInvalidCharRange, s}
1686 }
1687 seq, t = s[:end+1], s[end+1:]
1688 name = s[3:end]
1689 if err = checkUTF8(name); err != nil {
1690 return
1691 }
1692 }
1693
1694
1695 if name != "" && name[0] == '^' {
1696 sign = -sign
1697 name = name[1:]
1698 }
1699
1700 tab, fold := unicodeTable(name)
1701 if tab == nil {
1702 return nil, "", &Error{ErrInvalidCharRange, seq}
1703 }
1704
1705 if p.flags&FoldCase == 0 || fold == nil {
1706 if sign > 0 {
1707 r = appendTable(r, tab)
1708 } else {
1709 r = appendNegatedTable(r, tab)
1710 }
1711 } else {
1712
1713
1714
1715 tmp := p.tmpClass[:0]
1716 tmp = appendTable(tmp, tab)
1717 tmp = appendTable(tmp, fold)
1718 p.tmpClass = tmp
1719 tmp = cleanClass(&p.tmpClass)
1720 if sign > 0 {
1721 r = appendClass(r, tmp)
1722 } else {
1723 r = appendNegatedClass(r, tmp)
1724 }
1725 }
1726 return r, t, nil
1727 }
1728
1729
1730
1731 func (p *parser) parseClass(s string) (rest string, err error) {
1732 t := s[1:]
1733 re := p.newRegexp(OpCharClass)
1734 re.Flags = p.flags
1735 re.Rune = re.Rune0[:0]
1736
1737 sign := +1
1738 if t != "" && t[0] == '^' {
1739 sign = -1
1740 t = t[1:]
1741
1742
1743
1744 if p.flags&ClassNL == 0 {
1745 re.Rune = append(re.Rune, '\n', '\n')
1746 }
1747 }
1748
1749 class := re.Rune
1750 first := true
1751 for t == "" || t[0] != ']' || first {
1752
1753
1754 if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 || t[1] != ']') {
1755 _, size := utf8.DecodeRuneInString(t[1:])
1756 return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]}
1757 }
1758 first = false
1759
1760
1761 if len(t) > 2 && t[0] == '[' && t[1] == ':' {
1762 nclass, nt, err := p.parseNamedClass(t, class)
1763 if err != nil {
1764 return "", err
1765 }
1766 if nclass != nil {
1767 class, t = nclass, nt
1768 continue
1769 }
1770 }
1771
1772
1773 nclass, nt, err := p.parseUnicodeClass(t, class)
1774 if err != nil {
1775 return "", err
1776 }
1777 if nclass != nil {
1778 class, t = nclass, nt
1779 continue
1780 }
1781
1782
1783 if nclass, nt := p.parsePerlClassEscape(t, class); nclass != nil {
1784 class, t = nclass, nt
1785 continue
1786 }
1787
1788
1789 rng := t
1790 var lo, hi rune
1791 if lo, t, err = p.parseClassChar(t, s); err != nil {
1792 return "", err
1793 }
1794 hi = lo
1795
1796 if len(t) >= 2 && t[0] == '-' && t[1] != ']' {
1797 t = t[1:]
1798 if hi, t, err = p.parseClassChar(t, s); err != nil {
1799 return "", err
1800 }
1801 if hi < lo {
1802 rng = rng[:len(rng)-len(t)]
1803 return "", &Error{Code: ErrInvalidCharRange, Expr: rng}
1804 }
1805 }
1806 if p.flags&FoldCase == 0 {
1807 class = appendRange(class, lo, hi)
1808 } else {
1809 class = appendFoldedRange(class, lo, hi)
1810 }
1811 }
1812 t = t[1:]
1813
1814
1815 re.Rune = class
1816 class = cleanClass(&re.Rune)
1817 if sign < 0 {
1818 class = negateClass(class)
1819 }
1820 re.Rune = class
1821 p.push(re)
1822 return t, nil
1823 }
1824
1825
1826
1827 func cleanClass(rp *[]rune) []rune {
1828
1829
1830 sort.Sort(ranges{rp})
1831
1832 r := *rp
1833 if len(r) < 2 {
1834 return r
1835 }
1836
1837
1838 w := 2
1839 for i := 2; i < len(r); i += 2 {
1840 lo, hi := r[i], r[i+1]
1841 if lo <= r[w-1]+1 {
1842
1843 if hi > r[w-1] {
1844 r[w-1] = hi
1845 }
1846 continue
1847 }
1848
1849 r[w] = lo
1850 r[w+1] = hi
1851 w += 2
1852 }
1853
1854 return r[:w]
1855 }
1856
1857
1858 func appendLiteral(r []rune, x rune, flags Flags) []rune {
1859 if flags&FoldCase != 0 {
1860 return appendFoldedRange(r, x, x)
1861 }
1862 return appendRange(r, x, x)
1863 }
1864
1865
1866 func appendRange(r []rune, lo, hi rune) []rune {
1867
1868
1869
1870
1871 n := len(r)
1872 for i := 2; i <= 4; i += 2 {
1873 if n >= i {
1874 rlo, rhi := r[n-i], r[n-i+1]
1875 if lo <= rhi+1 && rlo <= hi+1 {
1876 if lo < rlo {
1877 r[n-i] = lo
1878 }
1879 if hi > rhi {
1880 r[n-i+1] = hi
1881 }
1882 return r
1883 }
1884 }
1885 }
1886
1887 return append(r, lo, hi)
1888 }
1889
1890 const (
1891
1892
1893 minFold = 0x0041
1894 maxFold = 0x1e943
1895 )
1896
1897
1898
1899 func appendFoldedRange(r []rune, lo, hi rune) []rune {
1900
1901 if lo <= minFold && hi >= maxFold {
1902
1903 return appendRange(r, lo, hi)
1904 }
1905 if hi < minFold || lo > maxFold {
1906
1907 return appendRange(r, lo, hi)
1908 }
1909 if lo < minFold {
1910
1911 r = appendRange(r, lo, minFold-1)
1912 lo = minFold
1913 }
1914 if hi > maxFold {
1915
1916 r = appendRange(r, maxFold+1, hi)
1917 hi = maxFold
1918 }
1919
1920
1921 for c := lo; c <= hi; c++ {
1922 r = appendRange(r, c, c)
1923 f := unicode.SimpleFold(c)
1924 for f != c {
1925 r = appendRange(r, f, f)
1926 f = unicode.SimpleFold(f)
1927 }
1928 }
1929 return r
1930 }
1931
1932
1933
1934 func appendClass(r []rune, x []rune) []rune {
1935 for i := 0; i < len(x); i += 2 {
1936 r = appendRange(r, x[i], x[i+1])
1937 }
1938 return r
1939 }
1940
1941
1942 func appendFoldedClass(r []rune, x []rune) []rune {
1943 for i := 0; i < len(x); i += 2 {
1944 r = appendFoldedRange(r, x[i], x[i+1])
1945 }
1946 return r
1947 }
1948
1949
1950
1951 func appendNegatedClass(r []rune, x []rune) []rune {
1952 nextLo := '\u0000'
1953 for i := 0; i < len(x); i += 2 {
1954 lo, hi := x[i], x[i+1]
1955 if nextLo <= lo-1 {
1956 r = appendRange(r, nextLo, lo-1)
1957 }
1958 nextLo = hi + 1
1959 }
1960 if nextLo <= unicode.MaxRune {
1961 r = appendRange(r, nextLo, unicode.MaxRune)
1962 }
1963 return r
1964 }
1965
1966
1967 func appendTable(r []rune, x *unicode.RangeTable) []rune {
1968 for _, xr := range x.R16 {
1969 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1970 if stride == 1 {
1971 r = appendRange(r, lo, hi)
1972 continue
1973 }
1974 for c := lo; c <= hi; c += stride {
1975 r = appendRange(r, c, c)
1976 }
1977 }
1978 for _, xr := range x.R32 {
1979 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1980 if stride == 1 {
1981 r = appendRange(r, lo, hi)
1982 continue
1983 }
1984 for c := lo; c <= hi; c += stride {
1985 r = appendRange(r, c, c)
1986 }
1987 }
1988 return r
1989 }
1990
1991
1992 func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune {
1993 nextLo := '\u0000'
1994 for _, xr := range x.R16 {
1995 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1996 if stride == 1 {
1997 if nextLo <= lo-1 {
1998 r = appendRange(r, nextLo, lo-1)
1999 }
2000 nextLo = hi + 1
2001 continue
2002 }
2003 for c := lo; c <= hi; c += stride {
2004 if nextLo <= c-1 {
2005 r = appendRange(r, nextLo, c-1)
2006 }
2007 nextLo = c + 1
2008 }
2009 }
2010 for _, xr := range x.R32 {
2011 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
2012 if stride == 1 {
2013 if nextLo <= lo-1 {
2014 r = appendRange(r, nextLo, lo-1)
2015 }
2016 nextLo = hi + 1
2017 continue
2018 }
2019 for c := lo; c <= hi; c += stride {
2020 if nextLo <= c-1 {
2021 r = appendRange(r, nextLo, c-1)
2022 }
2023 nextLo = c + 1
2024 }
2025 }
2026 if nextLo <= unicode.MaxRune {
2027 r = appendRange(r, nextLo, unicode.MaxRune)
2028 }
2029 return r
2030 }
2031
2032
2033
2034 func negateClass(r []rune) []rune {
2035 nextLo := '\u0000'
2036 w := 0
2037 for i := 0; i < len(r); i += 2 {
2038 lo, hi := r[i], r[i+1]
2039 if nextLo <= lo-1 {
2040 r[w] = nextLo
2041 r[w+1] = lo - 1
2042 w += 2
2043 }
2044 nextLo = hi + 1
2045 }
2046 r = r[:w]
2047 if nextLo <= unicode.MaxRune {
2048
2049
2050 r = append(r, nextLo, unicode.MaxRune)
2051 }
2052 return r
2053 }
2054
2055
2056
2057
2058
2059 type ranges struct {
2060 p *[]rune
2061 }
2062
2063 func (ra ranges) Less(i, j int) bool {
2064 p := *ra.p
2065 i *= 2
2066 j *= 2
2067 return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1]
2068 }
2069
2070 func (ra ranges) Len() int {
2071 return len(*ra.p) / 2
2072 }
2073
2074 func (ra ranges) Swap(i, j int) {
2075 p := *ra.p
2076 i *= 2
2077 j *= 2
2078 p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]
2079 }
2080
2081 func checkUTF8(s string) error {
2082 for s != "" {
2083 rune, size := utf8.DecodeRuneInString(s)
2084 if rune == utf8.RuneError && size == 1 {
2085 return &Error{Code: ErrInvalidUTF8, Expr: s}
2086 }
2087 s = s[size:]
2088 }
2089 return nil
2090 }
2091
2092 func nextRune(s string) (c rune, t string, err error) {
2093 c, size := utf8.DecodeRuneInString(s)
2094 if c == utf8.RuneError && size == 1 {
2095 return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
2096 }
2097 return c, s[size:], nil
2098 }
2099
2100 func isalnum(c rune) bool {
2101 return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
2102 }
2103
2104 func unhex(c rune) rune {
2105 if '0' <= c && c <= '9' {
2106 return c - '0'
2107 }
2108 if 'a' <= c && c <= 'f' {
2109 return c - 'a' + 10
2110 }
2111 if 'A' <= c && c <= 'F' {
2112 return c - 'A' + 10
2113 }
2114 return -1
2115 }
2116
View as plain text