Source file src/unicode/utf16/utf16_test.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package utf16_test
     6  
     7  import (
     8  	"reflect"
     9  	"testing"
    10  	"unicode"
    11  	. "unicode/utf16"
    12  )
    13  
    14  // Validate the constants redefined from unicode.
    15  func TestConstants(t *testing.T) {
    16  	if MaxRune != unicode.MaxRune {
    17  		t.Errorf("utf16.maxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
    18  	}
    19  	if ReplacementChar != unicode.ReplacementChar {
    20  		t.Errorf("utf16.replacementChar is wrong: %x should be %x", ReplacementChar, unicode.ReplacementChar)
    21  	}
    22  }
    23  
    24  type encodeTest struct {
    25  	in  []rune
    26  	out []uint16
    27  }
    28  
    29  var encodeTests = []encodeTest{
    30  	{[]rune{1, 2, 3, 4}, []uint16{1, 2, 3, 4}},
    31  	{[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff},
    32  		[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}},
    33  	{[]rune{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1},
    34  		[]uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}},
    35  }
    36  
    37  func TestEncode(t *testing.T) {
    38  	for _, tt := range encodeTests {
    39  		out := Encode(tt.in)
    40  		if !reflect.DeepEqual(out, tt.out) {
    41  			t.Errorf("Encode(%x) = %x; want %x", tt.in, out, tt.out)
    42  		}
    43  	}
    44  }
    45  
    46  func TestAppendRune(t *testing.T) {
    47  	for _, tt := range encodeTests {
    48  		var out []uint16
    49  		for _, u := range tt.in {
    50  			out = AppendRune(out, u)
    51  		}
    52  		if !reflect.DeepEqual(out, tt.out) {
    53  			t.Errorf("AppendRune(%x) = %x; want %x", tt.in, out, tt.out)
    54  		}
    55  	}
    56  }
    57  
    58  func TestEncodeRune(t *testing.T) {
    59  	for i, tt := range encodeTests {
    60  		j := 0
    61  		for _, r := range tt.in {
    62  			r1, r2 := EncodeRune(r)
    63  			if r < 0x10000 || r > unicode.MaxRune {
    64  				if j >= len(tt.out) {
    65  					t.Errorf("#%d: ran out of tt.out", i)
    66  					break
    67  				}
    68  				if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar {
    69  					t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2)
    70  				}
    71  				j++
    72  			} else {
    73  				if j+1 >= len(tt.out) {
    74  					t.Errorf("#%d: ran out of tt.out", i)
    75  					break
    76  				}
    77  				if r1 != rune(tt.out[j]) || r2 != rune(tt.out[j+1]) {
    78  					t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1])
    79  				}
    80  				j += 2
    81  				dec := DecodeRune(r1, r2)
    82  				if dec != r {
    83  					t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r)
    84  				}
    85  			}
    86  		}
    87  		if j != len(tt.out) {
    88  			t.Errorf("#%d: EncodeRune didn't generate enough output", i)
    89  		}
    90  	}
    91  }
    92  
    93  type decodeTest struct {
    94  	in  []uint16
    95  	out []rune
    96  }
    97  
    98  var decodeTests = []decodeTest{
    99  	{[]uint16{1, 2, 3, 4}, []rune{1, 2, 3, 4}},
   100  	{[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff},
   101  		[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}},
   102  	{[]uint16{0xd800, 'a'}, []rune{0xfffd, 'a'}},
   103  	{[]uint16{0xdfff}, []rune{0xfffd}},
   104  }
   105  
   106  func TestDecode(t *testing.T) {
   107  	for _, tt := range decodeTests {
   108  		out := Decode(tt.in)
   109  		if !reflect.DeepEqual(out, tt.out) {
   110  			t.Errorf("Decode(%x) = %x; want %x", tt.in, out, tt.out)
   111  		}
   112  	}
   113  }
   114  
   115  var decodeRuneTests = []struct {
   116  	r1, r2 rune
   117  	want   rune
   118  }{
   119  	{0xd800, 0xdc00, 0x10000},
   120  	{0xd800, 0xdc01, 0x10001},
   121  	{0xd808, 0xdf45, 0x12345},
   122  	{0xdbff, 0xdfff, 0x10ffff},
   123  	{0xd800, 'a', 0xfffd}, // illegal, replacement rune substituted
   124  }
   125  
   126  func TestDecodeRune(t *testing.T) {
   127  	for i, tt := range decodeRuneTests {
   128  		got := DecodeRune(tt.r1, tt.r2)
   129  		if got != tt.want {
   130  			t.Errorf("%d: DecodeRune(%q, %q) = %v; want %v", i, tt.r1, tt.r2, got, tt.want)
   131  		}
   132  	}
   133  }
   134  
   135  var surrogateTests = []struct {
   136  	r    rune
   137  	want bool
   138  }{
   139  	// from https://en.wikipedia.org/wiki/UTF-16
   140  	{'\u007A', false},     // LATIN SMALL LETTER Z
   141  	{'\u6C34', false},     // CJK UNIFIED IDEOGRAPH-6C34 (water)
   142  	{'\uFEFF', false},     // Byte Order Mark
   143  	{'\U00010000', false}, // LINEAR B SYLLABLE B008 A (first non-BMP code point)
   144  	{'\U0001D11E', false}, // MUSICAL SYMBOL G CLEF
   145  	{'\U0010FFFD', false}, // PRIVATE USE CHARACTER-10FFFD (last Unicode code point)
   146  
   147  	{rune(0xd7ff), false}, // surr1-1
   148  	{rune(0xd800), true},  // surr1
   149  	{rune(0xdc00), true},  // surr2
   150  	{rune(0xe000), false}, // surr3
   151  	{rune(0xdfff), true},  // surr3-1
   152  }
   153  
   154  func TestIsSurrogate(t *testing.T) {
   155  	for i, tt := range surrogateTests {
   156  		got := IsSurrogate(tt.r)
   157  		if got != tt.want {
   158  			t.Errorf("%d: IsSurrogate(%q) = %v; want %v", i, tt.r, got, tt.want)
   159  		}
   160  	}
   161  }
   162  
   163  func BenchmarkDecodeValidASCII(b *testing.B) {
   164  	// "hello world"
   165  	data := []uint16{104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100}
   166  	for i := 0; i < b.N; i++ {
   167  		Decode(data)
   168  	}
   169  }
   170  
   171  func BenchmarkDecodeValidJapaneseChars(b *testing.B) {
   172  	// "日本語日本語日本語"
   173  	data := []uint16{26085, 26412, 35486, 26085, 26412, 35486, 26085, 26412, 35486}
   174  	for i := 0; i < b.N; i++ {
   175  		Decode(data)
   176  	}
   177  }
   178  
   179  func BenchmarkDecodeRune(b *testing.B) {
   180  	rs := make([]rune, 10)
   181  	// U+1D4D0 to U+1D4D4: MATHEMATICAL BOLD SCRIPT CAPITAL LETTERS
   182  	for i, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {
   183  		rs[2*i], rs[2*i+1] = EncodeRune(u)
   184  	}
   185  
   186  	b.ResetTimer()
   187  	for i := 0; i < b.N; i++ {
   188  		for j := 0; j < 5; j++ {
   189  			DecodeRune(rs[2*j], rs[2*j+1])
   190  		}
   191  	}
   192  }
   193  
   194  func BenchmarkEncodeValidASCII(b *testing.B) {
   195  	data := []rune{'h', 'e', 'l', 'l', 'o'}
   196  	for i := 0; i < b.N; i++ {
   197  		Encode(data)
   198  	}
   199  }
   200  
   201  func BenchmarkEncodeValidJapaneseChars(b *testing.B) {
   202  	data := []rune{'日', '本', '語'}
   203  	for i := 0; i < b.N; i++ {
   204  		Encode(data)
   205  	}
   206  }
   207  
   208  func BenchmarkAppendRuneValidASCII(b *testing.B) {
   209  	data := []rune{'h', 'e', 'l', 'l', 'o'}
   210  	a := make([]uint16, 0, len(data)*2)
   211  	for i := 0; i < b.N; i++ {
   212  		for _, u := range data {
   213  			a = AppendRune(a, u)
   214  		}
   215  		a = a[:0]
   216  	}
   217  }
   218  
   219  func BenchmarkAppendRuneValidJapaneseChars(b *testing.B) {
   220  	data := []rune{'日', '本', '語'}
   221  	a := make([]uint16, 0, len(data)*2)
   222  	for i := 0; i < b.N; i++ {
   223  		for _, u := range data {
   224  			a = AppendRune(a, u)
   225  		}
   226  		a = a[:0]
   227  	}
   228  }
   229  
   230  func BenchmarkEncodeRune(b *testing.B) {
   231  	for i := 0; i < b.N; i++ {
   232  		for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {
   233  			EncodeRune(u)
   234  		}
   235  	}
   236  }
   237  

View as plain text