Source file test/codegen/memcombine.go

     1  // asmcheck
     2  
     3  // Copyright 2018 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  package codegen
     8  
     9  import (
    10  	"encoding/binary"
    11  	"runtime"
    12  )
    13  
    14  // ------------- //
    15  //    Loading    //
    16  // ------------- //
    17  
    18  func load_le64(b []byte) uint64 {
    19  	// amd64:`MOVQ\s\(.*\),`,-`MOV[BWL]\t[^$]`,-`OR`
    20  	// s390x:`MOVDBR\s\(.*\),`
    21  	// arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`
    22  	// ppc64le:`MOVD\s`,-`MOV[BHW]Z`
    23  	// ppc64:`MOVDBR\s`,-`MOV[BHW]Z`
    24  	return binary.LittleEndian.Uint64(b)
    25  }
    26  
    27  func load_le64_idx(b []byte, idx int) uint64 {
    28  	// amd64:`MOVQ\s\(.*\)\(.*\*1\),`,-`MOV[BWL]\t[^$]`,-`OR`
    29  	// s390x:`MOVDBR\s\(.*\)\(.*\*1\),`
    30  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]`
    31  	// ppc64le:`MOVD\s`,-`MOV[BHW]Z\s`
    32  	// ppc64:`MOVDBR\s`,-`MOV[BHW]Z\s`
    33  	return binary.LittleEndian.Uint64(b[idx:])
    34  }
    35  
    36  func load_le32(b []byte) uint32 {
    37  	// amd64:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
    38  	// 386:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
    39  	// s390x:`MOVWBR\s\(.*\),`
    40  	// arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`
    41  	// ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
    42  	// ppc64:`MOVWBR\s`,-`MOV[BH]Z\s`
    43  	return binary.LittleEndian.Uint32(b)
    44  }
    45  
    46  func load_le32_idx(b []byte, idx int) uint32 {
    47  	// amd64:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
    48  	// 386:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
    49  	// s390x:`MOVWBR\s\(.*\)\(.*\*1\),`
    50  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]`
    51  	// ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
    52  	// ppc64:`MOVWBR\s`,-`MOV[BH]Z\s'
    53  	return binary.LittleEndian.Uint32(b[idx:])
    54  }
    55  
    56  func load_le16(b []byte) uint16 {
    57  	// amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
    58  	// ppc64le:`MOVHZ\s`,-`MOVBZ`
    59  	// arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB`
    60  	// s390x:`MOVHBR\s\(.*\),`
    61  	// ppc64:`MOVHBR\s`,-`MOVBZ`
    62  	return binary.LittleEndian.Uint16(b)
    63  }
    64  
    65  func load_le16_idx(b []byte, idx int) uint16 {
    66  	// amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
    67  	// ppc64le:`MOVHZ\s`,-`MOVBZ`
    68  	// ppc64:`MOVHBR\s`,-`MOVBZ`
    69  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
    70  	// s390x:`MOVHBR\s\(.*\)\(.*\*1\),`
    71  	return binary.LittleEndian.Uint16(b[idx:])
    72  }
    73  
    74  func load_be64(b []byte) uint64 {
    75  	// amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
    76  	// amd64/v3:`MOVBEQ`
    77  	// s390x:`MOVD\s\(.*\),`
    78  	// arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W`
    79  	// ppc64le:`MOVDBR`,-`MOV[BHW]Z`
    80  	// ppc64:`MOVD`,-`MOV[BHW]Z`
    81  	return binary.BigEndian.Uint64(b)
    82  }
    83  
    84  func load_be64_idx(b []byte, idx int) uint64 {
    85  	// amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
    86  	// amd64/v3: `MOVBEQ\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
    87  	// s390x:`MOVD\s\(.*\)\(.*\*1\),`
    88  	// arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W`
    89  	// ppc64le:`MOVDBR`,-`MOV[BHW]Z`
    90  	// ppc64:`MOVD`,-`MOV[BHW]Z`
    91  	return binary.BigEndian.Uint64(b[idx:])
    92  }
    93  
    94  func load_be32(b []byte) uint32 {
    95  	// amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR`
    96  	// amd64/v3: `MOVBEL`
    97  	// s390x:`MOVWZ\s\(.*\),`
    98  	// arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W`
    99  	// ppc64le:`MOVWBR`,-`MOV[BH]Z`
   100  	// ppc64:`MOVWZ`,-MOV[BH]Z`
   101  	return binary.BigEndian.Uint32(b)
   102  }
   103  
   104  func load_be32_idx(b []byte, idx int) uint32 {
   105  	// amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR`
   106  	// amd64/v3: `MOVBEL\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
   107  	// s390x:`MOVWZ\s\(.*\)\(.*\*1\),`
   108  	// arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W`
   109  	// ppc64le:`MOVWBR`,-`MOV[BH]Z`
   110  	// ppc64:`MOVWZ`,-MOV[BH]Z`
   111  	return binary.BigEndian.Uint32(b[idx:])
   112  }
   113  
   114  func load_be16(b []byte) uint16 {
   115  	// amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
   116  	// arm64:`REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB`
   117  	// ppc64le:`MOVHBR`,-`MOVBZ`
   118  	// ppc64:`MOVHZ`,-`MOVBZ`
   119  	// s390x:`MOVHZ\s\(.*\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
   120  	return binary.BigEndian.Uint16(b)
   121  }
   122  
   123  func load_be16_idx(b []byte, idx int) uint16 {
   124  	// amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
   125  	// arm64:`REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
   126  	// ppc64le:`MOVHBR`,-`MOVBZ`
   127  	// ppc64:`MOVHZ`,-`MOVBZ`
   128  	// s390x:`MOVHZ\s\(.*\)\(.*\*1\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
   129  	return binary.BigEndian.Uint16(b[idx:])
   130  }
   131  
   132  func load_le_byte2_uint16(s []byte) uint16 {
   133  	// arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
   134  	// 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
   135  	// amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
   136  	// ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVBZ`
   137  	// ppc64:`MOVHBR`,-`MOVBZ`
   138  	return uint16(s[0]) | uint16(s[1])<<8
   139  }
   140  
   141  func load_le_byte2_uint16_inv(s []byte) uint16 {
   142  	// arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
   143  	// 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
   144  	// amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
   145  	// ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVBZ`
   146  	// ppc64:`MOVHBR`,-`MOVBZ`
   147  	return uint16(s[1])<<8 | uint16(s[0])
   148  }
   149  
   150  func load_le_byte4_uint32(s []byte) uint32 {
   151  	// arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
   152  	// 386:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
   153  	// amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
   154  	// ppc64le:`MOVWZ\t\(R[0-9]+\)`,-`MOV[BH]Z`
   155  	// ppc64:`MOVWBR`,-MOV[BH]Z`
   156  	return uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24
   157  }
   158  
   159  func load_le_byte4_uint32_inv(s []byte) uint32 {
   160  	// arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
   161  	// ppc64le:`MOVWZ`,-`MOV[BH]Z`
   162  	// ppc64:`MOVWBR`,-`MOV[BH]Z`
   163  	return uint32(s[3])<<24 | uint32(s[2])<<16 | uint32(s[1])<<8 | uint32(s[0])
   164  }
   165  
   166  func load_le_byte8_uint64(s []byte) uint64 {
   167  	// arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
   168  	// amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,-`MOV[BWL]\t[^$]`,-`OR`
   169  	// ppc64le:`MOVD\t\(R[0-9]+\)`,-`MOV[BHW]Z`
   170  	// ppc64:`MOVDBR`,-`MOVW[WHB]Z`
   171  	return uint64(s[0]) | uint64(s[1])<<8 | uint64(s[2])<<16 | uint64(s[3])<<24 | uint64(s[4])<<32 | uint64(s[5])<<40 | uint64(s[6])<<48 | uint64(s[7])<<56
   172  }
   173  
   174  func load_le_byte8_uint64_inv(s []byte) uint64 {
   175  	// arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
   176  	// ppc64le:`MOVD`,-`MOV[WHB]Z`
   177  	// ppc64:`MOVDBR`,-`MOV[WHB]Z`
   178  	return uint64(s[7])<<56 | uint64(s[6])<<48 | uint64(s[5])<<40 | uint64(s[4])<<32 | uint64(s[3])<<24 | uint64(s[2])<<16 | uint64(s[1])<<8 | uint64(s[0])
   179  }
   180  
   181  func load_be_byte2_uint16(s []byte) uint16 {
   182  	// arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
   183  	// amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
   184  	// ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
   185  	// ppc64:`MOVHZ`,-`MOVBZ`
   186  	return uint16(s[0])<<8 | uint16(s[1])
   187  }
   188  
   189  func load_be_byte2_uint16_inv(s []byte) uint16 {
   190  	// arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
   191  	// amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
   192  	// ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
   193  	// ppc64:`MOVHZ`,-`MOVBZ`
   194  	return uint16(s[1]) | uint16(s[0])<<8
   195  }
   196  
   197  func load_be_byte4_uint32(s []byte) uint32 {
   198  	// arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
   199  	// ppc64le:`MOVWBR`,-`MOV[HB]Z`
   200  	// ppc64:`MOVWZ`,-`MOV[HB]Z`
   201  	return uint32(s[0])<<24 | uint32(s[1])<<16 | uint32(s[2])<<8 | uint32(s[3])
   202  }
   203  
   204  func load_be_byte4_uint32_inv(s []byte) uint32 {
   205  	// arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
   206  	// amd64/v1,amd64/v2:`MOVL\s\([A-Z]+\)`,`BSWAPL`,-`MOV[BW]`,-`OR`
   207  	// amd64/v3: `MOVBEL`
   208  	// ppc64le:`MOVWBR`,-`MOV[HB]Z`
   209  	// ppc64:`MOVWZ`,-`MOV[HB]Z`
   210  	return uint32(s[3]) | uint32(s[2])<<8 | uint32(s[1])<<16 | uint32(s[0])<<24
   211  }
   212  
   213  func load_be_byte8_uint64(s []byte) uint64 {
   214  	// arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
   215  	// ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
   216  	// ppc64:`MOVD`,-`MOV[WHB]Z`
   217  	return uint64(s[0])<<56 | uint64(s[1])<<48 | uint64(s[2])<<40 | uint64(s[3])<<32 | uint64(s[4])<<24 | uint64(s[5])<<16 | uint64(s[6])<<8 | uint64(s[7])
   218  }
   219  
   220  func load_be_byte8_uint64_inv(s []byte) uint64 {
   221  	// arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
   222  	// amd64/v1,amd64/v2:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
   223  	// amd64/v3: `MOVBEQ`
   224  	// ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
   225  	// ppc64:`MOVD`,-`MOV[BHW]Z`
   226  	return uint64(s[7]) | uint64(s[6])<<8 | uint64(s[5])<<16 | uint64(s[4])<<24 | uint64(s[3])<<32 | uint64(s[2])<<40 | uint64(s[1])<<48 | uint64(s[0])<<56
   227  }
   228  
   229  func load_le_byte2_uint16_idx(s []byte, idx int) uint16 {
   230  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
   231  	// 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
   232  	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
   233  	// ppc64le:`MOVHZ`,-`MOVBZ`
   234  	// ppc64:`MOVHBR`,-`MOVBZ`
   235  	return uint16(s[idx]) | uint16(s[idx+1])<<8
   236  }
   237  
   238  func load_le_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
   239  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
   240  	// 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
   241  	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
   242  	// ppc64le:`MOVHZ`,-`MOVBZ`
   243  	// ppc64:`MOVHBR`,-`MOVBZ`
   244  	return uint16(s[idx+1])<<8 | uint16(s[idx])
   245  }
   246  
   247  func load_le_byte4_uint32_idx(s []byte, idx int) uint32 {
   248  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
   249  	// amd64:`MOVL\s\([A-Z]+\)\([A-Z]+`,-`MOV[BW]`,-`OR`
   250  	return uint32(s[idx]) | uint32(s[idx+1])<<8 | uint32(s[idx+2])<<16 | uint32(s[idx+3])<<24
   251  }
   252  
   253  func load_le_byte4_uint32_idx_inv(s []byte, idx int) uint32 {
   254  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
   255  	return uint32(s[idx+3])<<24 | uint32(s[idx+2])<<16 | uint32(s[idx+1])<<8 | uint32(s[idx])
   256  }
   257  
   258  func load_le_byte8_uint64_idx(s []byte, idx int) uint64 {
   259  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
   260  	// amd64:`MOVQ\s\([A-Z]+\)\([A-Z]+`,-`MOV[BWL]`,-`OR`
   261  	return uint64(s[idx]) | uint64(s[idx+1])<<8 | uint64(s[idx+2])<<16 | uint64(s[idx+3])<<24 | uint64(s[idx+4])<<32 | uint64(s[idx+5])<<40 | uint64(s[idx+6])<<48 | uint64(s[idx+7])<<56
   262  }
   263  
   264  func load_le_byte8_uint64_idx_inv(s []byte, idx int) uint64 {
   265  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
   266  	return uint64(s[idx+7])<<56 | uint64(s[idx+6])<<48 | uint64(s[idx+5])<<40 | uint64(s[idx+4])<<32 | uint64(s[idx+3])<<24 | uint64(s[idx+2])<<16 | uint64(s[idx+1])<<8 | uint64(s[idx])
   267  }
   268  
   269  func load_be_byte2_uint16_idx(s []byte, idx int) uint16 {
   270  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
   271  	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
   272  	return uint16(s[idx])<<8 | uint16(s[idx+1])
   273  }
   274  
   275  func load_be_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
   276  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
   277  	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
   278  	return uint16(s[idx+1]) | uint16(s[idx])<<8
   279  }
   280  
   281  func load_be_byte4_uint32_idx(s []byte, idx int) uint32 {
   282  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W`
   283  	return uint32(s[idx])<<24 | uint32(s[idx+1])<<16 | uint32(s[idx+2])<<8 | uint32(s[idx+3])
   284  }
   285  
   286  func load_be_byte8_uint64_idx(s []byte, idx int) uint64 {
   287  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W`
   288  	return uint64(s[idx])<<56 | uint64(s[idx+1])<<48 | uint64(s[idx+2])<<40 | uint64(s[idx+3])<<32 | uint64(s[idx+4])<<24 | uint64(s[idx+5])<<16 | uint64(s[idx+6])<<8 | uint64(s[idx+7])
   289  }
   290  
   291  func load_le_byte2_uint16_idx2(s []byte, idx int) uint16 {
   292  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
   293  	return uint16(s[idx<<1]) | uint16(s[(idx<<1)+1])<<8
   294  }
   295  
   296  func load_le_byte2_uint16_idx2_inv(s []byte, idx int) uint16 {
   297  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
   298  	return uint16(s[(idx<<1)+1])<<8 | uint16(s[idx<<1])
   299  }
   300  
   301  func load_le_byte4_uint32_idx4(s []byte, idx int) uint32 {
   302  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
   303  	return uint32(s[idx<<2]) | uint32(s[(idx<<2)+1])<<8 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+3])<<24
   304  }
   305  
   306  func load_le_byte4_uint32_idx4_inv(s []byte, idx int) uint32 {
   307  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
   308  	return uint32(s[(idx<<2)+3])<<24 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+1])<<8 | uint32(s[idx<<2])
   309  }
   310  
   311  func load_le_byte8_uint64_idx8(s []byte, idx int) uint64 {
   312  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
   313  	return uint64(s[idx<<3]) | uint64(s[(idx<<3)+1])<<8 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+7])<<56
   314  }
   315  
   316  func load_le_byte8_uint64_idx8_inv(s []byte, idx int) uint64 {
   317  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
   318  	return uint64(s[(idx<<3)+7])<<56 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+1])<<8 | uint64(s[idx<<3])
   319  }
   320  
   321  func load_be_byte2_uint16_idx2(s []byte, idx int) uint16 {
   322  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB`
   323  	return uint16(s[idx<<1])<<8 | uint16(s[(idx<<1)+1])
   324  }
   325  
   326  func load_be_byte2_uint16_idx2_inv(s []byte, idx int) uint16 {
   327  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB`
   328  	return uint16(s[(idx<<1)+1]) | uint16(s[idx<<1])<<8
   329  }
   330  
   331  func load_be_byte4_uint32_idx4(s []byte, idx int) uint32 {
   332  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W`
   333  	return uint32(s[idx<<2])<<24 | uint32(s[(idx<<2)+1])<<16 | uint32(s[(idx<<2)+2])<<8 | uint32(s[(idx<<2)+3])
   334  }
   335  
   336  func load_be_byte8_uint64_idx8(s []byte, idx int) uint64 {
   337  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W`
   338  	return uint64(s[idx<<3])<<56 | uint64(s[(idx<<3)+1])<<48 | uint64(s[(idx<<3)+2])<<40 | uint64(s[(idx<<3)+3])<<32 | uint64(s[(idx<<3)+4])<<24 | uint64(s[(idx<<3)+5])<<16 | uint64(s[(idx<<3)+6])<<8 | uint64(s[(idx<<3)+7])
   339  }
   340  
   341  // Some tougher cases for the memcombine pass.
   342  
   343  func reassoc_load_uint32(b []byte) uint32 {
   344  	// amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
   345  	return (uint32(b[0]) | uint32(b[1])<<8) | (uint32(b[2])<<16 | uint32(b[3])<<24)
   346  }
   347  
   348  func extrashift_load_uint32(b []byte) uint32 {
   349  	// amd64:`MOVL\s\([A-Z]+\)`,`SHLL\s[$]2`,-`MOV[BW]`,-`OR`
   350  	return uint32(b[0])<<2 | uint32(b[1])<<10 | uint32(b[2])<<18 | uint32(b[3])<<26
   351  }
   352  
   353  func outoforder_load_uint32(b []byte) uint32 {
   354  	// amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
   355  	return uint32(b[0]) | uint32(b[2])<<16 | uint32(b[1])<<8 | uint32(b[3])<<24
   356  }
   357  
   358  func extraOr_load_uint32(b []byte, x, y uint32) uint32 {
   359  	// amd64:`ORL\s\([A-Z]+\)`,-`MOV[BW]`
   360  	return x | binary.LittleEndian.Uint32(b) | y
   361  	// TODO: Note that
   362  	//   x | uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | y
   363  	// doesn't work because it associates in a way that memcombine can't detect it.
   364  }
   365  
   366  // Check load combining across function calls.
   367  
   368  func fcall_byte(a [2]byte) [2]byte {
   369  	return fcall_byte(fcall_byte(a)) // amd64:`MOVW`
   370  }
   371  
   372  func fcall_uint16(a [2]uint16) [2]uint16 {
   373  	return fcall_uint16(fcall_uint16(a)) // amd64:`MOVL`
   374  }
   375  
   376  func fcall_uint32(a [2]uint32) [2]uint32 {
   377  	return fcall_uint32(fcall_uint32(a)) // amd64:`MOVQ`
   378  }
   379  
   380  // We want to merge load+op in the first function, but not in the
   381  // second. See Issue 19595.
   382  func load_op_merge(p, q *int) {
   383  	x := *p // amd64:`ADDQ\t\(`
   384  	*q += x // The combined nilcheck and load would normally have this line number, but we want that combined operation to have the line number of the nil check instead (see #33724).
   385  }
   386  func load_op_no_merge(p, q *int) {
   387  	x := *p
   388  	for i := 0; i < 10; i++ {
   389  		*q += x // amd64:`ADDQ\t[A-Z]`
   390  	}
   391  }
   392  
   393  // Make sure offsets are folded into loads and stores.
   394  func offsets_fold(_, a [20]byte) (b [20]byte) {
   395  	// arm64:`MOVD\tcommand-line-arguments\.a\+[0-9]+\(FP\), R[0-9]+`,`MOVD\tR[0-9]+, command-line-arguments\.b\+[0-9]+\(FP\)`
   396  	b = a
   397  	return
   398  }
   399  
   400  // Make sure we don't put pointers in SSE registers across safe
   401  // points.
   402  
   403  func safe_point(p, q *[2]*int) {
   404  	a, b := p[0], p[1] // amd64:-`MOVUPS`
   405  	runtime.GC()
   406  	q[0], q[1] = a, b // amd64:-`MOVUPS`
   407  }
   408  
   409  // ------------- //
   410  //    Storing    //
   411  // ------------- //
   412  
   413  func store_le64(b []byte, x uint64) {
   414  	// amd64:`MOVQ\s.*\(.*\)$`,-`SHR.`
   415  	// arm64:`MOVD`,-`MOV[WBH]`
   416  	// ppc64le:`MOVD\s`,-`MOV[BHW]\s`
   417  	// ppc64:`MOVDBR`,-MOVB\s`
   418  	// s390x:`MOVDBR\s.*\(.*\)$`
   419  	binary.LittleEndian.PutUint64(b, x)
   420  }
   421  
   422  func store_le64_idx(b []byte, x uint64, idx int) {
   423  	// amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
   424  	// arm64:`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`
   425  	// ppc64le:`MOVD\s`,-`MOV[BHW]\s`
   426  	// ppc64:`MOVDBR`,-`MOVBZ`
   427  	// s390x:`MOVDBR\s.*\(.*\)\(.*\*1\)$`
   428  	binary.LittleEndian.PutUint64(b[idx:], x)
   429  }
   430  
   431  func store_le64_idx2(dst []byte, d, length, offset int) []byte {
   432  	a := dst[d : d+length]
   433  	b := dst[d-offset:]
   434  	// amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
   435  	binary.LittleEndian.PutUint64(a, binary.LittleEndian.Uint64(b))
   436  	return dst
   437  }
   438  
   439  func store_le64_idx_const(b []byte, idx int) {
   440  	// amd64:`MOVQ\s\$123, \(.*\)\(.*\*1\)$`
   441  	binary.LittleEndian.PutUint64(b[idx:], 123)
   442  }
   443  
   444  func store_le64_load(b []byte, x *[8]byte) {
   445  	_ = b[8]
   446  	// amd64:-`MOV[BWL]`
   447  	// arm64:-`MOV[BWH]`
   448  	// ppc64le:`MOVD\s`,-`MOV[BWH]Z`
   449  	// ppc64:`MOVDBR`
   450  	// s390x:-`MOVB`,-`MOV[WH]BR`
   451  	binary.LittleEndian.PutUint64(b, binary.LittleEndian.Uint64(x[:]))
   452  }
   453  
   454  func store_le32(b []byte, x uint32) {
   455  	// amd64:`MOVL\s`
   456  	// arm64:`MOVW`,-`MOV[BH]`
   457  	// ppc64le:`MOVW\s`
   458  	// ppc64:`MOVWBR`
   459  	// s390x:`MOVWBR\s.*\(.*\)$`
   460  	binary.LittleEndian.PutUint32(b, x)
   461  }
   462  
   463  func store_le32_idx(b []byte, x uint32, idx int) {
   464  	// amd64:`MOVL\s`
   465  	// arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`
   466  	// ppc64le:`MOVW\s`
   467  	// ppc64:`MOVWBR`
   468  	// s390x:`MOVWBR\s.*\(.*\)\(.*\*1\)$`
   469  	binary.LittleEndian.PutUint32(b[idx:], x)
   470  }
   471  
   472  func store_le32_idx_const(b []byte, idx int) {
   473  	// amd64:`MOVL\s\$123, \(.*\)\(.*\*1\)$`
   474  	// ppc64x:`MOVW\s`,-MOV[HB]`
   475  	binary.LittleEndian.PutUint32(b[idx:], 123)
   476  }
   477  
   478  func store_le16(b []byte, x uint16) {
   479  	// amd64:`MOVW\s`
   480  	// arm64:`MOVH`,-`MOVB`
   481  	// ppc64le:`MOVH\s`
   482  	// ppc64:`MOVHBR`
   483  	// s390x:`MOVHBR\s.*\(.*\)$`
   484  	binary.LittleEndian.PutUint16(b, x)
   485  }
   486  
   487  func store_le16_idx(b []byte, x uint16, idx int) {
   488  	// amd64:`MOVW\s`
   489  	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
   490  	// ppc64le:`MOVH\s`
   491  	// ppc64:`MOVHBR\s`
   492  	// s390x:`MOVHBR\s.*\(.*\)\(.*\*1\)$`
   493  	binary.LittleEndian.PutUint16(b[idx:], x)
   494  }
   495  
   496  func store_le16_idx_const(b []byte, idx int) {
   497  	// amd64:`MOVW\s\$123, \(.*\)\(.*\*1\)$`
   498  	// ppc64x:`MOVH\s`
   499  	binary.LittleEndian.PutUint16(b[idx:], 123)
   500  }
   501  
   502  func store_be64(b []byte, x uint64) {
   503  	// amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.`
   504  	// amd64/v3: `MOVBEQ`
   505  	// arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W`
   506  	// ppc64le:`MOVDBR`
   507  	// ppc64:`MOVD\s`
   508  	// s390x:`MOVD\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
   509  	binary.BigEndian.PutUint64(b, x)
   510  }
   511  
   512  func store_be64_idx(b []byte, x uint64, idx int) {
   513  	// amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.`
   514  	// amd64/v3:`MOVBEQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
   515  	// arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW`
   516  	// ppc64le:`MOVDBR`
   517  	// ppc64:`MOVD\s`
   518  	// s390x:`MOVD\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
   519  	binary.BigEndian.PutUint64(b[idx:], x)
   520  }
   521  
   522  func store_be32(b []byte, x uint32) {
   523  	// amd64/v1,amd64/v2:`BSWAPL`,-`SHR.`
   524  	// amd64/v3:`MOVBEL`
   525  	// arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W`
   526  	// ppc64le:`MOVWBR`
   527  	// ppc64:`MOVW\s`
   528  	// s390x:`MOVW\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
   529  	binary.BigEndian.PutUint32(b, x)
   530  }
   531  
   532  func store_be64_load(b, x *[8]byte) {
   533  	// arm64:-`REV`
   534  	// amd64:-`BSWAPQ`
   535  	binary.BigEndian.PutUint64(b[:], binary.BigEndian.Uint64(x[:]))
   536  }
   537  
   538  func store_be32_load(b, x *[8]byte) {
   539  	// arm64:-`REVW`
   540  	// amd64:-`BSWAPL`
   541  	binary.BigEndian.PutUint32(b[:], binary.BigEndian.Uint32(x[:]))
   542  }
   543  
   544  func store_be32_idx(b []byte, x uint32, idx int) {
   545  	// amd64/v1,amd64/v2:`BSWAPL`,-`SHR.`
   546  	// amd64/v3:`MOVBEL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
   547  	// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W`
   548  	// ppc64le:`MOVWBR`
   549  	// ppc64:`MOVW\s`
   550  	// s390x:`MOVW\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
   551  	binary.BigEndian.PutUint32(b[idx:], x)
   552  }
   553  
   554  func store_be16(b []byte, x uint16) {
   555  	// amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
   556  	// amd64/v3:`MOVBEW`,-`ROLW`
   557  	// arm64:`MOVH`,`REV16W`,-`MOVB`
   558  	// ppc64le:`MOVHBR`
   559  	// ppc64:`MOVH\s`
   560  	// s390x:`MOVH\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
   561  	binary.BigEndian.PutUint16(b, x)
   562  }
   563  
   564  func store_be16_idx(b []byte, x uint16, idx int) {
   565  	// amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
   566  	// amd64/v3:`MOVBEW\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
   567  	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB`
   568  	// ppc64le:`MOVHBR`
   569  	// ppc64:`MOVH\s`
   570  	// s390x:`MOVH\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
   571  	binary.BigEndian.PutUint16(b[idx:], x)
   572  }
   573  
   574  func store_le_byte_2(b []byte, val uint16) {
   575  	_ = b[2]
   576  	// arm64:`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
   577  	// 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
   578  	// amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
   579  	// ppc64le:`MOVH\s`,-`MOVB`
   580  	// ppc64:`MOVHBR`,-`MOVB`
   581  	b[1], b[2] = byte(val), byte(val>>8)
   582  }
   583  
   584  func store_le_byte_2_inv(b []byte, val uint16) {
   585  	_ = b[2]
   586  	// 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
   587  	// amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
   588  	// ppc64le:`MOVH\s`,-`MOVB`
   589  	// ppc64:`MOVHBR`,-`MOVB`
   590  	b[2], b[1] = byte(val>>8), byte(val)
   591  }
   592  
   593  func store_le_byte_4(b []byte, val uint32) {
   594  	_ = b[4]
   595  	// arm64:`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`
   596  	// 386:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
   597  	// amd64:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
   598  	// ppc64le:`MOVW\s`
   599  	// ppc64:`MOVWBR\s`
   600  	b[1], b[2], b[3], b[4] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24)
   601  }
   602  
   603  func store_le_byte_8(b []byte, val uint64) {
   604  	_ = b[8]
   605  	// arm64:`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`
   606  	// amd64:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL`
   607  	// ppc64le:`MOVD\s`,-`MOVW`
   608  	// ppc64:`MOVDBR\s`
   609  	b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24), byte(val>>32), byte(val>>40), byte(val>>48), byte(val>>56)
   610  }
   611  
   612  func store_be_byte_2(b []byte, val uint16) {
   613  	_ = b[2]
   614  	// arm64:`REV16W`,`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
   615  	// amd64/v1,amd64/v2:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
   616  	// amd64/v3: `MOVBEW`
   617  	// ppc64le:`MOVHBR`
   618  	// ppc64:`MOVH\s`
   619  	b[1], b[2] = byte(val>>8), byte(val)
   620  }
   621  
   622  func store_be_byte_4(b []byte, val uint32) {
   623  	_ = b[4]
   624  	// arm64:`REVW`,`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W`
   625  	// amd64/v1,amd64/v2:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
   626  	// amd64/v3:`MOVBEL\s[A-Z]+,\s1\([A-Z]+\)`
   627  	// ppc64le:`MOVWBR`
   628  	// ppc64:`MOVW\s`
   629  	b[1], b[2], b[3], b[4] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
   630  }
   631  
   632  func store_be_byte_8(b []byte, val uint64) {
   633  	_ = b[8]
   634  	// arm64:`REV`,`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`,-`REV16W`,-`REVW`
   635  	// amd64/v1,amd64/v2:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL`
   636  	// amd64/v3:`MOVBEQ\s[A-Z]+,\s1\([A-Z]+\)`, -`MOVBEL`
   637  	// ppc64le:`MOVDBR`
   638  	// ppc64:`MOVD`
   639  	b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val>>56), byte(val>>48), byte(val>>40), byte(val>>32), byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
   640  }
   641  
   642  func store_le_byte_2_idx(b []byte, idx int, val uint16) {
   643  	_, _ = b[idx+0], b[idx+1]
   644  	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
   645  	// 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
   646  	// ppc64le:`MOVH\s`
   647  	// ppc64:`MOVHBR`
   648  	b[idx+1], b[idx+0] = byte(val>>8), byte(val)
   649  }
   650  
   651  func store_le_byte_2_idx_inv(b []byte, idx int, val uint16) {
   652  	_, _ = b[idx+0], b[idx+1]
   653  	// 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
   654  	// ppc64le:`MOVH\s`
   655  	// ppc64:`MOVHBR`
   656  	b[idx+0], b[idx+1] = byte(val), byte(val>>8)
   657  }
   658  
   659  func store_le_byte_4_idx(b []byte, idx int, val uint32) {
   660  	_, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
   661  	// arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`
   662  	// ppc64le:`MOVW\s`
   663  	// ppc64:`MOVWBR`
   664  	b[idx+3], b[idx+2], b[idx+1], b[idx+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
   665  }
   666  
   667  func store_be_byte_2_idx(b []byte, idx int, val uint16) {
   668  	_, _ = b[idx+0], b[idx+1]
   669  	// arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
   670  	// ppc64le:`MOVHBR`
   671  	// ppc64:`MOVH\s`
   672  	b[idx+0], b[idx+1] = byte(val>>8), byte(val)
   673  }
   674  
   675  func store_be_byte_4_idx(b []byte, idx int, val uint32) {
   676  	_, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
   677  	// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W`
   678  	// ppc64le:`MOVWBR`
   679  	// ppc64:`MOVW\s`
   680  	b[idx+0], b[idx+1], b[idx+2], b[idx+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
   681  }
   682  
   683  func store_be_byte_2_idx2(b []byte, idx int, val uint16) {
   684  	_, _ = b[(idx<<1)+0], b[(idx<<1)+1]
   685  	// arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
   686  	// ppc64le:`MOVHBR`
   687  	// ppc64:`MOVH\s`
   688  	b[(idx<<1)+0], b[(idx<<1)+1] = byte(val>>8), byte(val)
   689  }
   690  
   691  func store_le_byte_2_idx2(b []byte, idx int, val uint16) {
   692  	_, _ = b[(idx<<1)+0], b[(idx<<1)+1]
   693  	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
   694  	// ppc64le:`MOVH\s`
   695  	// ppc64:`MOVHBR`
   696  	b[(idx<<1)+1], b[(idx<<1)+0] = byte(val>>8), byte(val)
   697  }
   698  
   699  func store_be_byte_4_idx4(b []byte, idx int, val uint32) {
   700  	_, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3]
   701  	// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`,-`REV16W`
   702  	// ppc64le:`MOVWBR`
   703  	// ppc64:`MOVW\s`
   704  	b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
   705  }
   706  
   707  func store_le_byte_4_idx4_inv(b []byte, idx int, val uint32) {
   708  	_, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3]
   709  	// arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`
   710  	// ppc64le:`MOVW\s`
   711  	// ppc64:`MOVWBR`
   712  	b[(idx<<2)+3], b[(idx<<2)+2], b[(idx<<2)+1], b[(idx<<2)+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
   713  }
   714  
   715  // ------------- //
   716  //    Zeroing    //
   717  // ------------- //
   718  
   719  // Check that zero stores are combined into larger stores
   720  
   721  func zero_byte_2(b1, b2 []byte) {
   722  	// bounds checks to guarantee safety of writes below
   723  	_, _ = b1[1], b2[1]
   724  	// arm64:"MOVH\tZR",-"MOVB"
   725  	// amd64:`MOVW\s[$]0,\s\([A-Z]+\)`
   726  	// 386:`MOVW\s[$]0,\s\([A-Z]+\)`
   727  	// ppc64x:`MOVH\s`
   728  	b1[0], b1[1] = 0, 0
   729  	// arm64:"MOVH\tZR",-"MOVB"
   730  	// 386:`MOVW\s[$]0,\s\([A-Z]+\)`
   731  	// amd64:`MOVW\s[$]0,\s\([A-Z]+\)`
   732  	// ppc64x:`MOVH`
   733  	b2[1], b2[0] = 0, 0
   734  }
   735  
   736  func zero_byte_4(b1, b2 []byte) {
   737  	_, _ = b1[3], b2[3]
   738  	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
   739  	// amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
   740  	// 386:`MOVL\s[$]0,\s\([A-Z]+\)`
   741  	// ppc64x:`MOVW\s`
   742  	b1[0], b1[1], b1[2], b1[3] = 0, 0, 0, 0
   743  	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
   744  	// ppc64x:`MOVW\s`
   745  	b2[2], b2[3], b2[1], b2[0] = 0, 0, 0, 0
   746  }
   747  
   748  func zero_byte_8(b []byte) {
   749  	_ = b[7]
   750  	b[0], b[1], b[2], b[3] = 0, 0, 0, 0 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
   751  	b[4], b[5], b[6], b[7] = 0, 0, 0, 0
   752  }
   753  
   754  func zero_byte_16(b []byte) {
   755  	_ = b[15]
   756  	b[0], b[1], b[2], b[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
   757  	b[4], b[5], b[6], b[7] = 0, 0, 0, 0
   758  	b[8], b[9], b[10], b[11] = 0, 0, 0, 0
   759  	b[12], b[13], b[14], b[15] = 0, 0, 0, 0
   760  }
   761  
   762  func zero_byte_30(a *[30]byte) {
   763  	*a = [30]byte{} // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
   764  }
   765  
   766  func zero_byte_39(a *[39]byte) {
   767  	*a = [39]byte{} // arm64:"MOVD",-"MOVB",-"MOVH",-"MOVW"
   768  }
   769  
   770  func zero_byte_2_idx(b []byte, idx int) {
   771  	_, _ = b[idx+0], b[idx+1]
   772  	// arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
   773  	// ppc64x:`MOVH\s`
   774  	b[idx+0], b[idx+1] = 0, 0
   775  }
   776  
   777  func zero_byte_2_idx2(b []byte, idx int) {
   778  	_, _ = b[(idx<<1)+0], b[(idx<<1)+1]
   779  	// arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
   780  	// ppc64x:`MOVH\s`
   781  	b[(idx<<1)+0], b[(idx<<1)+1] = 0, 0
   782  }
   783  
   784  func zero_uint16_2(h1, h2 []uint16) {
   785  	_, _ = h1[1], h2[1]
   786  	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
   787  	// amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
   788  	// 386:`MOVL\s[$]0,\s\([A-Z]+\)`
   789  	// ppc64x:`MOVW\s`
   790  	h1[0], h1[1] = 0, 0
   791  	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
   792  	// amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
   793  	// 386:`MOVL\s[$]0,\s\([A-Z]+\)`
   794  	// ppc64x:`MOVW`
   795  	h2[1], h2[0] = 0, 0
   796  }
   797  
   798  func zero_uint16_4(h1, h2 []uint16) {
   799  	_, _ = h1[3], h2[3]
   800  	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
   801  	// amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
   802  	// ppc64x:`MOVD\s`
   803  	h1[0], h1[1], h1[2], h1[3] = 0, 0, 0, 0
   804  	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
   805  	// ppc64x:`MOVD\s`
   806  	h2[2], h2[3], h2[1], h2[0] = 0, 0, 0, 0
   807  }
   808  
   809  func zero_uint16_8(h []uint16) {
   810  	_ = h[7]
   811  	h[0], h[1], h[2], h[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
   812  	h[4], h[5], h[6], h[7] = 0, 0, 0, 0
   813  }
   814  
   815  func zero_uint32_2(w1, w2 []uint32) {
   816  	_, _ = w1[1], w2[1]
   817  	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
   818  	// amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
   819  	// ppc64x:`MOVD\s`
   820  	w1[0], w1[1] = 0, 0
   821  	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
   822  	// amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
   823  	// ppc64x:`MOVD\s`
   824  	w2[1], w2[0] = 0, 0
   825  }
   826  
   827  func zero_uint32_4(w1, w2 []uint32) {
   828  	_, _ = w1[3], w2[3]
   829  	w1[0], w1[1], w1[2], w1[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
   830  	w2[2], w2[3], w2[1], w2[0] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
   831  }
   832  
   833  func zero_uint64_2(d1, d2 []uint64) {
   834  	_, _ = d1[1], d2[1]
   835  	d1[0], d1[1] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
   836  	d2[1], d2[0] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
   837  }
   838  
   839  func loadstore(p, q *[4]uint8) {
   840  	// amd64:"MOVL",-"MOVB"
   841  	// arm64:"MOVWU",-"MOVBU"
   842  	x0, x1, x2, x3 := q[0], q[1], q[2], q[3]
   843  	// amd64:"MOVL",-"MOVB"
   844  	// arm64:"MOVW",-"MOVB"
   845  	p[0], p[1], p[2], p[3] = x0, x1, x2, x3
   846  }
   847  
   848  type S1 struct {
   849  	a, b int16
   850  }
   851  
   852  func loadstore2(p, q *S1) {
   853  	// amd64:"MOVL",-"MOVWLZX"
   854  	// arm64:"MOVWU",-"MOVH"
   855  	a, b := p.a, p.b
   856  	// amd64:"MOVL",-"MOVW"
   857  	// arm64:"MOVW",-"MOVH"
   858  	q.a, q.b = a, b
   859  }
   860  
   861  func wideStore(p *[8]uint64) {
   862  	if p == nil {
   863  		return
   864  	}
   865  
   866  	// amd64:"MOVUPS",-"MOVQ"
   867  	// arm64:"STP",-"MOVD"
   868  	p[0] = 0
   869  	// amd64:-"MOVUPS",-"MOVQ"
   870  	// arm64:-"STP",-"MOVD"
   871  	p[1] = 0
   872  }
   873  
   874  func wideStore2(p *[8]uint64, x, y uint64) {
   875  	if p == nil {
   876  		return
   877  	}
   878  
   879  	// s390x:"STMG"
   880  	p[0] = x
   881  	// s390x:-"STMG",-"MOVD"
   882  	p[1] = y
   883  }
   884  
   885  func store32le(p *struct{ a, b uint32 }, x uint64) {
   886  	// amd64:"MOVQ",-"MOVL",-"SHRQ"
   887  	// arm64:"MOVD",-"MOVW",-"LSR"
   888  	// ppc64le:"MOVD",-"MOVW",-"SRD"
   889  	p.a = uint32(x)
   890  	// amd64:-"MOVL",-"SHRQ"
   891  	// arm64:-"MOVW",-"LSR"
   892  	// ppc64le:-"MOVW",-"SRD"
   893  	p.b = uint32(x >> 32)
   894  }
   895  func store32be(p *struct{ a, b uint32 }, x uint64) {
   896  	// ppc64:"MOVD",-"MOVW",-"SRD"
   897  	// s390x:"MOVD",-"MOVW",-"SRD"
   898  	p.a = uint32(x >> 32)
   899  	// ppc64:-"MOVW",-"SRD"
   900  	// s390x:-"MOVW",-"SRD"
   901  	p.b = uint32(x)
   902  }
   903  func store16le(p *struct{ a, b uint16 }, x uint32) {
   904  	// amd64:"MOVL",-"MOVW",-"SHRL"
   905  	// arm64:"MOVW",-"MOVH",-"UBFX"
   906  	// ppc64le:"MOVW",-"MOVH",-"SRW"
   907  	p.a = uint16(x)
   908  	// amd64:-"MOVW",-"SHRL"
   909  	// arm64:-"MOVH",-"UBFX"
   910  	// ppc64le:-"MOVH",-"SRW"
   911  	p.b = uint16(x >> 16)
   912  }
   913  func store16be(p *struct{ a, b uint16 }, x uint32) {
   914  	// ppc64:"MOVW",-"MOVH",-"SRW"
   915  	// s390x:"MOVW",-"MOVH",-"SRW"
   916  	p.a = uint16(x >> 16)
   917  	// ppc64:-"MOVH",-"SRW"
   918  	// s390x:-"MOVH",-"SRW"
   919  	p.b = uint16(x)
   920  }
   921  

View as plain text