Text file src/internal/bytealg/compare_riscv64.s

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
     9  	// X10 = a_base
    10  	// X11 = a_len
    11  	// X12 = a_cap (unused)
    12  	// X13 = b_base (want in X12)
    13  	// X14 = b_len (want in X13)
    14  	// X15 = b_cap (unused)
    15  	MOV	X13, X12
    16  	MOV	X14, X13
    17  	JMP	compare<>(SB)
    18  
    19  TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
    20  	// X10 = a_base
    21  	// X11 = a_len
    22  	// X12 = b_base
    23  	// X13 = b_len
    24  	JMP	compare<>(SB)
    25  
    26  // On entry:
    27  // X10 points to start of a
    28  // X11 length of a
    29  // X12 points to start of b
    30  // X13 length of b
    31  // for non-regabi X14 points to the address to store the return value (-1/0/1)
    32  // for regabi the return value in X10
    33  TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
    34  	BEQ	X10, X12, cmp_len
    35  
    36  	MOV	X11, X5
    37  	BGE	X13, X5, use_a_len // X5 = min(len(a), len(b))
    38  	MOV	X13, X5
    39  use_a_len:
    40  	BEQZ	X5, cmp_len
    41  
    42  	MOV	$32, X6
    43  	BLT	X5, X6, check8_unaligned
    44  
    45  	// Check alignment - if alignment differs we have to do one byte at a time.
    46  	AND	$7, X10, X7
    47  	AND	$7, X12, X8
    48  	BNE	X7, X8, check8_unaligned
    49  	BEQZ	X7, compare32
    50  
    51  	// Check one byte at a time until we reach 8 byte alignment.
    52  	SUB	X7, X0, X7
    53  	ADD	$8, X7, X7
    54  	SUB	X7, X5, X5
    55  align:
    56  	SUB	$1, X7
    57  	MOVBU	0(X10), X8
    58  	MOVBU	0(X12), X9
    59  	BNE	X8, X9, cmp
    60  	ADD	$1, X10
    61  	ADD	$1, X12
    62  	BNEZ	X7, align
    63  
    64  check32:
    65  	// X6 contains $32
    66  	BLT	X5, X6, compare16
    67  compare32:
    68  	MOV	0(X10), X15
    69  	MOV	0(X12), X16
    70  	MOV	8(X10), X17
    71  	MOV	8(X12), X18
    72  	BNE	X15, X16, cmp8a
    73  	BNE	X17, X18, cmp8b
    74  	MOV	16(X10), X15
    75  	MOV	16(X12), X16
    76  	MOV	24(X10), X17
    77  	MOV	24(X12), X18
    78  	BNE	X15, X16, cmp8a
    79  	BNE	X17, X18, cmp8b
    80  	ADD	$32, X10
    81  	ADD	$32, X12
    82  	SUB	$32, X5
    83  	BGE	X5, X6, compare32
    84  	BEQZ	X5, cmp_len
    85  
    86  check16:
    87  	MOV	$16, X6
    88  	BLT	X5, X6, check8_unaligned
    89  compare16:
    90  	MOV	0(X10), X15
    91  	MOV	0(X12), X16
    92  	MOV	8(X10), X17
    93  	MOV	8(X12), X18
    94  	BNE	X15, X16, cmp8a
    95  	BNE	X17, X18, cmp8b
    96  	ADD	$16, X10
    97  	ADD	$16, X12
    98  	SUB	$16, X5
    99  	BEQZ	X5, cmp_len
   100  
   101  check8_unaligned:
   102  	MOV	$8, X6
   103  	BLT	X5, X6, check4_unaligned
   104  compare8_unaligned:
   105  	MOVBU	0(X10), X8
   106  	MOVBU	1(X10), X15
   107  	MOVBU	2(X10), X17
   108  	MOVBU	3(X10), X19
   109  	MOVBU	4(X10), X21
   110  	MOVBU	5(X10), X23
   111  	MOVBU	6(X10), X25
   112  	MOVBU	7(X10), X29
   113  	MOVBU	0(X12), X9
   114  	MOVBU	1(X12), X16
   115  	MOVBU	2(X12), X18
   116  	MOVBU	3(X12), X20
   117  	MOVBU	4(X12), X22
   118  	MOVBU	5(X12), X24
   119  	MOVBU	6(X12), X28
   120  	MOVBU	7(X12), X30
   121  	BNE	X8, X9, cmp1a
   122  	BNE	X15, X16, cmp1b
   123  	BNE	X17, X18, cmp1c
   124  	BNE	X19, X20, cmp1d
   125  	BNE	X21, X22, cmp1e
   126  	BNE	X23, X24, cmp1f
   127  	BNE	X25, X28, cmp1g
   128  	BNE	X29, X30, cmp1h
   129  	ADD	$8, X10
   130  	ADD	$8, X12
   131  	SUB	$8, X5
   132  	BGE	X5, X6, compare8_unaligned
   133  	BEQZ	X5, cmp_len
   134  
   135  check4_unaligned:
   136  	MOV	$4, X6
   137  	BLT	X5, X6, compare1
   138  compare4_unaligned:
   139  	MOVBU	0(X10), X8
   140  	MOVBU	1(X10), X15
   141  	MOVBU	2(X10), X17
   142  	MOVBU	3(X10), X19
   143  	MOVBU	0(X12), X9
   144  	MOVBU	1(X12), X16
   145  	MOVBU	2(X12), X18
   146  	MOVBU	3(X12), X20
   147  	BNE	X8, X9, cmp1a
   148  	BNE	X15, X16, cmp1b
   149  	BNE	X17, X18, cmp1c
   150  	BNE	X19, X20, cmp1d
   151  	ADD	$4, X10
   152  	ADD	$4, X12
   153  	SUB	$4, X5
   154  	BGE	X5, X6, compare4_unaligned
   155  
   156  compare1:
   157  	BEQZ	X5, cmp_len
   158  	MOVBU	0(X10), X8
   159  	MOVBU	0(X12), X9
   160  	BNE	X8, X9, cmp
   161  	ADD	$1, X10
   162  	ADD	$1, X12
   163  	SUB	$1, X5
   164  	JMP	compare1
   165  
   166  	// Compare 8 bytes of memory in X15/X16 that are known to differ.
   167  cmp8a:
   168  	MOV	X15, X17
   169  	MOV	X16, X18
   170  
   171  	// Compare 8 bytes of memory in X17/X18 that are known to differ.
   172  cmp8b:
   173  	MOV	$0xff, X19
   174  cmp8_loop:
   175  	AND	X17, X19, X8
   176  	AND	X18, X19, X9
   177  	BNE	X8, X9, cmp
   178  	SLLI	$8, X19
   179  	JMP	cmp8_loop
   180  
   181  cmp1a:
   182  	SLTU	X9, X8, X5
   183  	SLTU	X8, X9, X6
   184  	JMP	cmp_ret
   185  cmp1b:
   186  	SLTU	X16, X15, X5
   187  	SLTU	X15, X16, X6
   188  	JMP	cmp_ret
   189  cmp1c:
   190  	SLTU	X18, X17, X5
   191  	SLTU	X17, X18, X6
   192  	JMP	cmp_ret
   193  cmp1d:
   194  	SLTU	X20, X19, X5
   195  	SLTU	X19, X20, X6
   196  	JMP	cmp_ret
   197  cmp1e:
   198  	SLTU	X22, X21, X5
   199  	SLTU	X21, X22, X6
   200  	JMP	cmp_ret
   201  cmp1f:
   202  	SLTU	X24, X23, X5
   203  	SLTU	X23, X24, X6
   204  	JMP	cmp_ret
   205  cmp1g:
   206  	SLTU	X28, X25, X5
   207  	SLTU	X25, X28, X6
   208  	JMP	cmp_ret
   209  cmp1h:
   210  	SLTU	X30, X29, X5
   211  	SLTU	X29, X30, X6
   212  	JMP	cmp_ret
   213  
   214  cmp_len:
   215  	MOV	X11, X8
   216  	MOV	X13, X9
   217  cmp:
   218  	SLTU	X9, X8, X5
   219  	SLTU	X8, X9, X6
   220  cmp_ret:
   221  	SUB	X5, X6, X10
   222  	RET
   223  

View as plain text