Text file src/internal/bytealg/compare_riscv64.s

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ¬∑Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
     9  #ifndef GOEXPERIMENT_regabiargs
    10  	MOV	a_base+0(FP), X10
    11  	MOV	a_len+8(FP), X11
    12  	MOV	b_base+24(FP), X12
    13  	MOV	b_len+32(FP), X13
    14  	MOV	$ret+48(FP), X14
    15  #else
    16  	// X10 = a_base
    17  	// X11 = a_len
    18  	// X12 = a_cap (unused)
    19  	// X13 = b_base (want in X12)
    20  	// X14 = b_len (want in X13)
    21  	// X15 = b_cap (unused)
    22  	MOV	X13, X12
    23  	MOV	X14, X13
    24  #endif
    25  	JMP	compare<>(SB)
    26  
    27  TEXT runtime¬∑cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
    28  #ifndef GOEXPERIMENT_regabiargs
    29  	MOV	a_base+0(FP), X10
    30  	MOV	a_len+8(FP), X11
    31  	MOV	b_base+16(FP), X12
    32  	MOV	b_len+24(FP), X13
    33  	MOV	$ret+32(FP), X14
    34  #endif
    35  	// X10 = a_base
    36  	// X11 = a_len
    37  	// X12 = b_base
    38  	// X13 = b_len
    39  	JMP	compare<>(SB)
    40  
    41  // On entry:
    42  // X10 points to start of a
    43  // X11 length of a
    44  // X12 points to start of b
    45  // X13 length of b
    46  // for non-regabi X14 points to the address to store the return value (-1/0/1)
    47  // for regabi the return value in X10
    48  TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
    49  	BEQ	X10, X12, cmp_len
    50  
    51  	MOV	X11, X5
    52  	BGE	X13, X5, use_a_len // X5 = min(len(a), len(b))
    53  	MOV	X13, X5
    54  use_a_len:
    55  	BEQZ	X5, cmp_len
    56  
    57  	MOV	$32, X6
    58  	BLT	X5, X6, loop4_check
    59  
    60  	// Check alignment - if alignment differs we have to do one byte at a time.
    61  	AND	$3, X10, X7
    62  	AND	$3, X12, X8
    63  	BNE	X7, X8, loop4_check
    64  	BEQZ	X7, loop32_check
    65  
    66  	// Check one byte at a time until we reach 8 byte alignment.
    67  	SUB	X7, X5, X5
    68  align:
    69  	ADD	$-1, X7
    70  	MOVBU	0(X10), X8
    71  	MOVBU	0(X12), X9
    72  	BNE	X8, X9, cmp
    73  	ADD	$1, X10
    74  	ADD	$1, X12
    75  	BNEZ	X7, align
    76  
    77  loop32_check:
    78  	MOV	$32, X7
    79  	BLT	X5, X7, loop16_check
    80  loop32:
    81  	MOV	0(X10), X15
    82  	MOV	0(X12), X16
    83  	MOV	8(X10), X17
    84  	MOV	8(X12), X18
    85  	BEQ	X15, X16, loop32a
    86  	JMP	cmp8a
    87  loop32a:
    88  	BEQ	X17, X18, loop32b
    89  	JMP	cmp8b
    90  loop32b:
    91  	MOV	16(X10), X15
    92  	MOV	16(X12), X16
    93  	MOV	24(X10), X17
    94  	MOV	24(X12), X18
    95  	BEQ	X15, X16, loop32c
    96  	JMP	cmp8a
    97  loop32c:
    98  	BEQ	X17, X18, loop32d
    99  	JMP	cmp8b
   100  loop32d:
   101  	ADD	$32, X10
   102  	ADD	$32, X12
   103  	ADD	$-32, X5
   104  	BGE	X5, X7, loop32
   105  	BEQZ	X5, cmp_len
   106  
   107  loop16_check:
   108  	MOV	$16, X6
   109  	BLT	X5, X6, loop4_check
   110  loop16:
   111  	MOV	0(X10), X15
   112  	MOV	0(X12), X16
   113  	MOV	8(X10), X17
   114  	MOV	8(X12), X18
   115  	BEQ	X15, X16, loop16a
   116  	JMP	cmp8a
   117  loop16a:
   118  	BEQ	X17, X18, loop16b
   119  	JMP	cmp8b
   120  loop16b:
   121  	ADD	$16, X10
   122  	ADD	$16, X12
   123  	ADD	$-16, X5
   124  	BGE	X5, X6, loop16
   125  	BEQZ	X5, cmp_len
   126  
   127  loop4_check:
   128  	MOV	$4, X6
   129  	BLT	X5, X6, loop1
   130  loop4:
   131  	MOVBU	0(X10), X8
   132  	MOVBU	0(X12), X9
   133  	MOVBU	1(X10), X15
   134  	MOVBU	1(X12), X16
   135  	BEQ	X8, X9, loop4a
   136  	SLTU	X9, X8, X5
   137  	SLTU	X8, X9, X6
   138  	JMP	cmp_ret
   139  loop4a:
   140  	BEQ	X15, X16, loop4b
   141  	SLTU	X16, X15, X5
   142  	SLTU	X15, X16, X6
   143  	JMP	cmp_ret
   144  loop4b:
   145  	MOVBU	2(X10), X21
   146  	MOVBU	2(X12), X22
   147  	MOVBU	3(X10), X23
   148  	MOVBU	3(X12), X24
   149  	BEQ	X21, X22, loop4c
   150  	SLTU	X22, X21, X5
   151  	SLTU	X21, X22, X6
   152  	JMP	cmp_ret
   153  loop4c:
   154  	BEQ	X23, X24, loop4d
   155  	SLTU	X24, X23, X5
   156  	SLTU	X23, X24, X6
   157  	JMP	cmp_ret
   158  loop4d:
   159  	ADD	$4, X10
   160  	ADD	$4, X12
   161  	ADD	$-4, X5
   162  	BGE	X5, X6, loop4
   163  
   164  loop1:
   165  	BEQZ	X5, cmp_len
   166  	MOVBU	0(X10), X8
   167  	MOVBU	0(X12), X9
   168  	BNE	X8, X9, cmp
   169  	ADD	$1, X10
   170  	ADD	$1, X12
   171  	ADD	$-1, X5
   172  	JMP	loop1
   173  
   174  	// Compare 8 bytes of memory in X15/X16 that are known to differ.
   175  cmp8a:
   176  	MOV	$0xff, X19
   177  cmp8a_loop:
   178  	AND	X15, X19, X8
   179  	AND	X16, X19, X9
   180  	BNE	X8, X9, cmp
   181  	SLLI	$8, X19
   182  	JMP	cmp8a_loop
   183  
   184  	// Compare 8 bytes of memory in X17/X18 that are known to differ.
   185  cmp8b:
   186  	MOV	$0xff, X19
   187  cmp8b_loop:
   188  	AND	X17, X19, X8
   189  	AND	X18, X19, X9
   190  	BNE	X8, X9, cmp
   191  	SLLI	$8, X19
   192  	JMP	cmp8b_loop
   193  
   194  cmp_len:
   195  	MOV	X11, X8
   196  	MOV	X13, X9
   197  cmp:
   198  	SLTU	X9, X8, X5
   199  	SLTU	X8, X9, X6
   200  cmp_ret:
   201  	SUB	X5, X6, X10
   202  #ifndef GOEXPERIMENT_regabiargs
   203  	MOV	X10, (X14)
   204  #endif
   205  	RET
   206  

View as plain text