// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "go_asm.h" #include "textflag.h" TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 // X10 = a_base // X11 = a_len // X12 = a_cap (unused) // X13 = b_base (want in X12) // X14 = b_len (want in X13) // X15 = b_cap (unused) MOV X13, X12 MOV X14, X13 JMP compare<>(SB) TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 // X10 = a_base // X11 = a_len // X12 = b_base // X13 = b_len JMP compare<>(SB) // On entry: // X10 points to start of a // X11 length of a // X12 points to start of b // X13 length of b // for non-regabi X14 points to the address to store the return value (-1/0/1) // for regabi the return value in X10 TEXT compare<>(SB),NOSPLIT|NOFRAME,$0 BEQ X10, X12, cmp_len MOV X11, X5 BGE X13, X5, use_a_len // X5 = min(len(a), len(b)) MOV X13, X5 use_a_len: BEQZ X5, cmp_len MOV $32, X6 BLT X5, X6, check8_unaligned // Check alignment - if alignment differs we have to do one byte at a time. AND $7, X10, X7 AND $7, X12, X8 BNE X7, X8, check8_unaligned BEQZ X7, compare32 // Check one byte at a time until we reach 8 byte alignment. SUB X7, X0, X7 ADD $8, X7, X7 SUB X7, X5, X5 align: SUB $1, X7 MOVBU 0(X10), X8 MOVBU 0(X12), X9 BNE X8, X9, cmp ADD $1, X10 ADD $1, X12 BNEZ X7, align check32: // X6 contains $32 BLT X5, X6, compare16 compare32: MOV 0(X10), X15 MOV 0(X12), X16 MOV 8(X10), X17 MOV 8(X12), X18 BNE X15, X16, cmp8a BNE X17, X18, cmp8b MOV 16(X10), X15 MOV 16(X12), X16 MOV 24(X10), X17 MOV 24(X12), X18 BNE X15, X16, cmp8a BNE X17, X18, cmp8b ADD $32, X10 ADD $32, X12 SUB $32, X5 BGE X5, X6, compare32 BEQZ X5, cmp_len check16: MOV $16, X6 BLT X5, X6, check8_unaligned compare16: MOV 0(X10), X15 MOV 0(X12), X16 MOV 8(X10), X17 MOV 8(X12), X18 BNE X15, X16, cmp8a BNE X17, X18, cmp8b ADD $16, X10 ADD $16, X12 SUB $16, X5 BEQZ X5, cmp_len check8_unaligned: MOV $8, X6 BLT X5, X6, check4_unaligned compare8_unaligned: MOVBU 0(X10), X8 MOVBU 1(X10), X15 MOVBU 2(X10), X17 MOVBU 3(X10), X19 MOVBU 4(X10), X21 MOVBU 5(X10), X23 MOVBU 6(X10), X25 MOVBU 7(X10), X29 MOVBU 0(X12), X9 MOVBU 1(X12), X16 MOVBU 2(X12), X18 MOVBU 3(X12), X20 MOVBU 4(X12), X22 MOVBU 5(X12), X24 MOVBU 6(X12), X28 MOVBU 7(X12), X30 BNE X8, X9, cmp1a BNE X15, X16, cmp1b BNE X17, X18, cmp1c BNE X19, X20, cmp1d BNE X21, X22, cmp1e BNE X23, X24, cmp1f BNE X25, X28, cmp1g BNE X29, X30, cmp1h ADD $8, X10 ADD $8, X12 SUB $8, X5 BGE X5, X6, compare8_unaligned BEQZ X5, cmp_len check4_unaligned: MOV $4, X6 BLT X5, X6, compare1 compare4_unaligned: MOVBU 0(X10), X8 MOVBU 1(X10), X15 MOVBU 2(X10), X17 MOVBU 3(X10), X19 MOVBU 0(X12), X9 MOVBU 1(X12), X16 MOVBU 2(X12), X18 MOVBU 3(X12), X20 BNE X8, X9, cmp1a BNE X15, X16, cmp1b BNE X17, X18, cmp1c BNE X19, X20, cmp1d ADD $4, X10 ADD $4, X12 SUB $4, X5 BGE X5, X6, compare4_unaligned compare1: BEQZ X5, cmp_len MOVBU 0(X10), X8 MOVBU 0(X12), X9 BNE X8, X9, cmp ADD $1, X10 ADD $1, X12 SUB $1, X5 JMP compare1 // Compare 8 bytes of memory in X15/X16 that are known to differ. cmp8a: MOV X15, X17 MOV X16, X18 // Compare 8 bytes of memory in X17/X18 that are known to differ. cmp8b: MOV $0xff, X19 cmp8_loop: AND X17, X19, X8 AND X18, X19, X9 BNE X8, X9, cmp SLLI $8, X19 JMP cmp8_loop cmp1a: SLTU X9, X8, X5 SLTU X8, X9, X6 JMP cmp_ret cmp1b: SLTU X16, X15, X5 SLTU X15, X16, X6 JMP cmp_ret cmp1c: SLTU X18, X17, X5 SLTU X17, X18, X6 JMP cmp_ret cmp1d: SLTU X20, X19, X5 SLTU X19, X20, X6 JMP cmp_ret cmp1e: SLTU X22, X21, X5 SLTU X21, X22, X6 JMP cmp_ret cmp1f: SLTU X24, X23, X5 SLTU X23, X24, X6 JMP cmp_ret cmp1g: SLTU X28, X25, X5 SLTU X25, X28, X6 JMP cmp_ret cmp1h: SLTU X30, X29, X5 SLTU X29, X30, X6 JMP cmp_ret cmp_len: MOV X11, X8 MOV X13, X9 cmp: SLTU X9, X8, X5 SLTU X8, X9, X6 cmp_ret: SUB X5, X6, X10 RET