// Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "go_asm.h" #include "textflag.h" TEXT ·Index(SB),NOSPLIT,$0-56 MOVD a_base+0(FP), R0 MOVD a_len+8(FP), R1 MOVD b_base+24(FP), R2 MOVD b_len+32(FP), R3 MOVD $ret+48(FP), R9 B indexbody<>(SB) TEXT ·IndexString(SB),NOSPLIT,$0-40 MOVD a_base+0(FP), R0 MOVD a_len+8(FP), R1 MOVD b_base+16(FP), R2 MOVD b_len+24(FP), R3 MOVD $ret+32(FP), R9 B indexbody<>(SB) // input: // R0: haystack // R1: length of haystack // R2: needle // R3: length of needle (2 <= len <= 32) // R9: address to put result TEXT indexbody<>(SB),NOSPLIT,$0-56 // main idea is to load 'sep' into separate register(s) // to avoid repeatedly re-load it again and again // for sebsequent substring comparisons SUB R3, R1, R4 // R4 contains the start of last substring for comparison ADD R0, R4, R4 ADD $1, R0, R8 CMP $8, R3 BHI greater_8 TBZ $3, R3, len_2_7 len_8: // R5 contains 8-byte of sep MOVD (R2), R5 loop_8: // R6 contains substring for comparison CMP R4, R0 BHI not_found MOVD.P 1(R0), R6 CMP R5, R6 BNE loop_8 B found len_2_7: TBZ $2, R3, len_2_3 TBZ $1, R3, len_4_5 TBZ $0, R3, len_6 len_7: // R5 and R6 contain 7-byte of sep MOVWU (R2), R5 // 1-byte overlap with R5 MOVWU 3(R2), R6 loop_7: CMP R4, R0 BHI not_found MOVWU.P 1(R0), R3 CMP R5, R3 BNE loop_7 MOVWU 2(R0), R3 CMP R6, R3 BNE loop_7 B found len_6: // R5 and R6 contain 6-byte of sep MOVWU (R2), R5 MOVHU 4(R2), R6 loop_6: CMP R4, R0 BHI not_found MOVWU.P 1(R0), R3 CMP R5, R3 BNE loop_6 MOVHU 3(R0), R3 CMP R6, R3 BNE loop_6 B found len_4_5: TBZ $0, R3, len_4 len_5: // R5 and R7 contain 5-byte of sep MOVWU (R2), R5 MOVBU 4(R2), R7 loop_5: CMP R4, R0 BHI not_found MOVWU.P 1(R0), R3 CMP R5, R3 BNE loop_5 MOVBU 3(R0), R3 CMP R7, R3 BNE loop_5 B found len_4: // R5 contains 4-byte of sep MOVWU (R2), R5 loop_4: CMP R4, R0 BHI not_found MOVWU.P 1(R0), R6 CMP R5, R6 BNE loop_4 B found len_2_3: TBZ $0, R3, len_2 len_3: // R6 and R7 contain 3-byte of sep MOVHU (R2), R6 MOVBU 2(R2), R7 loop_3: CMP R4, R0 BHI not_found MOVHU.P 1(R0), R3 CMP R6, R3 BNE loop_3 MOVBU 1(R0), R3 CMP R7, R3 BNE loop_3 B found len_2: // R5 contains 2-byte of sep MOVHU (R2), R5 loop_2: CMP R4, R0 BHI not_found MOVHU.P 1(R0), R6 CMP R5, R6 BNE loop_2 found: SUB R8, R0, R0 MOVD R0, (R9) RET not_found: MOVD $-1, R0 MOVD R0, (R9) RET greater_8: SUB $9, R3, R11 // len(sep) - 9, offset of R0 for last 8 bytes CMP $16, R3 BHI greater_16 len_9_16: MOVD.P 8(R2), R5 // R5 contains the first 8-byte of sep SUB $16, R3, R7 // len(sep) - 16, offset of R2 for last 8 bytes MOVD (R2)(R7), R6 // R6 contains the last 8-byte of sep loop_9_16: // search the first 8 bytes first CMP R4, R0 BHI not_found MOVD.P 1(R0), R7 CMP R5, R7 BNE loop_9_16 MOVD (R0)(R11), R7 CMP R6, R7 // compare the last 8 bytes BNE loop_9_16 B found greater_16: CMP $24, R3 BHI len_25_32 len_17_24: LDP.P 16(R2), (R5, R6) // R5 and R6 contain the first 16-byte of sep SUB $24, R3, R10 // len(sep) - 24 MOVD (R2)(R10), R7 // R7 contains the last 8-byte of sep loop_17_24: // search the first 16 bytes first CMP R4, R0 BHI not_found MOVD.P 1(R0), R10 CMP R5, R10 BNE loop_17_24 MOVD 7(R0), R10 CMP R6, R10 BNE loop_17_24 MOVD (R0)(R11), R10 CMP R7, R10 // compare the last 8 bytes BNE loop_17_24 B found len_25_32: LDP.P 16(R2), (R5, R6) MOVD.P 8(R2), R7 // R5, R6 and R7 contain the first 24-byte of sep SUB $32, R3, R12 // len(sep) - 32 MOVD (R2)(R12), R10 // R10 contains the last 8-byte of sep loop_25_32: // search the first 24 bytes first CMP R4, R0 BHI not_found MOVD.P 1(R0), R12 CMP R5, R12 BNE loop_25_32 MOVD 7(R0), R12 CMP R6, R12 BNE loop_25_32 MOVD 15(R0), R12 CMP R7, R12 BNE loop_25_32 MOVD (R0)(R11), R12 CMP R10, R12 // compare the last 8 bytes BNE loop_25_32 B found