// Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "go_asm.h" #include "textflag.h" // Caller must confirm availability of vx facility before calling. TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56 LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s) LMG b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep) MOVD $ret+48(FP), R5 BR indexbody<>(SB) // Caller must confirm availability of vx facility before calling. TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40 LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s) LMG b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep) MOVD $ret+32(FP), R5 BR indexbody<>(SB) // s: string we are searching // sep: string to search for // R1=&s[0], R2=len(s) // R3=&sep[0], R4=len(sep) // R5=&ret (int) // Caller must confirm availability of vx facility before calling. TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0 CMPBGT R4, R2, notfound ADD R1, R2 SUB R4, R2 // R2=&s[len(s)-len(sep)] (last valid index) CMPBEQ R4, $0, notfound SUB $1, R4 // R4=len(sep)-1 for use as VLL index VLL R4, (R3), V0 // contains first 16 bytes of sep MOVD R1, R7 index2plus: CMPBNE R4, $1, index3plus MOVD $15(R7), R9 CMPBGE R9, R2, index2to16 VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00... VONE V16 VREPH $0, V0, V1 CMPBGE R9, R2, index2to16 index2loop: VL 0(R7), V2 // 16 bytes, even indices VL 1(R7), V4 // 16 bytes, odd indices VCEQH V1, V2, V5 // compare even indices VCEQH V1, V4, V6 // compare odd indices VSEL V5, V6, V31, V7 // merge even and odd indices VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found BLT foundV17 MOVD $16(R7), R7 // R7+=16 ADD $15, R7, R9 CMPBLE R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search) CMPBLE R7, R2, index2to16 BR notfound index3plus: CMPBNE R4, $2, index4plus ADD $15, R7, R9 CMPBGE R9, R2, index2to16 MOVD $1, R0 VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00... VONE V16 VREPH $0, V0, V1 VREPB $2, V0, V8 index3loop: VL (R7), V2 // load 16-bytes into V2 VLL R0, 16(R7), V3 // load 2-bytes into V3 VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1 VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<2 VCEQH V1, V2, V5 // compare 2-byte even indices VCEQH V1, V4, V6 // compare 2-byte odd indices VCEQB V8, V9, V10 // compare last bytes VSEL V5, V6, V31, V7 // merge even and odd indices VN V7, V10, V7 // AND indices with last byte VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found BLT foundV17 MOVD $16(R7), R7 // R7+=16 ADD $15, R7, R9 CMPBLE R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search) CMPBLE R7, R2, index2to16 BR notfound index4plus: CMPBNE R4, $3, index5plus ADD $15, R7, R9 CMPBGE R9, R2, index2to16 MOVD $2, R0 VGBM $0x8888, V29 // 0xff000000ff000000... VGBM $0x2222, V30 // 0x0000ff000000ff00... VGBM $0xcccc, V31 // 0xffff0000ffff0000... VONE V16 VREPF $0, V0, V1 index4loop: VL (R7), V2 // load 16-bytes into V2 VLL R0, 16(R7), V3 // load 3-bytes into V3 VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1 VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<1 VSLDB $3, V2, V3, V10 // V10=(V2:V3)<<1 VCEQF V1, V2, V5 // compare index 0, 4, ... VCEQF V1, V4, V6 // compare index 1, 5, ... VCEQF V1, V9, V11 // compare index 2, 6, ... VCEQF V1, V10, V12 // compare index 3, 7, ... VSEL V5, V6, V29, V13 // merge index 0, 1, 4, 5, ... VSEL V11, V12, V30, V14 // merge index 2, 3, 6, 7, ... VSEL V13, V14, V31, V7 // final merge VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found BLT foundV17 MOVD $16(R7), R7 // R7+=16 ADD $15, R7, R9 CMPBLE R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search) CMPBLE R7, R2, index2to16 BR notfound index5plus: CMPBGT R4, $15, index17plus index2to16: CMPBGT R7, R2, notfound MOVD $1(R7), R8 CMPBGT R8, R2, index2to16tail index2to16loop: // unrolled 2x VLL R4, (R7), V1 VLL R4, 1(R7), V2 VCEQGS V0, V1, V3 BEQ found MOVD $1(R7), R7 VCEQGS V0, V2, V4 BEQ found MOVD $1(R7), R7 CMPBLT R7, R2, index2to16loop CMPBGT R7, R2, notfound index2to16tail: VLL R4, (R7), V1 VCEQGS V0, V1, V2 BEQ found BR notfound index17plus: CMPBGT R4, $31, index33plus SUB $16, R4, R0 VLL R0, 16(R3), V1 VONE V7 index17to32loop: VL (R7), V2 VLL R0, 16(R7), V3 VCEQG V0, V2, V4 VCEQG V1, V3, V5 VN V4, V5, V6 VCEQGS V6, V7, V8 BEQ found MOVD $1(R7), R7 CMPBLE R7, R2, index17to32loop BR notfound index33plus: CMPBGT R4, $47, index49plus SUB $32, R4, R0 VL 16(R3), V1 VLL R0, 32(R3), V2 VONE V11 index33to48loop: VL (R7), V3 VL 16(R7), V4 VLL R0, 32(R7), V5 VCEQG V0, V3, V6 VCEQG V1, V4, V7 VCEQG V2, V5, V8 VN V6, V7, V9 VN V8, V9, V10 VCEQGS V10, V11, V12 BEQ found MOVD $1(R7), R7 CMPBLE R7, R2, index33to48loop BR notfound index49plus: CMPBGT R4, $63, index65plus SUB $48, R4, R0 VL 16(R3), V1 VL 32(R3), V2 VLL R0, 48(R3), V3 VONE V15 index49to64loop: VL (R7), V4 VL 16(R7), V5 VL 32(R7), V6 VLL R0, 48(R7), V7 VCEQG V0, V4, V8 VCEQG V1, V5, V9 VCEQG V2, V6, V10 VCEQG V3, V7, V11 VN V8, V9, V12 VN V10, V11, V13 VN V12, V13, V14 VCEQGS V14, V15, V16 BEQ found MOVD $1(R7), R7 CMPBLE R7, R2, index49to64loop notfound: MOVD $-1, (R5) RET index65plus: // not implemented MOVD $0, (R0) RET foundV17: // index is in doubleword V17[0] VLGVG $0, V17, R8 ADD R8, R7 found: SUB R1, R7 MOVD R7, (R5) RET