1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 // memequal(a, b unsafe.Pointer, size uintptr) bool
9 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
10 // short path to handle 0-byte case
11 CBZ R2, equal
12 B memeqbody<>(SB)
13 equal:
14 MOVD $1, R0
15 RET
16
17 // memequal_varlen(a, b unsafe.Pointer) bool
18 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
19 CMP R0, R1
20 BEQ eq
21 MOVD 8(R26), R2 // compiler stores size at offset 8 in the closure
22 CBZ R2, eq
23 B memeqbody<>(SB)
24 eq:
25 MOVD $1, R0
26 RET
27
28 // input:
29 // R0: pointer a
30 // R1: pointer b
31 // R2: data len
32 // at return: result in R0
33 TEXT memeqbody<>(SB),NOSPLIT,$0
34 CMP $1, R2
35 // handle 1-byte special case for better performance
36 BEQ one
37 CMP $16, R2
38 // handle specially if length < 16
39 BLO tail
40 BIC $0x3f, R2, R3
41 CBZ R3, chunk16
42 // work with 64-byte chunks
43 ADD R3, R0, R6 // end of chunks
44 chunk64_loop:
45 VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2]
46 VLD1.P (R1), [V4.D2, V5.D2, V6.D2, V7.D2]
47 VCMEQ V0.D2, V4.D2, V8.D2
48 VCMEQ V1.D2, V5.D2, V9.D2
49 VCMEQ V2.D2, V6.D2, V10.D2
50 VCMEQ V3.D2, V7.D2, V11.D2
51 VAND V8.B16, V9.B16, V8.B16
52 VAND V8.B16, V10.B16, V8.B16
53 VAND V8.B16, V11.B16, V8.B16
54 CMP R0, R6
55 VMOV V8.D[0], R4
56 VMOV V8.D[1], R5
57 CBZ R4, not_equal
58 CBZ R5, not_equal
59 BNE chunk64_loop
60 AND $0x3f, R2, R2
61 CBZ R2, equal
62 chunk16:
63 // work with 16-byte chunks
64 BIC $0xf, R2, R3
65 CBZ R3, tail
66 ADD R3, R0, R6 // end of chunks
67 chunk16_loop:
68 LDP.P 16(R0), (R4, R5)
69 LDP.P 16(R1), (R7, R9)
70 EOR R4, R7
71 CBNZ R7, not_equal
72 EOR R5, R9
73 CBNZ R9, not_equal
74 CMP R0, R6
75 BNE chunk16_loop
76 AND $0xf, R2, R2
77 CBZ R2, equal
78 tail:
79 // special compare of tail with length < 16
80 TBZ $3, R2, lt_8
81 MOVD (R0), R4
82 MOVD (R1), R5
83 EOR R4, R5
84 CBNZ R5, not_equal
85 SUB $8, R2, R6 // offset of the last 8 bytes
86 MOVD (R0)(R6), R4
87 MOVD (R1)(R6), R5
88 EOR R4, R5
89 CBNZ R5, not_equal
90 B equal
91 lt_8:
92 TBZ $2, R2, lt_4
93 MOVWU (R0), R4
94 MOVWU (R1), R5
95 EOR R4, R5
96 CBNZ R5, not_equal
97 SUB $4, R2, R6 // offset of the last 4 bytes
98 MOVWU (R0)(R6), R4
99 MOVWU (R1)(R6), R5
100 EOR R4, R5
101 CBNZ R5, not_equal
102 B equal
103 lt_4:
104 TBZ $1, R2, lt_2
105 MOVHU.P 2(R0), R4
106 MOVHU.P 2(R1), R5
107 CMP R4, R5
108 BNE not_equal
109 lt_2:
110 TBZ $0, R2, equal
111 one:
112 MOVBU (R0), R4
113 MOVBU (R1), R5
114 CMP R4, R5
115 BNE not_equal
116 equal:
117 MOVD $1, R0
118 RET
119 not_equal:
120 MOVB ZR, R0
121 RET
122
View as plain text