1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "asm_amd64.h"
7 #include "textflag.h"
8
9 // memequal(a, b unsafe.Pointer, size uintptr) bool
10 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
11 // AX = a (want in SI)
12 // BX = b (want in DI)
13 // CX = size (want in BX)
14 CMPQ AX, BX
15 JNE neq
16 MOVQ $1, AX // return 1
17 RET
18 neq:
19 MOVQ AX, SI
20 MOVQ BX, DI
21 MOVQ CX, BX
22 JMP memeqbody<>(SB)
23
24 // memequal_varlen(a, b unsafe.Pointer) bool
25 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
26 // AX = a (want in SI)
27 // BX = b (want in DI)
28 // 8(DX) = size (want in BX)
29 CMPQ AX, BX
30 JNE neq
31 MOVQ $1, AX // return 1
32 RET
33 neq:
34 MOVQ AX, SI
35 MOVQ BX, DI
36 MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure
37 JMP memeqbody<>(SB)
38
39 // Input:
40 // a in SI
41 // b in DI
42 // count in BX
43 // Output:
44 // result in AX
45 TEXT memeqbody<>(SB),NOSPLIT,$0-0
46 CMPQ BX, $8
47 JB small
48 CMPQ BX, $64
49 JB bigloop
50 #ifndef hasAVX2
51 CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
52 JE hugeloop_avx2
53
54 // 64 bytes at a time using xmm registers
55 hugeloop:
56 CMPQ BX, $64
57 JB bigloop
58 MOVOU (SI), X0
59 MOVOU (DI), X1
60 MOVOU 16(SI), X2
61 MOVOU 16(DI), X3
62 MOVOU 32(SI), X4
63 MOVOU 32(DI), X5
64 MOVOU 48(SI), X6
65 MOVOU 48(DI), X7
66 PCMPEQB X1, X0
67 PCMPEQB X3, X2
68 PCMPEQB X5, X4
69 PCMPEQB X7, X6
70 PAND X2, X0
71 PAND X6, X4
72 PAND X4, X0
73 PMOVMSKB X0, DX
74 ADDQ $64, SI
75 ADDQ $64, DI
76 SUBQ $64, BX
77 CMPL DX, $0xffff
78 JEQ hugeloop
79 XORQ AX, AX // return 0
80 RET
81 #endif
82
83 // 64 bytes at a time using ymm registers
84 hugeloop_avx2:
85 CMPQ BX, $64
86 JB bigloop_avx2
87 VMOVDQU (SI), Y0
88 VMOVDQU (DI), Y1
89 VMOVDQU 32(SI), Y2
90 VMOVDQU 32(DI), Y3
91 VPCMPEQB Y1, Y0, Y4
92 VPCMPEQB Y2, Y3, Y5
93 VPAND Y4, Y5, Y6
94 VPMOVMSKB Y6, DX
95 ADDQ $64, SI
96 ADDQ $64, DI
97 SUBQ $64, BX
98 CMPL DX, $0xffffffff
99 JEQ hugeloop_avx2
100 VZEROUPPER
101 XORQ AX, AX // return 0
102 RET
103
104 bigloop_avx2:
105 VZEROUPPER
106
107 // 8 bytes at a time using 64-bit register
108 bigloop:
109 CMPQ BX, $8
110 JBE leftover
111 MOVQ (SI), CX
112 MOVQ (DI), DX
113 ADDQ $8, SI
114 ADDQ $8, DI
115 SUBQ $8, BX
116 CMPQ CX, DX
117 JEQ bigloop
118 XORQ AX, AX // return 0
119 RET
120
121 // remaining 0-8 bytes
122 leftover:
123 MOVQ -8(SI)(BX*1), CX
124 MOVQ -8(DI)(BX*1), DX
125 CMPQ CX, DX
126 SETEQ AX
127 RET
128
129 small:
130 CMPQ BX, $0
131 JEQ equal
132
133 LEAQ 0(BX*8), CX
134 NEGQ CX
135
136 CMPB SI, $0xf8
137 JA si_high
138
139 // load at SI won't cross a page boundary.
140 MOVQ (SI), SI
141 JMP si_finish
142 si_high:
143 // address ends in 11111xxx. Load up to bytes we want, move to correct position.
144 MOVQ -8(SI)(BX*1), SI
145 SHRQ CX, SI
146 si_finish:
147
148 // same for DI.
149 CMPB DI, $0xf8
150 JA di_high
151 MOVQ (DI), DI
152 JMP di_finish
153 di_high:
154 MOVQ -8(DI)(BX*1), DI
155 SHRQ CX, DI
156 di_finish:
157
158 SUBQ SI, DI
159 SHLQ CX, DI
160 equal:
161 SETEQ AX
162 RET
163
View as plain text