Text file
src/runtime/memmove_ppc64x.s
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build ppc64 || ppc64le
6
7 #include "textflag.h"
8
9 // See memmove Go doc for important implementation constraints.
10
11 // func memmove(to, from unsafe.Pointer, n uintptr)
12
13 // target address
14 #define TGT R3
15 // source address
16 #define SRC R4
17 // length to move
18 #define LEN R5
19 // number of doublewords
20 #define DWORDS R6
21 // number of bytes < 8
22 #define BYTES R7
23 // const 16 used as index
24 #define IDX16 R8
25 // temp used for copies, etc.
26 #define TMP R9
27 // number of 64 byte chunks
28 #define QWORDS R10
29 // index values
30 #define IDX32 R14
31 #define IDX48 R15
32 #define OCTWORDS R16
33
34 TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24
35 // R3 = TGT = to
36 // R4 = SRC = from
37 // R5 = LEN = n
38
39 // Determine if there are doublewords to
40 // copy so a more efficient move can be done
41 check:
42 ANDCC $7, LEN, BYTES // R7: bytes to copy
43 SRD $3, LEN, DWORDS // R6: double words to copy
44 MOVFL CR0, CR3 // save CR from ANDCC
45 CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy
46
47 // Determine overlap by subtracting dest - src and comparing against the
48 // length. This catches the cases where src and dest are in different types
49 // of storage such as stack and static to avoid doing backward move when not
50 // necessary.
51
52 SUB SRC, TGT, TMP // dest - src
53 CMPU TMP, LEN, CR2 // < len?
54 BC 12, 8, backward // BLT CR2 backward
55
56 // Copying forward if no overlap.
57
58 BC 12, 6, checkbytes // BEQ CR1, checkbytes
59 SRDCC $3, DWORDS, OCTWORDS // 64 byte chunks?
60 MOVD $16, IDX16
61 BEQ lt64gt8 // < 64 bytes
62
63 // Prepare for moves of 64 bytes at a time.
64
65 forward64setup:
66 DCBTST (TGT) // prepare data cache
67 DCBT (SRC)
68 MOVD OCTWORDS, CTR // Number of 64 byte chunks
69 MOVD $32, IDX32
70 MOVD $48, IDX48
71 PCALIGN $32
72
73 forward64:
74 LXVD2X (R0)(SRC), VS32 // load 64 bytes
75 LXVD2X (IDX16)(SRC), VS33
76 LXVD2X (IDX32)(SRC), VS34
77 LXVD2X (IDX48)(SRC), VS35
78 ADD $64, SRC
79 STXVD2X VS32, (R0)(TGT) // store 64 bytes
80 STXVD2X VS33, (IDX16)(TGT)
81 STXVD2X VS34, (IDX32)(TGT)
82 STXVD2X VS35, (IDX48)(TGT)
83 ADD $64,TGT // bump up for next set
84 BC 16, 0, forward64 // continue
85 ANDCC $7, DWORDS // remaining doublewords
86 BEQ checkbytes // only bytes remain
87
88 lt64gt8:
89 CMP DWORDS, $4
90 BLT lt32gt8
91 LXVD2X (R0)(SRC), VS32
92 LXVD2X (IDX16)(SRC), VS33
93 ADD $-4, DWORDS
94 STXVD2X VS32, (R0)(TGT)
95 STXVD2X VS33, (IDX16)(TGT)
96 ADD $32, SRC
97 ADD $32, TGT
98
99 lt32gt8:
100 // At this point >= 8 and < 32
101 // Move 16 bytes if possible
102 CMP DWORDS, $2
103 BLT lt16
104 LXVD2X (R0)(SRC), VS32
105 ADD $-2, DWORDS
106 STXVD2X VS32, (R0)(TGT)
107 ADD $16, SRC
108 ADD $16, TGT
109
110 lt16: // Move 8 bytes if possible
111 CMP DWORDS, $1
112 BLT checkbytes
113 MOVD 0(SRC), TMP
114 ADD $8, SRC
115 MOVD TMP, 0(TGT)
116 ADD $8, TGT
117 checkbytes:
118 BC 12, 14, LR // BEQ lr
119 lt8: // Move word if possible
120 CMP BYTES, $4
121 BLT lt4
122 MOVWZ 0(SRC), TMP
123 ADD $-4, BYTES
124 MOVW TMP, 0(TGT)
125 ADD $4, SRC
126 ADD $4, TGT
127 lt4: // Move halfword if possible
128 CMP BYTES, $2
129 BLT lt2
130 MOVHZ 0(SRC), TMP
131 ADD $-2, BYTES
132 MOVH TMP, 0(TGT)
133 ADD $2, SRC
134 ADD $2, TGT
135 lt2: // Move last byte if 1 left
136 CMP BYTES, $1
137 BC 12, 0, LR // ble lr
138 MOVBZ 0(SRC), TMP
139 MOVBZ TMP, 0(TGT)
140 RET
141
142 backward:
143 // Copying backwards proceeds by copying R7 bytes then copying R6 double words.
144 // R3 and R4 are advanced to the end of the destination/source buffers
145 // respectively and moved back as we copy.
146
147 ADD LEN, SRC, SRC // end of source
148 ADD TGT, LEN, TGT // end of dest
149
150 BEQ nobackwardtail // earlier condition
151
152 MOVD BYTES, CTR // bytes to move
153
154 backwardtailloop:
155 MOVBZ -1(SRC), TMP // point to last byte
156 SUB $1,SRC
157 MOVBZ TMP, -1(TGT)
158 SUB $1,TGT
159 BDNZ backwardtailloop
160
161 nobackwardtail:
162 BC 4, 5, LR // blelr cr1, return if DWORDS == 0
163 SRDCC $2,DWORDS,QWORDS // Compute number of 32B blocks and compare to 0
164 BNE backward32setup // If QWORDS != 0, start the 32B copy loop.
165
166 backward24:
167 // DWORDS is a value between 1-3.
168 CMP DWORDS, $2
169
170 MOVD -8(SRC), TMP
171 MOVD TMP, -8(TGT)
172 BC 12, 0, LR // bltlr, return if DWORDS == 1
173
174 MOVD -16(SRC), TMP
175 MOVD TMP, -16(TGT)
176 BC 12, 2, LR // beqlr, return if DWORDS == 2
177
178 MOVD -24(SRC), TMP
179 MOVD TMP, -24(TGT)
180 RET
181
182 backward32setup:
183 ANDCC $3,DWORDS // Compute remaining DWORDS and compare to 0
184 MOVD QWORDS, CTR // set up loop ctr
185 MOVD $16, IDX16 // 32 bytes at a time
186
187 backward32loop:
188 SUB $32, TGT
189 SUB $32, SRC
190 LXVD2X (R0)(SRC), VS32 // load 16x2 bytes
191 LXVD2X (IDX16)(SRC), VS33
192 STXVD2X VS32, (R0)(TGT) // store 16x2 bytes
193 STXVD2X VS33, (IDX16)(TGT)
194 BDNZ backward32loop
195 BC 12, 2, LR // beqlr, return if DWORDS == 0
196 BR backward24
197
View as plain text