Text file
src/runtime/asm_arm64.s
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "tls_arm64.h"
8 #include "funcdata.h"
9 #include "textflag.h"
10
11 TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
12 // SP = stack; R0 = argc; R1 = argv
13
14 SUB $32, RSP
15 MOVW R0, 8(RSP) // argc
16 MOVD R1, 16(RSP) // argv
17
18 #ifdef TLS_darwin
19 // Initialize TLS.
20 MOVD ZR, g // clear g, make sure it's not junk.
21 SUB $32, RSP
22 MRS_TPIDR_R0
23 AND $~7, R0
24 MOVD R0, 16(RSP) // arg2: TLS base
25 MOVD $runtime·tls_g(SB), R2
26 MOVD R2, 8(RSP) // arg1: &tlsg
27 BL ·tlsinit(SB)
28 ADD $32, RSP
29 #endif
30
31 // create istack out of the given (operating system) stack.
32 // _cgo_init may update stackguard.
33 MOVD $runtime·g0(SB), g
34 MOVD RSP, R7
35 MOVD $(-64*1024)(R7), R0
36 MOVD R0, g_stackguard0(g)
37 MOVD R0, g_stackguard1(g)
38 MOVD R0, (g_stack+stack_lo)(g)
39 MOVD R7, (g_stack+stack_hi)(g)
40
41 // if there is a _cgo_init, call it using the gcc ABI.
42 MOVD _cgo_init(SB), R12
43 CBZ R12, nocgo
44
45 #ifdef GOOS_android
46 MRS_TPIDR_R0 // load TLS base pointer
47 MOVD R0, R3 // arg 3: TLS base pointer
48 MOVD $runtime·tls_g(SB), R2 // arg 2: &tls_g
49 #else
50 MOVD $0, R2 // arg 2: not used when using platform's TLS
51 #endif
52 MOVD $setg_gcc<>(SB), R1 // arg 1: setg
53 MOVD g, R0 // arg 0: G
54 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
55 BL (R12)
56 ADD $16, RSP
57
58 nocgo:
59 BL runtime·save_g(SB)
60 // update stackguard after _cgo_init
61 MOVD (g_stack+stack_lo)(g), R0
62 ADD $const__StackGuard, R0
63 MOVD R0, g_stackguard0(g)
64 MOVD R0, g_stackguard1(g)
65
66 // set the per-goroutine and per-mach "registers"
67 MOVD $runtime·m0(SB), R0
68
69 // save m->g0 = g0
70 MOVD g, m_g0(R0)
71 // save m0 to g0->m
72 MOVD R0, g_m(g)
73
74 BL runtime·check(SB)
75
76 #ifdef GOOS_windows
77 BL runtime·wintls(SB)
78 #endif
79
80 MOVW 8(RSP), R0 // copy argc
81 MOVW R0, -8(RSP)
82 MOVD 16(RSP), R0 // copy argv
83 MOVD R0, 0(RSP)
84 BL runtime·args(SB)
85 BL runtime·osinit(SB)
86 BL runtime·schedinit(SB)
87
88 // create a new goroutine to start program
89 MOVD $runtime·mainPC(SB), R0 // entry
90 SUB $16, RSP
91 MOVD R0, 8(RSP) // arg
92 MOVD $0, 0(RSP) // dummy LR
93 BL runtime·newproc(SB)
94 ADD $16, RSP
95
96 // start this M
97 BL runtime·mstart(SB)
98
99 // Prevent dead-code elimination of debugCallV2, which is
100 // intended to be called by debuggers.
101 MOVD $runtime·debugCallV2<ABIInternal>(SB), R0
102
103 MOVD $0, R0
104 MOVD R0, (R0) // boom
105 UNDEF
106
107 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
108 GLOBL runtime·mainPC(SB),RODATA,$8
109
110 // Windows ARM64 needs an immediate 0xf000 argument.
111 // See go.dev/issues/53837.
112 #define BREAK \
113 #ifdef GOOS_windows \
114 BRK $0xf000 \
115 #else \
116 BRK \
117 #endif \
118
119
120 TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
121 BREAK
122 RET
123
124 TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
125 RET
126
127 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
128 BL runtime·mstart0(SB)
129 RET // not reached
130
131 /*
132 * go-routine
133 */
134
135 // void gogo(Gobuf*)
136 // restore state from Gobuf; longjmp
137 TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
138 MOVD buf+0(FP), R5
139 MOVD gobuf_g(R5), R6
140 MOVD 0(R6), R4 // make sure g != nil
141 B gogo<>(SB)
142
143 TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
144 MOVD R6, g
145 BL runtime·save_g(SB)
146
147 MOVD gobuf_sp(R5), R0
148 MOVD R0, RSP
149 MOVD gobuf_bp(R5), R29
150 MOVD gobuf_lr(R5), LR
151 MOVD gobuf_ret(R5), R0
152 MOVD gobuf_ctxt(R5), R26
153 MOVD $0, gobuf_sp(R5)
154 MOVD $0, gobuf_bp(R5)
155 MOVD $0, gobuf_ret(R5)
156 MOVD $0, gobuf_lr(R5)
157 MOVD $0, gobuf_ctxt(R5)
158 CMP ZR, ZR // set condition codes for == test, needed by stack split
159 MOVD gobuf_pc(R5), R6
160 B (R6)
161
162 // void mcall(fn func(*g))
163 // Switch to m->g0's stack, call fn(g).
164 // Fn must never return. It should gogo(&g->sched)
165 // to keep running g.
166 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
167 MOVD R0, R26 // context
168
169 // Save caller state in g->sched
170 MOVD RSP, R0
171 MOVD R0, (g_sched+gobuf_sp)(g)
172 MOVD R29, (g_sched+gobuf_bp)(g)
173 MOVD LR, (g_sched+gobuf_pc)(g)
174 MOVD $0, (g_sched+gobuf_lr)(g)
175
176 // Switch to m->g0 & its stack, call fn.
177 MOVD g, R3
178 MOVD g_m(g), R8
179 MOVD m_g0(R8), g
180 BL runtime·save_g(SB)
181 CMP g, R3
182 BNE 2(PC)
183 B runtime·badmcall(SB)
184
185 MOVD (g_sched+gobuf_sp)(g), R0
186 MOVD R0, RSP // sp = m->g0->sched.sp
187 MOVD (g_sched+gobuf_bp)(g), R29
188 MOVD R3, R0 // arg = g
189 MOVD $0, -16(RSP) // dummy LR
190 SUB $16, RSP
191 MOVD 0(R26), R4 // code pointer
192 BL (R4)
193 B runtime·badmcall2(SB)
194
195 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
196 // of the G stack. We need to distinguish the routine that
197 // lives at the bottom of the G stack from the one that lives
198 // at the top of the system stack because the one at the top of
199 // the system stack terminates the stack walk (see topofstack()).
200 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
201 UNDEF
202 BL (LR) // make sure this function is not leaf
203 RET
204
205 // func systemstack(fn func())
206 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
207 MOVD fn+0(FP), R3 // R3 = fn
208 MOVD R3, R26 // context
209 MOVD g_m(g), R4 // R4 = m
210
211 MOVD m_gsignal(R4), R5 // R5 = gsignal
212 CMP g, R5
213 BEQ noswitch
214
215 MOVD m_g0(R4), R5 // R5 = g0
216 CMP g, R5
217 BEQ noswitch
218
219 MOVD m_curg(R4), R6
220 CMP g, R6
221 BEQ switch
222
223 // Bad: g is not gsignal, not g0, not curg. What is it?
224 // Hide call from linker nosplit analysis.
225 MOVD $runtime·badsystemstack(SB), R3
226 BL (R3)
227 B runtime·abort(SB)
228
229 switch:
230 // save our state in g->sched. Pretend to
231 // be systemstack_switch if the G stack is scanned.
232 BL gosave_systemstack_switch<>(SB)
233
234 // switch to g0
235 MOVD R5, g
236 BL runtime·save_g(SB)
237 MOVD (g_sched+gobuf_sp)(g), R3
238 MOVD R3, RSP
239 MOVD (g_sched+gobuf_bp)(g), R29
240
241 // call target function
242 MOVD 0(R26), R3 // code pointer
243 BL (R3)
244
245 // switch back to g
246 MOVD g_m(g), R3
247 MOVD m_curg(R3), g
248 BL runtime·save_g(SB)
249 MOVD (g_sched+gobuf_sp)(g), R0
250 MOVD R0, RSP
251 MOVD (g_sched+gobuf_bp)(g), R29
252 MOVD $0, (g_sched+gobuf_sp)(g)
253 MOVD $0, (g_sched+gobuf_bp)(g)
254 RET
255
256 noswitch:
257 // already on m stack, just call directly
258 // Using a tail call here cleans up tracebacks since we won't stop
259 // at an intermediate systemstack.
260 MOVD 0(R26), R3 // code pointer
261 MOVD.P 16(RSP), R30 // restore LR
262 SUB $8, RSP, R29 // restore FP
263 B (R3)
264
265 /*
266 * support for morestack
267 */
268
269 // Called during function prolog when more stack is needed.
270 // Caller has already loaded:
271 // R3 prolog's LR (R30)
272 //
273 // The traceback routines see morestack on a g0 as being
274 // the top of a stack (for example, morestack calling newstack
275 // calling the scheduler calling newm calling gc), so we must
276 // record an argument size. For that purpose, it has no arguments.
277 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
278 // Cannot grow scheduler stack (m->g0).
279 MOVD g_m(g), R8
280 MOVD m_g0(R8), R4
281 CMP g, R4
282 BNE 3(PC)
283 BL runtime·badmorestackg0(SB)
284 B runtime·abort(SB)
285
286 // Cannot grow signal stack (m->gsignal).
287 MOVD m_gsignal(R8), R4
288 CMP g, R4
289 BNE 3(PC)
290 BL runtime·badmorestackgsignal(SB)
291 B runtime·abort(SB)
292
293 // Called from f.
294 // Set g->sched to context in f
295 MOVD RSP, R0
296 MOVD R0, (g_sched+gobuf_sp)(g)
297 MOVD R29, (g_sched+gobuf_bp)(g)
298 MOVD LR, (g_sched+gobuf_pc)(g)
299 MOVD R3, (g_sched+gobuf_lr)(g)
300 MOVD R26, (g_sched+gobuf_ctxt)(g)
301
302 // Called from f.
303 // Set m->morebuf to f's callers.
304 MOVD R3, (m_morebuf+gobuf_pc)(R8) // f's caller's PC
305 MOVD RSP, R0
306 MOVD R0, (m_morebuf+gobuf_sp)(R8) // f's caller's RSP
307 MOVD g, (m_morebuf+gobuf_g)(R8)
308
309 // Call newstack on m->g0's stack.
310 MOVD m_g0(R8), g
311 BL runtime·save_g(SB)
312 MOVD (g_sched+gobuf_sp)(g), R0
313 MOVD R0, RSP
314 MOVD (g_sched+gobuf_bp)(g), R29
315 MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned)
316 BL runtime·newstack(SB)
317
318 // Not reached, but make sure the return PC from the call to newstack
319 // is still in this function, and not the beginning of the next.
320 UNDEF
321
322 TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
323 // Force SPWRITE. This function doesn't actually write SP,
324 // but it is called with a special calling convention where
325 // the caller doesn't save LR on stack but passes it as a
326 // register (R3), and the unwinder currently doesn't understand.
327 // Make it SPWRITE to stop unwinding. (See issue 54332)
328 MOVD RSP, RSP
329
330 MOVW $0, R26
331 B runtime·morestack(SB)
332
333 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R20.
334 TEXT ·spillArgs(SB),NOSPLIT,$0-0
335 STP (R0, R1), (0*8)(R20)
336 STP (R2, R3), (2*8)(R20)
337 STP (R4, R5), (4*8)(R20)
338 STP (R6, R7), (6*8)(R20)
339 STP (R8, R9), (8*8)(R20)
340 STP (R10, R11), (10*8)(R20)
341 STP (R12, R13), (12*8)(R20)
342 STP (R14, R15), (14*8)(R20)
343 FSTPD (F0, F1), (16*8)(R20)
344 FSTPD (F2, F3), (18*8)(R20)
345 FSTPD (F4, F5), (20*8)(R20)
346 FSTPD (F6, F7), (22*8)(R20)
347 FSTPD (F8, F9), (24*8)(R20)
348 FSTPD (F10, F11), (26*8)(R20)
349 FSTPD (F12, F13), (28*8)(R20)
350 FSTPD (F14, F15), (30*8)(R20)
351 RET
352
353 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R20.
354 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
355 LDP (0*8)(R20), (R0, R1)
356 LDP (2*8)(R20), (R2, R3)
357 LDP (4*8)(R20), (R4, R5)
358 LDP (6*8)(R20), (R6, R7)
359 LDP (8*8)(R20), (R8, R9)
360 LDP (10*8)(R20), (R10, R11)
361 LDP (12*8)(R20), (R12, R13)
362 LDP (14*8)(R20), (R14, R15)
363 FLDPD (16*8)(R20), (F0, F1)
364 FLDPD (18*8)(R20), (F2, F3)
365 FLDPD (20*8)(R20), (F4, F5)
366 FLDPD (22*8)(R20), (F6, F7)
367 FLDPD (24*8)(R20), (F8, F9)
368 FLDPD (26*8)(R20), (F10, F11)
369 FLDPD (28*8)(R20), (F12, F13)
370 FLDPD (30*8)(R20), (F14, F15)
371 RET
372
373 // reflectcall: call a function with the given argument list
374 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
375 // we don't have variable-sized frames, so we use a small number
376 // of constant-sized-frame functions to encode a few bits of size in the pc.
377 // Caution: ugly multiline assembly macros in your future!
378
379 #define DISPATCH(NAME,MAXSIZE) \
380 MOVD $MAXSIZE, R27; \
381 CMP R27, R16; \
382 BGT 3(PC); \
383 MOVD $NAME(SB), R27; \
384 B (R27)
385 // Note: can't just "B NAME(SB)" - bad inlining results.
386
387 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48
388 MOVWU frameSize+32(FP), R16
389 DISPATCH(runtime·call16, 16)
390 DISPATCH(runtime·call32, 32)
391 DISPATCH(runtime·call64, 64)
392 DISPATCH(runtime·call128, 128)
393 DISPATCH(runtime·call256, 256)
394 DISPATCH(runtime·call512, 512)
395 DISPATCH(runtime·call1024, 1024)
396 DISPATCH(runtime·call2048, 2048)
397 DISPATCH(runtime·call4096, 4096)
398 DISPATCH(runtime·call8192, 8192)
399 DISPATCH(runtime·call16384, 16384)
400 DISPATCH(runtime·call32768, 32768)
401 DISPATCH(runtime·call65536, 65536)
402 DISPATCH(runtime·call131072, 131072)
403 DISPATCH(runtime·call262144, 262144)
404 DISPATCH(runtime·call524288, 524288)
405 DISPATCH(runtime·call1048576, 1048576)
406 DISPATCH(runtime·call2097152, 2097152)
407 DISPATCH(runtime·call4194304, 4194304)
408 DISPATCH(runtime·call8388608, 8388608)
409 DISPATCH(runtime·call16777216, 16777216)
410 DISPATCH(runtime·call33554432, 33554432)
411 DISPATCH(runtime·call67108864, 67108864)
412 DISPATCH(runtime·call134217728, 134217728)
413 DISPATCH(runtime·call268435456, 268435456)
414 DISPATCH(runtime·call536870912, 536870912)
415 DISPATCH(runtime·call1073741824, 1073741824)
416 MOVD $runtime·badreflectcall(SB), R0
417 B (R0)
418
419 #define CALLFN(NAME,MAXSIZE) \
420 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
421 NO_LOCAL_POINTERS; \
422 /* copy arguments to stack */ \
423 MOVD stackArgs+16(FP), R3; \
424 MOVWU stackArgsSize+24(FP), R4; \
425 ADD $8, RSP, R5; \
426 BIC $0xf, R4, R6; \
427 CBZ R6, 6(PC); \
428 /* if R6=(argsize&~15) != 0 */ \
429 ADD R6, R5, R6; \
430 /* copy 16 bytes a time */ \
431 LDP.P 16(R3), (R7, R8); \
432 STP.P (R7, R8), 16(R5); \
433 CMP R5, R6; \
434 BNE -3(PC); \
435 AND $0xf, R4, R6; \
436 CBZ R6, 6(PC); \
437 /* if R6=(argsize&15) != 0 */ \
438 ADD R6, R5, R6; \
439 /* copy 1 byte a time for the rest */ \
440 MOVBU.P 1(R3), R7; \
441 MOVBU.P R7, 1(R5); \
442 CMP R5, R6; \
443 BNE -3(PC); \
444 /* set up argument registers */ \
445 MOVD regArgs+40(FP), R20; \
446 CALL ·unspillArgs(SB); \
447 /* call function */ \
448 MOVD f+8(FP), R26; \
449 MOVD (R26), R20; \
450 PCDATA $PCDATA_StackMapIndex, $0; \
451 BL (R20); \
452 /* copy return values back */ \
453 MOVD regArgs+40(FP), R20; \
454 CALL ·spillArgs(SB); \
455 MOVD stackArgsType+0(FP), R7; \
456 MOVD stackArgs+16(FP), R3; \
457 MOVWU stackArgsSize+24(FP), R4; \
458 MOVWU stackRetOffset+28(FP), R6; \
459 ADD $8, RSP, R5; \
460 ADD R6, R5; \
461 ADD R6, R3; \
462 SUB R6, R4; \
463 BL callRet<>(SB); \
464 RET
465
466 // callRet copies return values back at the end of call*. This is a
467 // separate function so it can allocate stack space for the arguments
468 // to reflectcallmove. It does not follow the Go ABI; it expects its
469 // arguments in registers.
470 TEXT callRet<>(SB), NOSPLIT, $48-0
471 NO_LOCAL_POINTERS
472 STP (R7, R3), 8(RSP)
473 STP (R5, R4), 24(RSP)
474 MOVD R20, 40(RSP)
475 BL runtime·reflectcallmove(SB)
476 RET
477
478 CALLFN(·call16, 16)
479 CALLFN(·call32, 32)
480 CALLFN(·call64, 64)
481 CALLFN(·call128, 128)
482 CALLFN(·call256, 256)
483 CALLFN(·call512, 512)
484 CALLFN(·call1024, 1024)
485 CALLFN(·call2048, 2048)
486 CALLFN(·call4096, 4096)
487 CALLFN(·call8192, 8192)
488 CALLFN(·call16384, 16384)
489 CALLFN(·call32768, 32768)
490 CALLFN(·call65536, 65536)
491 CALLFN(·call131072, 131072)
492 CALLFN(·call262144, 262144)
493 CALLFN(·call524288, 524288)
494 CALLFN(·call1048576, 1048576)
495 CALLFN(·call2097152, 2097152)
496 CALLFN(·call4194304, 4194304)
497 CALLFN(·call8388608, 8388608)
498 CALLFN(·call16777216, 16777216)
499 CALLFN(·call33554432, 33554432)
500 CALLFN(·call67108864, 67108864)
501 CALLFN(·call134217728, 134217728)
502 CALLFN(·call268435456, 268435456)
503 CALLFN(·call536870912, 536870912)
504 CALLFN(·call1073741824, 1073741824)
505
506 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
507 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
508 MOVB runtime·useAeshash(SB), R10
509 CBZ R10, noaes
510 MOVD $runtime·aeskeysched+0(SB), R3
511
512 VEOR V0.B16, V0.B16, V0.B16
513 VLD1 (R3), [V2.B16]
514 VLD1 (R0), V0.S[1]
515 VMOV R1, V0.S[0]
516
517 AESE V2.B16, V0.B16
518 AESMC V0.B16, V0.B16
519 AESE V2.B16, V0.B16
520 AESMC V0.B16, V0.B16
521 AESE V2.B16, V0.B16
522
523 VMOV V0.D[0], R0
524 RET
525 noaes:
526 B runtime·memhash32Fallback<ABIInternal>(SB)
527
528 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
529 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
530 MOVB runtime·useAeshash(SB), R10
531 CBZ R10, noaes
532 MOVD $runtime·aeskeysched+0(SB), R3
533
534 VEOR V0.B16, V0.B16, V0.B16
535 VLD1 (R3), [V2.B16]
536 VLD1 (R0), V0.D[1]
537 VMOV R1, V0.D[0]
538
539 AESE V2.B16, V0.B16
540 AESMC V0.B16, V0.B16
541 AESE V2.B16, V0.B16
542 AESMC V0.B16, V0.B16
543 AESE V2.B16, V0.B16
544
545 VMOV V0.D[0], R0
546 RET
547 noaes:
548 B runtime·memhash64Fallback<ABIInternal>(SB)
549
550 // func memhash(p unsafe.Pointer, h, size uintptr) uintptr
551 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
552 MOVB runtime·useAeshash(SB), R10
553 CBZ R10, noaes
554 B aeshashbody<>(SB)
555 noaes:
556 B runtime·memhashFallback<ABIInternal>(SB)
557
558 // func strhash(p unsafe.Pointer, h uintptr) uintptr
559 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
560 MOVB runtime·useAeshash(SB), R10
561 CBZ R10, noaes
562 LDP (R0), (R0, R2) // string data / length
563 B aeshashbody<>(SB)
564 noaes:
565 B runtime·strhashFallback<ABIInternal>(SB)
566
567 // R0: data
568 // R1: seed data
569 // R2: length
570 // At return, R0 = return value
571 TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
572 VEOR V30.B16, V30.B16, V30.B16
573 VMOV R1, V30.D[0]
574 VMOV R2, V30.D[1] // load length into seed
575
576 MOVD $runtime·aeskeysched+0(SB), R4
577 VLD1.P 16(R4), [V0.B16]
578 AESE V30.B16, V0.B16
579 AESMC V0.B16, V0.B16
580 CMP $16, R2
581 BLO aes0to15
582 BEQ aes16
583 CMP $32, R2
584 BLS aes17to32
585 CMP $64, R2
586 BLS aes33to64
587 CMP $128, R2
588 BLS aes65to128
589 B aes129plus
590
591 aes0to15:
592 CBZ R2, aes0
593 VEOR V2.B16, V2.B16, V2.B16
594 TBZ $3, R2, less_than_8
595 VLD1.P 8(R0), V2.D[0]
596
597 less_than_8:
598 TBZ $2, R2, less_than_4
599 VLD1.P 4(R0), V2.S[2]
600
601 less_than_4:
602 TBZ $1, R2, less_than_2
603 VLD1.P 2(R0), V2.H[6]
604
605 less_than_2:
606 TBZ $0, R2, done
607 VLD1 (R0), V2.B[14]
608 done:
609 AESE V0.B16, V2.B16
610 AESMC V2.B16, V2.B16
611 AESE V0.B16, V2.B16
612 AESMC V2.B16, V2.B16
613 AESE V0.B16, V2.B16
614
615 VMOV V2.D[0], R0
616 RET
617
618 aes0:
619 VMOV V0.D[0], R0
620 RET
621
622 aes16:
623 VLD1 (R0), [V2.B16]
624 B done
625
626 aes17to32:
627 // make second seed
628 VLD1 (R4), [V1.B16]
629 AESE V30.B16, V1.B16
630 AESMC V1.B16, V1.B16
631 SUB $16, R2, R10
632 VLD1.P (R0)(R10), [V2.B16]
633 VLD1 (R0), [V3.B16]
634
635 AESE V0.B16, V2.B16
636 AESMC V2.B16, V2.B16
637 AESE V1.B16, V3.B16
638 AESMC V3.B16, V3.B16
639
640 AESE V0.B16, V2.B16
641 AESMC V2.B16, V2.B16
642 AESE V1.B16, V3.B16
643 AESMC V3.B16, V3.B16
644
645 AESE V0.B16, V2.B16
646 AESE V1.B16, V3.B16
647
648 VEOR V3.B16, V2.B16, V2.B16
649
650 VMOV V2.D[0], R0
651 RET
652
653 aes33to64:
654 VLD1 (R4), [V1.B16, V2.B16, V3.B16]
655 AESE V30.B16, V1.B16
656 AESMC V1.B16, V1.B16
657 AESE V30.B16, V2.B16
658 AESMC V2.B16, V2.B16
659 AESE V30.B16, V3.B16
660 AESMC V3.B16, V3.B16
661 SUB $32, R2, R10
662
663 VLD1.P (R0)(R10), [V4.B16, V5.B16]
664 VLD1 (R0), [V6.B16, V7.B16]
665
666 AESE V0.B16, V4.B16
667 AESMC V4.B16, V4.B16
668 AESE V1.B16, V5.B16
669 AESMC V5.B16, V5.B16
670 AESE V2.B16, V6.B16
671 AESMC V6.B16, V6.B16
672 AESE V3.B16, V7.B16
673 AESMC V7.B16, V7.B16
674
675 AESE V0.B16, V4.B16
676 AESMC V4.B16, V4.B16
677 AESE V1.B16, V5.B16
678 AESMC V5.B16, V5.B16
679 AESE V2.B16, V6.B16
680 AESMC V6.B16, V6.B16
681 AESE V3.B16, V7.B16
682 AESMC V7.B16, V7.B16
683
684 AESE V0.B16, V4.B16
685 AESE V1.B16, V5.B16
686 AESE V2.B16, V6.B16
687 AESE V3.B16, V7.B16
688
689 VEOR V6.B16, V4.B16, V4.B16
690 VEOR V7.B16, V5.B16, V5.B16
691 VEOR V5.B16, V4.B16, V4.B16
692
693 VMOV V4.D[0], R0
694 RET
695
696 aes65to128:
697 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
698 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
699 AESE V30.B16, V1.B16
700 AESMC V1.B16, V1.B16
701 AESE V30.B16, V2.B16
702 AESMC V2.B16, V2.B16
703 AESE V30.B16, V3.B16
704 AESMC V3.B16, V3.B16
705 AESE V30.B16, V4.B16
706 AESMC V4.B16, V4.B16
707 AESE V30.B16, V5.B16
708 AESMC V5.B16, V5.B16
709 AESE V30.B16, V6.B16
710 AESMC V6.B16, V6.B16
711 AESE V30.B16, V7.B16
712 AESMC V7.B16, V7.B16
713
714 SUB $64, R2, R10
715 VLD1.P (R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
716 VLD1 (R0), [V12.B16, V13.B16, V14.B16, V15.B16]
717 AESE V0.B16, V8.B16
718 AESMC V8.B16, V8.B16
719 AESE V1.B16, V9.B16
720 AESMC V9.B16, V9.B16
721 AESE V2.B16, V10.B16
722 AESMC V10.B16, V10.B16
723 AESE V3.B16, V11.B16
724 AESMC V11.B16, V11.B16
725 AESE V4.B16, V12.B16
726 AESMC V12.B16, V12.B16
727 AESE V5.B16, V13.B16
728 AESMC V13.B16, V13.B16
729 AESE V6.B16, V14.B16
730 AESMC V14.B16, V14.B16
731 AESE V7.B16, V15.B16
732 AESMC V15.B16, V15.B16
733
734 AESE V0.B16, V8.B16
735 AESMC V8.B16, V8.B16
736 AESE V1.B16, V9.B16
737 AESMC V9.B16, V9.B16
738 AESE V2.B16, V10.B16
739 AESMC V10.B16, V10.B16
740 AESE V3.B16, V11.B16
741 AESMC V11.B16, V11.B16
742 AESE V4.B16, V12.B16
743 AESMC V12.B16, V12.B16
744 AESE V5.B16, V13.B16
745 AESMC V13.B16, V13.B16
746 AESE V6.B16, V14.B16
747 AESMC V14.B16, V14.B16
748 AESE V7.B16, V15.B16
749 AESMC V15.B16, V15.B16
750
751 AESE V0.B16, V8.B16
752 AESE V1.B16, V9.B16
753 AESE V2.B16, V10.B16
754 AESE V3.B16, V11.B16
755 AESE V4.B16, V12.B16
756 AESE V5.B16, V13.B16
757 AESE V6.B16, V14.B16
758 AESE V7.B16, V15.B16
759
760 VEOR V12.B16, V8.B16, V8.B16
761 VEOR V13.B16, V9.B16, V9.B16
762 VEOR V14.B16, V10.B16, V10.B16
763 VEOR V15.B16, V11.B16, V11.B16
764 VEOR V10.B16, V8.B16, V8.B16
765 VEOR V11.B16, V9.B16, V9.B16
766 VEOR V9.B16, V8.B16, V8.B16
767
768 VMOV V8.D[0], R0
769 RET
770
771 aes129plus:
772 PRFM (R0), PLDL1KEEP
773 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
774 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
775 AESE V30.B16, V1.B16
776 AESMC V1.B16, V1.B16
777 AESE V30.B16, V2.B16
778 AESMC V2.B16, V2.B16
779 AESE V30.B16, V3.B16
780 AESMC V3.B16, V3.B16
781 AESE V30.B16, V4.B16
782 AESMC V4.B16, V4.B16
783 AESE V30.B16, V5.B16
784 AESMC V5.B16, V5.B16
785 AESE V30.B16, V6.B16
786 AESMC V6.B16, V6.B16
787 AESE V30.B16, V7.B16
788 AESMC V7.B16, V7.B16
789 ADD R0, R2, R10
790 SUB $128, R10, R10
791 VLD1.P 64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
792 VLD1 (R10), [V12.B16, V13.B16, V14.B16, V15.B16]
793 SUB $1, R2, R2
794 LSR $7, R2, R2
795
796 aesloop:
797 AESE V8.B16, V0.B16
798 AESMC V0.B16, V0.B16
799 AESE V9.B16, V1.B16
800 AESMC V1.B16, V1.B16
801 AESE V10.B16, V2.B16
802 AESMC V2.B16, V2.B16
803 AESE V11.B16, V3.B16
804 AESMC V3.B16, V3.B16
805 AESE V12.B16, V4.B16
806 AESMC V4.B16, V4.B16
807 AESE V13.B16, V5.B16
808 AESMC V5.B16, V5.B16
809 AESE V14.B16, V6.B16
810 AESMC V6.B16, V6.B16
811 AESE V15.B16, V7.B16
812 AESMC V7.B16, V7.B16
813
814 VLD1.P 64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
815 AESE V8.B16, V0.B16
816 AESMC V0.B16, V0.B16
817 AESE V9.B16, V1.B16
818 AESMC V1.B16, V1.B16
819 AESE V10.B16, V2.B16
820 AESMC V2.B16, V2.B16
821 AESE V11.B16, V3.B16
822 AESMC V3.B16, V3.B16
823
824 VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
825 AESE V12.B16, V4.B16
826 AESMC V4.B16, V4.B16
827 AESE V13.B16, V5.B16
828 AESMC V5.B16, V5.B16
829 AESE V14.B16, V6.B16
830 AESMC V6.B16, V6.B16
831 AESE V15.B16, V7.B16
832 AESMC V7.B16, V7.B16
833 SUB $1, R2, R2
834 CBNZ R2, aesloop
835
836 AESE V8.B16, V0.B16
837 AESMC V0.B16, V0.B16
838 AESE V9.B16, V1.B16
839 AESMC V1.B16, V1.B16
840 AESE V10.B16, V2.B16
841 AESMC V2.B16, V2.B16
842 AESE V11.B16, V3.B16
843 AESMC V3.B16, V3.B16
844 AESE V12.B16, V4.B16
845 AESMC V4.B16, V4.B16
846 AESE V13.B16, V5.B16
847 AESMC V5.B16, V5.B16
848 AESE V14.B16, V6.B16
849 AESMC V6.B16, V6.B16
850 AESE V15.B16, V7.B16
851 AESMC V7.B16, V7.B16
852
853 AESE V8.B16, V0.B16
854 AESMC V0.B16, V0.B16
855 AESE V9.B16, V1.B16
856 AESMC V1.B16, V1.B16
857 AESE V10.B16, V2.B16
858 AESMC V2.B16, V2.B16
859 AESE V11.B16, V3.B16
860 AESMC V3.B16, V3.B16
861 AESE V12.B16, V4.B16
862 AESMC V4.B16, V4.B16
863 AESE V13.B16, V5.B16
864 AESMC V5.B16, V5.B16
865 AESE V14.B16, V6.B16
866 AESMC V6.B16, V6.B16
867 AESE V15.B16, V7.B16
868 AESMC V7.B16, V7.B16
869
870 AESE V8.B16, V0.B16
871 AESE V9.B16, V1.B16
872 AESE V10.B16, V2.B16
873 AESE V11.B16, V3.B16
874 AESE V12.B16, V4.B16
875 AESE V13.B16, V5.B16
876 AESE V14.B16, V6.B16
877 AESE V15.B16, V7.B16
878
879 VEOR V0.B16, V1.B16, V0.B16
880 VEOR V2.B16, V3.B16, V2.B16
881 VEOR V4.B16, V5.B16, V4.B16
882 VEOR V6.B16, V7.B16, V6.B16
883 VEOR V0.B16, V2.B16, V0.B16
884 VEOR V4.B16, V6.B16, V4.B16
885 VEOR V4.B16, V0.B16, V0.B16
886
887 VMOV V0.D[0], R0
888 RET
889
890 TEXT runtime·procyield(SB),NOSPLIT,$0-0
891 MOVWU cycles+0(FP), R0
892 again:
893 YIELD
894 SUBW $1, R0
895 CBNZ R0, again
896 RET
897
898 // Save state of caller into g->sched,
899 // but using fake PC from systemstack_switch.
900 // Must only be called from functions with no locals ($0)
901 // or else unwinding from systemstack_switch is incorrect.
902 // Smashes R0.
903 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
904 MOVD $runtime·systemstack_switch(SB), R0
905 ADD $8, R0 // get past prologue
906 MOVD R0, (g_sched+gobuf_pc)(g)
907 MOVD RSP, R0
908 MOVD R0, (g_sched+gobuf_sp)(g)
909 MOVD R29, (g_sched+gobuf_bp)(g)
910 MOVD $0, (g_sched+gobuf_lr)(g)
911 MOVD $0, (g_sched+gobuf_ret)(g)
912 // Assert ctxt is zero. See func save.
913 MOVD (g_sched+gobuf_ctxt)(g), R0
914 CBZ R0, 2(PC)
915 CALL runtime·abort(SB)
916 RET
917
918 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
919 // Call fn(arg) aligned appropriately for the gcc ABI.
920 // Called on a system stack, and there may be no g yet (during needm).
921 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
922 MOVD fn+0(FP), R1
923 MOVD arg+8(FP), R0
924 SUB $16, RSP // skip over saved frame pointer below RSP
925 BL (R1)
926 ADD $16, RSP // skip over saved frame pointer below RSP
927 RET
928
929 // func asmcgocall(fn, arg unsafe.Pointer) int32
930 // Call fn(arg) on the scheduler stack,
931 // aligned appropriately for the gcc ABI.
932 // See cgocall.go for more details.
933 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
934 MOVD fn+0(FP), R1
935 MOVD arg+8(FP), R0
936
937 MOVD RSP, R2 // save original stack pointer
938 CBZ g, nosave
939 MOVD g, R4
940
941 // Figure out if we need to switch to m->g0 stack.
942 // We get called to create new OS threads too, and those
943 // come in on the m->g0 stack already. Or we might already
944 // be on the m->gsignal stack.
945 MOVD g_m(g), R8
946 MOVD m_gsignal(R8), R3
947 CMP R3, g
948 BEQ nosave
949 MOVD m_g0(R8), R3
950 CMP R3, g
951 BEQ nosave
952
953 // Switch to system stack.
954 MOVD R0, R9 // gosave_systemstack_switch<> and save_g might clobber R0
955 BL gosave_systemstack_switch<>(SB)
956 MOVD R3, g
957 BL runtime·save_g(SB)
958 MOVD (g_sched+gobuf_sp)(g), R0
959 MOVD R0, RSP
960 MOVD (g_sched+gobuf_bp)(g), R29
961 MOVD R9, R0
962
963 // Now on a scheduling stack (a pthread-created stack).
964 // Save room for two of our pointers /*, plus 32 bytes of callee
965 // save area that lives on the caller stack. */
966 MOVD RSP, R13
967 SUB $16, R13
968 MOVD R13, RSP
969 MOVD R4, 0(RSP) // save old g on stack
970 MOVD (g_stack+stack_hi)(R4), R4
971 SUB R2, R4
972 MOVD R4, 8(RSP) // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
973 BL (R1)
974 MOVD R0, R9
975
976 // Restore g, stack pointer. R0 is errno, so don't touch it
977 MOVD 0(RSP), g
978 BL runtime·save_g(SB)
979 MOVD (g_stack+stack_hi)(g), R5
980 MOVD 8(RSP), R6
981 SUB R6, R5
982 MOVD R9, R0
983 MOVD R5, RSP
984
985 MOVW R0, ret+16(FP)
986 RET
987
988 nosave:
989 // Running on a system stack, perhaps even without a g.
990 // Having no g can happen during thread creation or thread teardown
991 // (see needm/dropm on Solaris, for example).
992 // This code is like the above sequence but without saving/restoring g
993 // and without worrying about the stack moving out from under us
994 // (because we're on a system stack, not a goroutine stack).
995 // The above code could be used directly if already on a system stack,
996 // but then the only path through this code would be a rare case on Solaris.
997 // Using this code for all "already on system stack" calls exercises it more,
998 // which should help keep it correct.
999 MOVD RSP, R13
1000 SUB $16, R13
1001 MOVD R13, RSP
1002 MOVD $0, R4
1003 MOVD R4, 0(RSP) // Where above code stores g, in case someone looks during debugging.
1004 MOVD R2, 8(RSP) // Save original stack pointer.
1005 BL (R1)
1006 // Restore stack pointer.
1007 MOVD 8(RSP), R2
1008 MOVD R2, RSP
1009 MOVD R0, ret+16(FP)
1010 RET
1011
1012 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1013 // See cgocall.go for more details.
1014 TEXT ·cgocallback(SB),NOSPLIT,$24-24
1015 NO_LOCAL_POINTERS
1016
1017 // Load g from thread-local storage.
1018 BL runtime·load_g(SB)
1019
1020 // If g is nil, Go did not create the current thread.
1021 // Call needm to obtain one for temporary use.
1022 // In this case, we're running on the thread stack, so there's
1023 // lots of space, but the linker doesn't know. Hide the call from
1024 // the linker analysis by using an indirect call.
1025 CBZ g, needm
1026
1027 MOVD g_m(g), R8
1028 MOVD R8, savedm-8(SP)
1029 B havem
1030
1031 needm:
1032 MOVD g, savedm-8(SP) // g is zero, so is m.
1033 MOVD $runtime·needm(SB), R0
1034 BL (R0)
1035
1036 // Set m->g0->sched.sp = SP, so that if a panic happens
1037 // during the function we are about to execute, it will
1038 // have a valid SP to run on the g0 stack.
1039 // The next few lines (after the havem label)
1040 // will save this SP onto the stack and then write
1041 // the same SP back to m->sched.sp. That seems redundant,
1042 // but if an unrecovered panic happens, unwindm will
1043 // restore the g->sched.sp from the stack location
1044 // and then systemstack will try to use it. If we don't set it here,
1045 // that restored SP will be uninitialized (typically 0) and
1046 // will not be usable.
1047 MOVD g_m(g), R8
1048 MOVD m_g0(R8), R3
1049 MOVD RSP, R0
1050 MOVD R0, (g_sched+gobuf_sp)(R3)
1051 MOVD R29, (g_sched+gobuf_bp)(R3)
1052
1053 havem:
1054 // Now there's a valid m, and we're running on its m->g0.
1055 // Save current m->g0->sched.sp on stack and then set it to SP.
1056 // Save current sp in m->g0->sched.sp in preparation for
1057 // switch back to m->curg stack.
1058 // NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
1059 // Beware that the frame size is actually 32+16.
1060 MOVD m_g0(R8), R3
1061 MOVD (g_sched+gobuf_sp)(R3), R4
1062 MOVD R4, savedsp-16(SP)
1063 MOVD RSP, R0
1064 MOVD R0, (g_sched+gobuf_sp)(R3)
1065
1066 // Switch to m->curg stack and call runtime.cgocallbackg.
1067 // Because we are taking over the execution of m->curg
1068 // but *not* resuming what had been running, we need to
1069 // save that information (m->curg->sched) so we can restore it.
1070 // We can restore m->curg->sched.sp easily, because calling
1071 // runtime.cgocallbackg leaves SP unchanged upon return.
1072 // To save m->curg->sched.pc, we push it onto the curg stack and
1073 // open a frame the same size as cgocallback's g0 frame.
1074 // Once we switch to the curg stack, the pushed PC will appear
1075 // to be the return PC of cgocallback, so that the traceback
1076 // will seamlessly trace back into the earlier calls.
1077 MOVD m_curg(R8), g
1078 BL runtime·save_g(SB)
1079 MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
1080 MOVD (g_sched+gobuf_pc)(g), R5
1081 MOVD R5, -48(R4)
1082 MOVD (g_sched+gobuf_bp)(g), R5
1083 MOVD R5, -56(R4)
1084 // Gather our arguments into registers.
1085 MOVD fn+0(FP), R1
1086 MOVD frame+8(FP), R2
1087 MOVD ctxt+16(FP), R3
1088 MOVD $-48(R4), R0 // maintain 16-byte SP alignment
1089 MOVD R0, RSP // switch stack
1090 MOVD R1, 8(RSP)
1091 MOVD R2, 16(RSP)
1092 MOVD R3, 24(RSP)
1093 MOVD $runtime·cgocallbackg(SB), R0
1094 CALL (R0) // indirect call to bypass nosplit check. We're on a different stack now.
1095
1096 // Restore g->sched (== m->curg->sched) from saved values.
1097 MOVD 0(RSP), R5
1098 MOVD R5, (g_sched+gobuf_pc)(g)
1099 MOVD RSP, R4
1100 ADD $48, R4, R4
1101 MOVD R4, (g_sched+gobuf_sp)(g)
1102
1103 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1104 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1105 // so we do not have to restore it.)
1106 MOVD g_m(g), R8
1107 MOVD m_g0(R8), g
1108 BL runtime·save_g(SB)
1109 MOVD (g_sched+gobuf_sp)(g), R0
1110 MOVD R0, RSP
1111 MOVD savedsp-16(SP), R4
1112 MOVD R4, (g_sched+gobuf_sp)(g)
1113
1114 // If the m on entry was nil, we called needm above to borrow an m
1115 // for the duration of the call. Since the call is over, return it with dropm.
1116 MOVD savedm-8(SP), R6
1117 CBNZ R6, droppedm
1118 MOVD $runtime·dropm(SB), R0
1119 BL (R0)
1120 droppedm:
1121
1122 // Done!
1123 RET
1124
1125 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1126 // Must obey the gcc calling convention.
1127 TEXT _cgo_topofstack(SB),NOSPLIT,$24
1128 // g (R28) and REGTMP (R27) might be clobbered by load_g. They
1129 // are callee-save in the gcc calling convention, so save them.
1130 MOVD R27, savedR27-8(SP)
1131 MOVD g, saveG-16(SP)
1132
1133 BL runtime·load_g(SB)
1134 MOVD g_m(g), R0
1135 MOVD m_curg(R0), R0
1136 MOVD (g_stack+stack_hi)(R0), R0
1137
1138 MOVD saveG-16(SP), g
1139 MOVD savedR28-8(SP), R27
1140 RET
1141
1142 // void setg(G*); set g. for use by needm.
1143 TEXT runtime·setg(SB), NOSPLIT, $0-8
1144 MOVD gg+0(FP), g
1145 // This only happens if iscgo, so jump straight to save_g
1146 BL runtime·save_g(SB)
1147 RET
1148
1149 // void setg_gcc(G*); set g called from gcc
1150 TEXT setg_gcc<>(SB),NOSPLIT,$8
1151 MOVD R0, g
1152 MOVD R27, savedR27-8(SP)
1153 BL runtime·save_g(SB)
1154 MOVD savedR27-8(SP), R27
1155 RET
1156
1157 TEXT runtime·emptyfunc(SB),0,$0-0
1158 RET
1159
1160 TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
1161 MOVD ZR, R0
1162 MOVD (R0), R0
1163 UNDEF
1164
1165 TEXT runtime·return0(SB), NOSPLIT, $0
1166 MOVW $0, R0
1167 RET
1168
1169 // The top-most function running on a goroutine
1170 // returns to goexit+PCQuantum.
1171 TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
1172 MOVD R0, R0 // NOP
1173 BL runtime·goexit1(SB) // does not return
1174
1175 // This is called from .init_array and follows the platform, not Go, ABI.
1176 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1177 SUB $0x10, RSP
1178 MOVD R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
1179 MOVD runtime·lastmoduledatap(SB), R1
1180 MOVD R0, moduledata_next(R1)
1181 MOVD R0, runtime·lastmoduledatap(SB)
1182 MOVD 8(RSP), R27
1183 ADD $0x10, RSP
1184 RET
1185
1186 TEXT ·checkASM(SB),NOSPLIT,$0-1
1187 MOVW $1, R3
1188 MOVB R3, ret+0(FP)
1189 RET
1190
1191 // gcWriteBarrier performs a heap pointer write and informs the GC.
1192 //
1193 // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
1194 // - R2 is the destination of the write
1195 // - R3 is the value being written at R2
1196 // It clobbers condition codes.
1197 // It does not clobber any general-purpose registers,
1198 // but may clobber others (e.g., floating point registers)
1199 // The act of CALLing gcWriteBarrier will clobber R30 (LR).
1200 //
1201 // Defined as ABIInternal since the compiler generates ABIInternal
1202 // calls to it directly and it does not use the stack-based Go ABI.
1203 TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$200
1204 // Save the registers clobbered by the fast path.
1205 STP (R0, R1), 184(RSP)
1206 MOVD g_m(g), R0
1207 MOVD m_p(R0), R0
1208 MOVD (p_wbBuf+wbBuf_next)(R0), R1
1209 // Increment wbBuf.next position.
1210 ADD $16, R1
1211 MOVD R1, (p_wbBuf+wbBuf_next)(R0)
1212 MOVD (p_wbBuf+wbBuf_end)(R0), R0
1213 CMP R1, R0
1214 // Record the write.
1215 MOVD R3, -16(R1) // Record value
1216 MOVD (R2), R0 // TODO: This turns bad writes into bad reads.
1217 MOVD R0, -8(R1) // Record *slot
1218 // Is the buffer full? (flags set in CMP above)
1219 BEQ flush
1220 ret:
1221 LDP 184(RSP), (R0, R1)
1222 // Do the write.
1223 MOVD R3, (R2)
1224 RET
1225
1226 flush:
1227 // Save all general purpose registers since these could be
1228 // clobbered by wbBufFlush and were not saved by the caller.
1229 // R0 and R1 already saved
1230 STP (R2, R3), 1*8(RSP) // Also first and second arguments to wbBufFlush
1231 STP (R4, R5), 3*8(RSP)
1232 STP (R6, R7), 5*8(RSP)
1233 STP (R8, R9), 7*8(RSP)
1234 STP (R10, R11), 9*8(RSP)
1235 STP (R12, R13), 11*8(RSP)
1236 STP (R14, R15), 13*8(RSP)
1237 // R16, R17 may be clobbered by linker trampoline
1238 // R18 is unused.
1239 STP (R19, R20), 15*8(RSP)
1240 STP (R21, R22), 17*8(RSP)
1241 STP (R23, R24), 19*8(RSP)
1242 STP (R25, R26), 21*8(RSP)
1243 // R27 is temp register.
1244 // R28 is g.
1245 // R29 is frame pointer (unused).
1246 // R30 is LR, which was saved by the prologue.
1247 // R31 is SP.
1248
1249 // This takes arguments R2 and R3.
1250 CALL runtime·wbBufFlush(SB)
1251 LDP 1*8(RSP), (R2, R3)
1252 LDP 3*8(RSP), (R4, R5)
1253 LDP 5*8(RSP), (R6, R7)
1254 LDP 7*8(RSP), (R8, R9)
1255 LDP 9*8(RSP), (R10, R11)
1256 LDP 11*8(RSP), (R12, R13)
1257 LDP 13*8(RSP), (R14, R15)
1258 LDP 15*8(RSP), (R19, R20)
1259 LDP 17*8(RSP), (R21, R22)
1260 LDP 19*8(RSP), (R23, R24)
1261 LDP 21*8(RSP), (R25, R26)
1262 JMP ret
1263
1264 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1265 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1266
1267 // debugCallV2 is the entry point for debugger-injected function
1268 // calls on running goroutines. It informs the runtime that a
1269 // debug call has been injected and creates a call frame for the
1270 // debugger to fill in.
1271 //
1272 // To inject a function call, a debugger should:
1273 // 1. Check that the goroutine is in state _Grunning and that
1274 // there are at least 288 bytes free on the stack.
1275 // 2. Set SP as SP-16.
1276 // 3. Store the current LR in (SP) (using the SP after step 2).
1277 // 4. Store the current PC in the LR register.
1278 // 5. Write the desired argument frame size at SP-16
1279 // 6. Save all machine registers (including flags and fpsimd registers)
1280 // so they can be restored later by the debugger.
1281 // 7. Set the PC to debugCallV2 and resume execution.
1282 //
1283 // If the goroutine is in state _Grunnable, then it's not generally
1284 // safe to inject a call because it may return out via other runtime
1285 // operations. Instead, the debugger should unwind the stack to find
1286 // the return to non-runtime code, add a temporary breakpoint there,
1287 // and inject the call once that breakpoint is hit.
1288 //
1289 // If the goroutine is in any other state, it's not safe to inject a call.
1290 //
1291 // This function communicates back to the debugger by setting R20 and
1292 // invoking BRK to raise a breakpoint signal. Note that the signal PC of
1293 // the signal triggered by the BRK instruction is the PC where the signal
1294 // is trapped, not the next PC, so to resume execution, the debugger needs
1295 // to set the signal PC to PC+4. See the comments in the implementation for
1296 // the protocol the debugger is expected to follow. InjectDebugCall in the
1297 // runtime tests demonstrates this protocol.
1298 //
1299 // The debugger must ensure that any pointers passed to the function
1300 // obey escape analysis requirements. Specifically, it must not pass
1301 // a stack pointer to an escaping argument. debugCallV2 cannot check
1302 // this invariant.
1303 //
1304 // This is ABIInternal because Go code injects its PC directly into new
1305 // goroutine stacks.
1306 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
1307 STP (R29, R30), -280(RSP)
1308 SUB $272, RSP, RSP
1309 SUB $8, RSP, R29
1310 // Save all registers that may contain pointers so they can be
1311 // conservatively scanned.
1312 //
1313 // We can't do anything that might clobber any of these
1314 // registers before this.
1315 STP (R27, g), (30*8)(RSP)
1316 STP (R25, R26), (28*8)(RSP)
1317 STP (R23, R24), (26*8)(RSP)
1318 STP (R21, R22), (24*8)(RSP)
1319 STP (R19, R20), (22*8)(RSP)
1320 STP (R16, R17), (20*8)(RSP)
1321 STP (R14, R15), (18*8)(RSP)
1322 STP (R12, R13), (16*8)(RSP)
1323 STP (R10, R11), (14*8)(RSP)
1324 STP (R8, R9), (12*8)(RSP)
1325 STP (R6, R7), (10*8)(RSP)
1326 STP (R4, R5), (8*8)(RSP)
1327 STP (R2, R3), (6*8)(RSP)
1328 STP (R0, R1), (4*8)(RSP)
1329
1330 // Perform a safe-point check.
1331 MOVD R30, 8(RSP) // Caller's PC
1332 CALL runtime·debugCallCheck(SB)
1333 MOVD 16(RSP), R0
1334 CBZ R0, good
1335
1336 // The safety check failed. Put the reason string at the top
1337 // of the stack.
1338 MOVD R0, 8(RSP)
1339 MOVD 24(RSP), R0
1340 MOVD R0, 16(RSP)
1341
1342 // Set R20 to 8 and invoke BRK. The debugger should get the
1343 // reason a call can't be injected from SP+8 and resume execution.
1344 MOVD $8, R20
1345 BREAK
1346 JMP restore
1347
1348 good:
1349 // Registers are saved and it's safe to make a call.
1350 // Open up a call frame, moving the stack if necessary.
1351 //
1352 // Once the frame is allocated, this will set R20 to 0 and
1353 // invoke BRK. The debugger should write the argument
1354 // frame for the call at SP+8, set up argument registers,
1355 // set the LR as the signal PC + 4, set the PC to the function
1356 // to call, set R26 to point to the closure (if a closure call),
1357 // and resume execution.
1358 //
1359 // If the function returns, this will set R20 to 1 and invoke
1360 // BRK. The debugger can then inspect any return value saved
1361 // on the stack at SP+8 and in registers. To resume execution,
1362 // the debugger should restore the LR from (SP).
1363 //
1364 // If the function panics, this will set R20 to 2 and invoke BRK.
1365 // The interface{} value of the panic will be at SP+8. The debugger
1366 // can inspect the panic value and resume execution again.
1367 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1368 CMP $MAXSIZE, R0; \
1369 BGT 5(PC); \
1370 MOVD $NAME(SB), R0; \
1371 MOVD R0, 8(RSP); \
1372 CALL runtime·debugCallWrap(SB); \
1373 JMP restore
1374
1375 MOVD 256(RSP), R0 // the argument frame size
1376 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1377 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1378 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1379 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1380 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1381 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1382 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1383 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1384 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1385 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1386 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1387 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1388 // The frame size is too large. Report the error.
1389 MOVD $debugCallFrameTooLarge<>(SB), R0
1390 MOVD R0, 8(RSP)
1391 MOVD $20, R0
1392 MOVD R0, 16(RSP) // length of debugCallFrameTooLarge string
1393 MOVD $8, R20
1394 BREAK
1395 JMP restore
1396
1397 restore:
1398 // Calls and failures resume here.
1399 //
1400 // Set R20 to 16 and invoke BRK. The debugger should restore
1401 // all registers except for PC and RSP and resume execution.
1402 MOVD $16, R20
1403 BREAK
1404 // We must not modify flags after this point.
1405
1406 // Restore pointer-containing registers, which may have been
1407 // modified from the debugger's copy by stack copying.
1408 LDP (30*8)(RSP), (R27, g)
1409 LDP (28*8)(RSP), (R25, R26)
1410 LDP (26*8)(RSP), (R23, R24)
1411 LDP (24*8)(RSP), (R21, R22)
1412 LDP (22*8)(RSP), (R19, R20)
1413 LDP (20*8)(RSP), (R16, R17)
1414 LDP (18*8)(RSP), (R14, R15)
1415 LDP (16*8)(RSP), (R12, R13)
1416 LDP (14*8)(RSP), (R10, R11)
1417 LDP (12*8)(RSP), (R8, R9)
1418 LDP (10*8)(RSP), (R6, R7)
1419 LDP (8*8)(RSP), (R4, R5)
1420 LDP (6*8)(RSP), (R2, R3)
1421 LDP (4*8)(RSP), (R0, R1)
1422
1423 LDP -8(RSP), (R29, R27)
1424 ADD $288, RSP, RSP // Add 16 more bytes, see saveSigContext
1425 MOVD -16(RSP), R30 // restore old lr
1426 JMP (R27)
1427
1428 // runtime.debugCallCheck assumes that functions defined with the
1429 // DEBUG_CALL_FN macro are safe points to inject calls.
1430 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
1431 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
1432 NO_LOCAL_POINTERS; \
1433 MOVD $0, R20; \
1434 BREAK; \
1435 MOVD $1, R20; \
1436 BREAK; \
1437 RET
1438 DEBUG_CALL_FN(debugCall32<>, 32)
1439 DEBUG_CALL_FN(debugCall64<>, 64)
1440 DEBUG_CALL_FN(debugCall128<>, 128)
1441 DEBUG_CALL_FN(debugCall256<>, 256)
1442 DEBUG_CALL_FN(debugCall512<>, 512)
1443 DEBUG_CALL_FN(debugCall1024<>, 1024)
1444 DEBUG_CALL_FN(debugCall2048<>, 2048)
1445 DEBUG_CALL_FN(debugCall4096<>, 4096)
1446 DEBUG_CALL_FN(debugCall8192<>, 8192)
1447 DEBUG_CALL_FN(debugCall16384<>, 16384)
1448 DEBUG_CALL_FN(debugCall32768<>, 32768)
1449 DEBUG_CALL_FN(debugCall65536<>, 65536)
1450
1451 // func debugCallPanicked(val interface{})
1452 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
1453 // Copy the panic value to the top of stack at SP+8.
1454 MOVD val_type+0(FP), R0
1455 MOVD R0, 8(RSP)
1456 MOVD val_data+8(FP), R0
1457 MOVD R0, 16(RSP)
1458 MOVD $2, R20
1459 BREAK
1460 RET
1461
1462 // Note: these functions use a special calling convention to save generated code space.
1463 // Arguments are passed in registers, but the space for those arguments are allocated
1464 // in the caller's stack frame. These stubs write the args into that stack space and
1465 // then tail call to the corresponding runtime handler.
1466 // The tail call makes these stubs disappear in backtraces.
1467 //
1468 // Defined as ABIInternal since the compiler generates ABIInternal
1469 // calls to it directly and it does not use the stack-based Go ABI.
1470 TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
1471 JMP runtime·goPanicIndex<ABIInternal>(SB)
1472 TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
1473 JMP runtime·goPanicIndexU<ABIInternal>(SB)
1474 TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
1475 MOVD R1, R0
1476 MOVD R2, R1
1477 JMP runtime·goPanicSliceAlen<ABIInternal>(SB)
1478 TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
1479 MOVD R1, R0
1480 MOVD R2, R1
1481 JMP runtime·goPanicSliceAlenU<ABIInternal>(SB)
1482 TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
1483 MOVD R1, R0
1484 MOVD R2, R1
1485 JMP runtime·goPanicSliceAcap<ABIInternal>(SB)
1486 TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
1487 MOVD R1, R0
1488 MOVD R2, R1
1489 JMP runtime·goPanicSliceAcapU<ABIInternal>(SB)
1490 TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
1491 JMP runtime·goPanicSliceB<ABIInternal>(SB)
1492 TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
1493 JMP runtime·goPanicSliceBU<ABIInternal>(SB)
1494 TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
1495 MOVD R2, R0
1496 MOVD R3, R1
1497 JMP runtime·goPanicSlice3Alen<ABIInternal>(SB)
1498 TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
1499 MOVD R2, R0
1500 MOVD R3, R1
1501 JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB)
1502 TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
1503 MOVD R2, R0
1504 MOVD R3, R1
1505 JMP runtime·goPanicSlice3Acap<ABIInternal>(SB)
1506 TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
1507 MOVD R2, R0
1508 MOVD R3, R1
1509 JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB)
1510 TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
1511 MOVD R1, R0
1512 MOVD R2, R1
1513 JMP runtime·goPanicSlice3B<ABIInternal>(SB)
1514 TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
1515 MOVD R1, R0
1516 MOVD R2, R1
1517 JMP runtime·goPanicSlice3BU<ABIInternal>(SB)
1518 TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
1519 JMP runtime·goPanicSlice3C<ABIInternal>(SB)
1520 TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
1521 JMP runtime·goPanicSlice3CU<ABIInternal>(SB)
1522 TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
1523 MOVD R2, R0
1524 MOVD R3, R1
1525 JMP runtime·goPanicSliceConvert<ABIInternal>(SB)
1526
View as plain text