Text file
src/runtime/asm_arm64.s
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "tls_arm64.h"
8 #include "funcdata.h"
9 #include "textflag.h"
10
11 TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
12 // SP = stack; R0 = argc; R1 = argv
13
14 SUB $32, RSP
15 MOVW R0, 8(RSP) // argc
16 MOVD R1, 16(RSP) // argv
17
18 #ifdef TLS_darwin
19 // Initialize TLS.
20 MOVD ZR, g // clear g, make sure it's not junk.
21 SUB $32, RSP
22 MRS_TPIDR_R0
23 AND $~7, R0
24 MOVD R0, 16(RSP) // arg2: TLS base
25 MOVD $runtime·tls_g(SB), R2
26 MOVD R2, 8(RSP) // arg1: &tlsg
27 BL ·tlsinit(SB)
28 ADD $32, RSP
29 #endif
30
31 // create istack out of the given (operating system) stack.
32 // _cgo_init may update stackguard.
33 MOVD $runtime·g0(SB), g
34 MOVD RSP, R7
35 MOVD $(-64*1024)(R7), R0
36 MOVD R0, g_stackguard0(g)
37 MOVD R0, g_stackguard1(g)
38 MOVD R0, (g_stack+stack_lo)(g)
39 MOVD R7, (g_stack+stack_hi)(g)
40
41 // if there is a _cgo_init, call it using the gcc ABI.
42 MOVD _cgo_init(SB), R12
43 CBZ R12, nocgo
44
45 #ifdef GOOS_android
46 MRS_TPIDR_R0 // load TLS base pointer
47 MOVD R0, R3 // arg 3: TLS base pointer
48 MOVD $runtime·tls_g(SB), R2 // arg 2: &tls_g
49 #else
50 MOVD $0, R2 // arg 2: not used when using platform's TLS
51 #endif
52 MOVD $setg_gcc<>(SB), R1 // arg 1: setg
53 MOVD g, R0 // arg 0: G
54 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
55 BL (R12)
56 ADD $16, RSP
57
58 nocgo:
59 BL runtime·save_g(SB)
60 // update stackguard after _cgo_init
61 MOVD (g_stack+stack_lo)(g), R0
62 ADD $const_stackGuard, R0
63 MOVD R0, g_stackguard0(g)
64 MOVD R0, g_stackguard1(g)
65
66 // set the per-goroutine and per-mach "registers"
67 MOVD $runtime·m0(SB), R0
68
69 // save m->g0 = g0
70 MOVD g, m_g0(R0)
71 // save m0 to g0->m
72 MOVD R0, g_m(g)
73
74 BL runtime·check(SB)
75
76 #ifdef GOOS_windows
77 BL runtime·wintls(SB)
78 #endif
79
80 MOVW 8(RSP), R0 // copy argc
81 MOVW R0, -8(RSP)
82 MOVD 16(RSP), R0 // copy argv
83 MOVD R0, 0(RSP)
84 BL runtime·args(SB)
85 BL runtime·osinit(SB)
86 BL runtime·schedinit(SB)
87
88 // create a new goroutine to start program
89 MOVD $runtime·mainPC(SB), R0 // entry
90 SUB $16, RSP
91 MOVD R0, 8(RSP) // arg
92 MOVD $0, 0(RSP) // dummy LR
93 BL runtime·newproc(SB)
94 ADD $16, RSP
95
96 // start this M
97 BL runtime·mstart(SB)
98
99 // Prevent dead-code elimination of debugCallV2, which is
100 // intended to be called by debuggers.
101 MOVD $runtime·debugCallV2<ABIInternal>(SB), R0
102
103 MOVD $0, R0
104 MOVD R0, (R0) // boom
105 UNDEF
106
107 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
108 GLOBL runtime·mainPC(SB),RODATA,$8
109
110 // Windows ARM64 needs an immediate 0xf000 argument.
111 // See go.dev/issues/53837.
112 #define BREAK \
113 #ifdef GOOS_windows \
114 BRK $0xf000 \
115 #else \
116 BRK \
117 #endif \
118
119
120 TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
121 BREAK
122 RET
123
124 TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
125 RET
126
127 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
128 BL runtime·mstart0(SB)
129 RET // not reached
130
131 /*
132 * go-routine
133 */
134
135 // void gogo(Gobuf*)
136 // restore state from Gobuf; longjmp
137 TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
138 MOVD buf+0(FP), R5
139 MOVD gobuf_g(R5), R6
140 MOVD 0(R6), R4 // make sure g != nil
141 B gogo<>(SB)
142
143 TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
144 MOVD R6, g
145 BL runtime·save_g(SB)
146
147 MOVD gobuf_sp(R5), R0
148 MOVD R0, RSP
149 MOVD gobuf_bp(R5), R29
150 MOVD gobuf_lr(R5), LR
151 MOVD gobuf_ret(R5), R0
152 MOVD gobuf_ctxt(R5), R26
153 MOVD $0, gobuf_sp(R5)
154 MOVD $0, gobuf_bp(R5)
155 MOVD $0, gobuf_ret(R5)
156 MOVD $0, gobuf_lr(R5)
157 MOVD $0, gobuf_ctxt(R5)
158 CMP ZR, ZR // set condition codes for == test, needed by stack split
159 MOVD gobuf_pc(R5), R6
160 B (R6)
161
162 // void mcall(fn func(*g))
163 // Switch to m->g0's stack, call fn(g).
164 // Fn must never return. It should gogo(&g->sched)
165 // to keep running g.
166 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
167 MOVD R0, R26 // context
168
169 // Save caller state in g->sched
170 MOVD RSP, R0
171 MOVD R0, (g_sched+gobuf_sp)(g)
172 MOVD R29, (g_sched+gobuf_bp)(g)
173 MOVD LR, (g_sched+gobuf_pc)(g)
174 MOVD $0, (g_sched+gobuf_lr)(g)
175
176 // Switch to m->g0 & its stack, call fn.
177 MOVD g, R3
178 MOVD g_m(g), R8
179 MOVD m_g0(R8), g
180 BL runtime·save_g(SB)
181 CMP g, R3
182 BNE 2(PC)
183 B runtime·badmcall(SB)
184
185 MOVD (g_sched+gobuf_sp)(g), R0
186 MOVD R0, RSP // sp = m->g0->sched.sp
187 MOVD (g_sched+gobuf_bp)(g), R29
188 MOVD R3, R0 // arg = g
189 MOVD $0, -16(RSP) // dummy LR
190 SUB $16, RSP
191 MOVD 0(R26), R4 // code pointer
192 BL (R4)
193 B runtime·badmcall2(SB)
194
195 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
196 // of the G stack. We need to distinguish the routine that
197 // lives at the bottom of the G stack from the one that lives
198 // at the top of the system stack because the one at the top of
199 // the system stack terminates the stack walk (see topofstack()).
200 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
201 UNDEF
202 BL (LR) // make sure this function is not leaf
203 RET
204
205 // func systemstack(fn func())
206 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
207 MOVD fn+0(FP), R3 // R3 = fn
208 MOVD R3, R26 // context
209 MOVD g_m(g), R4 // R4 = m
210
211 MOVD m_gsignal(R4), R5 // R5 = gsignal
212 CMP g, R5
213 BEQ noswitch
214
215 MOVD m_g0(R4), R5 // R5 = g0
216 CMP g, R5
217 BEQ noswitch
218
219 MOVD m_curg(R4), R6
220 CMP g, R6
221 BEQ switch
222
223 // Bad: g is not gsignal, not g0, not curg. What is it?
224 // Hide call from linker nosplit analysis.
225 MOVD $runtime·badsystemstack(SB), R3
226 BL (R3)
227 B runtime·abort(SB)
228
229 switch:
230 // save our state in g->sched. Pretend to
231 // be systemstack_switch if the G stack is scanned.
232 BL gosave_systemstack_switch<>(SB)
233
234 // switch to g0
235 MOVD R5, g
236 BL runtime·save_g(SB)
237 MOVD (g_sched+gobuf_sp)(g), R3
238 MOVD R3, RSP
239 MOVD (g_sched+gobuf_bp)(g), R29
240
241 // call target function
242 MOVD 0(R26), R3 // code pointer
243 BL (R3)
244
245 // switch back to g
246 MOVD g_m(g), R3
247 MOVD m_curg(R3), g
248 BL runtime·save_g(SB)
249 MOVD (g_sched+gobuf_sp)(g), R0
250 MOVD R0, RSP
251 MOVD (g_sched+gobuf_bp)(g), R29
252 MOVD $0, (g_sched+gobuf_sp)(g)
253 MOVD $0, (g_sched+gobuf_bp)(g)
254 RET
255
256 noswitch:
257 // already on m stack, just call directly
258 // Using a tail call here cleans up tracebacks since we won't stop
259 // at an intermediate systemstack.
260 MOVD 0(R26), R3 // code pointer
261 MOVD.P 16(RSP), R30 // restore LR
262 SUB $8, RSP, R29 // restore FP
263 B (R3)
264
265 /*
266 * support for morestack
267 */
268
269 // Called during function prolog when more stack is needed.
270 // Caller has already loaded:
271 // R3 prolog's LR (R30)
272 //
273 // The traceback routines see morestack on a g0 as being
274 // the top of a stack (for example, morestack calling newstack
275 // calling the scheduler calling newm calling gc), so we must
276 // record an argument size. For that purpose, it has no arguments.
277 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
278 // Cannot grow scheduler stack (m->g0).
279 MOVD g_m(g), R8
280 MOVD m_g0(R8), R4
281 CMP g, R4
282 BNE 3(PC)
283 BL runtime·badmorestackg0(SB)
284 B runtime·abort(SB)
285
286 // Cannot grow signal stack (m->gsignal).
287 MOVD m_gsignal(R8), R4
288 CMP g, R4
289 BNE 3(PC)
290 BL runtime·badmorestackgsignal(SB)
291 B runtime·abort(SB)
292
293 // Called from f.
294 // Set g->sched to context in f
295 MOVD RSP, R0
296 MOVD R0, (g_sched+gobuf_sp)(g)
297 MOVD R29, (g_sched+gobuf_bp)(g)
298 MOVD LR, (g_sched+gobuf_pc)(g)
299 MOVD R3, (g_sched+gobuf_lr)(g)
300 MOVD R26, (g_sched+gobuf_ctxt)(g)
301
302 // Called from f.
303 // Set m->morebuf to f's callers.
304 MOVD R3, (m_morebuf+gobuf_pc)(R8) // f's caller's PC
305 MOVD RSP, R0
306 MOVD R0, (m_morebuf+gobuf_sp)(R8) // f's caller's RSP
307 MOVD g, (m_morebuf+gobuf_g)(R8)
308
309 // Call newstack on m->g0's stack.
310 MOVD m_g0(R8), g
311 BL runtime·save_g(SB)
312 MOVD (g_sched+gobuf_sp)(g), R0
313 MOVD R0, RSP
314 MOVD (g_sched+gobuf_bp)(g), R29
315 MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned)
316 BL runtime·newstack(SB)
317
318 // Not reached, but make sure the return PC from the call to newstack
319 // is still in this function, and not the beginning of the next.
320 UNDEF
321
322 TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
323 // Force SPWRITE. This function doesn't actually write SP,
324 // but it is called with a special calling convention where
325 // the caller doesn't save LR on stack but passes it as a
326 // register (R3), and the unwinder currently doesn't understand.
327 // Make it SPWRITE to stop unwinding. (See issue 54332)
328 MOVD RSP, RSP
329
330 MOVW $0, R26
331 B runtime·morestack(SB)
332
333 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R20.
334 TEXT ·spillArgs(SB),NOSPLIT,$0-0
335 STP (R0, R1), (0*8)(R20)
336 STP (R2, R3), (2*8)(R20)
337 STP (R4, R5), (4*8)(R20)
338 STP (R6, R7), (6*8)(R20)
339 STP (R8, R9), (8*8)(R20)
340 STP (R10, R11), (10*8)(R20)
341 STP (R12, R13), (12*8)(R20)
342 STP (R14, R15), (14*8)(R20)
343 FSTPD (F0, F1), (16*8)(R20)
344 FSTPD (F2, F3), (18*8)(R20)
345 FSTPD (F4, F5), (20*8)(R20)
346 FSTPD (F6, F7), (22*8)(R20)
347 FSTPD (F8, F9), (24*8)(R20)
348 FSTPD (F10, F11), (26*8)(R20)
349 FSTPD (F12, F13), (28*8)(R20)
350 FSTPD (F14, F15), (30*8)(R20)
351 RET
352
353 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R20.
354 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
355 LDP (0*8)(R20), (R0, R1)
356 LDP (2*8)(R20), (R2, R3)
357 LDP (4*8)(R20), (R4, R5)
358 LDP (6*8)(R20), (R6, R7)
359 LDP (8*8)(R20), (R8, R9)
360 LDP (10*8)(R20), (R10, R11)
361 LDP (12*8)(R20), (R12, R13)
362 LDP (14*8)(R20), (R14, R15)
363 FLDPD (16*8)(R20), (F0, F1)
364 FLDPD (18*8)(R20), (F2, F3)
365 FLDPD (20*8)(R20), (F4, F5)
366 FLDPD (22*8)(R20), (F6, F7)
367 FLDPD (24*8)(R20), (F8, F9)
368 FLDPD (26*8)(R20), (F10, F11)
369 FLDPD (28*8)(R20), (F12, F13)
370 FLDPD (30*8)(R20), (F14, F15)
371 RET
372
373 // reflectcall: call a function with the given argument list
374 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
375 // we don't have variable-sized frames, so we use a small number
376 // of constant-sized-frame functions to encode a few bits of size in the pc.
377 // Caution: ugly multiline assembly macros in your future!
378
379 #define DISPATCH(NAME,MAXSIZE) \
380 MOVD $MAXSIZE, R27; \
381 CMP R27, R16; \
382 BGT 3(PC); \
383 MOVD $NAME(SB), R27; \
384 B (R27)
385 // Note: can't just "B NAME(SB)" - bad inlining results.
386
387 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48
388 MOVWU frameSize+32(FP), R16
389 DISPATCH(runtime·call16, 16)
390 DISPATCH(runtime·call32, 32)
391 DISPATCH(runtime·call64, 64)
392 DISPATCH(runtime·call128, 128)
393 DISPATCH(runtime·call256, 256)
394 DISPATCH(runtime·call512, 512)
395 DISPATCH(runtime·call1024, 1024)
396 DISPATCH(runtime·call2048, 2048)
397 DISPATCH(runtime·call4096, 4096)
398 DISPATCH(runtime·call8192, 8192)
399 DISPATCH(runtime·call16384, 16384)
400 DISPATCH(runtime·call32768, 32768)
401 DISPATCH(runtime·call65536, 65536)
402 DISPATCH(runtime·call131072, 131072)
403 DISPATCH(runtime·call262144, 262144)
404 DISPATCH(runtime·call524288, 524288)
405 DISPATCH(runtime·call1048576, 1048576)
406 DISPATCH(runtime·call2097152, 2097152)
407 DISPATCH(runtime·call4194304, 4194304)
408 DISPATCH(runtime·call8388608, 8388608)
409 DISPATCH(runtime·call16777216, 16777216)
410 DISPATCH(runtime·call33554432, 33554432)
411 DISPATCH(runtime·call67108864, 67108864)
412 DISPATCH(runtime·call134217728, 134217728)
413 DISPATCH(runtime·call268435456, 268435456)
414 DISPATCH(runtime·call536870912, 536870912)
415 DISPATCH(runtime·call1073741824, 1073741824)
416 MOVD $runtime·badreflectcall(SB), R0
417 B (R0)
418
419 #define CALLFN(NAME,MAXSIZE) \
420 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
421 NO_LOCAL_POINTERS; \
422 /* copy arguments to stack */ \
423 MOVD stackArgs+16(FP), R3; \
424 MOVWU stackArgsSize+24(FP), R4; \
425 ADD $8, RSP, R5; \
426 BIC $0xf, R4, R6; \
427 CBZ R6, 6(PC); \
428 /* if R6=(argsize&~15) != 0 */ \
429 ADD R6, R5, R6; \
430 /* copy 16 bytes a time */ \
431 LDP.P 16(R3), (R7, R8); \
432 STP.P (R7, R8), 16(R5); \
433 CMP R5, R6; \
434 BNE -3(PC); \
435 AND $0xf, R4, R6; \
436 CBZ R6, 6(PC); \
437 /* if R6=(argsize&15) != 0 */ \
438 ADD R6, R5, R6; \
439 /* copy 1 byte a time for the rest */ \
440 MOVBU.P 1(R3), R7; \
441 MOVBU.P R7, 1(R5); \
442 CMP R5, R6; \
443 BNE -3(PC); \
444 /* set up argument registers */ \
445 MOVD regArgs+40(FP), R20; \
446 CALL ·unspillArgs(SB); \
447 /* call function */ \
448 MOVD f+8(FP), R26; \
449 MOVD (R26), R20; \
450 PCDATA $PCDATA_StackMapIndex, $0; \
451 BL (R20); \
452 /* copy return values back */ \
453 MOVD regArgs+40(FP), R20; \
454 CALL ·spillArgs(SB); \
455 MOVD stackArgsType+0(FP), R7; \
456 MOVD stackArgs+16(FP), R3; \
457 MOVWU stackArgsSize+24(FP), R4; \
458 MOVWU stackRetOffset+28(FP), R6; \
459 ADD $8, RSP, R5; \
460 ADD R6, R5; \
461 ADD R6, R3; \
462 SUB R6, R4; \
463 BL callRet<>(SB); \
464 RET
465
466 // callRet copies return values back at the end of call*. This is a
467 // separate function so it can allocate stack space for the arguments
468 // to reflectcallmove. It does not follow the Go ABI; it expects its
469 // arguments in registers.
470 TEXT callRet<>(SB), NOSPLIT, $48-0
471 NO_LOCAL_POINTERS
472 STP (R7, R3), 8(RSP)
473 STP (R5, R4), 24(RSP)
474 MOVD R20, 40(RSP)
475 BL runtime·reflectcallmove(SB)
476 RET
477
478 CALLFN(·call16, 16)
479 CALLFN(·call32, 32)
480 CALLFN(·call64, 64)
481 CALLFN(·call128, 128)
482 CALLFN(·call256, 256)
483 CALLFN(·call512, 512)
484 CALLFN(·call1024, 1024)
485 CALLFN(·call2048, 2048)
486 CALLFN(·call4096, 4096)
487 CALLFN(·call8192, 8192)
488 CALLFN(·call16384, 16384)
489 CALLFN(·call32768, 32768)
490 CALLFN(·call65536, 65536)
491 CALLFN(·call131072, 131072)
492 CALLFN(·call262144, 262144)
493 CALLFN(·call524288, 524288)
494 CALLFN(·call1048576, 1048576)
495 CALLFN(·call2097152, 2097152)
496 CALLFN(·call4194304, 4194304)
497 CALLFN(·call8388608, 8388608)
498 CALLFN(·call16777216, 16777216)
499 CALLFN(·call33554432, 33554432)
500 CALLFN(·call67108864, 67108864)
501 CALLFN(·call134217728, 134217728)
502 CALLFN(·call268435456, 268435456)
503 CALLFN(·call536870912, 536870912)
504 CALLFN(·call1073741824, 1073741824)
505
506 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
507 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
508 MOVB runtime·useAeshash(SB), R10
509 CBZ R10, noaes
510 MOVD $runtime·aeskeysched+0(SB), R3
511
512 VEOR V0.B16, V0.B16, V0.B16
513 VLD1 (R3), [V2.B16]
514 VLD1 (R0), V0.S[1]
515 VMOV R1, V0.S[0]
516
517 AESE V2.B16, V0.B16
518 AESMC V0.B16, V0.B16
519 AESE V2.B16, V0.B16
520 AESMC V0.B16, V0.B16
521 AESE V2.B16, V0.B16
522
523 VMOV V0.D[0], R0
524 RET
525 noaes:
526 B runtime·memhash32Fallback<ABIInternal>(SB)
527
528 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
529 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
530 MOVB runtime·useAeshash(SB), R10
531 CBZ R10, noaes
532 MOVD $runtime·aeskeysched+0(SB), R3
533
534 VEOR V0.B16, V0.B16, V0.B16
535 VLD1 (R3), [V2.B16]
536 VLD1 (R0), V0.D[1]
537 VMOV R1, V0.D[0]
538
539 AESE V2.B16, V0.B16
540 AESMC V0.B16, V0.B16
541 AESE V2.B16, V0.B16
542 AESMC V0.B16, V0.B16
543 AESE V2.B16, V0.B16
544
545 VMOV V0.D[0], R0
546 RET
547 noaes:
548 B runtime·memhash64Fallback<ABIInternal>(SB)
549
550 // func memhash(p unsafe.Pointer, h, size uintptr) uintptr
551 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
552 MOVB runtime·useAeshash(SB), R10
553 CBZ R10, noaes
554 B aeshashbody<>(SB)
555 noaes:
556 B runtime·memhashFallback<ABIInternal>(SB)
557
558 // func strhash(p unsafe.Pointer, h uintptr) uintptr
559 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
560 MOVB runtime·useAeshash(SB), R10
561 CBZ R10, noaes
562 LDP (R0), (R0, R2) // string data / length
563 B aeshashbody<>(SB)
564 noaes:
565 B runtime·strhashFallback<ABIInternal>(SB)
566
567 // R0: data
568 // R1: seed data
569 // R2: length
570 // At return, R0 = return value
571 TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
572 VEOR V30.B16, V30.B16, V30.B16
573 VMOV R1, V30.D[0]
574 VMOV R2, V30.D[1] // load length into seed
575
576 MOVD $runtime·aeskeysched+0(SB), R4
577 VLD1.P 16(R4), [V0.B16]
578 AESE V30.B16, V0.B16
579 AESMC V0.B16, V0.B16
580 CMP $16, R2
581 BLO aes0to15
582 BEQ aes16
583 CMP $32, R2
584 BLS aes17to32
585 CMP $64, R2
586 BLS aes33to64
587 CMP $128, R2
588 BLS aes65to128
589 B aes129plus
590
591 aes0to15:
592 CBZ R2, aes0
593 VEOR V2.B16, V2.B16, V2.B16
594 TBZ $3, R2, less_than_8
595 VLD1.P 8(R0), V2.D[0]
596
597 less_than_8:
598 TBZ $2, R2, less_than_4
599 VLD1.P 4(R0), V2.S[2]
600
601 less_than_4:
602 TBZ $1, R2, less_than_2
603 VLD1.P 2(R0), V2.H[6]
604
605 less_than_2:
606 TBZ $0, R2, done
607 VLD1 (R0), V2.B[14]
608 done:
609 AESE V0.B16, V2.B16
610 AESMC V2.B16, V2.B16
611 AESE V0.B16, V2.B16
612 AESMC V2.B16, V2.B16
613 AESE V0.B16, V2.B16
614 AESMC V2.B16, V2.B16
615
616 VMOV V2.D[0], R0
617 RET
618
619 aes0:
620 VMOV V0.D[0], R0
621 RET
622
623 aes16:
624 VLD1 (R0), [V2.B16]
625 B done
626
627 aes17to32:
628 // make second seed
629 VLD1 (R4), [V1.B16]
630 AESE V30.B16, V1.B16
631 AESMC V1.B16, V1.B16
632 SUB $16, R2, R10
633 VLD1.P (R0)(R10), [V2.B16]
634 VLD1 (R0), [V3.B16]
635
636 AESE V0.B16, V2.B16
637 AESMC V2.B16, V2.B16
638 AESE V1.B16, V3.B16
639 AESMC V3.B16, V3.B16
640
641 AESE V0.B16, V2.B16
642 AESMC V2.B16, V2.B16
643 AESE V1.B16, V3.B16
644 AESMC V3.B16, V3.B16
645
646 AESE V0.B16, V2.B16
647 AESE V1.B16, V3.B16
648
649 VEOR V3.B16, V2.B16, V2.B16
650
651 VMOV V2.D[0], R0
652 RET
653
654 aes33to64:
655 VLD1 (R4), [V1.B16, V2.B16, V3.B16]
656 AESE V30.B16, V1.B16
657 AESMC V1.B16, V1.B16
658 AESE V30.B16, V2.B16
659 AESMC V2.B16, V2.B16
660 AESE V30.B16, V3.B16
661 AESMC V3.B16, V3.B16
662 SUB $32, R2, R10
663
664 VLD1.P (R0)(R10), [V4.B16, V5.B16]
665 VLD1 (R0), [V6.B16, V7.B16]
666
667 AESE V0.B16, V4.B16
668 AESMC V4.B16, V4.B16
669 AESE V1.B16, V5.B16
670 AESMC V5.B16, V5.B16
671 AESE V2.B16, V6.B16
672 AESMC V6.B16, V6.B16
673 AESE V3.B16, V7.B16
674 AESMC V7.B16, V7.B16
675
676 AESE V0.B16, V4.B16
677 AESMC V4.B16, V4.B16
678 AESE V1.B16, V5.B16
679 AESMC V5.B16, V5.B16
680 AESE V2.B16, V6.B16
681 AESMC V6.B16, V6.B16
682 AESE V3.B16, V7.B16
683 AESMC V7.B16, V7.B16
684
685 AESE V0.B16, V4.B16
686 AESE V1.B16, V5.B16
687 AESE V2.B16, V6.B16
688 AESE V3.B16, V7.B16
689
690 VEOR V6.B16, V4.B16, V4.B16
691 VEOR V7.B16, V5.B16, V5.B16
692 VEOR V5.B16, V4.B16, V4.B16
693
694 VMOV V4.D[0], R0
695 RET
696
697 aes65to128:
698 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
699 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
700 AESE V30.B16, V1.B16
701 AESMC V1.B16, V1.B16
702 AESE V30.B16, V2.B16
703 AESMC V2.B16, V2.B16
704 AESE V30.B16, V3.B16
705 AESMC V3.B16, V3.B16
706 AESE V30.B16, V4.B16
707 AESMC V4.B16, V4.B16
708 AESE V30.B16, V5.B16
709 AESMC V5.B16, V5.B16
710 AESE V30.B16, V6.B16
711 AESMC V6.B16, V6.B16
712 AESE V30.B16, V7.B16
713 AESMC V7.B16, V7.B16
714
715 SUB $64, R2, R10
716 VLD1.P (R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
717 VLD1 (R0), [V12.B16, V13.B16, V14.B16, V15.B16]
718 AESE V0.B16, V8.B16
719 AESMC V8.B16, V8.B16
720 AESE V1.B16, V9.B16
721 AESMC V9.B16, V9.B16
722 AESE V2.B16, V10.B16
723 AESMC V10.B16, V10.B16
724 AESE V3.B16, V11.B16
725 AESMC V11.B16, V11.B16
726 AESE V4.B16, V12.B16
727 AESMC V12.B16, V12.B16
728 AESE V5.B16, V13.B16
729 AESMC V13.B16, V13.B16
730 AESE V6.B16, V14.B16
731 AESMC V14.B16, V14.B16
732 AESE V7.B16, V15.B16
733 AESMC V15.B16, V15.B16
734
735 AESE V0.B16, V8.B16
736 AESMC V8.B16, V8.B16
737 AESE V1.B16, V9.B16
738 AESMC V9.B16, V9.B16
739 AESE V2.B16, V10.B16
740 AESMC V10.B16, V10.B16
741 AESE V3.B16, V11.B16
742 AESMC V11.B16, V11.B16
743 AESE V4.B16, V12.B16
744 AESMC V12.B16, V12.B16
745 AESE V5.B16, V13.B16
746 AESMC V13.B16, V13.B16
747 AESE V6.B16, V14.B16
748 AESMC V14.B16, V14.B16
749 AESE V7.B16, V15.B16
750 AESMC V15.B16, V15.B16
751
752 AESE V0.B16, V8.B16
753 AESE V1.B16, V9.B16
754 AESE V2.B16, V10.B16
755 AESE V3.B16, V11.B16
756 AESE V4.B16, V12.B16
757 AESE V5.B16, V13.B16
758 AESE V6.B16, V14.B16
759 AESE V7.B16, V15.B16
760
761 VEOR V12.B16, V8.B16, V8.B16
762 VEOR V13.B16, V9.B16, V9.B16
763 VEOR V14.B16, V10.B16, V10.B16
764 VEOR V15.B16, V11.B16, V11.B16
765 VEOR V10.B16, V8.B16, V8.B16
766 VEOR V11.B16, V9.B16, V9.B16
767 VEOR V9.B16, V8.B16, V8.B16
768
769 VMOV V8.D[0], R0
770 RET
771
772 aes129plus:
773 PRFM (R0), PLDL1KEEP
774 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
775 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
776 AESE V30.B16, V1.B16
777 AESMC V1.B16, V1.B16
778 AESE V30.B16, V2.B16
779 AESMC V2.B16, V2.B16
780 AESE V30.B16, V3.B16
781 AESMC V3.B16, V3.B16
782 AESE V30.B16, V4.B16
783 AESMC V4.B16, V4.B16
784 AESE V30.B16, V5.B16
785 AESMC V5.B16, V5.B16
786 AESE V30.B16, V6.B16
787 AESMC V6.B16, V6.B16
788 AESE V30.B16, V7.B16
789 AESMC V7.B16, V7.B16
790 ADD R0, R2, R10
791 SUB $128, R10, R10
792 VLD1.P 64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
793 VLD1 (R10), [V12.B16, V13.B16, V14.B16, V15.B16]
794 SUB $1, R2, R2
795 LSR $7, R2, R2
796
797 aesloop:
798 AESE V8.B16, V0.B16
799 AESMC V0.B16, V0.B16
800 AESE V9.B16, V1.B16
801 AESMC V1.B16, V1.B16
802 AESE V10.B16, V2.B16
803 AESMC V2.B16, V2.B16
804 AESE V11.B16, V3.B16
805 AESMC V3.B16, V3.B16
806 AESE V12.B16, V4.B16
807 AESMC V4.B16, V4.B16
808 AESE V13.B16, V5.B16
809 AESMC V5.B16, V5.B16
810 AESE V14.B16, V6.B16
811 AESMC V6.B16, V6.B16
812 AESE V15.B16, V7.B16
813 AESMC V7.B16, V7.B16
814
815 VLD1.P 64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
816 AESE V8.B16, V0.B16
817 AESMC V0.B16, V0.B16
818 AESE V9.B16, V1.B16
819 AESMC V1.B16, V1.B16
820 AESE V10.B16, V2.B16
821 AESMC V2.B16, V2.B16
822 AESE V11.B16, V3.B16
823 AESMC V3.B16, V3.B16
824
825 VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
826 AESE V12.B16, V4.B16
827 AESMC V4.B16, V4.B16
828 AESE V13.B16, V5.B16
829 AESMC V5.B16, V5.B16
830 AESE V14.B16, V6.B16
831 AESMC V6.B16, V6.B16
832 AESE V15.B16, V7.B16
833 AESMC V7.B16, V7.B16
834 SUB $1, R2, R2
835 CBNZ R2, aesloop
836
837 AESE V8.B16, V0.B16
838 AESMC V0.B16, V0.B16
839 AESE V9.B16, V1.B16
840 AESMC V1.B16, V1.B16
841 AESE V10.B16, V2.B16
842 AESMC V2.B16, V2.B16
843 AESE V11.B16, V3.B16
844 AESMC V3.B16, V3.B16
845 AESE V12.B16, V4.B16
846 AESMC V4.B16, V4.B16
847 AESE V13.B16, V5.B16
848 AESMC V5.B16, V5.B16
849 AESE V14.B16, V6.B16
850 AESMC V6.B16, V6.B16
851 AESE V15.B16, V7.B16
852 AESMC V7.B16, V7.B16
853
854 AESE V8.B16, V0.B16
855 AESMC V0.B16, V0.B16
856 AESE V9.B16, V1.B16
857 AESMC V1.B16, V1.B16
858 AESE V10.B16, V2.B16
859 AESMC V2.B16, V2.B16
860 AESE V11.B16, V3.B16
861 AESMC V3.B16, V3.B16
862 AESE V12.B16, V4.B16
863 AESMC V4.B16, V4.B16
864 AESE V13.B16, V5.B16
865 AESMC V5.B16, V5.B16
866 AESE V14.B16, V6.B16
867 AESMC V6.B16, V6.B16
868 AESE V15.B16, V7.B16
869 AESMC V7.B16, V7.B16
870
871 AESE V8.B16, V0.B16
872 AESE V9.B16, V1.B16
873 AESE V10.B16, V2.B16
874 AESE V11.B16, V3.B16
875 AESE V12.B16, V4.B16
876 AESE V13.B16, V5.B16
877 AESE V14.B16, V6.B16
878 AESE V15.B16, V7.B16
879
880 VEOR V0.B16, V1.B16, V0.B16
881 VEOR V2.B16, V3.B16, V2.B16
882 VEOR V4.B16, V5.B16, V4.B16
883 VEOR V6.B16, V7.B16, V6.B16
884 VEOR V0.B16, V2.B16, V0.B16
885 VEOR V4.B16, V6.B16, V4.B16
886 VEOR V4.B16, V0.B16, V0.B16
887
888 VMOV V0.D[0], R0
889 RET
890
891 TEXT runtime·procyield(SB),NOSPLIT,$0-0
892 MOVWU cycles+0(FP), R0
893 again:
894 YIELD
895 SUBW $1, R0
896 CBNZ R0, again
897 RET
898
899 // Save state of caller into g->sched,
900 // but using fake PC from systemstack_switch.
901 // Must only be called from functions with no locals ($0)
902 // or else unwinding from systemstack_switch is incorrect.
903 // Smashes R0.
904 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
905 MOVD $runtime·systemstack_switch(SB), R0
906 ADD $8, R0 // get past prologue
907 MOVD R0, (g_sched+gobuf_pc)(g)
908 MOVD RSP, R0
909 MOVD R0, (g_sched+gobuf_sp)(g)
910 MOVD R29, (g_sched+gobuf_bp)(g)
911 MOVD $0, (g_sched+gobuf_lr)(g)
912 MOVD $0, (g_sched+gobuf_ret)(g)
913 // Assert ctxt is zero. See func save.
914 MOVD (g_sched+gobuf_ctxt)(g), R0
915 CBZ R0, 2(PC)
916 CALL runtime·abort(SB)
917 RET
918
919 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
920 // Call fn(arg) aligned appropriately for the gcc ABI.
921 // Called on a system stack, and there may be no g yet (during needm).
922 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
923 MOVD fn+0(FP), R1
924 MOVD arg+8(FP), R0
925 SUB $16, RSP // skip over saved frame pointer below RSP
926 BL (R1)
927 ADD $16, RSP // skip over saved frame pointer below RSP
928 RET
929
930 // func asmcgocall(fn, arg unsafe.Pointer) int32
931 // Call fn(arg) on the scheduler stack,
932 // aligned appropriately for the gcc ABI.
933 // See cgocall.go for more details.
934 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
935 MOVD fn+0(FP), R1
936 MOVD arg+8(FP), R0
937
938 MOVD RSP, R2 // save original stack pointer
939 CBZ g, nosave
940 MOVD g, R4
941
942 // Figure out if we need to switch to m->g0 stack.
943 // We get called to create new OS threads too, and those
944 // come in on the m->g0 stack already. Or we might already
945 // be on the m->gsignal stack.
946 MOVD g_m(g), R8
947 MOVD m_gsignal(R8), R3
948 CMP R3, g
949 BEQ nosave
950 MOVD m_g0(R8), R3
951 CMP R3, g
952 BEQ nosave
953
954 // Switch to system stack.
955 MOVD R0, R9 // gosave_systemstack_switch<> and save_g might clobber R0
956 BL gosave_systemstack_switch<>(SB)
957 MOVD R3, g
958 BL runtime·save_g(SB)
959 MOVD (g_sched+gobuf_sp)(g), R0
960 MOVD R0, RSP
961 MOVD (g_sched+gobuf_bp)(g), R29
962 MOVD R9, R0
963
964 // Now on a scheduling stack (a pthread-created stack).
965 // Save room for two of our pointers /*, plus 32 bytes of callee
966 // save area that lives on the caller stack. */
967 MOVD RSP, R13
968 SUB $16, R13
969 MOVD R13, RSP
970 MOVD R4, 0(RSP) // save old g on stack
971 MOVD (g_stack+stack_hi)(R4), R4
972 SUB R2, R4
973 MOVD R4, 8(RSP) // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
974 BL (R1)
975 MOVD R0, R9
976
977 // Restore g, stack pointer. R0 is errno, so don't touch it
978 MOVD 0(RSP), g
979 BL runtime·save_g(SB)
980 MOVD (g_stack+stack_hi)(g), R5
981 MOVD 8(RSP), R6
982 SUB R6, R5
983 MOVD R9, R0
984 MOVD R5, RSP
985
986 MOVW R0, ret+16(FP)
987 RET
988
989 nosave:
990 // Running on a system stack, perhaps even without a g.
991 // Having no g can happen during thread creation or thread teardown
992 // (see needm/dropm on Solaris, for example).
993 // This code is like the above sequence but without saving/restoring g
994 // and without worrying about the stack moving out from under us
995 // (because we're on a system stack, not a goroutine stack).
996 // The above code could be used directly if already on a system stack,
997 // but then the only path through this code would be a rare case on Solaris.
998 // Using this code for all "already on system stack" calls exercises it more,
999 // which should help keep it correct.
1000 MOVD RSP, R13
1001 SUB $16, R13
1002 MOVD R13, RSP
1003 MOVD $0, R4
1004 MOVD R4, 0(RSP) // Where above code stores g, in case someone looks during debugging.
1005 MOVD R2, 8(RSP) // Save original stack pointer.
1006 BL (R1)
1007 // Restore stack pointer.
1008 MOVD 8(RSP), R2
1009 MOVD R2, RSP
1010 MOVD R0, ret+16(FP)
1011 RET
1012
1013 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1014 // See cgocall.go for more details.
1015 TEXT ·cgocallback(SB),NOSPLIT,$24-24
1016 NO_LOCAL_POINTERS
1017
1018 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1019 // It is used to dropm while thread is exiting.
1020 MOVD fn+0(FP), R1
1021 CBNZ R1, loadg
1022 // Restore the g from frame.
1023 MOVD frame+8(FP), g
1024 B dropm
1025
1026 loadg:
1027 // Load g from thread-local storage.
1028 BL runtime·load_g(SB)
1029
1030 // If g is nil, Go did not create the current thread,
1031 // or if this thread never called into Go on pthread platforms.
1032 // Call needm to obtain one for temporary use.
1033 // In this case, we're running on the thread stack, so there's
1034 // lots of space, but the linker doesn't know. Hide the call from
1035 // the linker analysis by using an indirect call.
1036 CBZ g, needm
1037
1038 MOVD g_m(g), R8
1039 MOVD R8, savedm-8(SP)
1040 B havem
1041
1042 needm:
1043 MOVD g, savedm-8(SP) // g is zero, so is m.
1044 MOVD $runtime·needAndBindM(SB), R0
1045 BL (R0)
1046
1047 // Set m->g0->sched.sp = SP, so that if a panic happens
1048 // during the function we are about to execute, it will
1049 // have a valid SP to run on the g0 stack.
1050 // The next few lines (after the havem label)
1051 // will save this SP onto the stack and then write
1052 // the same SP back to m->sched.sp. That seems redundant,
1053 // but if an unrecovered panic happens, unwindm will
1054 // restore the g->sched.sp from the stack location
1055 // and then systemstack will try to use it. If we don't set it here,
1056 // that restored SP will be uninitialized (typically 0) and
1057 // will not be usable.
1058 MOVD g_m(g), R8
1059 MOVD m_g0(R8), R3
1060 MOVD RSP, R0
1061 MOVD R0, (g_sched+gobuf_sp)(R3)
1062 MOVD R29, (g_sched+gobuf_bp)(R3)
1063
1064 havem:
1065 // Now there's a valid m, and we're running on its m->g0.
1066 // Save current m->g0->sched.sp on stack and then set it to SP.
1067 // Save current sp in m->g0->sched.sp in preparation for
1068 // switch back to m->curg stack.
1069 // NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
1070 // Beware that the frame size is actually 32+16.
1071 MOVD m_g0(R8), R3
1072 MOVD (g_sched+gobuf_sp)(R3), R4
1073 MOVD R4, savedsp-16(SP)
1074 MOVD RSP, R0
1075 MOVD R0, (g_sched+gobuf_sp)(R3)
1076
1077 // Switch to m->curg stack and call runtime.cgocallbackg.
1078 // Because we are taking over the execution of m->curg
1079 // but *not* resuming what had been running, we need to
1080 // save that information (m->curg->sched) so we can restore it.
1081 // We can restore m->curg->sched.sp easily, because calling
1082 // runtime.cgocallbackg leaves SP unchanged upon return.
1083 // To save m->curg->sched.pc, we push it onto the curg stack and
1084 // open a frame the same size as cgocallback's g0 frame.
1085 // Once we switch to the curg stack, the pushed PC will appear
1086 // to be the return PC of cgocallback, so that the traceback
1087 // will seamlessly trace back into the earlier calls.
1088 MOVD m_curg(R8), g
1089 BL runtime·save_g(SB)
1090 MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
1091 MOVD (g_sched+gobuf_pc)(g), R5
1092 MOVD R5, -48(R4)
1093 MOVD (g_sched+gobuf_bp)(g), R5
1094 MOVD R5, -56(R4)
1095 // Gather our arguments into registers.
1096 MOVD fn+0(FP), R1
1097 MOVD frame+8(FP), R2
1098 MOVD ctxt+16(FP), R3
1099 MOVD $-48(R4), R0 // maintain 16-byte SP alignment
1100 MOVD R0, RSP // switch stack
1101 MOVD R1, 8(RSP)
1102 MOVD R2, 16(RSP)
1103 MOVD R3, 24(RSP)
1104 MOVD $runtime·cgocallbackg(SB), R0
1105 CALL (R0) // indirect call to bypass nosplit check. We're on a different stack now.
1106
1107 // Restore g->sched (== m->curg->sched) from saved values.
1108 MOVD 0(RSP), R5
1109 MOVD R5, (g_sched+gobuf_pc)(g)
1110 MOVD RSP, R4
1111 ADD $48, R4, R4
1112 MOVD R4, (g_sched+gobuf_sp)(g)
1113
1114 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1115 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1116 // so we do not have to restore it.)
1117 MOVD g_m(g), R8
1118 MOVD m_g0(R8), g
1119 BL runtime·save_g(SB)
1120 MOVD (g_sched+gobuf_sp)(g), R0
1121 MOVD R0, RSP
1122 MOVD savedsp-16(SP), R4
1123 MOVD R4, (g_sched+gobuf_sp)(g)
1124
1125 // If the m on entry was nil, we called needm above to borrow an m,
1126 // 1. for the duration of the call on non-pthread platforms,
1127 // 2. or the duration of the C thread alive on pthread platforms.
1128 // If the m on entry wasn't nil,
1129 // 1. the thread might be a Go thread,
1130 // 2. or it wasn't the first call from a C thread on pthread platforms,
1131 // since then we skip dropm to reuse the m in the first call.
1132 MOVD savedm-8(SP), R6
1133 CBNZ R6, droppedm
1134
1135 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1136 MOVD _cgo_pthread_key_created(SB), R6
1137 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1138 CBZ R6, dropm
1139 MOVD (R6), R6
1140 CBNZ R6, droppedm
1141
1142 dropm:
1143 MOVD $runtime·dropm(SB), R0
1144 BL (R0)
1145 droppedm:
1146
1147 // Done!
1148 RET
1149
1150 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1151 // Must obey the gcc calling convention.
1152 TEXT _cgo_topofstack(SB),NOSPLIT,$24
1153 // g (R28) and REGTMP (R27) might be clobbered by load_g. They
1154 // are callee-save in the gcc calling convention, so save them.
1155 MOVD R27, savedR27-8(SP)
1156 MOVD g, saveG-16(SP)
1157
1158 BL runtime·load_g(SB)
1159 MOVD g_m(g), R0
1160 MOVD m_curg(R0), R0
1161 MOVD (g_stack+stack_hi)(R0), R0
1162
1163 MOVD saveG-16(SP), g
1164 MOVD savedR28-8(SP), R27
1165 RET
1166
1167 // void setg(G*); set g. for use by needm.
1168 TEXT runtime·setg(SB), NOSPLIT, $0-8
1169 MOVD gg+0(FP), g
1170 // This only happens if iscgo, so jump straight to save_g
1171 BL runtime·save_g(SB)
1172 RET
1173
1174 // void setg_gcc(G*); set g called from gcc
1175 TEXT setg_gcc<>(SB),NOSPLIT,$8
1176 MOVD R0, g
1177 MOVD R27, savedR27-8(SP)
1178 BL runtime·save_g(SB)
1179 MOVD savedR27-8(SP), R27
1180 RET
1181
1182 TEXT runtime·emptyfunc(SB),0,$0-0
1183 RET
1184
1185 TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
1186 MOVD ZR, R0
1187 MOVD (R0), R0
1188 UNDEF
1189
1190 TEXT runtime·return0(SB), NOSPLIT, $0
1191 MOVW $0, R0
1192 RET
1193
1194 // The top-most function running on a goroutine
1195 // returns to goexit+PCQuantum.
1196 TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
1197 MOVD R0, R0 // NOP
1198 BL runtime·goexit1(SB) // does not return
1199
1200 // This is called from .init_array and follows the platform, not Go, ABI.
1201 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1202 SUB $0x10, RSP
1203 MOVD R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
1204 MOVD runtime·lastmoduledatap(SB), R1
1205 MOVD R0, moduledata_next(R1)
1206 MOVD R0, runtime·lastmoduledatap(SB)
1207 MOVD 8(RSP), R27
1208 ADD $0x10, RSP
1209 RET
1210
1211 TEXT ·checkASM(SB),NOSPLIT,$0-1
1212 MOVW $1, R3
1213 MOVB R3, ret+0(FP)
1214 RET
1215
1216 // gcWriteBarrier informs the GC about heap pointer writes.
1217 //
1218 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1219 // number of bytes of buffer needed in R25, and returns a pointer
1220 // to the buffer space in R25.
1221 // It clobbers condition codes.
1222 // It does not clobber any general-purpose registers except R27,
1223 // but may clobber others (e.g., floating point registers)
1224 // The act of CALLing gcWriteBarrier will clobber R30 (LR).
1225 TEXT gcWriteBarrier<>(SB),NOSPLIT,$200
1226 // Save the registers clobbered by the fast path.
1227 STP (R0, R1), 184(RSP)
1228 retry:
1229 MOVD g_m(g), R0
1230 MOVD m_p(R0), R0
1231 MOVD (p_wbBuf+wbBuf_next)(R0), R1
1232 MOVD (p_wbBuf+wbBuf_end)(R0), R27
1233 // Increment wbBuf.next position.
1234 ADD R25, R1
1235 // Is the buffer full?
1236 CMP R27, R1
1237 BHI flush
1238 // Commit to the larger buffer.
1239 MOVD R1, (p_wbBuf+wbBuf_next)(R0)
1240 // Make return value (the original next position)
1241 SUB R25, R1, R25
1242 // Restore registers.
1243 LDP 184(RSP), (R0, R1)
1244 RET
1245
1246 flush:
1247 // Save all general purpose registers since these could be
1248 // clobbered by wbBufFlush and were not saved by the caller.
1249 // R0 and R1 already saved
1250 STP (R2, R3), 1*8(RSP)
1251 STP (R4, R5), 3*8(RSP)
1252 STP (R6, R7), 5*8(RSP)
1253 STP (R8, R9), 7*8(RSP)
1254 STP (R10, R11), 9*8(RSP)
1255 STP (R12, R13), 11*8(RSP)
1256 STP (R14, R15), 13*8(RSP)
1257 // R16, R17 may be clobbered by linker trampoline
1258 // R18 is unused.
1259 STP (R19, R20), 15*8(RSP)
1260 STP (R21, R22), 17*8(RSP)
1261 STP (R23, R24), 19*8(RSP)
1262 STP (R25, R26), 21*8(RSP)
1263 // R27 is temp register.
1264 // R28 is g.
1265 // R29 is frame pointer (unused).
1266 // R30 is LR, which was saved by the prologue.
1267 // R31 is SP.
1268
1269 CALL runtime·wbBufFlush(SB)
1270 LDP 1*8(RSP), (R2, R3)
1271 LDP 3*8(RSP), (R4, R5)
1272 LDP 5*8(RSP), (R6, R7)
1273 LDP 7*8(RSP), (R8, R9)
1274 LDP 9*8(RSP), (R10, R11)
1275 LDP 11*8(RSP), (R12, R13)
1276 LDP 13*8(RSP), (R14, R15)
1277 LDP 15*8(RSP), (R19, R20)
1278 LDP 17*8(RSP), (R21, R22)
1279 LDP 19*8(RSP), (R23, R24)
1280 LDP 21*8(RSP), (R25, R26)
1281 JMP retry
1282
1283 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
1284 MOVD $8, R25
1285 JMP gcWriteBarrier<>(SB)
1286 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
1287 MOVD $16, R25
1288 JMP gcWriteBarrier<>(SB)
1289 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
1290 MOVD $24, R25
1291 JMP gcWriteBarrier<>(SB)
1292 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
1293 MOVD $32, R25
1294 JMP gcWriteBarrier<>(SB)
1295 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
1296 MOVD $40, R25
1297 JMP gcWriteBarrier<>(SB)
1298 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
1299 MOVD $48, R25
1300 JMP gcWriteBarrier<>(SB)
1301 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
1302 MOVD $56, R25
1303 JMP gcWriteBarrier<>(SB)
1304 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
1305 MOVD $64, R25
1306 JMP gcWriteBarrier<>(SB)
1307
1308 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1309 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1310
1311 // debugCallV2 is the entry point for debugger-injected function
1312 // calls on running goroutines. It informs the runtime that a
1313 // debug call has been injected and creates a call frame for the
1314 // debugger to fill in.
1315 //
1316 // To inject a function call, a debugger should:
1317 // 1. Check that the goroutine is in state _Grunning and that
1318 // there are at least 288 bytes free on the stack.
1319 // 2. Set SP as SP-16.
1320 // 3. Store the current LR in (SP) (using the SP after step 2).
1321 // 4. Store the current PC in the LR register.
1322 // 5. Write the desired argument frame size at SP-16
1323 // 6. Save all machine registers (including flags and fpsimd registers)
1324 // so they can be restored later by the debugger.
1325 // 7. Set the PC to debugCallV2 and resume execution.
1326 //
1327 // If the goroutine is in state _Grunnable, then it's not generally
1328 // safe to inject a call because it may return out via other runtime
1329 // operations. Instead, the debugger should unwind the stack to find
1330 // the return to non-runtime code, add a temporary breakpoint there,
1331 // and inject the call once that breakpoint is hit.
1332 //
1333 // If the goroutine is in any other state, it's not safe to inject a call.
1334 //
1335 // This function communicates back to the debugger by setting R20 and
1336 // invoking BRK to raise a breakpoint signal. Note that the signal PC of
1337 // the signal triggered by the BRK instruction is the PC where the signal
1338 // is trapped, not the next PC, so to resume execution, the debugger needs
1339 // to set the signal PC to PC+4. See the comments in the implementation for
1340 // the protocol the debugger is expected to follow. InjectDebugCall in the
1341 // runtime tests demonstrates this protocol.
1342 //
1343 // The debugger must ensure that any pointers passed to the function
1344 // obey escape analysis requirements. Specifically, it must not pass
1345 // a stack pointer to an escaping argument. debugCallV2 cannot check
1346 // this invariant.
1347 //
1348 // This is ABIInternal because Go code injects its PC directly into new
1349 // goroutine stacks.
1350 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
1351 STP (R29, R30), -280(RSP)
1352 SUB $272, RSP, RSP
1353 SUB $8, RSP, R29
1354 // Save all registers that may contain pointers so they can be
1355 // conservatively scanned.
1356 //
1357 // We can't do anything that might clobber any of these
1358 // registers before this.
1359 STP (R27, g), (30*8)(RSP)
1360 STP (R25, R26), (28*8)(RSP)
1361 STP (R23, R24), (26*8)(RSP)
1362 STP (R21, R22), (24*8)(RSP)
1363 STP (R19, R20), (22*8)(RSP)
1364 STP (R16, R17), (20*8)(RSP)
1365 STP (R14, R15), (18*8)(RSP)
1366 STP (R12, R13), (16*8)(RSP)
1367 STP (R10, R11), (14*8)(RSP)
1368 STP (R8, R9), (12*8)(RSP)
1369 STP (R6, R7), (10*8)(RSP)
1370 STP (R4, R5), (8*8)(RSP)
1371 STP (R2, R3), (6*8)(RSP)
1372 STP (R0, R1), (4*8)(RSP)
1373
1374 // Perform a safe-point check.
1375 MOVD R30, 8(RSP) // Caller's PC
1376 CALL runtime·debugCallCheck(SB)
1377 MOVD 16(RSP), R0
1378 CBZ R0, good
1379
1380 // The safety check failed. Put the reason string at the top
1381 // of the stack.
1382 MOVD R0, 8(RSP)
1383 MOVD 24(RSP), R0
1384 MOVD R0, 16(RSP)
1385
1386 // Set R20 to 8 and invoke BRK. The debugger should get the
1387 // reason a call can't be injected from SP+8 and resume execution.
1388 MOVD $8, R20
1389 BREAK
1390 JMP restore
1391
1392 good:
1393 // Registers are saved and it's safe to make a call.
1394 // Open up a call frame, moving the stack if necessary.
1395 //
1396 // Once the frame is allocated, this will set R20 to 0 and
1397 // invoke BRK. The debugger should write the argument
1398 // frame for the call at SP+8, set up argument registers,
1399 // set the LR as the signal PC + 4, set the PC to the function
1400 // to call, set R26 to point to the closure (if a closure call),
1401 // and resume execution.
1402 //
1403 // If the function returns, this will set R20 to 1 and invoke
1404 // BRK. The debugger can then inspect any return value saved
1405 // on the stack at SP+8 and in registers. To resume execution,
1406 // the debugger should restore the LR from (SP).
1407 //
1408 // If the function panics, this will set R20 to 2 and invoke BRK.
1409 // The interface{} value of the panic will be at SP+8. The debugger
1410 // can inspect the panic value and resume execution again.
1411 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1412 CMP $MAXSIZE, R0; \
1413 BGT 5(PC); \
1414 MOVD $NAME(SB), R0; \
1415 MOVD R0, 8(RSP); \
1416 CALL runtime·debugCallWrap(SB); \
1417 JMP restore
1418
1419 MOVD 256(RSP), R0 // the argument frame size
1420 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1421 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1422 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1423 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1424 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1425 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1426 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1427 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1428 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1429 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1430 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1431 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1432 // The frame size is too large. Report the error.
1433 MOVD $debugCallFrameTooLarge<>(SB), R0
1434 MOVD R0, 8(RSP)
1435 MOVD $20, R0
1436 MOVD R0, 16(RSP) // length of debugCallFrameTooLarge string
1437 MOVD $8, R20
1438 BREAK
1439 JMP restore
1440
1441 restore:
1442 // Calls and failures resume here.
1443 //
1444 // Set R20 to 16 and invoke BRK. The debugger should restore
1445 // all registers except for PC and RSP and resume execution.
1446 MOVD $16, R20
1447 BREAK
1448 // We must not modify flags after this point.
1449
1450 // Restore pointer-containing registers, which may have been
1451 // modified from the debugger's copy by stack copying.
1452 LDP (30*8)(RSP), (R27, g)
1453 LDP (28*8)(RSP), (R25, R26)
1454 LDP (26*8)(RSP), (R23, R24)
1455 LDP (24*8)(RSP), (R21, R22)
1456 LDP (22*8)(RSP), (R19, R20)
1457 LDP (20*8)(RSP), (R16, R17)
1458 LDP (18*8)(RSP), (R14, R15)
1459 LDP (16*8)(RSP), (R12, R13)
1460 LDP (14*8)(RSP), (R10, R11)
1461 LDP (12*8)(RSP), (R8, R9)
1462 LDP (10*8)(RSP), (R6, R7)
1463 LDP (8*8)(RSP), (R4, R5)
1464 LDP (6*8)(RSP), (R2, R3)
1465 LDP (4*8)(RSP), (R0, R1)
1466
1467 LDP -8(RSP), (R29, R27)
1468 ADD $288, RSP, RSP // Add 16 more bytes, see saveSigContext
1469 MOVD -16(RSP), R30 // restore old lr
1470 JMP (R27)
1471
1472 // runtime.debugCallCheck assumes that functions defined with the
1473 // DEBUG_CALL_FN macro are safe points to inject calls.
1474 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
1475 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
1476 NO_LOCAL_POINTERS; \
1477 MOVD $0, R20; \
1478 BREAK; \
1479 MOVD $1, R20; \
1480 BREAK; \
1481 RET
1482 DEBUG_CALL_FN(debugCall32<>, 32)
1483 DEBUG_CALL_FN(debugCall64<>, 64)
1484 DEBUG_CALL_FN(debugCall128<>, 128)
1485 DEBUG_CALL_FN(debugCall256<>, 256)
1486 DEBUG_CALL_FN(debugCall512<>, 512)
1487 DEBUG_CALL_FN(debugCall1024<>, 1024)
1488 DEBUG_CALL_FN(debugCall2048<>, 2048)
1489 DEBUG_CALL_FN(debugCall4096<>, 4096)
1490 DEBUG_CALL_FN(debugCall8192<>, 8192)
1491 DEBUG_CALL_FN(debugCall16384<>, 16384)
1492 DEBUG_CALL_FN(debugCall32768<>, 32768)
1493 DEBUG_CALL_FN(debugCall65536<>, 65536)
1494
1495 // func debugCallPanicked(val interface{})
1496 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
1497 // Copy the panic value to the top of stack at SP+8.
1498 MOVD val_type+0(FP), R0
1499 MOVD R0, 8(RSP)
1500 MOVD val_data+8(FP), R0
1501 MOVD R0, 16(RSP)
1502 MOVD $2, R20
1503 BREAK
1504 RET
1505
1506 // Note: these functions use a special calling convention to save generated code space.
1507 // Arguments are passed in registers, but the space for those arguments are allocated
1508 // in the caller's stack frame. These stubs write the args into that stack space and
1509 // then tail call to the corresponding runtime handler.
1510 // The tail call makes these stubs disappear in backtraces.
1511 //
1512 // Defined as ABIInternal since the compiler generates ABIInternal
1513 // calls to it directly and it does not use the stack-based Go ABI.
1514 TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
1515 JMP runtime·goPanicIndex<ABIInternal>(SB)
1516 TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
1517 JMP runtime·goPanicIndexU<ABIInternal>(SB)
1518 TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
1519 MOVD R1, R0
1520 MOVD R2, R1
1521 JMP runtime·goPanicSliceAlen<ABIInternal>(SB)
1522 TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
1523 MOVD R1, R0
1524 MOVD R2, R1
1525 JMP runtime·goPanicSliceAlenU<ABIInternal>(SB)
1526 TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
1527 MOVD R1, R0
1528 MOVD R2, R1
1529 JMP runtime·goPanicSliceAcap<ABIInternal>(SB)
1530 TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
1531 MOVD R1, R0
1532 MOVD R2, R1
1533 JMP runtime·goPanicSliceAcapU<ABIInternal>(SB)
1534 TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
1535 JMP runtime·goPanicSliceB<ABIInternal>(SB)
1536 TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
1537 JMP runtime·goPanicSliceBU<ABIInternal>(SB)
1538 TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
1539 MOVD R2, R0
1540 MOVD R3, R1
1541 JMP runtime·goPanicSlice3Alen<ABIInternal>(SB)
1542 TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
1543 MOVD R2, R0
1544 MOVD R3, R1
1545 JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB)
1546 TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
1547 MOVD R2, R0
1548 MOVD R3, R1
1549 JMP runtime·goPanicSlice3Acap<ABIInternal>(SB)
1550 TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
1551 MOVD R2, R0
1552 MOVD R3, R1
1553 JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB)
1554 TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
1555 MOVD R1, R0
1556 MOVD R2, R1
1557 JMP runtime·goPanicSlice3B<ABIInternal>(SB)
1558 TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
1559 MOVD R1, R0
1560 MOVD R2, R1
1561 JMP runtime·goPanicSlice3BU<ABIInternal>(SB)
1562 TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
1563 JMP runtime·goPanicSlice3C<ABIInternal>(SB)
1564 TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
1565 JMP runtime·goPanicSlice3CU<ABIInternal>(SB)
1566 TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
1567 MOVD R2, R0
1568 MOVD R3, R1
1569 JMP runtime·goPanicSliceConvert<ABIInternal>(SB)
1570
1571 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1572 MOVD R29, R0
1573 RET
1574
View as plain text