Text file
src/runtime/asm_amd64.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9 #include "cgo/abi_amd64.h"
10
11 // _rt0_amd64 is common startup code for most amd64 systems when using
12 // internal linking. This is the entry point for the program from the
13 // kernel for an ordinary -buildmode=exe program. The stack holds the
14 // number of arguments and the C-style argv.
15 TEXT _rt0_amd64(SB),NOSPLIT,$-8
16 MOVQ 0(SP), DI // argc
17 LEAQ 8(SP), SI // argv
18 JMP runtime·rt0_go(SB)
19
20 // main is common startup code for most amd64 systems when using
21 // external linking. The C startup code will call the symbol "main"
22 // passing argc and argv in the usual C ABI registers DI and SI.
23 TEXT main(SB),NOSPLIT,$-8
24 JMP runtime·rt0_go(SB)
25
26 // _rt0_amd64_lib is common startup code for most amd64 systems when
27 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
28 // arrange to invoke this function as a global constructor (for
29 // c-archive) or when the shared library is loaded (for c-shared).
30 // We expect argc and argv to be passed in the usual C ABI registers
31 // DI and SI.
32 TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0
33 // Transition from C ABI to Go ABI.
34 PUSH_REGS_HOST_TO_ABI0()
35
36 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
37 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 CALL runtime·libpreinit(SB)
41
42 // Create a new thread to finish Go runtime initialization.
43 MOVQ _cgo_sys_thread_create(SB), AX
44 TESTQ AX, AX
45 JZ nocgo
46
47 // We're calling back to C.
48 // Align stack per ELF ABI requirements.
49 MOVQ SP, BX // Callee-save in C ABI
50 ANDQ $~15, SP
51 MOVQ $_rt0_amd64_lib_go(SB), DI
52 MOVQ $0, SI
53 CALL AX
54 MOVQ BX, SP
55 JMP restore
56
57 nocgo:
58 ADJSP $16
59 MOVQ $0x800000, 0(SP) // stacksize
60 MOVQ $_rt0_amd64_lib_go(SB), AX
61 MOVQ AX, 8(SP) // fn
62 CALL runtime·newosproc0(SB)
63 ADJSP $-16
64
65 restore:
66 POP_REGS_HOST_TO_ABI0()
67 RET
68
69 // _rt0_amd64_lib_go initializes the Go runtime.
70 // This is started in a separate thread by _rt0_amd64_lib.
71 TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
72 MOVQ _rt0_amd64_lib_argc<>(SB), DI
73 MOVQ _rt0_amd64_lib_argv<>(SB), SI
74 JMP runtime·rt0_go(SB)
75
76 DATA _rt0_amd64_lib_argc<>(SB)/8, $0
77 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
78 DATA _rt0_amd64_lib_argv<>(SB)/8, $0
79 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
80
81 #ifdef GOAMD64_v2
82 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
83 #endif
84
85 #ifdef GOAMD64_v3
86 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
87 #endif
88
89 #ifdef GOAMD64_v4
90 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
91 #endif
92
93 GLOBL bad_cpu_msg<>(SB), RODATA, $84
94
95 // Define a list of AMD64 microarchitecture level features
96 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
97
98 // SSE3 SSSE3 CMPXCHNG16 SSE4.1 SSE4.2 POPCNT
99 #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13 | 1 << 19 | 1 << 20 | 1 << 23)
100 // LAHF/SAHF
101 #define V2_EXT_FEATURES_CX (1 << 0)
102 // FMA MOVBE OSXSAVE AVX F16C
103 #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
104 // ABM (FOR LZNCT)
105 #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
106 // BMI1 AVX2 BMI2
107 #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
108 // XMM YMM
109 #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
110
111 #define V4_FEATURES_CX V3_FEATURES_CX
112
113 #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
114 // AVX512F AVX512DQ AVX512CD AVX512BW AVX512VL
115 #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
116 // OPMASK ZMM
117 #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
118
119 #ifdef GOAMD64_v2
120 #define NEED_MAX_CPUID 0x80000001
121 #define NEED_FEATURES_CX V2_FEATURES_CX
122 #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
123 #endif
124
125 #ifdef GOAMD64_v3
126 #define NEED_MAX_CPUID 0x80000001
127 #define NEED_FEATURES_CX V3_FEATURES_CX
128 #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
129 #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
130 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
131 #endif
132
133 #ifdef GOAMD64_v4
134 #define NEED_MAX_CPUID 0x80000001
135 #define NEED_FEATURES_CX V4_FEATURES_CX
136 #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
137 #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
138
139 // Darwin requires a different approach to check AVX512 support, see CL 285572.
140 #ifdef GOOS_darwin
141 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
142 // These values are from:
143 // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
144 #define commpage64_base_address 0x00007fffffe00000
145 #define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
146 #define commpage64_version (commpage64_base_address+0x01E)
147 #define hasAVX512F 0x0000004000000000
148 #define hasAVX512CD 0x0000008000000000
149 #define hasAVX512DQ 0x0000010000000000
150 #define hasAVX512BW 0x0000020000000000
151 #define hasAVX512VL 0x0000100000000000
152 #define NEED_DARWIN_SUPPORT (hasAVX512F | hasAVX512DQ | hasAVX512CD | hasAVX512BW | hasAVX512VL)
153 #else
154 #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
155 #endif
156
157 #endif
158
159 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
160 // copy arguments forward on an even stack
161 MOVQ DI, AX // argc
162 MOVQ SI, BX // argv
163 SUBQ $(5*8), SP // 3args 2auto
164 ANDQ $~15, SP
165 MOVQ AX, 24(SP)
166 MOVQ BX, 32(SP)
167
168 // create istack out of the given (operating system) stack.
169 // _cgo_init may update stackguard.
170 MOVQ $runtime·g0(SB), DI
171 LEAQ (-64*1024)(SP), BX
172 MOVQ BX, g_stackguard0(DI)
173 MOVQ BX, g_stackguard1(DI)
174 MOVQ BX, (g_stack+stack_lo)(DI)
175 MOVQ SP, (g_stack+stack_hi)(DI)
176
177 // find out information about the processor we're on
178 MOVL $0, AX
179 CPUID
180 CMPL AX, $0
181 JE nocpuinfo
182
183 CMPL BX, $0x756E6547 // "Genu"
184 JNE notintel
185 CMPL DX, $0x49656E69 // "ineI"
186 JNE notintel
187 CMPL CX, $0x6C65746E // "ntel"
188 JNE notintel
189 MOVB $1, runtime·isIntel(SB)
190
191 notintel:
192 // Load EAX=1 cpuid flags
193 MOVL $1, AX
194 CPUID
195 MOVL AX, runtime·processorVersionInfo(SB)
196
197 nocpuinfo:
198 // if there is an _cgo_init, call it.
199 MOVQ _cgo_init(SB), AX
200 TESTQ AX, AX
201 JZ needtls
202 // arg 1: g0, already in DI
203 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
204 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
205 MOVQ $0, CX
206 #ifdef GOOS_android
207 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
208 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
209 // Compensate for tls_g (+16).
210 MOVQ -16(TLS), CX
211 #endif
212 #ifdef GOOS_windows
213 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
214 // Adjust for the Win64 calling convention.
215 MOVQ CX, R9 // arg 4
216 MOVQ DX, R8 // arg 3
217 MOVQ SI, DX // arg 2
218 MOVQ DI, CX // arg 1
219 #endif
220 CALL AX
221
222 // update stackguard after _cgo_init
223 MOVQ $runtime·g0(SB), CX
224 MOVQ (g_stack+stack_lo)(CX), AX
225 ADDQ $const_stackGuard, AX
226 MOVQ AX, g_stackguard0(CX)
227 MOVQ AX, g_stackguard1(CX)
228
229 #ifndef GOOS_windows
230 JMP ok
231 #endif
232 needtls:
233 #ifdef GOOS_plan9
234 // skip TLS setup on Plan 9
235 JMP ok
236 #endif
237 #ifdef GOOS_solaris
238 // skip TLS setup on Solaris
239 JMP ok
240 #endif
241 #ifdef GOOS_illumos
242 // skip TLS setup on illumos
243 JMP ok
244 #endif
245 #ifdef GOOS_darwin
246 // skip TLS setup on Darwin
247 JMP ok
248 #endif
249 #ifdef GOOS_openbsd
250 // skip TLS setup on OpenBSD
251 JMP ok
252 #endif
253
254 #ifdef GOOS_windows
255 CALL runtime·wintls(SB)
256 #endif
257
258 LEAQ runtime·m0+m_tls(SB), DI
259 CALL runtime·settls(SB)
260
261 // store through it, to make sure it works
262 get_tls(BX)
263 MOVQ $0x123, g(BX)
264 MOVQ runtime·m0+m_tls(SB), AX
265 CMPQ AX, $0x123
266 JEQ 2(PC)
267 CALL runtime·abort(SB)
268 ok:
269 // set the per-goroutine and per-mach "registers"
270 get_tls(BX)
271 LEAQ runtime·g0(SB), CX
272 MOVQ CX, g(BX)
273 LEAQ runtime·m0(SB), AX
274
275 // save m->g0 = g0
276 MOVQ CX, m_g0(AX)
277 // save m0 to g0->m
278 MOVQ AX, g_m(CX)
279
280 CLD // convention is D is always left cleared
281
282 // Check GOAMD64 requirements
283 // We need to do this after setting up TLS, so that
284 // we can report an error if there is a failure. See issue 49586.
285 #ifdef NEED_FEATURES_CX
286 MOVL $0, AX
287 CPUID
288 CMPL AX, $0
289 JE bad_cpu
290 MOVL $1, AX
291 CPUID
292 ANDL $NEED_FEATURES_CX, CX
293 CMPL CX, $NEED_FEATURES_CX
294 JNE bad_cpu
295 #endif
296
297 #ifdef NEED_MAX_CPUID
298 MOVL $0x80000000, AX
299 CPUID
300 CMPL AX, $NEED_MAX_CPUID
301 JL bad_cpu
302 #endif
303
304 #ifdef NEED_EXT_FEATURES_BX
305 MOVL $7, AX
306 MOVL $0, CX
307 CPUID
308 ANDL $NEED_EXT_FEATURES_BX, BX
309 CMPL BX, $NEED_EXT_FEATURES_BX
310 JNE bad_cpu
311 #endif
312
313 #ifdef NEED_EXT_FEATURES_CX
314 MOVL $0x80000001, AX
315 CPUID
316 ANDL $NEED_EXT_FEATURES_CX, CX
317 CMPL CX, $NEED_EXT_FEATURES_CX
318 JNE bad_cpu
319 #endif
320
321 #ifdef NEED_OS_SUPPORT_AX
322 XORL CX, CX
323 XGETBV
324 ANDL $NEED_OS_SUPPORT_AX, AX
325 CMPL AX, $NEED_OS_SUPPORT_AX
326 JNE bad_cpu
327 #endif
328
329 #ifdef NEED_DARWIN_SUPPORT
330 MOVQ $commpage64_version, BX
331 CMPW (BX), $13 // cpu_capabilities64 undefined in versions < 13
332 JL bad_cpu
333 MOVQ $commpage64_cpu_capabilities64, BX
334 MOVQ (BX), BX
335 MOVQ $NEED_DARWIN_SUPPORT, CX
336 ANDQ CX, BX
337 CMPQ BX, CX
338 JNE bad_cpu
339 #endif
340
341 CALL runtime·check(SB)
342
343 MOVL 24(SP), AX // copy argc
344 MOVL AX, 0(SP)
345 MOVQ 32(SP), AX // copy argv
346 MOVQ AX, 8(SP)
347 CALL runtime·args(SB)
348 CALL runtime·osinit(SB)
349 CALL runtime·schedinit(SB)
350
351 // create a new goroutine to start program
352 MOVQ $runtime·mainPC(SB), AX // entry
353 PUSHQ AX
354 CALL runtime·newproc(SB)
355 POPQ AX
356
357 // start this M
358 CALL runtime·mstart(SB)
359
360 CALL runtime·abort(SB) // mstart should never return
361 RET
362
363 bad_cpu: // show that the program requires a certain microarchitecture level.
364 MOVQ $2, 0(SP)
365 MOVQ $bad_cpu_msg<>(SB), AX
366 MOVQ AX, 8(SP)
367 MOVQ $84, 16(SP)
368 CALL runtime·write(SB)
369 MOVQ $1, 0(SP)
370 CALL runtime·exit(SB)
371 CALL runtime·abort(SB)
372 RET
373
374 // Prevent dead-code elimination of debugCallV2, which is
375 // intended to be called by debuggers.
376 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX
377 RET
378
379 // mainPC is a function value for runtime.main, to be passed to newproc.
380 // The reference to runtime.main is made via ABIInternal, since the
381 // actual function (not the ABI0 wrapper) is needed by newproc.
382 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
383 GLOBL runtime·mainPC(SB),RODATA,$8
384
385 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
386 BYTE $0xcc
387 RET
388
389 TEXT runtime·asminit(SB),NOSPLIT,$0-0
390 // No per-thread init.
391 RET
392
393 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
394 CALL runtime·mstart0(SB)
395 RET // not reached
396
397 /*
398 * go-routine
399 */
400
401 // func gogo(buf *gobuf)
402 // restore state from Gobuf; longjmp
403 TEXT runtime·gogo(SB), NOSPLIT, $0-8
404 MOVQ buf+0(FP), BX // gobuf
405 MOVQ gobuf_g(BX), DX
406 MOVQ 0(DX), CX // make sure g != nil
407 JMP gogo<>(SB)
408
409 TEXT gogo<>(SB), NOSPLIT, $0
410 get_tls(CX)
411 MOVQ DX, g(CX)
412 MOVQ DX, R14 // set the g register
413 MOVQ gobuf_sp(BX), SP // restore SP
414 MOVQ gobuf_ret(BX), AX
415 MOVQ gobuf_ctxt(BX), DX
416 MOVQ gobuf_bp(BX), BP
417 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
418 MOVQ $0, gobuf_ret(BX)
419 MOVQ $0, gobuf_ctxt(BX)
420 MOVQ $0, gobuf_bp(BX)
421 MOVQ gobuf_pc(BX), BX
422 JMP BX
423
424 // func mcall(fn func(*g))
425 // Switch to m->g0's stack, call fn(g).
426 // Fn must never return. It should gogo(&g->sched)
427 // to keep running g.
428 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
429 MOVQ AX, DX // DX = fn
430
431 // Save state in g->sched. The caller's SP and PC are restored by gogo to
432 // resume execution in the caller's frame (implicit return). The caller's BP
433 // is also restored to support frame pointer unwinding.
434 MOVQ SP, BX // hide (SP) reads from vet
435 MOVQ 8(BX), BX // caller's PC
436 MOVQ BX, (g_sched+gobuf_pc)(R14)
437 LEAQ fn+0(FP), BX // caller's SP
438 MOVQ BX, (g_sched+gobuf_sp)(R14)
439 // Get the caller's frame pointer by dereferencing BP. Storing BP as it is
440 // can cause a frame pointer cycle, see CL 476235.
441 MOVQ (BP), BX // caller's BP
442 MOVQ BX, (g_sched+gobuf_bp)(R14)
443
444 // switch to m->g0 & its stack, call fn
445 MOVQ g_m(R14), BX
446 MOVQ m_g0(BX), SI // SI = g.m.g0
447 CMPQ SI, R14 // if g == m->g0 call badmcall
448 JNE goodm
449 JMP runtime·badmcall(SB)
450 goodm:
451 MOVQ R14, AX // AX (and arg 0) = g
452 MOVQ SI, R14 // g = g.m.g0
453 get_tls(CX) // Set G in TLS
454 MOVQ R14, g(CX)
455 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp
456 PUSHQ AX // open up space for fn's arg spill slot
457 MOVQ 0(DX), R12
458 CALL R12 // fn(g)
459 POPQ AX
460 JMP runtime·badmcall2(SB)
461 RET
462
463 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
464 // of the G stack. We need to distinguish the routine that
465 // lives at the bottom of the G stack from the one that lives
466 // at the top of the system stack because the one at the top of
467 // the system stack terminates the stack walk (see topofstack()).
468 // The frame layout needs to match systemstack
469 // so that it can pretend to be systemstack_switch.
470 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
471 UNDEF
472 // Make sure this function is not leaf,
473 // so the frame is saved.
474 CALL runtime·abort(SB)
475 RET
476
477 // func systemstack(fn func())
478 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
479 MOVQ fn+0(FP), DI // DI = fn
480 get_tls(CX)
481 MOVQ g(CX), AX // AX = g
482 MOVQ g_m(AX), BX // BX = m
483
484 CMPQ AX, m_gsignal(BX)
485 JEQ noswitch
486
487 MOVQ m_g0(BX), DX // DX = g0
488 CMPQ AX, DX
489 JEQ noswitch
490
491 CMPQ AX, m_curg(BX)
492 JNE bad
493
494 // Switch stacks.
495 // The original frame pointer is stored in BP,
496 // which is useful for stack unwinding.
497 // Save our state in g->sched. Pretend to
498 // be systemstack_switch if the G stack is scanned.
499 CALL gosave_systemstack_switch<>(SB)
500
501 // switch to g0
502 MOVQ DX, g(CX)
503 MOVQ DX, R14 // set the g register
504 MOVQ (g_sched+gobuf_sp)(DX), SP
505
506 // call target function
507 MOVQ DI, DX
508 MOVQ 0(DI), DI
509 CALL DI
510
511 // switch back to g
512 get_tls(CX)
513 MOVQ g(CX), AX
514 MOVQ g_m(AX), BX
515 MOVQ m_curg(BX), AX
516 MOVQ AX, g(CX)
517 MOVQ (g_sched+gobuf_sp)(AX), SP
518 MOVQ (g_sched+gobuf_bp)(AX), BP
519 MOVQ $0, (g_sched+gobuf_sp)(AX)
520 MOVQ $0, (g_sched+gobuf_bp)(AX)
521 RET
522
523 noswitch:
524 // already on m stack; tail call the function
525 // Using a tail call here cleans up tracebacks since we won't stop
526 // at an intermediate systemstack.
527 MOVQ DI, DX
528 MOVQ 0(DI), DI
529 // The function epilogue is not called on a tail call.
530 // Pop BP from the stack to simulate it.
531 POPQ BP
532 JMP DI
533
534 bad:
535 // Bad: g is not gsignal, not g0, not curg. What is it?
536 MOVQ $runtime·badsystemstack(SB), AX
537 CALL AX
538 INT $3
539
540
541 /*
542 * support for morestack
543 */
544
545 // Called during function prolog when more stack is needed.
546 //
547 // The traceback routines see morestack on a g0 as being
548 // the top of a stack (for example, morestack calling newstack
549 // calling the scheduler calling newm calling gc), so we must
550 // record an argument size. For that purpose, it has no arguments.
551 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
552 // Cannot grow scheduler stack (m->g0).
553 get_tls(CX)
554 MOVQ g(CX), BX
555 MOVQ g_m(BX), BX
556 MOVQ m_g0(BX), SI
557 CMPQ g(CX), SI
558 JNE 3(PC)
559 CALL runtime·badmorestackg0(SB)
560 CALL runtime·abort(SB)
561
562 // Cannot grow signal stack (m->gsignal).
563 MOVQ m_gsignal(BX), SI
564 CMPQ g(CX), SI
565 JNE 3(PC)
566 CALL runtime·badmorestackgsignal(SB)
567 CALL runtime·abort(SB)
568
569 // Called from f.
570 // Set m->morebuf to f's caller.
571 NOP SP // tell vet SP changed - stop checking offsets
572 MOVQ 8(SP), AX // f's caller's PC
573 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
574 LEAQ 16(SP), AX // f's caller's SP
575 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
576 get_tls(CX)
577 MOVQ g(CX), SI
578 MOVQ SI, (m_morebuf+gobuf_g)(BX)
579
580 // Set g->sched to context in f.
581 MOVQ 0(SP), AX // f's PC
582 MOVQ AX, (g_sched+gobuf_pc)(SI)
583 LEAQ 8(SP), AX // f's SP
584 MOVQ AX, (g_sched+gobuf_sp)(SI)
585 MOVQ BP, (g_sched+gobuf_bp)(SI)
586 MOVQ DX, (g_sched+gobuf_ctxt)(SI)
587
588 // Call newstack on m->g0's stack.
589 MOVQ m_g0(BX), BX
590 MOVQ BX, g(CX)
591 MOVQ (g_sched+gobuf_sp)(BX), SP
592 MOVQ (g_sched+gobuf_bp)(BX), BP
593 CALL runtime·newstack(SB)
594 CALL runtime·abort(SB) // crash if newstack returns
595 RET
596
597 // morestack but not preserving ctxt.
598 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
599 MOVL $0, DX
600 JMP runtime·morestack(SB)
601
602 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
603 TEXT ·spillArgs(SB),NOSPLIT,$0-0
604 MOVQ AX, 0(R12)
605 MOVQ BX, 8(R12)
606 MOVQ CX, 16(R12)
607 MOVQ DI, 24(R12)
608 MOVQ SI, 32(R12)
609 MOVQ R8, 40(R12)
610 MOVQ R9, 48(R12)
611 MOVQ R10, 56(R12)
612 MOVQ R11, 64(R12)
613 MOVQ X0, 72(R12)
614 MOVQ X1, 80(R12)
615 MOVQ X2, 88(R12)
616 MOVQ X3, 96(R12)
617 MOVQ X4, 104(R12)
618 MOVQ X5, 112(R12)
619 MOVQ X6, 120(R12)
620 MOVQ X7, 128(R12)
621 MOVQ X8, 136(R12)
622 MOVQ X9, 144(R12)
623 MOVQ X10, 152(R12)
624 MOVQ X11, 160(R12)
625 MOVQ X12, 168(R12)
626 MOVQ X13, 176(R12)
627 MOVQ X14, 184(R12)
628 RET
629
630 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
631 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
632 MOVQ 0(R12), AX
633 MOVQ 8(R12), BX
634 MOVQ 16(R12), CX
635 MOVQ 24(R12), DI
636 MOVQ 32(R12), SI
637 MOVQ 40(R12), R8
638 MOVQ 48(R12), R9
639 MOVQ 56(R12), R10
640 MOVQ 64(R12), R11
641 MOVQ 72(R12), X0
642 MOVQ 80(R12), X1
643 MOVQ 88(R12), X2
644 MOVQ 96(R12), X3
645 MOVQ 104(R12), X4
646 MOVQ 112(R12), X5
647 MOVQ 120(R12), X6
648 MOVQ 128(R12), X7
649 MOVQ 136(R12), X8
650 MOVQ 144(R12), X9
651 MOVQ 152(R12), X10
652 MOVQ 160(R12), X11
653 MOVQ 168(R12), X12
654 MOVQ 176(R12), X13
655 MOVQ 184(R12), X14
656 RET
657
658 // reflectcall: call a function with the given argument list
659 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
660 // we don't have variable-sized frames, so we use a small number
661 // of constant-sized-frame functions to encode a few bits of size in the pc.
662 // Caution: ugly multiline assembly macros in your future!
663
664 #define DISPATCH(NAME,MAXSIZE) \
665 CMPQ CX, $MAXSIZE; \
666 JA 3(PC); \
667 MOVQ $NAME(SB), AX; \
668 JMP AX
669 // Note: can't just "JMP NAME(SB)" - bad inlining results.
670
671 TEXT ·reflectcall(SB), NOSPLIT, $0-48
672 MOVLQZX frameSize+32(FP), CX
673 DISPATCH(runtime·call16, 16)
674 DISPATCH(runtime·call32, 32)
675 DISPATCH(runtime·call64, 64)
676 DISPATCH(runtime·call128, 128)
677 DISPATCH(runtime·call256, 256)
678 DISPATCH(runtime·call512, 512)
679 DISPATCH(runtime·call1024, 1024)
680 DISPATCH(runtime·call2048, 2048)
681 DISPATCH(runtime·call4096, 4096)
682 DISPATCH(runtime·call8192, 8192)
683 DISPATCH(runtime·call16384, 16384)
684 DISPATCH(runtime·call32768, 32768)
685 DISPATCH(runtime·call65536, 65536)
686 DISPATCH(runtime·call131072, 131072)
687 DISPATCH(runtime·call262144, 262144)
688 DISPATCH(runtime·call524288, 524288)
689 DISPATCH(runtime·call1048576, 1048576)
690 DISPATCH(runtime·call2097152, 2097152)
691 DISPATCH(runtime·call4194304, 4194304)
692 DISPATCH(runtime·call8388608, 8388608)
693 DISPATCH(runtime·call16777216, 16777216)
694 DISPATCH(runtime·call33554432, 33554432)
695 DISPATCH(runtime·call67108864, 67108864)
696 DISPATCH(runtime·call134217728, 134217728)
697 DISPATCH(runtime·call268435456, 268435456)
698 DISPATCH(runtime·call536870912, 536870912)
699 DISPATCH(runtime·call1073741824, 1073741824)
700 MOVQ $runtime·badreflectcall(SB), AX
701 JMP AX
702
703 #define CALLFN(NAME,MAXSIZE) \
704 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
705 NO_LOCAL_POINTERS; \
706 /* copy arguments to stack */ \
707 MOVQ stackArgs+16(FP), SI; \
708 MOVLQZX stackArgsSize+24(FP), CX; \
709 MOVQ SP, DI; \
710 REP;MOVSB; \
711 /* set up argument registers */ \
712 MOVQ regArgs+40(FP), R12; \
713 CALL ·unspillArgs(SB); \
714 /* call function */ \
715 MOVQ f+8(FP), DX; \
716 PCDATA $PCDATA_StackMapIndex, $0; \
717 MOVQ (DX), R12; \
718 CALL R12; \
719 /* copy register return values back */ \
720 MOVQ regArgs+40(FP), R12; \
721 CALL ·spillArgs(SB); \
722 MOVLQZX stackArgsSize+24(FP), CX; \
723 MOVLQZX stackRetOffset+28(FP), BX; \
724 MOVQ stackArgs+16(FP), DI; \
725 MOVQ stackArgsType+0(FP), DX; \
726 MOVQ SP, SI; \
727 ADDQ BX, DI; \
728 ADDQ BX, SI; \
729 SUBQ BX, CX; \
730 CALL callRet<>(SB); \
731 RET
732
733 // callRet copies return values back at the end of call*. This is a
734 // separate function so it can allocate stack space for the arguments
735 // to reflectcallmove. It does not follow the Go ABI; it expects its
736 // arguments in registers.
737 TEXT callRet<>(SB), NOSPLIT, $40-0
738 NO_LOCAL_POINTERS
739 MOVQ DX, 0(SP)
740 MOVQ DI, 8(SP)
741 MOVQ SI, 16(SP)
742 MOVQ CX, 24(SP)
743 MOVQ R12, 32(SP)
744 CALL runtime·reflectcallmove(SB)
745 RET
746
747 CALLFN(·call16, 16)
748 CALLFN(·call32, 32)
749 CALLFN(·call64, 64)
750 CALLFN(·call128, 128)
751 CALLFN(·call256, 256)
752 CALLFN(·call512, 512)
753 CALLFN(·call1024, 1024)
754 CALLFN(·call2048, 2048)
755 CALLFN(·call4096, 4096)
756 CALLFN(·call8192, 8192)
757 CALLFN(·call16384, 16384)
758 CALLFN(·call32768, 32768)
759 CALLFN(·call65536, 65536)
760 CALLFN(·call131072, 131072)
761 CALLFN(·call262144, 262144)
762 CALLFN(·call524288, 524288)
763 CALLFN(·call1048576, 1048576)
764 CALLFN(·call2097152, 2097152)
765 CALLFN(·call4194304, 4194304)
766 CALLFN(·call8388608, 8388608)
767 CALLFN(·call16777216, 16777216)
768 CALLFN(·call33554432, 33554432)
769 CALLFN(·call67108864, 67108864)
770 CALLFN(·call134217728, 134217728)
771 CALLFN(·call268435456, 268435456)
772 CALLFN(·call536870912, 536870912)
773 CALLFN(·call1073741824, 1073741824)
774
775 TEXT runtime·procyield(SB),NOSPLIT,$0-0
776 MOVL cycles+0(FP), AX
777 again:
778 PAUSE
779 SUBL $1, AX
780 JNZ again
781 RET
782
783
784 TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0
785 // Stores are already ordered on x86, so this is just a
786 // compile barrier.
787 RET
788
789 // Save state of caller into g->sched,
790 // but using fake PC from systemstack_switch.
791 // Must only be called from functions with frame pointer
792 // and without locals ($0) or else unwinding from
793 // systemstack_switch is incorrect.
794 // Smashes R9.
795 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
796 // Take systemstack_switch PC and add 8 bytes to skip
797 // the prologue. The final location does not matter
798 // as long as we are between the prologue and the epilogue.
799 MOVQ $runtime·systemstack_switch+8(SB), R9
800 MOVQ R9, (g_sched+gobuf_pc)(R14)
801 LEAQ 8(SP), R9
802 MOVQ R9, (g_sched+gobuf_sp)(R14)
803 MOVQ $0, (g_sched+gobuf_ret)(R14)
804 MOVQ BP, (g_sched+gobuf_bp)(R14)
805 // Assert ctxt is zero. See func save.
806 MOVQ (g_sched+gobuf_ctxt)(R14), R9
807 TESTQ R9, R9
808 JZ 2(PC)
809 CALL runtime·abort(SB)
810 RET
811
812 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
813 // Call fn(arg) aligned appropriately for the gcc ABI.
814 // Called on a system stack, and there may be no g yet (during needm).
815 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16
816 MOVQ fn+0(FP), AX
817 MOVQ arg+8(FP), BX
818 MOVQ SP, DX
819 ANDQ $~15, SP // alignment
820 MOVQ DX, 8(SP)
821 MOVQ BX, DI // DI = first argument in AMD64 ABI
822 MOVQ BX, CX // CX = first argument in Win64
823 CALL AX
824 MOVQ 8(SP), DX
825 MOVQ DX, SP
826 RET
827
828 // func asmcgocall(fn, arg unsafe.Pointer) int32
829 // Call fn(arg) on the scheduler stack,
830 // aligned appropriately for the gcc ABI.
831 // See cgocall.go for more details.
832 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
833 MOVQ fn+0(FP), AX
834 MOVQ arg+8(FP), BX
835
836 MOVQ SP, DX
837
838 // Figure out if we need to switch to m->g0 stack.
839 // We get called to create new OS threads too, and those
840 // come in on the m->g0 stack already. Or we might already
841 // be on the m->gsignal stack.
842 get_tls(CX)
843 MOVQ g(CX), DI
844 CMPQ DI, $0
845 JEQ nosave
846 MOVQ g_m(DI), R8
847 MOVQ m_gsignal(R8), SI
848 CMPQ DI, SI
849 JEQ nosave
850 MOVQ m_g0(R8), SI
851 CMPQ DI, SI
852 JEQ nosave
853
854 // Switch to system stack.
855 // The original frame pointer is stored in BP,
856 // which is useful for stack unwinding.
857 CALL gosave_systemstack_switch<>(SB)
858 MOVQ SI, g(CX)
859 MOVQ (g_sched+gobuf_sp)(SI), SP
860
861 // Now on a scheduling stack (a pthread-created stack).
862 // Make sure we have enough room for 4 stack-backed fast-call
863 // registers as per windows amd64 calling convention.
864 SUBQ $64, SP
865 ANDQ $~15, SP // alignment for gcc ABI
866 MOVQ DI, 48(SP) // save g
867 MOVQ (g_stack+stack_hi)(DI), DI
868 SUBQ DX, DI
869 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
870 MOVQ BX, DI // DI = first argument in AMD64 ABI
871 MOVQ BX, CX // CX = first argument in Win64
872 CALL AX
873
874 // Restore registers, g, stack pointer.
875 get_tls(CX)
876 MOVQ 48(SP), DI
877 MOVQ (g_stack+stack_hi)(DI), SI
878 SUBQ 40(SP), SI
879 MOVQ DI, g(CX)
880 MOVQ SI, SP
881
882 MOVL AX, ret+16(FP)
883 RET
884
885 nosave:
886 // Running on a system stack, perhaps even without a g.
887 // Having no g can happen during thread creation or thread teardown
888 // (see needm/dropm on Solaris, for example).
889 // This code is like the above sequence but without saving/restoring g
890 // and without worrying about the stack moving out from under us
891 // (because we're on a system stack, not a goroutine stack).
892 // The above code could be used directly if already on a system stack,
893 // but then the only path through this code would be a rare case on Solaris.
894 // Using this code for all "already on system stack" calls exercises it more,
895 // which should help keep it correct.
896 SUBQ $64, SP
897 ANDQ $~15, SP
898 MOVQ $0, 48(SP) // where above code stores g, in case someone looks during debugging
899 MOVQ DX, 40(SP) // save original stack pointer
900 MOVQ BX, DI // DI = first argument in AMD64 ABI
901 MOVQ BX, CX // CX = first argument in Win64
902 CALL AX
903 MOVQ 40(SP), SI // restore original stack pointer
904 MOVQ SI, SP
905 MOVL AX, ret+16(FP)
906 RET
907
908 #ifdef GOOS_windows
909 // Dummy TLS that's used on Windows so that we don't crash trying
910 // to restore the G register in needm. needm and its callees are
911 // very careful never to actually use the G, the TLS just can't be
912 // unset since we're in Go code.
913 GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
914 #endif
915
916 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
917 // See cgocall.go for more details.
918 TEXT ·cgocallback(SB),NOSPLIT,$24-24
919 NO_LOCAL_POINTERS
920
921 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
922 // It is used to dropm while thread is exiting.
923 MOVQ fn+0(FP), AX
924 CMPQ AX, $0
925 JNE loadg
926 // Restore the g from frame.
927 get_tls(CX)
928 MOVQ frame+8(FP), BX
929 MOVQ BX, g(CX)
930 JMP dropm
931
932 loadg:
933 // If g is nil, Go did not create the current thread,
934 // or if this thread never called into Go on pthread platforms.
935 // Call needm to obtain one m for temporary use.
936 // In this case, we're running on the thread stack, so there's
937 // lots of space, but the linker doesn't know. Hide the call from
938 // the linker analysis by using an indirect call through AX.
939 get_tls(CX)
940 #ifdef GOOS_windows
941 MOVL $0, BX
942 CMPQ CX, $0
943 JEQ 2(PC)
944 #endif
945 MOVQ g(CX), BX
946 CMPQ BX, $0
947 JEQ needm
948 MOVQ g_m(BX), BX
949 MOVQ BX, savedm-8(SP) // saved copy of oldm
950 JMP havem
951 needm:
952 #ifdef GOOS_windows
953 // Set up a dummy TLS value. needm is careful not to use it,
954 // but it needs to be there to prevent autogenerated code from
955 // crashing when it loads from it.
956 // We don't need to clear it or anything later because needm
957 // will set up TLS properly.
958 MOVQ $zeroTLS<>(SB), DI
959 CALL runtime·settls(SB)
960 #endif
961 // On some platforms (Windows) we cannot call needm through
962 // an ABI wrapper because there's no TLS set up, and the ABI
963 // wrapper will try to restore the G register (R14) from TLS.
964 // Clear X15 because Go expects it and we're not calling
965 // through a wrapper, but otherwise avoid setting the G
966 // register in the wrapper and call needm directly. It
967 // takes no arguments and doesn't return any values so
968 // there's no need to handle that. Clear R14 so that there's
969 // a bad value in there, in case needm tries to use it.
970 XORPS X15, X15
971 XORQ R14, R14
972 MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
973 CALL AX
974 MOVQ $0, savedm-8(SP)
975 get_tls(CX)
976 MOVQ g(CX), BX
977 MOVQ g_m(BX), BX
978
979 // Set m->sched.sp = SP, so that if a panic happens
980 // during the function we are about to execute, it will
981 // have a valid SP to run on the g0 stack.
982 // The next few lines (after the havem label)
983 // will save this SP onto the stack and then write
984 // the same SP back to m->sched.sp. That seems redundant,
985 // but if an unrecovered panic happens, unwindm will
986 // restore the g->sched.sp from the stack location
987 // and then systemstack will try to use it. If we don't set it here,
988 // that restored SP will be uninitialized (typically 0) and
989 // will not be usable.
990 MOVQ m_g0(BX), SI
991 MOVQ SP, (g_sched+gobuf_sp)(SI)
992
993 havem:
994 // Now there's a valid m, and we're running on its m->g0.
995 // Save current m->g0->sched.sp on stack and then set it to SP.
996 // Save current sp in m->g0->sched.sp in preparation for
997 // switch back to m->curg stack.
998 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
999 MOVQ m_g0(BX), SI
1000 MOVQ (g_sched+gobuf_sp)(SI), AX
1001 MOVQ AX, 0(SP)
1002 MOVQ SP, (g_sched+gobuf_sp)(SI)
1003
1004 // Switch to m->curg stack and call runtime.cgocallbackg.
1005 // Because we are taking over the execution of m->curg
1006 // but *not* resuming what had been running, we need to
1007 // save that information (m->curg->sched) so we can restore it.
1008 // We can restore m->curg->sched.sp easily, because calling
1009 // runtime.cgocallbackg leaves SP unchanged upon return.
1010 // To save m->curg->sched.pc, we push it onto the curg stack and
1011 // open a frame the same size as cgocallback's g0 frame.
1012 // Once we switch to the curg stack, the pushed PC will appear
1013 // to be the return PC of cgocallback, so that the traceback
1014 // will seamlessly trace back into the earlier calls.
1015 MOVQ m_curg(BX), SI
1016 MOVQ SI, g(CX)
1017 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
1018 MOVQ (g_sched+gobuf_pc)(SI), BX
1019 MOVQ BX, -8(DI) // "push" return PC on the g stack
1020 // Gather our arguments into registers.
1021 MOVQ fn+0(FP), BX
1022 MOVQ frame+8(FP), CX
1023 MOVQ ctxt+16(FP), DX
1024 // Compute the size of the frame, including return PC and, if
1025 // GOEXPERIMENT=framepointer, the saved base pointer
1026 LEAQ fn+0(FP), AX
1027 SUBQ SP, AX // AX is our actual frame size
1028 SUBQ AX, DI // Allocate the same frame size on the g stack
1029 MOVQ DI, SP
1030
1031 MOVQ BX, 0(SP)
1032 MOVQ CX, 8(SP)
1033 MOVQ DX, 16(SP)
1034 MOVQ $runtime·cgocallbackg(SB), AX
1035 CALL AX // indirect call to bypass nosplit check. We're on a different stack now.
1036
1037 // Compute the size of the frame again. FP and SP have
1038 // completely different values here than they did above,
1039 // but only their difference matters.
1040 LEAQ fn+0(FP), AX
1041 SUBQ SP, AX
1042
1043 // Restore g->sched (== m->curg->sched) from saved values.
1044 get_tls(CX)
1045 MOVQ g(CX), SI
1046 MOVQ SP, DI
1047 ADDQ AX, DI
1048 MOVQ -8(DI), BX
1049 MOVQ BX, (g_sched+gobuf_pc)(SI)
1050 MOVQ DI, (g_sched+gobuf_sp)(SI)
1051
1052 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1053 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1054 // so we do not have to restore it.)
1055 MOVQ g(CX), BX
1056 MOVQ g_m(BX), BX
1057 MOVQ m_g0(BX), SI
1058 MOVQ SI, g(CX)
1059 MOVQ (g_sched+gobuf_sp)(SI), SP
1060 MOVQ 0(SP), AX
1061 MOVQ AX, (g_sched+gobuf_sp)(SI)
1062
1063 // If the m on entry was nil, we called needm above to borrow an m,
1064 // 1. for the duration of the call on non-pthread platforms,
1065 // 2. or the duration of the C thread alive on pthread platforms.
1066 // If the m on entry wasn't nil,
1067 // 1. the thread might be a Go thread,
1068 // 2. or it wasn't the first call from a C thread on pthread platforms,
1069 // since then we skip dropm to reuse the m in the first call.
1070 MOVQ savedm-8(SP), BX
1071 CMPQ BX, $0
1072 JNE done
1073
1074 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1075 MOVQ _cgo_pthread_key_created(SB), AX
1076 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1077 CMPQ AX, $0
1078 JEQ dropm
1079 CMPQ (AX), $0
1080 JNE done
1081
1082 dropm:
1083 MOVQ $runtime·dropm(SB), AX
1084 CALL AX
1085 #ifdef GOOS_windows
1086 // We need to clear the TLS pointer in case the next
1087 // thread that comes into Go tries to reuse that space
1088 // but uses the same M.
1089 XORQ DI, DI
1090 CALL runtime·settls(SB)
1091 #endif
1092 done:
1093
1094 // Done!
1095 RET
1096
1097 // func setg(gg *g)
1098 // set g. for use by needm.
1099 TEXT runtime·setg(SB), NOSPLIT, $0-8
1100 MOVQ gg+0(FP), BX
1101 get_tls(CX)
1102 MOVQ BX, g(CX)
1103 RET
1104
1105 // void setg_gcc(G*); set g called from gcc.
1106 TEXT setg_gcc<>(SB),NOSPLIT,$0
1107 get_tls(AX)
1108 MOVQ DI, g(AX)
1109 MOVQ DI, R14 // set the g register
1110 RET
1111
1112 TEXT runtime·abort(SB),NOSPLIT,$0-0
1113 INT $3
1114 loop:
1115 JMP loop
1116
1117 // check that SP is in range [g->stack.lo, g->stack.hi)
1118 TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0
1119 get_tls(CX)
1120 MOVQ g(CX), AX
1121 CMPQ (g_stack+stack_hi)(AX), SP
1122 JHI 2(PC)
1123 CALL runtime·abort(SB)
1124 CMPQ SP, (g_stack+stack_lo)(AX)
1125 JHI 2(PC)
1126 CALL runtime·abort(SB)
1127 RET
1128
1129 // func cputicks() int64
1130 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
1131 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
1132 JNE fences
1133 // Instruction stream serializing RDTSCP is supported.
1134 // RDTSCP is supported by Intel Nehalem (2008) and
1135 // AMD K8 Rev. F (2006) and newer.
1136 RDTSCP
1137 done:
1138 SHLQ $32, DX
1139 ADDQ DX, AX
1140 MOVQ AX, ret+0(FP)
1141 RET
1142 fences:
1143 // MFENCE is instruction stream serializing and flushes the
1144 // store buffers on AMD. The serialization semantics of LFENCE on AMD
1145 // are dependent on MSR C001_1029 and CPU generation.
1146 // LFENCE on Intel does wait for all previous instructions to have executed.
1147 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
1148 // previous instructions executed and all previous loads and stores to globally visible.
1149 // Using MFENCE;LFENCE here aligns the serializing properties without
1150 // runtime detection of CPU manufacturer.
1151 MFENCE
1152 LFENCE
1153 RDTSC
1154 JMP done
1155
1156 // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
1157 // hash function using AES hardware instructions
1158 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
1159 // AX = ptr to data
1160 // BX = seed
1161 // CX = size
1162 CMPB runtime·useAeshash(SB), $0
1163 JEQ noaes
1164 JMP aeshashbody<>(SB)
1165 noaes:
1166 JMP runtime·memhashFallback<ABIInternal>(SB)
1167
1168 // func strhash(p unsafe.Pointer, h uintptr) uintptr
1169 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
1170 // AX = ptr to string struct
1171 // BX = seed
1172 CMPB runtime·useAeshash(SB), $0
1173 JEQ noaes
1174 MOVQ 8(AX), CX // length of string
1175 MOVQ (AX), AX // string data
1176 JMP aeshashbody<>(SB)
1177 noaes:
1178 JMP runtime·strhashFallback<ABIInternal>(SB)
1179
1180 // AX: data
1181 // BX: hash seed
1182 // CX: length
1183 // At return: AX = return value
1184 TEXT aeshashbody<>(SB),NOSPLIT,$0-0
1185 // Fill an SSE register with our seeds.
1186 MOVQ BX, X0 // 64 bits of per-table hash seed
1187 PINSRW $4, CX, X0 // 16 bits of length
1188 PSHUFHW $0, X0, X0 // repeat length 4 times total
1189 MOVO X0, X1 // save unscrambled seed
1190 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
1191 AESENC X0, X0 // scramble seed
1192
1193 CMPQ CX, $16
1194 JB aes0to15
1195 JE aes16
1196 CMPQ CX, $32
1197 JBE aes17to32
1198 CMPQ CX, $64
1199 JBE aes33to64
1200 CMPQ CX, $128
1201 JBE aes65to128
1202 JMP aes129plus
1203
1204 aes0to15:
1205 TESTQ CX, CX
1206 JE aes0
1207
1208 ADDQ $16, AX
1209 TESTW $0xff0, AX
1210 JE endofpage
1211
1212 // 16 bytes loaded at this address won't cross
1213 // a page boundary, so we can load it directly.
1214 MOVOU -16(AX), X1
1215 ADDQ CX, CX
1216 MOVQ $masks<>(SB), AX
1217 PAND (AX)(CX*8), X1
1218 final1:
1219 PXOR X0, X1 // xor data with seed
1220 AESENC X1, X1 // scramble combo 3 times
1221 AESENC X1, X1
1222 AESENC X1, X1
1223 MOVQ X1, AX // return X1
1224 RET
1225
1226 endofpage:
1227 // address ends in 1111xxxx. Might be up against
1228 // a page boundary, so load ending at last byte.
1229 // Then shift bytes down using pshufb.
1230 MOVOU -32(AX)(CX*1), X1
1231 ADDQ CX, CX
1232 MOVQ $shifts<>(SB), AX
1233 PSHUFB (AX)(CX*8), X1
1234 JMP final1
1235
1236 aes0:
1237 // Return scrambled input seed
1238 AESENC X0, X0
1239 MOVQ X0, AX // return X0
1240 RET
1241
1242 aes16:
1243 MOVOU (AX), X1
1244 JMP final1
1245
1246 aes17to32:
1247 // make second starting seed
1248 PXOR runtime·aeskeysched+16(SB), X1
1249 AESENC X1, X1
1250
1251 // load data to be hashed
1252 MOVOU (AX), X2
1253 MOVOU -16(AX)(CX*1), X3
1254
1255 // xor with seed
1256 PXOR X0, X2
1257 PXOR X1, X3
1258
1259 // scramble 3 times
1260 AESENC X2, X2
1261 AESENC X3, X3
1262 AESENC X2, X2
1263 AESENC X3, X3
1264 AESENC X2, X2
1265 AESENC X3, X3
1266
1267 // combine results
1268 PXOR X3, X2
1269 MOVQ X2, AX // return X2
1270 RET
1271
1272 aes33to64:
1273 // make 3 more starting seeds
1274 MOVO X1, X2
1275 MOVO X1, X3
1276 PXOR runtime·aeskeysched+16(SB), X1
1277 PXOR runtime·aeskeysched+32(SB), X2
1278 PXOR runtime·aeskeysched+48(SB), X3
1279 AESENC X1, X1
1280 AESENC X2, X2
1281 AESENC X3, X3
1282
1283 MOVOU (AX), X4
1284 MOVOU 16(AX), X5
1285 MOVOU -32(AX)(CX*1), X6
1286 MOVOU -16(AX)(CX*1), X7
1287
1288 PXOR X0, X4
1289 PXOR X1, X5
1290 PXOR X2, X6
1291 PXOR X3, X7
1292
1293 AESENC X4, X4
1294 AESENC X5, X5
1295 AESENC X6, X6
1296 AESENC X7, X7
1297
1298 AESENC X4, X4
1299 AESENC X5, X5
1300 AESENC X6, X6
1301 AESENC X7, X7
1302
1303 AESENC X4, X4
1304 AESENC X5, X5
1305 AESENC X6, X6
1306 AESENC X7, X7
1307
1308 PXOR X6, X4
1309 PXOR X7, X5
1310 PXOR X5, X4
1311 MOVQ X4, AX // return X4
1312 RET
1313
1314 aes65to128:
1315 // make 7 more starting seeds
1316 MOVO X1, X2
1317 MOVO X1, X3
1318 MOVO X1, X4
1319 MOVO X1, X5
1320 MOVO X1, X6
1321 MOVO X1, X7
1322 PXOR runtime·aeskeysched+16(SB), X1
1323 PXOR runtime·aeskeysched+32(SB), X2
1324 PXOR runtime·aeskeysched+48(SB), X3
1325 PXOR runtime·aeskeysched+64(SB), X4
1326 PXOR runtime·aeskeysched+80(SB), X5
1327 PXOR runtime·aeskeysched+96(SB), X6
1328 PXOR runtime·aeskeysched+112(SB), X7
1329 AESENC X1, X1
1330 AESENC X2, X2
1331 AESENC X3, X3
1332 AESENC X4, X4
1333 AESENC X5, X5
1334 AESENC X6, X6
1335 AESENC X7, X7
1336
1337 // load data
1338 MOVOU (AX), X8
1339 MOVOU 16(AX), X9
1340 MOVOU 32(AX), X10
1341 MOVOU 48(AX), X11
1342 MOVOU -64(AX)(CX*1), X12
1343 MOVOU -48(AX)(CX*1), X13
1344 MOVOU -32(AX)(CX*1), X14
1345 MOVOU -16(AX)(CX*1), X15
1346
1347 // xor with seed
1348 PXOR X0, X8
1349 PXOR X1, X9
1350 PXOR X2, X10
1351 PXOR X3, X11
1352 PXOR X4, X12
1353 PXOR X5, X13
1354 PXOR X6, X14
1355 PXOR X7, X15
1356
1357 // scramble 3 times
1358 AESENC X8, X8
1359 AESENC X9, X9
1360 AESENC X10, X10
1361 AESENC X11, X11
1362 AESENC X12, X12
1363 AESENC X13, X13
1364 AESENC X14, X14
1365 AESENC X15, X15
1366
1367 AESENC X8, X8
1368 AESENC X9, X9
1369 AESENC X10, X10
1370 AESENC X11, X11
1371 AESENC X12, X12
1372 AESENC X13, X13
1373 AESENC X14, X14
1374 AESENC X15, X15
1375
1376 AESENC X8, X8
1377 AESENC X9, X9
1378 AESENC X10, X10
1379 AESENC X11, X11
1380 AESENC X12, X12
1381 AESENC X13, X13
1382 AESENC X14, X14
1383 AESENC X15, X15
1384
1385 // combine results
1386 PXOR X12, X8
1387 PXOR X13, X9
1388 PXOR X14, X10
1389 PXOR X15, X11
1390 PXOR X10, X8
1391 PXOR X11, X9
1392 PXOR X9, X8
1393 // X15 must be zero on return
1394 PXOR X15, X15
1395 MOVQ X8, AX // return X8
1396 RET
1397
1398 aes129plus:
1399 // make 7 more starting seeds
1400 MOVO X1, X2
1401 MOVO X1, X3
1402 MOVO X1, X4
1403 MOVO X1, X5
1404 MOVO X1, X6
1405 MOVO X1, X7
1406 PXOR runtime·aeskeysched+16(SB), X1
1407 PXOR runtime·aeskeysched+32(SB), X2
1408 PXOR runtime·aeskeysched+48(SB), X3
1409 PXOR runtime·aeskeysched+64(SB), X4
1410 PXOR runtime·aeskeysched+80(SB), X5
1411 PXOR runtime·aeskeysched+96(SB), X6
1412 PXOR runtime·aeskeysched+112(SB), X7
1413 AESENC X1, X1
1414 AESENC X2, X2
1415 AESENC X3, X3
1416 AESENC X4, X4
1417 AESENC X5, X5
1418 AESENC X6, X6
1419 AESENC X7, X7
1420
1421 // start with last (possibly overlapping) block
1422 MOVOU -128(AX)(CX*1), X8
1423 MOVOU -112(AX)(CX*1), X9
1424 MOVOU -96(AX)(CX*1), X10
1425 MOVOU -80(AX)(CX*1), X11
1426 MOVOU -64(AX)(CX*1), X12
1427 MOVOU -48(AX)(CX*1), X13
1428 MOVOU -32(AX)(CX*1), X14
1429 MOVOU -16(AX)(CX*1), X15
1430
1431 // xor in seed
1432 PXOR X0, X8
1433 PXOR X1, X9
1434 PXOR X2, X10
1435 PXOR X3, X11
1436 PXOR X4, X12
1437 PXOR X5, X13
1438 PXOR X6, X14
1439 PXOR X7, X15
1440
1441 // compute number of remaining 128-byte blocks
1442 DECQ CX
1443 SHRQ $7, CX
1444
1445 aesloop:
1446 // scramble state
1447 AESENC X8, X8
1448 AESENC X9, X9
1449 AESENC X10, X10
1450 AESENC X11, X11
1451 AESENC X12, X12
1452 AESENC X13, X13
1453 AESENC X14, X14
1454 AESENC X15, X15
1455
1456 // scramble state, xor in a block
1457 MOVOU (AX), X0
1458 MOVOU 16(AX), X1
1459 MOVOU 32(AX), X2
1460 MOVOU 48(AX), X3
1461 AESENC X0, X8
1462 AESENC X1, X9
1463 AESENC X2, X10
1464 AESENC X3, X11
1465 MOVOU 64(AX), X4
1466 MOVOU 80(AX), X5
1467 MOVOU 96(AX), X6
1468 MOVOU 112(AX), X7
1469 AESENC X4, X12
1470 AESENC X5, X13
1471 AESENC X6, X14
1472 AESENC X7, X15
1473
1474 ADDQ $128, AX
1475 DECQ CX
1476 JNE aesloop
1477
1478 // 3 more scrambles to finish
1479 AESENC X8, X8
1480 AESENC X9, X9
1481 AESENC X10, X10
1482 AESENC X11, X11
1483 AESENC X12, X12
1484 AESENC X13, X13
1485 AESENC X14, X14
1486 AESENC X15, X15
1487 AESENC X8, X8
1488 AESENC X9, X9
1489 AESENC X10, X10
1490 AESENC X11, X11
1491 AESENC X12, X12
1492 AESENC X13, X13
1493 AESENC X14, X14
1494 AESENC X15, X15
1495 AESENC X8, X8
1496 AESENC X9, X9
1497 AESENC X10, X10
1498 AESENC X11, X11
1499 AESENC X12, X12
1500 AESENC X13, X13
1501 AESENC X14, X14
1502 AESENC X15, X15
1503
1504 PXOR X12, X8
1505 PXOR X13, X9
1506 PXOR X14, X10
1507 PXOR X15, X11
1508 PXOR X10, X8
1509 PXOR X11, X9
1510 PXOR X9, X8
1511 // X15 must be zero on return
1512 PXOR X15, X15
1513 MOVQ X8, AX // return X8
1514 RET
1515
1516 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
1517 // ABIInternal for performance.
1518 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
1519 // AX = ptr to data
1520 // BX = seed
1521 CMPB runtime·useAeshash(SB), $0
1522 JEQ noaes
1523 MOVQ BX, X0 // X0 = seed
1524 PINSRD $2, (AX), X0 // data
1525 AESENC runtime·aeskeysched+0(SB), X0
1526 AESENC runtime·aeskeysched+16(SB), X0
1527 AESENC runtime·aeskeysched+32(SB), X0
1528 MOVQ X0, AX // return X0
1529 RET
1530 noaes:
1531 JMP runtime·memhash32Fallback<ABIInternal>(SB)
1532
1533 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
1534 // ABIInternal for performance.
1535 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
1536 // AX = ptr to data
1537 // BX = seed
1538 CMPB runtime·useAeshash(SB), $0
1539 JEQ noaes
1540 MOVQ BX, X0 // X0 = seed
1541 PINSRQ $1, (AX), X0 // data
1542 AESENC runtime·aeskeysched+0(SB), X0
1543 AESENC runtime·aeskeysched+16(SB), X0
1544 AESENC runtime·aeskeysched+32(SB), X0
1545 MOVQ X0, AX // return X0
1546 RET
1547 noaes:
1548 JMP runtime·memhash64Fallback<ABIInternal>(SB)
1549
1550 // simple mask to get rid of data in the high part of the register.
1551 DATA masks<>+0x00(SB)/8, $0x0000000000000000
1552 DATA masks<>+0x08(SB)/8, $0x0000000000000000
1553 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1554 DATA masks<>+0x18(SB)/8, $0x0000000000000000
1555 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1556 DATA masks<>+0x28(SB)/8, $0x0000000000000000
1557 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1558 DATA masks<>+0x38(SB)/8, $0x0000000000000000
1559 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1560 DATA masks<>+0x48(SB)/8, $0x0000000000000000
1561 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1562 DATA masks<>+0x58(SB)/8, $0x0000000000000000
1563 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1564 DATA masks<>+0x68(SB)/8, $0x0000000000000000
1565 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1566 DATA masks<>+0x78(SB)/8, $0x0000000000000000
1567 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1568 DATA masks<>+0x88(SB)/8, $0x0000000000000000
1569 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1570 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1571 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1572 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1573 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1574 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1575 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1576 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1577 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1578 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1579 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1580 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1581 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1582 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1583 GLOBL masks<>(SB),RODATA,$256
1584
1585 // func checkASM() bool
1586 TEXT ·checkASM(SB),NOSPLIT,$0-1
1587 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1588 MOVQ $masks<>(SB), AX
1589 MOVQ $shifts<>(SB), BX
1590 ORQ BX, AX
1591 TESTQ $15, AX
1592 SETEQ ret+0(FP)
1593 RET
1594
1595 // these are arguments to pshufb. They move data down from
1596 // the high bytes of the register to the low bytes of the register.
1597 // index is how many bytes to move.
1598 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1599 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1600 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1601 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1602 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1603 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1604 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1605 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1606 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1607 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1608 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1609 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1610 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1611 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1612 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1613 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1614 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1615 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1616 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1617 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1618 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1619 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1620 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1621 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1622 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1623 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1624 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1625 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1626 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1627 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1628 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1629 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1630 GLOBL shifts<>(SB),RODATA,$256
1631
1632 TEXT runtime·return0(SB), NOSPLIT, $0
1633 MOVL $0, AX
1634 RET
1635
1636
1637 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1638 // Must obey the gcc calling convention.
1639 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1640 get_tls(CX)
1641 MOVQ g(CX), AX
1642 MOVQ g_m(AX), AX
1643 MOVQ m_curg(AX), AX
1644 MOVQ (g_stack+stack_hi)(AX), AX
1645 RET
1646
1647 // The top-most function running on a goroutine
1648 // returns to goexit+PCQuantum.
1649 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0
1650 BYTE $0x90 // NOP
1651 CALL runtime·goexit1(SB) // does not return
1652 // traceback from goexit1 must hit code range of goexit
1653 BYTE $0x90 // NOP
1654
1655 // This is called from .init_array and follows the platform, not Go, ABI.
1656 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1657 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1658 MOVQ runtime·lastmoduledatap(SB), AX
1659 MOVQ DI, moduledata_next(AX)
1660 MOVQ DI, runtime·lastmoduledatap(SB)
1661 POPQ R15
1662 RET
1663
1664 // Initialize special registers then jump to sigpanic.
1665 // This function is injected from the signal handler for panicking
1666 // signals. It is quite painful to set X15 in the signal context,
1667 // so we do it here.
1668 TEXT ·sigpanic0(SB),NOSPLIT,$0-0
1669 get_tls(R14)
1670 MOVQ g(R14), R14
1671 #ifndef GOOS_plan9
1672 XORPS X15, X15
1673 #endif
1674 JMP ·sigpanic<ABIInternal>(SB)
1675
1676 // gcWriteBarrier informs the GC about heap pointer writes.
1677 //
1678 // gcWriteBarrier returns space in a write barrier buffer which
1679 // should be filled in by the caller.
1680 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1681 // number of bytes of buffer needed in R11, and returns a pointer
1682 // to the buffer space in R11.
1683 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1684 // but may clobber others (e.g., SSE registers).
1685 // Typical use would be, when doing *(CX+88) = AX
1686 // CMPL $0, runtime.writeBarrier(SB)
1687 // JEQ dowrite
1688 // CALL runtime.gcBatchBarrier2(SB)
1689 // MOVQ AX, (R11)
1690 // MOVQ 88(CX), DX
1691 // MOVQ DX, 8(R11)
1692 // dowrite:
1693 // MOVQ AX, 88(CX)
1694 TEXT gcWriteBarrier<>(SB),NOSPLIT,$112
1695 // Save the registers clobbered by the fast path. This is slightly
1696 // faster than having the caller spill these.
1697 MOVQ R12, 96(SP)
1698 MOVQ R13, 104(SP)
1699 retry:
1700 // TODO: Consider passing g.m.p in as an argument so they can be shared
1701 // across a sequence of write barriers.
1702 MOVQ g_m(R14), R13
1703 MOVQ m_p(R13), R13
1704 // Get current buffer write position.
1705 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position
1706 ADDQ R11, R12 // new next position
1707 // Is the buffer full?
1708 CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
1709 JA flush
1710 // Commit to the larger buffer.
1711 MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
1712 // Make return value (the original next position)
1713 SUBQ R11, R12
1714 MOVQ R12, R11
1715 // Restore registers.
1716 MOVQ 96(SP), R12
1717 MOVQ 104(SP), R13
1718 RET
1719
1720 flush:
1721 // Save all general purpose registers since these could be
1722 // clobbered by wbBufFlush and were not saved by the caller.
1723 // It is possible for wbBufFlush to clobber other registers
1724 // (e.g., SSE registers), but the compiler takes care of saving
1725 // those in the caller if necessary. This strikes a balance
1726 // with registers that are likely to be used.
1727 //
1728 // We don't have type information for these, but all code under
1729 // here is NOSPLIT, so nothing will observe these.
1730 //
1731 // TODO: We could strike a different balance; e.g., saving X0
1732 // and not saving GP registers that are less likely to be used.
1733 MOVQ DI, 0(SP)
1734 MOVQ AX, 8(SP)
1735 MOVQ BX, 16(SP)
1736 MOVQ CX, 24(SP)
1737 MOVQ DX, 32(SP)
1738 // DI already saved
1739 MOVQ SI, 40(SP)
1740 MOVQ BP, 48(SP)
1741 MOVQ R8, 56(SP)
1742 MOVQ R9, 64(SP)
1743 MOVQ R10, 72(SP)
1744 MOVQ R11, 80(SP)
1745 // R12 already saved
1746 // R13 already saved
1747 // R14 is g
1748 MOVQ R15, 88(SP)
1749
1750 CALL runtime·wbBufFlush(SB)
1751
1752 MOVQ 0(SP), DI
1753 MOVQ 8(SP), AX
1754 MOVQ 16(SP), BX
1755 MOVQ 24(SP), CX
1756 MOVQ 32(SP), DX
1757 MOVQ 40(SP), SI
1758 MOVQ 48(SP), BP
1759 MOVQ 56(SP), R8
1760 MOVQ 64(SP), R9
1761 MOVQ 72(SP), R10
1762 MOVQ 80(SP), R11
1763 MOVQ 88(SP), R15
1764 JMP retry
1765
1766 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1767 MOVL $8, R11
1768 JMP gcWriteBarrier<>(SB)
1769 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1770 MOVL $16, R11
1771 JMP gcWriteBarrier<>(SB)
1772 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1773 MOVL $24, R11
1774 JMP gcWriteBarrier<>(SB)
1775 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1776 MOVL $32, R11
1777 JMP gcWriteBarrier<>(SB)
1778 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1779 MOVL $40, R11
1780 JMP gcWriteBarrier<>(SB)
1781 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1782 MOVL $48, R11
1783 JMP gcWriteBarrier<>(SB)
1784 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1785 MOVL $56, R11
1786 JMP gcWriteBarrier<>(SB)
1787 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1788 MOVL $64, R11
1789 JMP gcWriteBarrier<>(SB)
1790
1791 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1792 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1793
1794 // debugCallV2 is the entry point for debugger-injected function
1795 // calls on running goroutines. It informs the runtime that a
1796 // debug call has been injected and creates a call frame for the
1797 // debugger to fill in.
1798 //
1799 // To inject a function call, a debugger should:
1800 // 1. Check that the goroutine is in state _Grunning and that
1801 // there are at least 256 bytes free on the stack.
1802 // 2. Push the current PC on the stack (updating SP).
1803 // 3. Write the desired argument frame size at SP-16 (using the SP
1804 // after step 2).
1805 // 4. Save all machine registers (including flags and XMM registers)
1806 // so they can be restored later by the debugger.
1807 // 5. Set the PC to debugCallV2 and resume execution.
1808 //
1809 // If the goroutine is in state _Grunnable, then it's not generally
1810 // safe to inject a call because it may return out via other runtime
1811 // operations. Instead, the debugger should unwind the stack to find
1812 // the return to non-runtime code, add a temporary breakpoint there,
1813 // and inject the call once that breakpoint is hit.
1814 //
1815 // If the goroutine is in any other state, it's not safe to inject a call.
1816 //
1817 // This function communicates back to the debugger by setting R12 and
1818 // invoking INT3 to raise a breakpoint signal. See the comments in the
1819 // implementation for the protocol the debugger is expected to
1820 // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
1821 //
1822 // The debugger must ensure that any pointers passed to the function
1823 // obey escape analysis requirements. Specifically, it must not pass
1824 // a stack pointer to an escaping argument. debugCallV2 cannot check
1825 // this invariant.
1826 //
1827 // This is ABIInternal because Go code injects its PC directly into new
1828 // goroutine stacks.
1829 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
1830 // Save all registers that may contain pointers so they can be
1831 // conservatively scanned.
1832 //
1833 // We can't do anything that might clobber any of these
1834 // registers before this.
1835 MOVQ R15, r15-(14*8+8)(SP)
1836 MOVQ R14, r14-(13*8+8)(SP)
1837 MOVQ R13, r13-(12*8+8)(SP)
1838 MOVQ R12, r12-(11*8+8)(SP)
1839 MOVQ R11, r11-(10*8+8)(SP)
1840 MOVQ R10, r10-(9*8+8)(SP)
1841 MOVQ R9, r9-(8*8+8)(SP)
1842 MOVQ R8, r8-(7*8+8)(SP)
1843 MOVQ DI, di-(6*8+8)(SP)
1844 MOVQ SI, si-(5*8+8)(SP)
1845 MOVQ BP, bp-(4*8+8)(SP)
1846 MOVQ BX, bx-(3*8+8)(SP)
1847 MOVQ DX, dx-(2*8+8)(SP)
1848 // Save the frame size before we clobber it. Either of the last
1849 // saves could clobber this depending on whether there's a saved BP.
1850 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1851 MOVQ CX, cx-(1*8+8)(SP)
1852 MOVQ AX, ax-(0*8+8)(SP)
1853
1854 // Save the argument frame size.
1855 MOVQ DX, frameSize-128(SP)
1856
1857 // Perform a safe-point check.
1858 MOVQ retpc-8(FP), AX // Caller's PC
1859 MOVQ AX, 0(SP)
1860 CALL runtime·debugCallCheck(SB)
1861 MOVQ 8(SP), AX
1862 TESTQ AX, AX
1863 JZ good
1864 // The safety check failed. Put the reason string at the top
1865 // of the stack.
1866 MOVQ AX, 0(SP)
1867 MOVQ 16(SP), AX
1868 MOVQ AX, 8(SP)
1869 // Set R12 to 8 and invoke INT3. The debugger should get the
1870 // reason a call can't be injected from the top of the stack
1871 // and resume execution.
1872 MOVQ $8, R12
1873 BYTE $0xcc
1874 JMP restore
1875
1876 good:
1877 // Registers are saved and it's safe to make a call.
1878 // Open up a call frame, moving the stack if necessary.
1879 //
1880 // Once the frame is allocated, this will set R12 to 0 and
1881 // invoke INT3. The debugger should write the argument
1882 // frame for the call at SP, set up argument registers, push
1883 // the trapping PC on the stack, set the PC to the function to
1884 // call, set RDX to point to the closure (if a closure call),
1885 // and resume execution.
1886 //
1887 // If the function returns, this will set R12 to 1 and invoke
1888 // INT3. The debugger can then inspect any return value saved
1889 // on the stack at SP and in registers and resume execution again.
1890 //
1891 // If the function panics, this will set R12 to 2 and invoke INT3.
1892 // The interface{} value of the panic will be at SP. The debugger
1893 // can inspect the panic value and resume execution again.
1894 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1895 CMPQ AX, $MAXSIZE; \
1896 JA 5(PC); \
1897 MOVQ $NAME(SB), AX; \
1898 MOVQ AX, 0(SP); \
1899 CALL runtime·debugCallWrap(SB); \
1900 JMP restore
1901
1902 MOVQ frameSize-128(SP), AX
1903 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1904 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1905 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1906 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1907 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1908 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1909 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1910 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1911 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1912 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1913 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1914 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1915 // The frame size is too large. Report the error.
1916 MOVQ $debugCallFrameTooLarge<>(SB), AX
1917 MOVQ AX, 0(SP)
1918 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string
1919 MOVQ $8, R12
1920 BYTE $0xcc
1921 JMP restore
1922
1923 restore:
1924 // Calls and failures resume here.
1925 //
1926 // Set R12 to 16 and invoke INT3. The debugger should restore
1927 // all registers except RIP and RSP and resume execution.
1928 MOVQ $16, R12
1929 BYTE $0xcc
1930 // We must not modify flags after this point.
1931
1932 // Restore pointer-containing registers, which may have been
1933 // modified from the debugger's copy by stack copying.
1934 MOVQ ax-(0*8+8)(SP), AX
1935 MOVQ cx-(1*8+8)(SP), CX
1936 MOVQ dx-(2*8+8)(SP), DX
1937 MOVQ bx-(3*8+8)(SP), BX
1938 MOVQ bp-(4*8+8)(SP), BP
1939 MOVQ si-(5*8+8)(SP), SI
1940 MOVQ di-(6*8+8)(SP), DI
1941 MOVQ r8-(7*8+8)(SP), R8
1942 MOVQ r9-(8*8+8)(SP), R9
1943 MOVQ r10-(9*8+8)(SP), R10
1944 MOVQ r11-(10*8+8)(SP), R11
1945 MOVQ r12-(11*8+8)(SP), R12
1946 MOVQ r13-(12*8+8)(SP), R13
1947 MOVQ r14-(13*8+8)(SP), R14
1948 MOVQ r15-(14*8+8)(SP), R15
1949
1950 RET
1951
1952 // runtime.debugCallCheck assumes that functions defined with the
1953 // DEBUG_CALL_FN macro are safe points to inject calls.
1954 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
1955 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
1956 NO_LOCAL_POINTERS; \
1957 MOVQ $0, R12; \
1958 BYTE $0xcc; \
1959 MOVQ $1, R12; \
1960 BYTE $0xcc; \
1961 RET
1962 DEBUG_CALL_FN(debugCall32<>, 32)
1963 DEBUG_CALL_FN(debugCall64<>, 64)
1964 DEBUG_CALL_FN(debugCall128<>, 128)
1965 DEBUG_CALL_FN(debugCall256<>, 256)
1966 DEBUG_CALL_FN(debugCall512<>, 512)
1967 DEBUG_CALL_FN(debugCall1024<>, 1024)
1968 DEBUG_CALL_FN(debugCall2048<>, 2048)
1969 DEBUG_CALL_FN(debugCall4096<>, 4096)
1970 DEBUG_CALL_FN(debugCall8192<>, 8192)
1971 DEBUG_CALL_FN(debugCall16384<>, 16384)
1972 DEBUG_CALL_FN(debugCall32768<>, 32768)
1973 DEBUG_CALL_FN(debugCall65536<>, 65536)
1974
1975 // func debugCallPanicked(val interface{})
1976 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
1977 // Copy the panic value to the top of stack.
1978 MOVQ val_type+0(FP), AX
1979 MOVQ AX, 0(SP)
1980 MOVQ val_data+8(FP), AX
1981 MOVQ AX, 8(SP)
1982 MOVQ $2, R12
1983 BYTE $0xcc
1984 RET
1985
1986 // Note: these functions use a special calling convention to save generated code space.
1987 // Arguments are passed in registers, but the space for those arguments are allocated
1988 // in the caller's stack frame. These stubs write the args into that stack space and
1989 // then tail call to the corresponding runtime handler.
1990 // The tail call makes these stubs disappear in backtraces.
1991 // Defined as ABIInternal since they do not use the stack-based Go ABI.
1992 TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
1993 MOVQ CX, BX
1994 JMP runtime·goPanicIndex<ABIInternal>(SB)
1995 TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
1996 MOVQ CX, BX
1997 JMP runtime·goPanicIndexU<ABIInternal>(SB)
1998 TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
1999 MOVQ CX, AX
2000 MOVQ DX, BX
2001 JMP runtime·goPanicSliceAlen<ABIInternal>(SB)
2002 TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
2003 MOVQ CX, AX
2004 MOVQ DX, BX
2005 JMP runtime·goPanicSliceAlenU<ABIInternal>(SB)
2006 TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
2007 MOVQ CX, AX
2008 MOVQ DX, BX
2009 JMP runtime·goPanicSliceAcap<ABIInternal>(SB)
2010 TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
2011 MOVQ CX, AX
2012 MOVQ DX, BX
2013 JMP runtime·goPanicSliceAcapU<ABIInternal>(SB)
2014 TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
2015 MOVQ CX, BX
2016 JMP runtime·goPanicSliceB<ABIInternal>(SB)
2017 TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
2018 MOVQ CX, BX
2019 JMP runtime·goPanicSliceBU<ABIInternal>(SB)
2020 TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
2021 MOVQ DX, AX
2022 JMP runtime·goPanicSlice3Alen<ABIInternal>(SB)
2023 TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
2024 MOVQ DX, AX
2025 JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB)
2026 TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
2027 MOVQ DX, AX
2028 JMP runtime·goPanicSlice3Acap<ABIInternal>(SB)
2029 TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
2030 MOVQ DX, AX
2031 JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB)
2032 TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
2033 MOVQ CX, AX
2034 MOVQ DX, BX
2035 JMP runtime·goPanicSlice3B<ABIInternal>(SB)
2036 TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
2037 MOVQ CX, AX
2038 MOVQ DX, BX
2039 JMP runtime·goPanicSlice3BU<ABIInternal>(SB)
2040 TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
2041 MOVQ CX, BX
2042 JMP runtime·goPanicSlice3C<ABIInternal>(SB)
2043 TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
2044 MOVQ CX, BX
2045 JMP runtime·goPanicSlice3CU<ABIInternal>(SB)
2046 TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
2047 MOVQ DX, AX
2048 JMP runtime·goPanicSliceConvert<ABIInternal>(SB)
2049
2050 #ifdef GOOS_android
2051 // Use the free TLS_SLOT_APP slot #2 on Android Q.
2052 // Earlier androids are set up in gcc_android.c.
2053 DATA runtime·tls_g+0(SB)/8, $16
2054 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2055 #endif
2056 #ifdef GOOS_windows
2057 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2058 #endif
2059
2060 // The compiler and assembler's -spectre=ret mode rewrites
2061 // all indirect CALL AX / JMP AX instructions to be
2062 // CALL retpolineAX / JMP retpolineAX.
2063 // See https://support.google.com/faqs/answer/7625886.
2064 #define RETPOLINE(reg) \
2065 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \
2066 /* nospec: */ \
2067 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \
2068 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \
2069 /* setup: */ \
2070 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \
2071 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \
2072 /* RET */ BYTE $0xC3
2073
2074 TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0)
2075 TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1)
2076 TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2)
2077 TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3)
2078 /* SP is 4, can't happen / magic encodings */
2079 TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5)
2080 TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6)
2081 TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7)
2082 TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8)
2083 TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9)
2084 TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10)
2085 TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11)
2086 TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12)
2087 TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13)
2088 TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14)
2089 TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15)
2090
2091 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
2092 MOVQ BP, AX
2093 RET
2094
View as plain text