Text file src/runtime/race_amd64.s

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build race
     6  
     7  #include "go_asm.h"
     8  #include "go_tls.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  #include "cgo/abi_amd64.h"
    12  
    13  // The following thunks allow calling the gcc-compiled race runtime directly
    14  // from Go code without going all the way through cgo.
    15  // First, it's much faster (up to 50% speedup for real Go programs).
    16  // Second, it eliminates race-related special cases from cgocall and scheduler.
    17  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    18  
    19  // A brief recap of the amd64 calling convention.
    20  // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
    21  // Callee-saved registers are: BX, BP, R12-R15.
    22  // SP must be 16-byte aligned.
    23  // On Windows:
    24  // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
    25  // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
    26  // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
    27  // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention
    28  // We do not do this, because it seems to be intended for vararg/unprototyped functions.
    29  // Gcc-compiled race runtime does not try to use that space.
    30  
    31  #ifdef GOOS_windows
    32  #define RARG0 CX
    33  #define RARG1 DX
    34  #define RARG2 R8
    35  #define RARG3 R9
    36  #else
    37  #define RARG0 DI
    38  #define RARG1 SI
    39  #define RARG2 DX
    40  #define RARG3 CX
    41  #endif
    42  
    43  // func runtime·raceread(addr uintptr)
    44  // Called from instrumented code.
    45  // Defined as ABIInternal so as to avoid introducing a wrapper,
    46  // which would render runtime.getcallerpc ineffective.
    47  TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
    48  	MOVQ	AX, RARG1
    49  	MOVQ	(SP), RARG2
    50  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    51  	MOVQ	$__tsan_read(SB), AX
    52  	JMP	racecalladdr<>(SB)
    53  
    54  // func runtime·RaceRead(addr uintptr)
    55  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    56  	// This needs to be a tail call, because raceread reads caller pc.
    57  	JMP	runtime·raceread(SB)
    58  
    59  // void runtime·racereadpc(void *addr, void *callpc, void *pc)
    60  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    61  	MOVQ	addr+0(FP), RARG1
    62  	MOVQ	callpc+8(FP), RARG2
    63  	MOVQ	pc+16(FP), RARG3
    64  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    65  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    66  	MOVQ	$__tsan_read_pc(SB), AX
    67  	JMP	racecalladdr<>(SB)
    68  
    69  // func runtime·racewrite(addr uintptr)
    70  // Called from instrumented code.
    71  // Defined as ABIInternal so as to avoid introducing a wrapper,
    72  // which would render runtime.getcallerpc ineffective.
    73  TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
    74  	MOVQ	AX, RARG1
    75  	MOVQ	(SP), RARG2
    76  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    77  	MOVQ	$__tsan_write(SB), AX
    78  	JMP	racecalladdr<>(SB)
    79  
    80  // func runtime·RaceWrite(addr uintptr)
    81  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    82  	// This needs to be a tail call, because racewrite reads caller pc.
    83  	JMP	runtime·racewrite(SB)
    84  
    85  // void runtime·racewritepc(void *addr, void *callpc, void *pc)
    86  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    87  	MOVQ	addr+0(FP), RARG1
    88  	MOVQ	callpc+8(FP), RARG2
    89  	MOVQ	pc+16(FP), RARG3
    90  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    91  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    92  	MOVQ	$__tsan_write_pc(SB), AX
    93  	JMP	racecalladdr<>(SB)
    94  
    95  // func runtime·racereadrange(addr, size uintptr)
    96  // Called from instrumented code.
    97  // Defined as ABIInternal so as to avoid introducing a wrapper,
    98  // which would render runtime.getcallerpc ineffective.
    99  TEXT	runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
   100  	MOVQ	AX, RARG1
   101  	MOVQ	BX, RARG2
   102  	MOVQ	(SP), RARG3
   103  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   104  	MOVQ	$__tsan_read_range(SB), AX
   105  	JMP	racecalladdr<>(SB)
   106  
   107  // func runtime·RaceReadRange(addr, size uintptr)
   108  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   109  	// This needs to be a tail call, because racereadrange reads caller pc.
   110  	JMP	runtime·racereadrange(SB)
   111  
   112  // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   113  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   114  	MOVQ	addr+0(FP), RARG1
   115  	MOVQ	size+8(FP), RARG2
   116  	MOVQ	pc+16(FP), RARG3
   117  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   118  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   119  	MOVQ	$__tsan_read_range(SB), AX
   120  	JMP	racecalladdr<>(SB)
   121  
   122  // func runtime·racewriterange(addr, size uintptr)
   123  // Called from instrumented code.
   124  // Defined as ABIInternal so as to avoid introducing a wrapper,
   125  // which would render runtime.getcallerpc ineffective.
   126  TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
   127  	MOVQ	AX, RARG1
   128  	MOVQ	BX, RARG2
   129  	MOVQ	(SP), RARG3
   130  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   131  	MOVQ	$__tsan_write_range(SB), AX
   132  	JMP	racecalladdr<>(SB)
   133  
   134  // func runtime·RaceWriteRange(addr, size uintptr)
   135  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   136  	// This needs to be a tail call, because racewriterange reads caller pc.
   137  	JMP	runtime·racewriterange(SB)
   138  
   139  // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   140  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   141  	MOVQ	addr+0(FP), RARG1
   142  	MOVQ	size+8(FP), RARG2
   143  	MOVQ	pc+16(FP), RARG3
   144  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   145  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   146  	MOVQ	$__tsan_write_range(SB), AX
   147  	JMP	racecalladdr<>(SB)
   148  
   149  // If addr (RARG1) is out of range, do nothing.
   150  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   151  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   152  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   153  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   154  	CMPQ	RARG1, runtime·racearenastart(SB)
   155  	JB	data
   156  	CMPQ	RARG1, runtime·racearenaend(SB)
   157  	JB	call
   158  data:
   159  	CMPQ	RARG1, runtime·racedatastart(SB)
   160  	JB	ret
   161  	CMPQ	RARG1, runtime·racedataend(SB)
   162  	JAE	ret
   163  call:
   164  	MOVQ	AX, AX		// w/o this 6a miscompiles this function
   165  	JMP	racecall<>(SB)
   166  ret:
   167  	RET
   168  
   169  // func runtime·racefuncenter(pc uintptr)
   170  // Called from instrumented code.
   171  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   172  	MOVQ	callpc+0(FP), R11
   173  	JMP	racefuncenter<>(SB)
   174  
   175  // Common code for racefuncenter
   176  // R11 = caller's return address
   177  TEXT	racefuncenter<>(SB), NOSPLIT|NOFRAME, $0-0
   178  	MOVQ	DX, BX		// save function entry context (for closures)
   179  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   180  	MOVQ	R11, RARG1
   181  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   182  	MOVQ	$__tsan_func_enter(SB), AX
   183  	// racecall<> preserves BX
   184  	CALL	racecall<>(SB)
   185  	MOVQ	BX, DX	// restore function entry context
   186  	RET
   187  
   188  // func runtime·racefuncexit()
   189  // Called from instrumented code.
   190  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   191  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   192  	// void __tsan_func_exit(ThreadState *thr);
   193  	MOVQ	$__tsan_func_exit(SB), AX
   194  	JMP	racecall<>(SB)
   195  
   196  // Atomic operations for sync/atomic package.
   197  
   198  // Load
   199  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT|NOFRAME, $0-12
   200  	GO_ARGS
   201  	MOVQ	$__tsan_go_atomic32_load(SB), AX
   202  	CALL	racecallatomic<>(SB)
   203  	RET
   204  
   205  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT|NOFRAME, $0-16
   206  	GO_ARGS
   207  	MOVQ	$__tsan_go_atomic64_load(SB), AX
   208  	CALL	racecallatomic<>(SB)
   209  	RET
   210  
   211  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   212  	GO_ARGS
   213  	JMP	sync∕atomic·LoadInt32(SB)
   214  
   215  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   216  	GO_ARGS
   217  	JMP	sync∕atomic·LoadInt64(SB)
   218  
   219  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   220  	GO_ARGS
   221  	JMP	sync∕atomic·LoadInt64(SB)
   222  
   223  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   224  	GO_ARGS
   225  	JMP	sync∕atomic·LoadInt64(SB)
   226  
   227  // Store
   228  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT|NOFRAME, $0-12
   229  	GO_ARGS
   230  	MOVQ	$__tsan_go_atomic32_store(SB), AX
   231  	CALL	racecallatomic<>(SB)
   232  	RET
   233  
   234  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT|NOFRAME, $0-16
   235  	GO_ARGS
   236  	MOVQ	$__tsan_go_atomic64_store(SB), AX
   237  	CALL	racecallatomic<>(SB)
   238  	RET
   239  
   240  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   241  	GO_ARGS
   242  	JMP	sync∕atomic·StoreInt32(SB)
   243  
   244  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   245  	GO_ARGS
   246  	JMP	sync∕atomic·StoreInt64(SB)
   247  
   248  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   249  	GO_ARGS
   250  	JMP	sync∕atomic·StoreInt64(SB)
   251  
   252  // Swap
   253  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT|NOFRAME, $0-20
   254  	GO_ARGS
   255  	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
   256  	CALL	racecallatomic<>(SB)
   257  	RET
   258  
   259  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT|NOFRAME, $0-24
   260  	GO_ARGS
   261  	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
   262  	CALL	racecallatomic<>(SB)
   263  	RET
   264  
   265  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   266  	GO_ARGS
   267  	JMP	sync∕atomic·SwapInt32(SB)
   268  
   269  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   270  	GO_ARGS
   271  	JMP	sync∕atomic·SwapInt64(SB)
   272  
   273  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   274  	GO_ARGS
   275  	JMP	sync∕atomic·SwapInt64(SB)
   276  
   277  // Add
   278  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT|NOFRAME, $0-20
   279  	GO_ARGS
   280  	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
   281  	CALL	racecallatomic<>(SB)
   282  	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
   283  	ADDL	AX, ret+16(FP)
   284  	RET
   285  
   286  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT|NOFRAME, $0-24
   287  	GO_ARGS
   288  	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
   289  	CALL	racecallatomic<>(SB)
   290  	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
   291  	ADDQ	AX, ret+16(FP)
   292  	RET
   293  
   294  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   295  	GO_ARGS
   296  	JMP	sync∕atomic·AddInt32(SB)
   297  
   298  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   299  	GO_ARGS
   300  	JMP	sync∕atomic·AddInt64(SB)
   301  
   302  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   303  	GO_ARGS
   304  	JMP	sync∕atomic·AddInt64(SB)
   305  
   306  // CompareAndSwap
   307  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT|NOFRAME, $0-17
   308  	GO_ARGS
   309  	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
   310  	CALL	racecallatomic<>(SB)
   311  	RET
   312  
   313  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT|NOFRAME, $0-25
   314  	GO_ARGS
   315  	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
   316  	CALL	racecallatomic<>(SB)
   317  	RET
   318  
   319  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   320  	GO_ARGS
   321  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   322  
   323  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   324  	GO_ARGS
   325  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   326  
   327  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   328  	GO_ARGS
   329  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   330  
   331  // Generic atomic operation implementation.
   332  // AX already contains target function.
   333  TEXT	racecallatomic<>(SB), NOSPLIT|NOFRAME, $0-0
   334  	// Trigger SIGSEGV early.
   335  	MOVQ	16(SP), R12
   336  	MOVBLZX	(R12), R13
   337  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   338  	CMPQ	R12, runtime·racearenastart(SB)
   339  	JB	racecallatomic_data
   340  	CMPQ	R12, runtime·racearenaend(SB)
   341  	JB	racecallatomic_ok
   342  racecallatomic_data:
   343  	CMPQ	R12, runtime·racedatastart(SB)
   344  	JB	racecallatomic_ignore
   345  	CMPQ	R12, runtime·racedataend(SB)
   346  	JAE	racecallatomic_ignore
   347  racecallatomic_ok:
   348  	// Addr is within the good range, call the atomic function.
   349  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   350  	MOVQ	8(SP), RARG1	// caller pc
   351  	MOVQ	(SP), RARG2	// pc
   352  	LEAQ	16(SP), RARG3	// arguments
   353  	JMP	racecall<>(SB)	// does not return
   354  racecallatomic_ignore:
   355  	// Addr is outside the good range.
   356  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   357  	// An attempt to synchronize on the address would cause crash.
   358  	MOVQ	AX, BX	// remember the original function
   359  	MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
   360  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   361  	CALL	racecall<>(SB)
   362  	MOVQ	BX, AX	// restore the original function
   363  	// Call the atomic function.
   364  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   365  	MOVQ	8(SP), RARG1	// caller pc
   366  	MOVQ	(SP), RARG2	// pc
   367  	LEAQ	16(SP), RARG3	// arguments
   368  	CALL	racecall<>(SB)
   369  	// Call __tsan_go_ignore_sync_end.
   370  	MOVQ	$__tsan_go_ignore_sync_end(SB), AX
   371  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   372  	JMP	racecall<>(SB)
   373  
   374  // void runtime·racecall(void(*f)(...), ...)
   375  // Calls C function f from race runtime and passes up to 4 arguments to it.
   376  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   377  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   378  	MOVQ	fn+0(FP), AX
   379  	MOVQ	arg0+8(FP), RARG0
   380  	MOVQ	arg1+16(FP), RARG1
   381  	MOVQ	arg2+24(FP), RARG2
   382  	MOVQ	arg3+32(FP), RARG3
   383  	JMP	racecall<>(SB)
   384  
   385  // Switches SP to g0 stack and calls (AX). Arguments already set.
   386  TEXT	racecall<>(SB), NOSPLIT|NOFRAME, $0-0
   387  	MOVQ	g_m(R14), R13
   388  	// Switch to g0 stack.
   389  	MOVQ	SP, R12		// callee-saved, preserved across the CALL
   390  	MOVQ	m_g0(R13), R10
   391  	CMPQ	R10, R14
   392  	JE	call	// already on g0
   393  	MOVQ	(g_sched+gobuf_sp)(R10), SP
   394  call:
   395  	ANDQ	$~15, SP	// alignment for gcc ABI
   396  	CALL	AX
   397  	MOVQ	R12, SP
   398  	// Back to Go world, set special registers.
   399  	// The g register (R14) is preserved in C.
   400  	XORPS	X15, X15
   401  	RET
   402  
   403  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   404  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   405  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   406  // RARG0 contains command code. RARG1 contains command-specific context.
   407  // See racecallback for command codes.
   408  TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0-0
   409  	// Handle command raceGetProcCmd (0) here.
   410  	// First, code below assumes that we are on curg, while raceGetProcCmd
   411  	// can be executed on g0. Second, it is called frequently, so will
   412  	// benefit from this fast path.
   413  	CMPQ	RARG0, $0
   414  	JNE	rest
   415  	get_tls(RARG0)
   416  	MOVQ	g(RARG0), RARG0
   417  	MOVQ	g_m(RARG0), RARG0
   418  	MOVQ	m_p(RARG0), RARG0
   419  	MOVQ	p_raceprocctx(RARG0), RARG0
   420  	MOVQ	RARG0, (RARG1)
   421  	RET
   422  
   423  rest:
   424  	// Transition from C ABI to Go ABI.
   425  	PUSH_REGS_HOST_TO_ABI0()
   426  	// Set g = g0.
   427  	get_tls(R12)
   428  	MOVQ	g(R12), R14
   429  	MOVQ	g_m(R14), R13
   430  	MOVQ	m_g0(R13), R15
   431  	CMPQ	R13, R15
   432  	JEQ	noswitch	// branch if already on g0
   433  	MOVQ	R15, g(R12)	// g = m->g0
   434  	MOVQ	R15, R14	// set g register
   435  	PUSHQ	RARG1	// func arg
   436  	PUSHQ	RARG0	// func arg
   437  	CALL	runtime·racecallback(SB)
   438  	POPQ	R12
   439  	POPQ	R12
   440  	// All registers are smashed after Go code, reload.
   441  	get_tls(R12)
   442  	MOVQ	g(R12), R13
   443  	MOVQ	g_m(R13), R13
   444  	MOVQ	m_curg(R13), R14
   445  	MOVQ	R14, g(R12)	// g = m->curg
   446  ret:
   447  	POP_REGS_HOST_TO_ABI0()
   448  	RET
   449  
   450  noswitch:
   451  	// already on g0
   452  	PUSHQ	RARG1	// func arg
   453  	PUSHQ	RARG0	// func arg
   454  	CALL	runtime·racecallback(SB)
   455  	POPQ	R12
   456  	POPQ	R12
   457  	JMP	ret
   458  

View as plain text