Text file src/runtime/race_arm64.s

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build race
     6  
     7  #include "go_asm.h"
     8  #include "funcdata.h"
     9  #include "textflag.h"
    10  #include "tls_arm64.h"
    11  #include "cgo/abi_arm64.h"
    12  
    13  // The following thunks allow calling the gcc-compiled race runtime directly
    14  // from Go code without going all the way through cgo.
    15  // First, it's much faster (up to 50% speedup for real Go programs).
    16  // Second, it eliminates race-related special cases from cgocall and scheduler.
    17  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    18  
    19  // A brief recap of the arm64 calling convention.
    20  // Arguments are passed in R0...R7, the rest is on stack.
    21  // Callee-saved registers are: R19...R28.
    22  // Temporary registers are: R9...R15
    23  // SP must be 16-byte aligned.
    24  
    25  // When calling racecalladdr, R9 is the call target address.
    26  
    27  // The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
    28  
    29  // Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
    30  // No-op on other OSes.
    31  #ifdef TLS_darwin
    32  #define TP_ALIGN	AND	$~7, R0
    33  #else
    34  #define TP_ALIGN
    35  #endif
    36  
    37  // Load g from TLS. (See tls_arm64.s)
    38  #define load_g \
    39  	MRS_TPIDR_R0 \
    40  	TP_ALIGN \
    41  	MOVD    runtime·tls_g(SB), R11 \
    42  	MOVD    (R0)(R11), g
    43  
    44  // func runtime·raceread(addr uintptr)
    45  // Called from instrumented code.
    46  // Defined as ABIInternal so as to avoid introducing a wrapper,
    47  // which would make caller's PC ineffective.
    48  TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
    49  	MOVD	R0, R1	// addr
    50  	MOVD	LR, R2
    51  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    52  	MOVD	$__tsan_read(SB), R9
    53  	JMP	racecalladdr<>(SB)
    54  
    55  // func runtime·RaceRead(addr uintptr)
    56  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    57  	// This needs to be a tail call, because raceread reads caller pc.
    58  	JMP	runtime·raceread(SB)
    59  
    60  // func runtime·racereadpc(void *addr, void *callpc, void *pc)
    61  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    62  	MOVD	addr+0(FP), R1
    63  	MOVD	callpc+8(FP), R2
    64  	MOVD	pc+16(FP), R3
    65  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    66  	MOVD	$__tsan_read_pc(SB), R9
    67  	JMP	racecalladdr<>(SB)
    68  
    69  // func runtime·racewrite(addr uintptr)
    70  // Called from instrumented code.
    71  // Defined as ABIInternal so as to avoid introducing a wrapper,
    72  // which would make caller's PC ineffective.
    73  TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
    74  	MOVD	R0, R1	// addr
    75  	MOVD	LR, R2
    76  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    77  	MOVD	$__tsan_write(SB), R9
    78  	JMP	racecalladdr<>(SB)
    79  
    80  // func runtime·RaceWrite(addr uintptr)
    81  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    82  	// This needs to be a tail call, because racewrite reads caller pc.
    83  	JMP	runtime·racewrite(SB)
    84  
    85  // func runtime·racewritepc(void *addr, void *callpc, void *pc)
    86  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    87  	MOVD	addr+0(FP), R1
    88  	MOVD	callpc+8(FP), R2
    89  	MOVD	pc+16(FP), R3
    90  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    91  	MOVD	$__tsan_write_pc(SB), R9
    92  	JMP	racecalladdr<>(SB)
    93  
    94  // func runtime·racereadrange(addr, size uintptr)
    95  // Called from instrumented code.
    96  // Defined as ABIInternal so as to avoid introducing a wrapper,
    97  // which would make caller's PC ineffective.
    98  TEXT	runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
    99  	MOVD	R1, R2	// size
   100  	MOVD	R0, R1	// addr
   101  	MOVD	LR, R3
   102  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   103  	MOVD	$__tsan_read_range(SB), R9
   104  	JMP	racecalladdr<>(SB)
   105  
   106  // func runtime·RaceReadRange(addr, size uintptr)
   107  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   108  	// This needs to be a tail call, because racereadrange reads caller pc.
   109  	JMP	runtime·racereadrange(SB)
   110  
   111  // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   112  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   113  	MOVD	addr+0(FP), R1
   114  	MOVD	size+8(FP), R2
   115  	MOVD	pc+16(FP), R3
   116  	ADD	$4, R3	// pc is function start, tsan wants return address.
   117  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   118  	MOVD	$__tsan_read_range(SB), R9
   119  	JMP	racecalladdr<>(SB)
   120  
   121  // func runtime·racewriterange(addr, size uintptr)
   122  // Called from instrumented code.
   123  // Defined as ABIInternal so as to avoid introducing a wrapper,
   124  // which would make caller's PC ineffective.
   125  TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
   126  	MOVD	R1, R2	// size
   127  	MOVD	R0, R1	// addr
   128  	MOVD	LR, R3
   129  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   130  	MOVD	$__tsan_write_range(SB), R9
   131  	JMP	racecalladdr<>(SB)
   132  
   133  // func runtime·RaceWriteRange(addr, size uintptr)
   134  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   135  	// This needs to be a tail call, because racewriterange reads caller pc.
   136  	JMP	runtime·racewriterange(SB)
   137  
   138  // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   139  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   140  	MOVD	addr+0(FP), R1
   141  	MOVD	size+8(FP), R2
   142  	MOVD	pc+16(FP), R3
   143  	ADD	$4, R3	// pc is function start, tsan wants return address.
   144  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   145  	MOVD	$__tsan_write_range(SB), R9
   146  	JMP	racecalladdr<>(SB)
   147  
   148  // If addr (R1) is out of range, do nothing.
   149  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   150  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   151  	load_g
   152  	MOVD	g_racectx(g), R0
   153  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   154  	MOVD	runtime·racearenastart(SB), R10
   155  	CMP	R10, R1
   156  	BLT	data
   157  	MOVD	runtime·racearenaend(SB), R10
   158  	CMP	R10, R1
   159  	BLT	call
   160  data:
   161  	MOVD	runtime·racedatastart(SB), R10
   162  	CMP	R10, R1
   163  	BLT	ret
   164  	MOVD	runtime·racedataend(SB), R10
   165  	CMP	R10, R1
   166  	BGT	ret
   167  call:
   168  	JMP	racecall<>(SB)
   169  ret:
   170  	RET
   171  
   172  // func runtime·racefuncenter(pc uintptr)
   173  // Called from instrumented code.
   174  TEXT	runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
   175  	MOVD	R0, R9	// callpc
   176  	JMP	racefuncenter<>(SB)
   177  
   178  // Common code for racefuncenter
   179  // R9 = caller's return address
   180  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   181  	load_g
   182  	MOVD	g_racectx(g), R0	// goroutine racectx
   183  	MOVD	R9, R1
   184  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   185  	MOVD	$__tsan_func_enter(SB), R9
   186  	BL	racecall<>(SB)
   187  	RET
   188  
   189  // func runtime·racefuncexit()
   190  // Called from instrumented code.
   191  TEXT	runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
   192  	load_g
   193  	MOVD	g_racectx(g), R0	// race context
   194  	// void __tsan_func_exit(ThreadState *thr);
   195  	MOVD	$__tsan_func_exit(SB), R9
   196  	JMP	racecall<>(SB)
   197  
   198  // Atomic operations for sync/atomic package.
   199  // R3 = addr of arguments passed to this function, it can
   200  // be fetched at 40(RSP) in racecallatomic after two times BL
   201  // R0, R1, R2 set in racecallatomic
   202  
   203  // Load
   204  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
   205  	GO_ARGS
   206  	MOVD	$__tsan_go_atomic32_load(SB), R9
   207  	BL	racecallatomic<>(SB)
   208  	RET
   209  
   210  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
   211  	GO_ARGS
   212  	MOVD	$__tsan_go_atomic64_load(SB), R9
   213  	BL	racecallatomic<>(SB)
   214  	RET
   215  
   216  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   217  	GO_ARGS
   218  	JMP	sync∕atomic·LoadInt32(SB)
   219  
   220  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   221  	GO_ARGS
   222  	JMP	sync∕atomic·LoadInt64(SB)
   223  
   224  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   225  	GO_ARGS
   226  	JMP	sync∕atomic·LoadInt64(SB)
   227  
   228  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   229  	GO_ARGS
   230  	JMP	sync∕atomic·LoadInt64(SB)
   231  
   232  // Store
   233  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
   234  	GO_ARGS
   235  	MOVD	$__tsan_go_atomic32_store(SB), R9
   236  	BL	racecallatomic<>(SB)
   237  	RET
   238  
   239  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
   240  	GO_ARGS
   241  	MOVD	$__tsan_go_atomic64_store(SB), R9
   242  	BL	racecallatomic<>(SB)
   243  	RET
   244  
   245  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   246  	GO_ARGS
   247  	JMP	sync∕atomic·StoreInt32(SB)
   248  
   249  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   250  	GO_ARGS
   251  	JMP	sync∕atomic·StoreInt64(SB)
   252  
   253  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   254  	GO_ARGS
   255  	JMP	sync∕atomic·StoreInt64(SB)
   256  
   257  // Swap
   258  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
   259  	GO_ARGS
   260  	MOVD	$__tsan_go_atomic32_exchange(SB), R9
   261  	BL	racecallatomic<>(SB)
   262  	RET
   263  
   264  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
   265  	GO_ARGS
   266  	MOVD	$__tsan_go_atomic64_exchange(SB), R9
   267  	BL	racecallatomic<>(SB)
   268  	RET
   269  
   270  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   271  	GO_ARGS
   272  	JMP	sync∕atomic·SwapInt32(SB)
   273  
   274  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   275  	GO_ARGS
   276  	JMP	sync∕atomic·SwapInt64(SB)
   277  
   278  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   279  	GO_ARGS
   280  	JMP	sync∕atomic·SwapInt64(SB)
   281  
   282  // Add
   283  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
   284  	GO_ARGS
   285  	MOVD	$__tsan_go_atomic32_fetch_add(SB), R9
   286  	BL	racecallatomic<>(SB)
   287  	MOVW	add+8(FP), R0	// convert fetch_add to add_fetch
   288  	MOVW	ret+16(FP), R1
   289  	ADD	R0, R1, R0
   290  	MOVW	R0, ret+16(FP)
   291  	RET
   292  
   293  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
   294  	GO_ARGS
   295  	MOVD	$__tsan_go_atomic64_fetch_add(SB), R9
   296  	BL	racecallatomic<>(SB)
   297  	MOVD	add+8(FP), R0	// convert fetch_add to add_fetch
   298  	MOVD	ret+16(FP), R1
   299  	ADD	R0, R1, R0
   300  	MOVD	R0, ret+16(FP)
   301  	RET
   302  
   303  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   304  	GO_ARGS
   305  	JMP	sync∕atomic·AddInt32(SB)
   306  
   307  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   308  	GO_ARGS
   309  	JMP	sync∕atomic·AddInt64(SB)
   310  
   311  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   312  	GO_ARGS
   313  	JMP	sync∕atomic·AddInt64(SB)
   314  
   315  // CompareAndSwap
   316  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
   317  	GO_ARGS
   318  	MOVD	$__tsan_go_atomic32_compare_exchange(SB), R9
   319  	BL	racecallatomic<>(SB)
   320  	RET
   321  
   322  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
   323  	GO_ARGS
   324  	MOVD	$__tsan_go_atomic64_compare_exchange(SB), R9
   325  	BL	racecallatomic<>(SB)
   326  	RET
   327  
   328  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   329  	GO_ARGS
   330  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   331  
   332  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   333  	GO_ARGS
   334  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   335  
   336  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   337  	GO_ARGS
   338  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   339  
   340  // Generic atomic operation implementation.
   341  // R9 = addr of target function
   342  TEXT	racecallatomic<>(SB), NOSPLIT, $0
   343  	// Set up these registers
   344  	// R0 = *ThreadState
   345  	// R1 = caller pc
   346  	// R2 = pc
   347  	// R3 = addr of incoming arg list
   348  
   349  	// Trigger SIGSEGV early.
   350  	MOVD	40(RSP), R3	// 1st arg is addr. after two times BL, get it at 40(RSP)
   351  	MOVB	(R3), R13	// segv here if addr is bad
   352  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   353  	MOVD	runtime·racearenastart(SB), R10
   354  	CMP	R10, R3
   355  	BLT	racecallatomic_data
   356  	MOVD	runtime·racearenaend(SB), R10
   357  	CMP	R10, R3
   358  	BLT	racecallatomic_ok
   359  racecallatomic_data:
   360  	MOVD	runtime·racedatastart(SB), R10
   361  	CMP	R10, R3
   362  	BLT	racecallatomic_ignore
   363  	MOVD	runtime·racedataend(SB), R10
   364  	CMP	R10, R3
   365  	BGE	racecallatomic_ignore
   366  racecallatomic_ok:
   367  	// Addr is within the good range, call the atomic function.
   368  	load_g
   369  	MOVD	g_racectx(g), R0	// goroutine context
   370  	MOVD	16(RSP), R1	// caller pc
   371  	MOVD	R9, R2	// pc
   372  	ADD	$40, RSP, R3
   373  	JMP	racecall<>(SB)	// does not return
   374  racecallatomic_ignore:
   375  	// Addr is outside the good range.
   376  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   377  	// An attempt to synchronize on the address would cause crash.
   378  	MOVD	R9, R21	// remember the original function
   379  	MOVD	$__tsan_go_ignore_sync_begin(SB), R9
   380  	load_g
   381  	MOVD	g_racectx(g), R0	// goroutine context
   382  	BL	racecall<>(SB)
   383  	MOVD	R21, R9	// restore the original function
   384  	// Call the atomic function.
   385  	// racecall will call LLVM race code which might clobber R28 (g)
   386  	load_g
   387  	MOVD	g_racectx(g), R0	// goroutine context
   388  	MOVD	16(RSP), R1	// caller pc
   389  	MOVD	R9, R2	// pc
   390  	ADD	$40, RSP, R3	// arguments
   391  	BL	racecall<>(SB)
   392  	// Call __tsan_go_ignore_sync_end.
   393  	MOVD	$__tsan_go_ignore_sync_end(SB), R9
   394  	MOVD	g_racectx(g), R0	// goroutine context
   395  	BL	racecall<>(SB)
   396  	RET
   397  
   398  // func runtime·racecall(void(*f)(...), ...)
   399  // Calls C function f from race runtime and passes up to 4 arguments to it.
   400  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   401  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   402  	MOVD	fn+0(FP), R9
   403  	MOVD	arg0+8(FP), R0
   404  	MOVD	arg1+16(FP), R1
   405  	MOVD	arg2+24(FP), R2
   406  	MOVD	arg3+32(FP), R3
   407  	JMP	racecall<>(SB)
   408  
   409  // Switches SP to g0 stack and calls (R9). Arguments already set.
   410  // Clobbers R19, R20.
   411  TEXT	racecall<>(SB), NOSPLIT|NOFRAME, $0-0
   412  	MOVD	g_m(g), R10
   413  	// Switch to g0 stack.
   414  	MOVD	RSP, R19	// callee-saved, preserved across the CALL
   415  	MOVD	R30, R20	// callee-saved, preserved across the CALL
   416  	MOVD	m_g0(R10), R11
   417  	CMP	R11, g
   418  	BEQ	call	// already on g0
   419  	MOVD	(g_sched+gobuf_sp)(R11), R12
   420  	MOVD	R12, RSP
   421  call:
   422  	BL	R9
   423  	MOVD	R19, RSP
   424  	JMP	(R20)
   425  
   426  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   427  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   428  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   429  // R0 contains command code. R1 contains command-specific context.
   430  // See racecallback for command codes.
   431  TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
   432  	// Handle command raceGetProcCmd (0) here.
   433  	// First, code below assumes that we are on curg, while raceGetProcCmd
   434  	// can be executed on g0. Second, it is called frequently, so will
   435  	// benefit from this fast path.
   436  	CBNZ	R0, rest
   437  	MOVD	g, R13
   438  #ifdef TLS_darwin
   439  	MOVD	R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
   440  #endif
   441  	load_g
   442  #ifdef TLS_darwin
   443  	MOVD	R12, R27
   444  #endif
   445  	MOVD	g_m(g), R0
   446  	MOVD	m_p(R0), R0
   447  	MOVD	p_raceprocctx(R0), R0
   448  	MOVD	R0, (R1)
   449  	MOVD	R13, g
   450  	JMP	(LR)
   451  rest:
   452  	// Save callee-saved registers (Go code won't respect that).
   453  	// 8(RSP) and 16(RSP) are for args passed through racecallback
   454  	SUB	$176, RSP
   455  	MOVD	LR, 0(RSP)
   456  
   457  	SAVE_R19_TO_R28(8*3)
   458  	SAVE_F8_TO_F15(8*13)
   459  	MOVD	R29, (8*21)(RSP)
   460  	// Set g = g0.
   461  	// load_g will clobber R0, Save R0
   462  	MOVD	R0, R13
   463  	load_g
   464  	// restore R0
   465  	MOVD	R13, R0
   466  	MOVD	g_m(g), R13
   467  	MOVD	m_g0(R13), R14
   468  	CMP	R14, g
   469  	BEQ	noswitch	// branch if already on g0
   470  	MOVD	R14, g
   471  
   472  	MOVD	R0, 8(RSP)	// func arg
   473  	MOVD	R1, 16(RSP)	// func arg
   474  	BL	runtime·racecallback(SB)
   475  
   476  	// All registers are smashed after Go code, reload.
   477  	MOVD	g_m(g), R13
   478  	MOVD	m_curg(R13), g	// g = m->curg
   479  ret:
   480  	// Restore callee-saved registers.
   481  	MOVD	0(RSP), LR
   482  	MOVD	(8*21)(RSP), R29
   483  	RESTORE_F8_TO_F15(8*13)
   484  	RESTORE_R19_TO_R28(8*3)
   485  	ADD	$176, RSP
   486  	JMP	(LR)
   487  
   488  noswitch:
   489  	// already on g0
   490  	MOVD	R0, 8(RSP)	// func arg
   491  	MOVD	R1, 16(RSP)	// func arg
   492  	BL	runtime·racecallback(SB)
   493  	JMP	ret
   494  
   495  #ifndef TLSG_IS_VARIABLE
   496  // tls_g, g value for each thread in TLS
   497  GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
   498  #endif
   499  

View as plain text