Text file src/runtime/race_s390x.s

     1  // Copyright 2021 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build race
     6  // +build race
     7  
     8  #include "go_asm.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  
    12  // The following thunks allow calling the gcc-compiled race runtime directly
    13  // from Go code without going all the way through cgo.
    14  // First, it's much faster (up to 50% speedup for real Go programs).
    15  // Second, it eliminates race-related special cases from cgocall and scheduler.
    16  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    17  
    18  // A brief recap of the s390x C calling convention.
    19  // Arguments are passed in R2...R6, the rest is on stack.
    20  // Callee-saved registers are: R6...R13, R15.
    21  // Temporary registers are: R0...R5, R14.
    22  
    23  // When calling racecalladdr, R1 is the call target address.
    24  
    25  // The race ctx, ThreadState *thr below, is passed in R2 and loaded in racecalladdr.
    26  
    27  // func runtime·raceread(addr uintptr)
    28  // Called from instrumented code.
    29  TEXT	runtime·raceread(SB), NOSPLIT, $0-8
    30  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    31  	MOVD	$__tsan_read(SB), R1
    32  	MOVD	addr+0(FP), R3
    33  	MOVD	R14, R4
    34  	JMP	racecalladdr<>(SB)
    35  
    36  // func runtime·RaceRead(addr uintptr)
    37  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    38  	// This needs to be a tail call, because raceread reads caller pc.
    39  	JMP	runtime·raceread(SB)
    40  
    41  // func runtime·racereadpc(void *addr, void *callpc, void *pc)
    42  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    43  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    44  	MOVD	$__tsan_read_pc(SB), R1
    45  	LMG	addr+0(FP), R3, R5
    46  	JMP	racecalladdr<>(SB)
    47  
    48  // func runtime·racewrite(addr uintptr)
    49  // Called from instrumented code.
    50  TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
    51  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    52  	MOVD	$__tsan_write(SB), R1
    53  	MOVD	addr+0(FP), R3
    54  	MOVD	R14, R4
    55  	JMP	racecalladdr<>(SB)
    56  
    57  // func runtime·RaceWrite(addr uintptr)
    58  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    59  	// This needs to be a tail call, because racewrite reads caller pc.
    60  	JMP	runtime·racewrite(SB)
    61  
    62  // func runtime·racewritepc(void *addr, void *callpc, void *pc)
    63  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    64  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    65  	MOVD	$__tsan_write_pc(SB), R1
    66  	LMG	addr+0(FP), R3, R5
    67  	JMP	racecalladdr<>(SB)
    68  
    69  // func runtime·racereadrange(addr, size uintptr)
    70  // Called from instrumented code.
    71  TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    72  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    73  	MOVD	$__tsan_read_range(SB), R1
    74  	LMG	addr+0(FP), R3, R4
    75  	MOVD	R14, R5
    76  	JMP	racecalladdr<>(SB)
    77  
    78  // func runtime·RaceReadRange(addr, size uintptr)
    79  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
    80  	// This needs to be a tail call, because racereadrange reads caller pc.
    81  	JMP	runtime·racereadrange(SB)
    82  
    83  // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
    84  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
    85  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    86  	MOVD	$__tsan_read_range(SB), R1
    87  	LMG	addr+0(FP), R3, R5
    88  	// pc is an interceptor address, but TSan expects it to point to the
    89  	// middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW).
    90  	ADD	$2, R5
    91  	JMP	racecalladdr<>(SB)
    92  
    93  // func runtime·racewriterange(addr, size uintptr)
    94  // Called from instrumented code.
    95  TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
    96  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    97  	MOVD	$__tsan_write_range(SB), R1
    98  	LMG	addr+0(FP), R3, R4
    99  	MOVD	R14, R5
   100  	JMP	racecalladdr<>(SB)
   101  
   102  // func runtime·RaceWriteRange(addr, size uintptr)
   103  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   104  	// This needs to be a tail call, because racewriterange reads caller pc.
   105  	JMP	runtime·racewriterange(SB)
   106  
   107  // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   108  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   109  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   110  	MOVD	$__tsan_write_range(SB), R1
   111  	LMG	addr+0(FP), R3, R5
   112  	// pc is an interceptor address, but TSan expects it to point to the
   113  	// middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW).
   114  	ADD	$2, R5
   115  	JMP	racecalladdr<>(SB)
   116  
   117  // If R3 is out of range, do nothing. Otherwise, setup goroutine context and
   118  // invoke racecall. Other arguments are already set.
   119  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   120  	MOVD	runtime·racearenastart(SB), R0
   121  	CMPUBLT	R3, R0, data			// Before racearena start?
   122  	MOVD	runtime·racearenaend(SB), R0
   123  	CMPUBLT	R3, R0, call			// Before racearena end?
   124  data:
   125  	MOVD	runtime·racedatastart(SB), R0
   126  	CMPUBLT	R3, R0, ret			// Before racedata start?
   127  	MOVD	runtime·racedataend(SB), R0
   128  	CMPUBGE	R3, R0, ret			// At or after racedata end?
   129  call:
   130  	MOVD	g_racectx(g), R2
   131  	JMP	racecall<>(SB)
   132  ret:
   133  	RET
   134  
   135  // func runtime·racefuncenter(pc uintptr)
   136  // Called from instrumented code.
   137  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   138  	MOVD	callpc+0(FP), R3
   139  	JMP	racefuncenter<>(SB)
   140  
   141  // Common code for racefuncenter
   142  // R3 = caller's return address
   143  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   144  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   145  	MOVD	$__tsan_func_enter(SB), R1
   146  	MOVD	g_racectx(g), R2
   147  	BL	racecall<>(SB)
   148  	RET
   149  
   150  // func runtime·racefuncexit()
   151  // Called from instrumented code.
   152  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   153  	// void __tsan_func_exit(ThreadState *thr);
   154  	MOVD	$__tsan_func_exit(SB), R1
   155  	MOVD	g_racectx(g), R2
   156  	JMP	racecall<>(SB)
   157  
   158  // Atomic operations for sync/atomic package.
   159  
   160  // Load
   161  
   162  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
   163  	GO_ARGS
   164  	MOVD	$__tsan_go_atomic32_load(SB), R1
   165  	BL	racecallatomic<>(SB)
   166  	RET
   167  
   168  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
   169  	GO_ARGS
   170  	MOVD	$__tsan_go_atomic64_load(SB), R1
   171  	BL	racecallatomic<>(SB)
   172  	RET
   173  
   174  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   175  	GO_ARGS
   176  	JMP	sync∕atomic·LoadInt32(SB)
   177  
   178  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   179  	GO_ARGS
   180  	JMP	sync∕atomic·LoadInt64(SB)
   181  
   182  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   183  	GO_ARGS
   184  	JMP	sync∕atomic·LoadInt64(SB)
   185  
   186  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   187  	GO_ARGS
   188  	JMP	sync∕atomic·LoadInt64(SB)
   189  
   190  // Store
   191  
   192  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
   193  	GO_ARGS
   194  	MOVD	$__tsan_go_atomic32_store(SB), R1
   195  	BL	racecallatomic<>(SB)
   196  	RET
   197  
   198  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
   199  	GO_ARGS
   200  	MOVD	$__tsan_go_atomic64_store(SB), R1
   201  	BL	racecallatomic<>(SB)
   202  	RET
   203  
   204  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   205  	GO_ARGS
   206  	JMP	sync∕atomic·StoreInt32(SB)
   207  
   208  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   209  	GO_ARGS
   210  	JMP	sync∕atomic·StoreInt64(SB)
   211  
   212  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   213  	GO_ARGS
   214  	JMP	sync∕atomic·StoreInt64(SB)
   215  
   216  // Swap
   217  
   218  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
   219  	GO_ARGS
   220  	MOVD	$__tsan_go_atomic32_exchange(SB), R1
   221  	BL	racecallatomic<>(SB)
   222  	RET
   223  
   224  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
   225  	GO_ARGS
   226  	MOVD	$__tsan_go_atomic64_exchange(SB), R1
   227  	BL	racecallatomic<>(SB)
   228  	RET
   229  
   230  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   231  	GO_ARGS
   232  	JMP	sync∕atomic·SwapInt32(SB)
   233  
   234  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   235  	GO_ARGS
   236  	JMP	sync∕atomic·SwapInt64(SB)
   237  
   238  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   239  	GO_ARGS
   240  	JMP	sync∕atomic·SwapInt64(SB)
   241  
   242  // Add
   243  
   244  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
   245  	GO_ARGS
   246  	MOVD	$__tsan_go_atomic32_fetch_add(SB), R1
   247  	BL	racecallatomic<>(SB)
   248  	// TSan performed fetch_add, but Go needs add_fetch.
   249  	MOVW	add+8(FP), R0
   250  	MOVW	ret+16(FP), R1
   251  	ADD	R0, R1, R0
   252  	MOVW	R0, ret+16(FP)
   253  	RET
   254  
   255  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
   256  	GO_ARGS
   257  	MOVD	$__tsan_go_atomic64_fetch_add(SB), R1
   258  	BL	racecallatomic<>(SB)
   259  	// TSan performed fetch_add, but Go needs add_fetch.
   260  	MOVD	add+8(FP), R0
   261  	MOVD	ret+16(FP), R1
   262  	ADD	R0, R1, R0
   263  	MOVD	R0, ret+16(FP)
   264  	RET
   265  
   266  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   267  	GO_ARGS
   268  	JMP	sync∕atomic·AddInt32(SB)
   269  
   270  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   271  	GO_ARGS
   272  	JMP	sync∕atomic·AddInt64(SB)
   273  
   274  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   275  	GO_ARGS
   276  	JMP	sync∕atomic·AddInt64(SB)
   277  
   278  // CompareAndSwap
   279  
   280  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
   281  	GO_ARGS
   282  	MOVD	$__tsan_go_atomic32_compare_exchange(SB), R1
   283  	BL	racecallatomic<>(SB)
   284  	RET
   285  
   286  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
   287  	GO_ARGS
   288  	MOVD	$__tsan_go_atomic64_compare_exchange(SB), R1
   289  	BL	racecallatomic<>(SB)
   290  	RET
   291  
   292  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   293  	GO_ARGS
   294  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   295  
   296  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   297  	GO_ARGS
   298  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   299  
   300  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   301  	GO_ARGS
   302  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   303  
   304  // Common code for atomic operations. Calls R1.
   305  TEXT	racecallatomic<>(SB), NOSPLIT, $0
   306  	MOVD	24(R15), R5			// Address (arg1, after 2xBL).
   307  	// If we pass an invalid pointer to the TSan runtime, it will cause a
   308  	// "fatal error: unknown caller pc". So trigger a SEGV here instead.
   309  	MOVB	(R5), R0
   310  	MOVD	runtime·racearenastart(SB), R0
   311  	CMPUBLT	R5, R0, racecallatomic_data	// Before racearena start?
   312  	MOVD	runtime·racearenaend(SB), R0
   313  	CMPUBLT	R5, R0, racecallatomic_ok	// Before racearena end?
   314  racecallatomic_data:
   315  	MOVD	runtime·racedatastart(SB), R0
   316  	CMPUBLT	R5, R0, racecallatomic_ignore	// Before racedata start?
   317  	MOVD	runtime·racedataend(SB), R0
   318  	CMPUBGE	R5, R0,	racecallatomic_ignore	// At or after racearena end?
   319  racecallatomic_ok:
   320  	MOVD	g_racectx(g), R2		// ThreadState *.
   321  	MOVD	8(R15), R3			// Caller PC.
   322  	MOVD	R14, R4				// PC.
   323  	ADD	$24, R15, R5			// Arguments.
   324  	// Tail call fails to restore R15, so use a normal one.
   325  	BL	racecall<>(SB)
   326  	RET
   327  racecallatomic_ignore:
   328  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during
   329  	// the atomic op. An attempt to synchronize on the address would cause
   330  	// a crash.
   331  	MOVD	R1, R6				// Save target function.
   332  	MOVD	R14, R7				// Save PC.
   333  	MOVD	$__tsan_go_ignore_sync_begin(SB), R1
   334  	MOVD	g_racectx(g), R2		// ThreadState *.
   335  	BL	racecall<>(SB)
   336  	MOVD	R6, R1				// Restore target function.
   337  	MOVD	g_racectx(g), R2		// ThreadState *.
   338  	MOVD	8(R15), R3			// Caller PC.
   339  	MOVD	R7, R4				// PC.
   340  	ADD	$24, R15, R5			// Arguments.
   341  	BL	racecall<>(SB)
   342  	MOVD	$__tsan_go_ignore_sync_end(SB), R1
   343  	MOVD	g_racectx(g), R2		// ThreadState *.
   344  	BL	racecall<>(SB)
   345  	RET
   346  
   347  // func runtime·racecall(void(*f)(...), ...)
   348  // Calls C function f from race runtime and passes up to 4 arguments to it.
   349  // The arguments are never heap-object-preserving pointers, so we pretend there
   350  // are no arguments.
   351  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   352  	MOVD	fn+0(FP), R1
   353  	MOVD	arg0+8(FP), R2
   354  	MOVD	arg1+16(FP), R3
   355  	MOVD	arg2+24(FP), R4
   356  	MOVD	arg3+32(FP), R5
   357  	JMP	racecall<>(SB)
   358  
   359  // Switches SP to g0 stack and calls R1. Arguments are already set.
   360  TEXT	racecall<>(SB), NOSPLIT, $0-0
   361  	BL	runtime·save_g(SB)		// Save g for callbacks.
   362  	MOVD	R15, R7				// Save SP.
   363  	MOVD	g_m(g), R8			// R8 = thread.
   364  	MOVD	m_g0(R8), R8			// R8 = g0.
   365  	CMPBEQ	R8, g, call			// Already on g0?
   366  	MOVD	(g_sched+gobuf_sp)(R8), R15	// Switch SP to g0.
   367  call:	SUB	$160, R15			// Allocate C frame.
   368  	BL	R1				// Call C code.
   369  	MOVD	R7, R15				// Restore SP.
   370  	RET					// Return to Go.
   371  
   372  // C->Go callback thunk that allows to call runtime·racesymbolize from C
   373  // code. racecall has only switched SP, finish g->g0 switch by setting correct
   374  // g. R2 contains command code, R3 contains command-specific context. See
   375  // racecallback for command codes.
   376  TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
   377  	STMG	R6, R15, 48(R15)		// Save non-volatile regs.
   378  	BL	runtime·load_g(SB)		// Saved by racecall.
   379  	CMPBNE	R2, $0, rest			// raceGetProcCmd?
   380  	MOVD	g_m(g), R2			// R2 = thread.
   381  	MOVD	m_p(R2), R2			// R2 = processor.
   382  	MVC	$8, p_raceprocctx(R2), (R3)	// *R3 = ThreadState *.
   383  	LMG	48(R15), R6, R15		// Restore non-volatile regs.
   384  	BR	R14				// Return to C.
   385  rest:	MOVD	g_m(g), R4			// R4 = current thread.
   386  	MOVD	m_g0(R4), g			// Switch to g0.
   387  	SUB	$24, R15			// Allocate Go argument slots.
   388  	STMG	R2, R3, 8(R15)			// Fill Go frame.
   389  	BL	runtime·racecallback(SB)	// Call Go code.
   390  	LMG	72(R15), R6, R15		// Restore non-volatile regs.
   391  	BR	R14				// Return to C.
   392  

View as plain text