From: Joel Sing Date: Fri, 25 Jul 2025 10:41:51 +0000 (+1000) Subject: runtime/race: add race detector support for linux/riscv64 X-Git-Tag: go1.26rc1~987 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=8bcda6c79d40f49363cd04cdaf5bd7909de8f94f;p=gostls13.git runtime/race: add race detector support for linux/riscv64 This enables support for the race detector on linux/riscv64. Fixes #64345 Cq-Include-Trybots: luci.golang.try:gotip-linux-riscv64 Change-Id: I98962827e91455404858549b0f9691ee438f104b Reviewed-on: https://go-review.googlesource.com/c/go/+/690497 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI Reviewed-by: Meng Zhuo Reviewed-by: Carlos Amedee --- diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go index 6200363ce2..dfa4ffb522 100644 --- a/src/cmd/dist/test.go +++ b/src/cmd/dist/test.go @@ -1698,7 +1698,7 @@ func (t *tester) makeGOROOTUnwritable() (undo func()) { func raceDetectorSupported(goos, goarch string) bool { switch goos { case "linux": - return goarch == "amd64" || goarch == "ppc64le" || goarch == "arm64" || goarch == "s390x" || goarch == "loong64" + return goarch == "amd64" || goarch == "arm64" || goarch == "loong64" || goarch == "ppc64le" || goarch == "riscv64" || goarch == "s390x" case "darwin": return goarch == "amd64" || goarch == "arm64" case "freebsd", "netbsd", "windows": diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go index fd23d829f5..f1e1b1c333 100644 --- a/src/cmd/go/alldocs.go +++ b/src/cmd/go/alldocs.go @@ -121,8 +121,9 @@ // The default is GOMAXPROCS, normally the number of CPUs available. // -race // enable data race detection. -// Supported only on linux/amd64, freebsd/amd64, darwin/amd64, darwin/arm64, windows/amd64, -// linux/ppc64le and linux/arm64 (only for 48-bit VMA). +// Supported only on darwin/amd64, darwin/arm64, freebsd/amd64, linux/amd64, +// linux/arm64 (only for 48-bit VMA), linux/ppc64le, linux/riscv64 and +// windows/amd64. // -msan // enable interoperation with memory sanitizer. // Supported only on linux/amd64, linux/arm64, linux/loong64, freebsd/amd64 diff --git a/src/cmd/go/internal/work/build.go b/src/cmd/go/internal/work/build.go index 53f5b04a77..742efe6490 100644 --- a/src/cmd/go/internal/work/build.go +++ b/src/cmd/go/internal/work/build.go @@ -77,8 +77,9 @@ and test commands: The default is GOMAXPROCS, normally the number of CPUs available. -race enable data race detection. - Supported only on linux/amd64, freebsd/amd64, darwin/amd64, darwin/arm64, windows/amd64, - linux/ppc64le and linux/arm64 (only for 48-bit VMA). + Supported only on darwin/amd64, darwin/arm64, freebsd/amd64, linux/amd64, + linux/arm64 (only for 48-bit VMA), linux/ppc64le, linux/riscv64 and + windows/amd64. -msan enable interoperation with memory sanitizer. Supported only on linux/amd64, linux/arm64, linux/loong64, freebsd/amd64 diff --git a/src/internal/platform/supported.go b/src/internal/platform/supported.go index a07b66d394..f9706a6988 100644 --- a/src/internal/platform/supported.go +++ b/src/internal/platform/supported.go @@ -23,7 +23,7 @@ func (p OSArch) String() string { func RaceDetectorSupported(goos, goarch string) bool { switch goos { case "linux": - return goarch == "amd64" || goarch == "ppc64le" || goarch == "arm64" || goarch == "s390x" || goarch == "loong64" + return goarch == "amd64" || goarch == "arm64" || goarch == "loong64" || goarch == "ppc64le" || goarch == "riscv64" || goarch == "s390x" case "darwin": return goarch == "amd64" || goarch == "arm64" case "freebsd", "netbsd", "windows": diff --git a/src/race.bash b/src/race.bash index ae9f57ffd7..3ce91a345f 100755 --- a/src/race.bash +++ b/src/race.bash @@ -9,7 +9,7 @@ set -e function usage { - echo 'race detector is only supported on linux/amd64, linux/ppc64le, linux/arm64, linux/loong64, linux/s390x, freebsd/amd64, netbsd/amd64, openbsd/amd64, darwin/amd64, and darwin/arm64' 1>&2 + echo 'race detector is only supported on linux/amd64, linux/ppc64le, linux/arm64, linux/loong64, linux/riscv64, linux/s390x, freebsd/amd64, netbsd/amd64, openbsd/amd64, darwin/amd64, and darwin/arm64' 1>&2 exit 1 } @@ -20,6 +20,7 @@ case $(uname -s -m) in "Linux ppc64le") ;; "Linux aarch64") ;; "Linux loongarch64") ;; + "Linux riscv64") ;; "Linux s390x") ;; "FreeBSD amd64") ;; "NetBSD amd64") ;; diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 29e41147f0..ec5f0765ba 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -588,11 +588,20 @@ func mallocinit() { // possible to use RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS at some // point in the future - for now use the system stack address. vmaSize = sys.Len64(uint64(getg().m.g0.stack.hi)) + 1 + if raceenabled && vmaSize != 39 && vmaSize != 48 { + println("vma size = ", vmaSize) + throw("riscv64 vma size is unknown and race mode is enabled") + } } for i := 0x7f; i >= 0; i-- { var p uintptr switch { + case raceenabled && GOARCH == "riscv64" && vmaSize == 39: + p = uintptr(i)<<28 | uintptrMask&(0x0013<<28) + if p >= uintptrMask&0x000f00000000 { + continue + } case raceenabled: // The TSAN runtime requires the heap // to be in the range [0x00c000000000, diff --git a/src/runtime/race/README b/src/runtime/race/README index def7bfec86..a65d463e10 100644 --- a/src/runtime/race/README +++ b/src/runtime/race/README @@ -15,4 +15,5 @@ race_darwin_arm64.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 race_linux_arm64.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 and Go e7d582b55dda36e76ce4d0ce770139ca0915b7c5. race_linux_loong64.syso built with LLVM 83fe85115da9dc25fa270d2ea8140113c8d49670 and Go 037112464b4439571b45536de9ebe4bc9e10ecb7. race_linux_ppc64le.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 and Go e7d582b55dda36e76ce4d0ce770139ca0915b7c5. +race_linux_riscv64.syso built with LLVM c3c24be13f7928460ca1e2fe613a1146c868854e and Go a21249436b6e1fd47356361d53dc053bbc074f90. race_linux_s390x.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 and Go e7d582b55dda36e76ce4d0ce770139ca0915b7c5. diff --git a/src/runtime/race/race.go b/src/runtime/race/race.go index 9fd75424ca..c2c5913966 100644 --- a/src/runtime/race/race.go +++ b/src/runtime/race/race.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build race && ((linux && (amd64 || arm64 || loong64 || ppc64le || s390x)) || ((freebsd || netbsd || openbsd || windows) && amd64)) +//go:build race && ((linux && (amd64 || arm64 || loong64 || ppc64le || riscv64 || s390x)) || ((freebsd || netbsd || openbsd || windows) && amd64)) package race diff --git a/src/runtime/race/race_linux_riscv64.syso b/src/runtime/race/race_linux_riscv64.syso new file mode 100644 index 0000000000..e5c5b88498 Binary files /dev/null and b/src/runtime/race/race_linux_riscv64.syso differ diff --git a/src/runtime/race_riscv64.s b/src/runtime/race_riscv64.s new file mode 100644 index 0000000000..9992a519eb --- /dev/null +++ b/src/runtime/race_riscv64.s @@ -0,0 +1,551 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build race + +#include "go_asm.h" +#include "funcdata.h" +#include "textflag.h" + +// The following thunks allow calling the gcc-compiled race runtime directly +// from Go code without going all the way through cgo. +// First, it's much faster (up to 50% speedup for real Go programs). +// Second, it eliminates race-related special cases from cgocall and scheduler. +// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. + +// A brief recap of the riscv C calling convention. +// Arguments are passed in X10...X17 +// Callee-saved registers are: X8, X9, X18..X27 +// Temporary registers are: X5..X7, X28..X31 + +// When calling racecalladdr, X11 is the call target address. + +// The race ctx, ThreadState *thr below, is passed in X10 and loaded in racecalladdr. + +// func runtime·raceread(addr uintptr) +// Called from instrumented code. +TEXT runtime·raceread(SB), NOSPLIT, $0-8 + // void __tsan_read(ThreadState *thr, void *addr, void *pc); + MOV $__tsan_read(SB), X5 + MOV X10, X11 + MOV X1, X12 + JMP racecalladdr<>(SB) + +// func runtime·RaceRead(addr uintptr) +TEXT runtime·RaceRead(SB), NOSPLIT, $0-8 + // This needs to be a tail call, because raceread reads caller pc. + JMP runtime·raceread(SB) + +// func runtime·racereadpc(void *addr, void *callpc, void *pc) +TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 + // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); + MOV $__tsan_read_pc(SB), X5 + MOV addr+0(FP), X11 + MOV callpc+8(FP), X12 + MOV pc+16(FP), X13 + JMP racecalladdr<>(SB) + +// func runtime·racewrite(addr uintptr) +// Called from instrumented code. +TEXT runtime·racewrite(SB), NOSPLIT, $0-8 + // void __tsan_write(ThreadState *thr, void *addr, void *pc); + MOV $__tsan_write(SB), X5 + MOV X10, X11 + MOV X1, X12 + JMP racecalladdr<>(SB) + +// func runtime·RaceWrite(addr uintptr) +TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8 + // This needs to be a tail call, because racewrite reads caller pc. + JMP runtime·racewrite(SB) + +// func runtime·racewritepc(void *addr, void *callpc, void *pc) +TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 + // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); + MOV $__tsan_write_pc(SB), X5 + MOV addr+0(FP), X11 + MOV callpc+8(FP), X12 + MOV pc+16(FP), X13 + JMP racecalladdr<>(SB) + +// func runtime·racereadrange(addr, size uintptr) +// Called from instrumented code. +TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 + // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); + MOV $__tsan_read_range(SB), X5 + MOV X11, X12 + MOV X10, X11 + MOV X1, X13 + JMP racecalladdr<>(SB) + +// func runtime·RaceReadRange(addr, size uintptr) +TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16 + // This needs to be a tail call, because racereadrange reads caller pc. + JMP runtime·racereadrange(SB) + +// func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc) +TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 + // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); + MOV $__tsan_read_range(SB), X5 + MOV addr+0(FP), X11 + MOV size+8(FP), X12 + MOV pc+16(FP), X13 + + // pc is an interceptor address, but TSan expects it to point to the + // middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW). + ADD $4, X13 + JMP racecalladdr<>(SB) + +// func runtime·racewriterange(addr, size uintptr) +// Called from instrumented code. +TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 + // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); + MOV $__tsan_write_range(SB), X5 + MOV X11, X12 + MOV X10, X11 + MOV X1, X13 + JMP racecalladdr<>(SB) + +// func runtime·RaceWriteRange(addr, size uintptr) +TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16 + // This needs to be a tail call, because racewriterange reads caller pc. + JMP runtime·racewriterange(SB) + +// func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc) +TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24 + // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); + MOV $__tsan_write_range(SB), X5 + MOV addr+0(FP), X11 + MOV size+8(FP), X12 + MOV pc+16(FP), X13 + // pc is an interceptor address, but TSan expects it to point to the + // middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW). + ADD $4, X13 + JMP racecalladdr<>(SB) + +// If addr (X11) is out of range, do nothing. Otherwise, setup goroutine context and +// invoke racecall. Other arguments are already set. +TEXT racecalladdr<>(SB), NOSPLIT, $0-0 + MOV runtime·racearenastart(SB), X7 + BLT X11, X7, data // Before racearena start? + MOV runtime·racearenaend(SB), X7 + BLT X11, X7, call // Before racearena end? +data: + MOV runtime·racedatastart(SB), X7 + BLT X11, X7, ret // Before racedata start? + MOV runtime·racedataend(SB), X7 + BGE X11, X7, ret // At or after racedata end? +call: + MOV g_racectx(g), X10 + JMP racecall<>(SB) +ret: + RET + +// func runtime·racefuncenter(pc uintptr) +// Called from instrumented code. +TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 + MOV $__tsan_func_enter(SB), X5 + MOV X10, X11 + MOV g_racectx(g), X10 + JMP racecall<>(SB) + +// Common code for racefuncenter +// X1 = caller's return address +TEXT racefuncenter<>(SB), NOSPLIT, $0-0 + // void __tsan_func_enter(ThreadState *thr, void *pc); + MOV $__tsan_func_enter(SB), X5 + MOV g_racectx(g), X10 + MOV X1, X11 + JMP racecall<>(SB) + +// func runtime·racefuncexit() +// Called from instrumented code. +TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 + // void __tsan_func_exit(ThreadState *thr); + MOV $__tsan_func_exit(SB), X5 + MOV g_racectx(g), X10 + JMP racecall<>(SB) + +// Atomic operations for sync/atomic package. + +// Load + +TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12 + GO_ARGS + MOV $__tsan_go_atomic32_load(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16 + GO_ARGS + MOV $__tsan_go_atomic64_load(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12 + GO_ARGS + JMP sync∕atomic·LoadInt32(SB) + +TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16 + GO_ARGS + JMP sync∕atomic·LoadInt64(SB) + +TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16 + GO_ARGS + JMP sync∕atomic·LoadInt64(SB) + +TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16 + GO_ARGS + JMP sync∕atomic·LoadInt64(SB) + +// Store + +TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12 + GO_ARGS + MOV $__tsan_go_atomic32_store(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16 + GO_ARGS + MOV $__tsan_go_atomic64_store(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12 + GO_ARGS + JMP sync∕atomic·StoreInt32(SB) + +TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16 + GO_ARGS + JMP sync∕atomic·StoreInt64(SB) + +TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16 + GO_ARGS + JMP sync∕atomic·StoreInt64(SB) + +// Swap + +TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20 + GO_ARGS + MOV $__tsan_go_atomic32_exchange(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24 + GO_ARGS + MOV $__tsan_go_atomic64_exchange(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20 + GO_ARGS + JMP sync∕atomic·SwapInt32(SB) + +TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·SwapInt64(SB) + +TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·SwapInt64(SB) + +// Add + +TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20 + GO_ARGS + MOV $__tsan_go_atomic32_fetch_add(SB), X5 + CALL racecallatomic<>(SB) + // TSan performed fetch_add, but Go needs add_fetch. + MOVW add+8(FP), X5 + MOVW ret+16(FP), X6 + ADD X5, X6, X5 + MOVW X5, ret+16(FP) + RET + +TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24 + GO_ARGS + MOV $__tsan_go_atomic64_fetch_add(SB), X5 + CALL racecallatomic<>(SB) + // TSan performed fetch_add, but Go needs add_fetch. + MOV add+8(FP), X5 + MOV ret+16(FP), X6 + ADD X5, X6, X5 + MOV X5, ret+16(FP) + RET + +TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20 + GO_ARGS + JMP sync∕atomic·AddInt32(SB) + +TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·AddInt64(SB) + +TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·AddInt64(SB) + +// And +TEXT sync∕atomic·AndInt32(SB), NOSPLIT, $0-20 + GO_ARGS + MOV $__tsan_go_atomic32_fetch_and(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·AndInt64(SB), NOSPLIT, $0-24 + GO_ARGS + MOV $__tsan_go_atomic64_fetch_and(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·AndUint32(SB), NOSPLIT, $0-20 + GO_ARGS + JMP sync∕atomic·AndInt32(SB) + +TEXT sync∕atomic·AndUint64(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·AndInt64(SB) + +TEXT sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·AndInt64(SB) + +// Or +TEXT sync∕atomic·OrInt32(SB), NOSPLIT, $0-20 + GO_ARGS + MOV $__tsan_go_atomic32_fetch_or(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·OrInt64(SB), NOSPLIT, $0-24 + GO_ARGS + MOV $__tsan_go_atomic64_fetch_or(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·OrUint32(SB), NOSPLIT, $0-20 + GO_ARGS + JMP sync∕atomic·OrInt32(SB) + +TEXT sync∕atomic·OrUint64(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·OrInt64(SB) + +TEXT sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·OrInt64(SB) + +// CompareAndSwap + +TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17 + GO_ARGS + MOV $__tsan_go_atomic32_compare_exchange(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25 + GO_ARGS + MOV $__tsan_go_atomic64_compare_exchange(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17 + GO_ARGS + JMP sync∕atomic·CompareAndSwapInt32(SB) + +TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25 + GO_ARGS + JMP sync∕atomic·CompareAndSwapInt64(SB) + +TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25 + GO_ARGS + JMP sync∕atomic·CompareAndSwapInt64(SB) + +// Generic atomic operation implementation. +// X5 = addr of target function +TEXT racecallatomic<>(SB), NOSPLIT, $0 + // Set up these registers + // X10 = *ThreadState + // X11 = caller pc + // X12 = pc + // X13 = addr of incoming arg list + + // Trigger SIGSEGV early. + MOV 24(X2), X6 // 1st arg is addr. after two times CALL, get it at 24(X2) + MOVB (X6), X0 // segv here if addr is bad + // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). + MOV runtime·racearenastart(SB), X7 + BLT X6, X7, racecallatomic_data + MOV runtime·racearenaend(SB), X7 + BLT X6, X7, racecallatomic_ok +racecallatomic_data: + MOV runtime·racedatastart(SB), X7 + BLT X6, X7, racecallatomic_ignore + MOV runtime·racedataend(SB), X7 + BGE X6, X7, racecallatomic_ignore +racecallatomic_ok: + // Addr is within the good range, call the atomic function. + MOV g_racectx(g), X10 // goroutine context + MOV 8(X2), X11 // caller pc + MOV X1, X12 // pc + ADD $24, X2, X13 + CALL racecall<>(SB) + RET +racecallatomic_ignore: + // Addr is outside the good range. + // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. + // An attempt to synchronize on the address would cause crash. + MOV X1, X20 // save PC + MOV X5, X21 // save target function + MOV $__tsan_go_ignore_sync_begin(SB), X5 + MOV g_racectx(g), X10 // goroutine context + CALL racecall<>(SB) + MOV X21, X5 // restore the target function + // Call the atomic function. + MOV g_racectx(g), X10 // goroutine context + MOV 8(X2), X11 // caller pc + MOV X20, X12 // pc + ADD $24, X2, X13 // arguments + CALL racecall<>(SB) + // Call __tsan_go_ignore_sync_end. + MOV $__tsan_go_ignore_sync_end(SB), X5 + MOV g_racectx(g), X10 // goroutine context + CALL racecall<>(SB) + RET + +// func runtime·racecall(void(*f)(...), ...) +// Calls C function f from race runtime and passes up to 4 arguments to it. +// The arguments are never heap-object-preserving pointers, so we pretend there +// are no arguments. +TEXT runtime·racecall(SB), NOSPLIT, $0-0 + MOV fn+0(FP), X5 + MOV arg0+8(FP), X10 + MOV arg1+16(FP), X11 + MOV arg2+24(FP), X12 + MOV arg3+32(FP), X13 + JMP racecall<>(SB) + +// Switches SP to g0 stack and calls X5. Arguments are already set. +TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0 + MOV X1, X18 // Save RA in callee save register + MOV X2, X19 // Save SP in callee save register + CALL runtime·save_g(SB) // Save g for callbacks + + MOV g_m(g), X6 + + // Switch to g0 stack if we aren't already on g0 or gsignal. + MOV m_gsignal(X6), X7 + BEQ X7, g, call + MOV m_g0(X6), X7 + BEQ X7, g, call + + MOV (g_sched+gobuf_sp)(X7), X2 // Switch to g0 stack +call: + JALR RA, (X5) // Call C function + MOV X19, X2 // Restore SP + JMP (X18) // Return to Go. + +// C->Go callback thunk that allows to call runtime·racesymbolize from C code. +// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. +// The overall effect of Go->C->Go call chain is similar to that of mcall. +// R0 contains command code. R1 contains command-specific context. +// See racecallback for command codes. +TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0 + // Handle command raceGetProcCmd (0) here. + // First, code below assumes that we are on curg, while raceGetProcCmd + // can be executed on g0. Second, it is called frequently, so will + // benefit from this fast path. + BNEZ X10, rest + MOV X1, X5 + MOV g, X6 + CALL runtime·load_g(SB) + MOV g_m(g), X7 + MOV m_p(X7), X7 + MOV p_raceprocctx(X7), X7 + MOV X7, (X11) + MOV X6, g + JMP (X5) +rest: + // Save callee-save registers (X8, X9, X18..X27, F8, F9, F18..F27), + // since Go code will not respect this. + // 8(X2) and 16(X2) are for args passed to racecallback + SUB $(27*8), X2 + MOV X1, (0*8)(X2) + MOV X8, (3*8)(X2) + MOV X9, (4*8)(X2) + MOV X18, (5*8)(X2) + MOV X19, (6*8)(X2) + MOV X20, (7*8)(X2) + MOV X21, (8*8)(X2) + MOV X22, (9*8)(X2) + MOV X23, (10*8)(X2) + MOV X24, (11*8)(X2) + MOV X25, (12*8)(X2) + MOV X26, (13*8)(X2) + MOV g, (14*8)(X2) + MOVD F8, (15*8)(X2) + MOVD F9, (16*8)(X2) + MOVD F18, (17*8)(X2) + MOVD F19, (18*8)(X2) + MOVD F20, (19*8)(X2) + MOVD F21, (20*8)(X2) + MOVD F22, (21*8)(X2) + MOVD F23, (22*8)(X2) + MOVD F24, (23*8)(X2) + MOVD F25, (24*8)(X2) + MOVD F26, (25*8)(X2) + MOVD F27, (26*8)(X2) + + // Set g = g0. + CALL runtime·load_g(SB) + MOV g_m(g), X5 + MOV m_g0(X5), X6 + BEQ X6, g, noswitch // branch if already on g0 + MOV X6, g + + MOV X10, 8(X2) // func arg + MOV X11, 16(X2) // func arg + CALL runtime·racecallback(SB) + + // All registers are smashed after Go code, reload. + MOV g_m(g), X5 + MOV m_curg(X5), g // g = m->curg +ret: + // Restore callee-save registers. + MOV (0*8)(X2), X1 + MOV (3*8)(X2), X8 + MOV (4*8)(X2), X9 + MOV (5*8)(X2), X18 + MOV (6*8)(X2), X19 + MOV (7*8)(X2), X20 + MOV (8*8)(X2), X21 + MOV (9*8)(X2), X22 + MOV (10*8)(X2), X23 + MOV (11*8)(X2), X24 + MOV (12*8)(X2), X25 + MOV (13*8)(X2), X26 + MOV (14*8)(X2), g + MOVD (15*8)(X2), F8 + MOVD (16*8)(X2), F9 + MOVD (17*8)(X2), F18 + MOVD (18*8)(X2), F19 + MOVD (19*8)(X2), F20 + MOVD (20*8)(X2), F21 + MOVD (21*8)(X2), F22 + MOVD (22*8)(X2), F23 + MOVD (23*8)(X2), F24 + MOVD (24*8)(X2), F25 + MOVD (25*8)(X2), F26 + MOVD (26*8)(X2), F27 + + ADD $(27*8), X2 + JMP (X1) + +noswitch: + // already on g0 + MOV X10, 8(X2) // func arg + MOV X11, 16(X2) // func arg + CALL runtime·racecallback(SB) + JMP ret