From a930fede7386fd3583553b523fd6f7fa5fef1244 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Sat, 26 Oct 2019 22:54:28 -0400 Subject: [PATCH] runtime: add async preemption support on MIPS and MIPS64 This CL adds support of call injection and async preemption on MIPS and MIPS64. Like ARM64, we need to clobber one register (REGTMP) for returning from the injected call. Previous CLs have marked code sequences that use REGTMP async-nonpreemtible. It seems on MIPS/MIPS64, a CALL instruction is not "atomic" (!). If a signal is delivered right at the CALL instruction, we may see an updated LR with a not-yet-updated PC. In some cases this may lead to failed stack unwinding. Don't preempt in this case. Change-Id: I99437b2d05869ded5c0c8cb55265dbfc933aedab Reviewed-on: https://go-review.googlesource.com/c/go/+/203720 Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/gen/MIPS64Ops.go | 2 + src/cmd/compile/internal/ssa/gen/MIPSOps.go | 2 + src/runtime/mkpreempt.go | 65 ++++++++- src/runtime/preempt.go | 13 +- src/runtime/preempt_mips64x.s | 135 +++++++++++++++++- src/runtime/preempt_mipsx.s | 135 +++++++++++++++++- src/runtime/signal_mips64x.go | 14 +- src/runtime/signal_mipsx.go | 14 +- src/runtime/signal_unix.go | 2 +- 9 files changed, 370 insertions(+), 12 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go b/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go index 10cf4a8b89..404b05b635 100644 --- a/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go @@ -100,6 +100,8 @@ var regNamesMIPS64 = []string{ "HI", // high bits of multiplication "LO", // low bits of multiplication + // If you add registers, update asyncPreempt in runtime. + // pseudo-registers "SB", } diff --git a/src/cmd/compile/internal/ssa/gen/MIPSOps.go b/src/cmd/compile/internal/ssa/gen/MIPSOps.go index a19f5b4c38..3b89557b14 100644 --- a/src/cmd/compile/internal/ssa/gen/MIPSOps.go +++ b/src/cmd/compile/internal/ssa/gen/MIPSOps.go @@ -84,6 +84,8 @@ var regNamesMIPS = []string{ "HI", // high bits of multiplication "LO", // low bits of multiplication + // If you add registers, update asyncPreempt in runtime. + // pseudo-registers "SB", } diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 2f022971fd..e9e82b8c43 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -80,8 +80,8 @@ var arches = map[string]func(){ "amd64": genAMD64, "arm": genARM, "arm64": genARM64, - "mips64x": notImplemented, - "mipsx": notImplemented, + "mips64x": func() { genMIPS(true) }, + "mipsx": func() { genMIPS(false) }, "ppc64x": notImplemented, "s390x": genS390X, "wasm": genWasm, @@ -356,6 +356,67 @@ func genARM64() { p("JMP (R27)") } +func genMIPS(_64bit bool) { + mov := "MOVW" + movf := "MOVF" + add := "ADD" + sub := "SUB" + r28 := "R28" + regsize := 4 + if _64bit { + mov = "MOVV" + movf = "MOVD" + add = "ADDV" + sub = "SUBV" + r28 = "RSB" + regsize = 8 + } + + // Add integer registers R1-R22, R24-R25, R28 + // R0 (zero), R23 (REGTMP), R29 (SP), R30 (g), R31 (LR) are special, + // and not saved here. R26 and R27 are reserved by kernel and not used. + var l = layout{sp: "R29", stack: regsize} // add slot to save PC of interrupted instruction (in LR) + for i := 1; i <= 25; i++ { + if i == 23 { + continue // R23 is REGTMP + } + reg := fmt.Sprintf("R%d", i) + l.add(mov, reg, regsize) + } + l.add(mov, r28, regsize) + l.addSpecial( + mov+" HI, R1\n"+mov+" R1, %d(R29)", + mov+" %d(R29), R1\n"+mov+" R1, HI", + regsize) + l.addSpecial( + mov+" LO, R1\n"+mov+" R1, %d(R29)", + mov+" %d(R29), R1\n"+mov+" R1, LO", + regsize) + // Add floating point control/status register FCR31 (FCR0-FCR30 are irrelevant) + l.addSpecial( + mov+" FCR31, R1\n"+mov+" R1, %d(R29)", + mov+" %d(R29), R1\n"+mov+" R1, FCR31", + regsize) + // Add floating point registers F0-F31. + for i := 0; i <= 31; i++ { + reg := fmt.Sprintf("F%d", i) + l.add(movf, reg, regsize) + } + + // allocate frame, save PC of interrupted instruction (in LR) + p(mov+" R31, -%d(R29)", l.stack) + p(sub+" $%d, R29", l.stack) + + l.save() + p("CALL ·asyncPreempt2(SB)") + l.restore() + + p(mov+" %d(R29), R31", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it + p(mov + " (R29), R23") // load PC to REGTMP + p(add+" $%d, R29", l.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall) + p("JMP (R23)") +} + func genS390X() { // Add integer registers R0-R12 // R13 (g), R14 (LR), R15 (SP) are special, and not saved here. diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go index 544c251a9f..f154614913 100644 --- a/src/runtime/preempt.go +++ b/src/runtime/preempt.go @@ -337,7 +337,7 @@ func wantAsyncPreempt(gp *g) bool { // 3. It's generally safe to interact with the runtime, even if we're // in a signal handler stopped here. For example, there are no runtime // locks held, so acquiring a runtime lock won't self-deadlock. -func isAsyncSafePoint(gp *g, pc, sp uintptr) bool { +func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) bool { mp := gp.m // Only user Gs can have safe-points. We check this first @@ -363,6 +363,17 @@ func isAsyncSafePoint(gp *g, pc, sp uintptr) bool { // Not Go code. return false } + if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "mips64" || GOARCH == "mips64le") && lr == pc+8 && funcspdelta(f, pc, nil) == 0 { + // We probably stopped at a half-executed CALL instruction, + // where the LR is updated but the PC has not. If we preempt + // here we'll see a seemingly self-recursive call, which is in + // fact not. + // This is normally ok, as we use the return address saved on + // stack for unwinding, not the LR value. But if this is a + // call to morestack, we haven't created the frame, and we'll + // use the LR for unwinding, which will be bad. + return false + } smi := pcdatavalue(f, _PCDATA_StackMapIndex, pc, nil) if smi == -2 { // Unsafe-point marked by compiler. This includes diff --git a/src/runtime/preempt_mips64x.s b/src/runtime/preempt_mips64x.s index 713c074abf..8048a87cd3 100644 --- a/src/runtime/preempt_mips64x.s +++ b/src/runtime/preempt_mips64x.s @@ -6,5 +6,136 @@ #include "textflag.h" TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 - // Not implemented yet - JMP ·abort(SB) + MOVV R31, -488(R29) + SUBV $488, R29 + MOVV R1, 8(R29) + MOVV R2, 16(R29) + MOVV R3, 24(R29) + MOVV R4, 32(R29) + MOVV R5, 40(R29) + MOVV R6, 48(R29) + MOVV R7, 56(R29) + MOVV R8, 64(R29) + MOVV R9, 72(R29) + MOVV R10, 80(R29) + MOVV R11, 88(R29) + MOVV R12, 96(R29) + MOVV R13, 104(R29) + MOVV R14, 112(R29) + MOVV R15, 120(R29) + MOVV R16, 128(R29) + MOVV R17, 136(R29) + MOVV R18, 144(R29) + MOVV R19, 152(R29) + MOVV R20, 160(R29) + MOVV R21, 168(R29) + MOVV R22, 176(R29) + MOVV R24, 184(R29) + MOVV R25, 192(R29) + MOVV RSB, 200(R29) + MOVV HI, R1 + MOVV R1, 208(R29) + MOVV LO, R1 + MOVV R1, 216(R29) + MOVV FCR31, R1 + MOVV R1, 224(R29) + MOVD F0, 232(R29) + MOVD F1, 240(R29) + MOVD F2, 248(R29) + MOVD F3, 256(R29) + MOVD F4, 264(R29) + MOVD F5, 272(R29) + MOVD F6, 280(R29) + MOVD F7, 288(R29) + MOVD F8, 296(R29) + MOVD F9, 304(R29) + MOVD F10, 312(R29) + MOVD F11, 320(R29) + MOVD F12, 328(R29) + MOVD F13, 336(R29) + MOVD F14, 344(R29) + MOVD F15, 352(R29) + MOVD F16, 360(R29) + MOVD F17, 368(R29) + MOVD F18, 376(R29) + MOVD F19, 384(R29) + MOVD F20, 392(R29) + MOVD F21, 400(R29) + MOVD F22, 408(R29) + MOVD F23, 416(R29) + MOVD F24, 424(R29) + MOVD F25, 432(R29) + MOVD F26, 440(R29) + MOVD F27, 448(R29) + MOVD F28, 456(R29) + MOVD F29, 464(R29) + MOVD F30, 472(R29) + MOVD F31, 480(R29) + CALL ·asyncPreempt2(SB) + MOVD 480(R29), F31 + MOVD 472(R29), F30 + MOVD 464(R29), F29 + MOVD 456(R29), F28 + MOVD 448(R29), F27 + MOVD 440(R29), F26 + MOVD 432(R29), F25 + MOVD 424(R29), F24 + MOVD 416(R29), F23 + MOVD 408(R29), F22 + MOVD 400(R29), F21 + MOVD 392(R29), F20 + MOVD 384(R29), F19 + MOVD 376(R29), F18 + MOVD 368(R29), F17 + MOVD 360(R29), F16 + MOVD 352(R29), F15 + MOVD 344(R29), F14 + MOVD 336(R29), F13 + MOVD 328(R29), F12 + MOVD 320(R29), F11 + MOVD 312(R29), F10 + MOVD 304(R29), F9 + MOVD 296(R29), F8 + MOVD 288(R29), F7 + MOVD 280(R29), F6 + MOVD 272(R29), F5 + MOVD 264(R29), F4 + MOVD 256(R29), F3 + MOVD 248(R29), F2 + MOVD 240(R29), F1 + MOVD 232(R29), F0 + MOVV 224(R29), R1 + MOVV R1, FCR31 + MOVV 216(R29), R1 + MOVV R1, LO + MOVV 208(R29), R1 + MOVV R1, HI + MOVV 200(R29), RSB + MOVV 192(R29), R25 + MOVV 184(R29), R24 + MOVV 176(R29), R22 + MOVV 168(R29), R21 + MOVV 160(R29), R20 + MOVV 152(R29), R19 + MOVV 144(R29), R18 + MOVV 136(R29), R17 + MOVV 128(R29), R16 + MOVV 120(R29), R15 + MOVV 112(R29), R14 + MOVV 104(R29), R13 + MOVV 96(R29), R12 + MOVV 88(R29), R11 + MOVV 80(R29), R10 + MOVV 72(R29), R9 + MOVV 64(R29), R8 + MOVV 56(R29), R7 + MOVV 48(R29), R6 + MOVV 40(R29), R5 + MOVV 32(R29), R4 + MOVV 24(R29), R3 + MOVV 16(R29), R2 + MOVV 8(R29), R1 + MOVV 488(R29), R31 + MOVV (R29), R23 + ADDV $496, R29 + JMP (R23) diff --git a/src/runtime/preempt_mipsx.s b/src/runtime/preempt_mipsx.s index 2538a2ee00..840e861497 100644 --- a/src/runtime/preempt_mipsx.s +++ b/src/runtime/preempt_mipsx.s @@ -6,5 +6,136 @@ #include "textflag.h" TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 - // Not implemented yet - JMP ·abort(SB) + MOVW R31, -244(R29) + SUB $244, R29 + MOVW R1, 4(R29) + MOVW R2, 8(R29) + MOVW R3, 12(R29) + MOVW R4, 16(R29) + MOVW R5, 20(R29) + MOVW R6, 24(R29) + MOVW R7, 28(R29) + MOVW R8, 32(R29) + MOVW R9, 36(R29) + MOVW R10, 40(R29) + MOVW R11, 44(R29) + MOVW R12, 48(R29) + MOVW R13, 52(R29) + MOVW R14, 56(R29) + MOVW R15, 60(R29) + MOVW R16, 64(R29) + MOVW R17, 68(R29) + MOVW R18, 72(R29) + MOVW R19, 76(R29) + MOVW R20, 80(R29) + MOVW R21, 84(R29) + MOVW R22, 88(R29) + MOVW R24, 92(R29) + MOVW R25, 96(R29) + MOVW R28, 100(R29) + MOVW HI, R1 + MOVW R1, 104(R29) + MOVW LO, R1 + MOVW R1, 108(R29) + MOVW FCR31, R1 + MOVW R1, 112(R29) + MOVF F0, 116(R29) + MOVF F1, 120(R29) + MOVF F2, 124(R29) + MOVF F3, 128(R29) + MOVF F4, 132(R29) + MOVF F5, 136(R29) + MOVF F6, 140(R29) + MOVF F7, 144(R29) + MOVF F8, 148(R29) + MOVF F9, 152(R29) + MOVF F10, 156(R29) + MOVF F11, 160(R29) + MOVF F12, 164(R29) + MOVF F13, 168(R29) + MOVF F14, 172(R29) + MOVF F15, 176(R29) + MOVF F16, 180(R29) + MOVF F17, 184(R29) + MOVF F18, 188(R29) + MOVF F19, 192(R29) + MOVF F20, 196(R29) + MOVF F21, 200(R29) + MOVF F22, 204(R29) + MOVF F23, 208(R29) + MOVF F24, 212(R29) + MOVF F25, 216(R29) + MOVF F26, 220(R29) + MOVF F27, 224(R29) + MOVF F28, 228(R29) + MOVF F29, 232(R29) + MOVF F30, 236(R29) + MOVF F31, 240(R29) + CALL ·asyncPreempt2(SB) + MOVF 240(R29), F31 + MOVF 236(R29), F30 + MOVF 232(R29), F29 + MOVF 228(R29), F28 + MOVF 224(R29), F27 + MOVF 220(R29), F26 + MOVF 216(R29), F25 + MOVF 212(R29), F24 + MOVF 208(R29), F23 + MOVF 204(R29), F22 + MOVF 200(R29), F21 + MOVF 196(R29), F20 + MOVF 192(R29), F19 + MOVF 188(R29), F18 + MOVF 184(R29), F17 + MOVF 180(R29), F16 + MOVF 176(R29), F15 + MOVF 172(R29), F14 + MOVF 168(R29), F13 + MOVF 164(R29), F12 + MOVF 160(R29), F11 + MOVF 156(R29), F10 + MOVF 152(R29), F9 + MOVF 148(R29), F8 + MOVF 144(R29), F7 + MOVF 140(R29), F6 + MOVF 136(R29), F5 + MOVF 132(R29), F4 + MOVF 128(R29), F3 + MOVF 124(R29), F2 + MOVF 120(R29), F1 + MOVF 116(R29), F0 + MOVW 112(R29), R1 + MOVW R1, FCR31 + MOVW 108(R29), R1 + MOVW R1, LO + MOVW 104(R29), R1 + MOVW R1, HI + MOVW 100(R29), R28 + MOVW 96(R29), R25 + MOVW 92(R29), R24 + MOVW 88(R29), R22 + MOVW 84(R29), R21 + MOVW 80(R29), R20 + MOVW 76(R29), R19 + MOVW 72(R29), R18 + MOVW 68(R29), R17 + MOVW 64(R29), R16 + MOVW 60(R29), R15 + MOVW 56(R29), R14 + MOVW 52(R29), R13 + MOVW 48(R29), R12 + MOVW 44(R29), R11 + MOVW 40(R29), R10 + MOVW 36(R29), R9 + MOVW 32(R29), R8 + MOVW 28(R29), R7 + MOVW 24(R29), R6 + MOVW 20(R29), R5 + MOVW 16(R29), R4 + MOVW 12(R29), R3 + MOVW 8(R29), R2 + MOVW 4(R29), R1 + MOVW 244(R29), R31 + MOVW (R29), R23 + ADD $248, R29 + JMP (R23) diff --git a/src/runtime/signal_mips64x.go b/src/runtime/signal_mips64x.go index 3f1992c711..011db09727 100644 --- a/src/runtime/signal_mips64x.go +++ b/src/runtime/signal_mips64x.go @@ -85,8 +85,18 @@ func (c *sigctxt) preparePanic(sig uint32, gp *g) { c.set_pc(sigpanicPC) } -const pushCallSupported = false +const pushCallSupported = true func (c *sigctxt) pushCall(targetPC uintptr) { - throw("not implemented") + // Push the LR to stack, as we'll clobber it in order to + // push the call. The function being pushed is responsible + // for restoring the LR and setting the SP back. + // This extra slot is known to gentraceback. + sp := c.sp() - 8 + c.set_sp(sp) + *(*uint64)(unsafe.Pointer(uintptr(sp))) = c.link() + // Set up PC and LR to pretend the function being signaled + // calls targetPC at the faulting PC. + c.set_link(c.pc()) + c.set_pc(uint64(targetPC)) } diff --git a/src/runtime/signal_mipsx.go b/src/runtime/signal_mipsx.go index 6b5ed2872d..edc38c043f 100644 --- a/src/runtime/signal_mipsx.go +++ b/src/runtime/signal_mipsx.go @@ -80,8 +80,18 @@ func (c *sigctxt) preparePanic(sig uint32, gp *g) { c.set_pc(uint32(funcPC(sigpanic))) } -const pushCallSupported = false +const pushCallSupported = true func (c *sigctxt) pushCall(targetPC uintptr) { - throw("not implemented") + // Push the LR to stack, as we'll clobber it in order to + // push the call. The function being pushed is responsible + // for restoring the LR and setting the SP back. + // This extra slot is known to gentraceback. + sp := c.sp() - 4 + c.set_sp(sp) + *(*uint32)(unsafe.Pointer(uintptr(sp))) = c.link() + // Set up PC and LR to pretend the function being signaled + // calls targetPC at the faulting PC. + c.set_link(c.pc()) + c.set_pc(uint32(targetPC)) } diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go index fab8574d1c..35e641286b 100644 --- a/src/runtime/signal_unix.go +++ b/src/runtime/signal_unix.go @@ -326,7 +326,7 @@ func sigpipe() { func doSigPreempt(gp *g, ctxt *sigctxt) { // Check if this G wants to be preempted and is safe to // preempt. - if wantAsyncPreempt(gp) && isAsyncSafePoint(gp, ctxt.sigpc(), ctxt.sigsp()) { + if wantAsyncPreempt(gp) && isAsyncSafePoint(gp, ctxt.sigpc(), ctxt.sigsp(), ctxt.siglr()) { // Inject a call to asyncPreempt. ctxt.pushCall(funcPC(asyncPreempt)) } -- 2.50.0