From 3a55b92ccffc9211d2aac68802433712f126ec75 Mon Sep 17 00:00:00 2001 From: limeidan Date: Tue, 21 May 2024 19:23:44 +0800 Subject: [PATCH] runtime: add debug call injection support on loong64 Change-Id: Iaf2bd9da0b35c20c5b57db2eb9b2eea2b662140c Reviewed-on: https://go-review.googlesource.com/c/go/+/587055 LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov Reviewed-by: abner chenc Reviewed-by: Michael Knyszek Auto-Submit: abner chenc --- src/runtime/asm_loong64.s | 227 +++++++++++++++++++++++ src/runtime/debug_test.go | 2 +- src/runtime/debugcall.go | 2 +- src/runtime/export_debug_loong64_test.go | 227 +++++++++++++++++++++++ src/runtime/export_debug_test.go | 2 +- src/runtime/signal_loong64.go | 7 +- 6 files changed, 461 insertions(+), 6 deletions(-) create mode 100644 src/runtime/export_debug_loong64_test.go diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s index c16b27a0f2..c6e46f5f69 100644 --- a/src/runtime/asm_loong64.s +++ b/src/runtime/asm_loong64.s @@ -69,6 +69,10 @@ nocgo: // start this M JAL runtime·mstart(SB) + // Prevent dead-code elimination of debugCallV2, which is + // intended to be called by debuggers. + MOVV $runtime·debugCallV2(SB), R0 + MOVV R0, 1(R0) RET @@ -882,6 +886,229 @@ TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 MOVV $64, R29 JMP gcWriteBarrier<>(SB) +DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large" +GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below + +// debugCallV2 is the entry point for debugger-injected function +// calls on running goroutines. It informs the runtime that a +// debug call has been injected and creates a call frame for the +// debugger to fill in. +// +// To inject a function call, a debugger should: +// 1. Check that the goroutine is in state _Grunning and that +// there are at least 280 bytes free on the stack. +// 2. Set SP as SP-8. +// 3. Store the current LR in (SP) (using the SP after step 2). +// 4. Store the current PC in the LR register. +// 5. Write the desired argument frame size at SP-8 +// 6. Save all machine registers so they can be restored later by the debugger. +// 7. Set the PC to debugCallV2 and resume execution. +// +// If the goroutine is in state _Grunnable, then it's not generally +// safe to inject a call because it may return out via other runtime +// operations. Instead, the debugger should unwind the stack to find +// the return to non-runtime code, add a temporary breakpoint there, +// and inject the call once that breakpoint is hit. +// +// If the goroutine is in any other state, it's not safe to inject a call. +// +// This function communicates back to the debugger by setting R19 and +// invoking BREAK to raise a breakpoint signal. Note that the signal PC of +// the signal triggered by the BREAK instruction is the PC where the signal +// is trapped, not the next PC, so to resume execution, the debugger needs +// to set the signal PC to PC+4. See the comments in the implementation for +// the protocol the debugger is expected to follow. InjectDebugCall in the +// runtime tests demonstrates this protocol. +// +// The debugger must ensure that any pointers passed to the function +// obey escape analysis requirements. Specifically, it must not pass +// a stack pointer to an escaping argument. debugCallV2 cannot check +// this invariant. +// +// This is ABIInternal because Go code injects its PC directly into new +// goroutine stacks. +TEXT runtime·debugCallV2(SB),NOSPLIT|NOFRAME,$0-0 + MOVV R1, -272(R3) + ADDV $-272, R3 + + // We can't do anything that might clobber any of these + // registers before this. + MOVV R2, (4*8)(R3) + MOVV R4, (5*8)(R3) + MOVV R5, (6*8)(R3) + MOVV R6, (7*8)(R3) + MOVV R7, (8*8)(R3) + MOVV R8, (9*8)(R3) + MOVV R9, (10*8)(R3) + MOVV R10, (11*8)(R3) + MOVV R11, (12*8)(R3) + MOVV R12, (13*8)(R3) + MOVV R13, (14*8)(R3) + MOVV R14, (15*8)(R3) + MOVV R15, (16*8)(R3) + MOVV R16, (17*8)(R3) + MOVV R17, (18*8)(R3) + MOVV R18, (19*8)(R3) + MOVV R19, (20*8)(R3) + MOVV R20, (21*8)(R3) + MOVV R21, (22*8)(R3) + MOVV g, (23*8)(R3) + MOVV R23, (24*8)(R3) + MOVV R24, (25*8)(R3) + MOVV R25, (26*8)(R3) + MOVV R26, (27*8)(R3) + MOVV R27, (28*8)(R3) + MOVV R28, (29*8)(R3) + MOVV R29, (30*8)(R3) + MOVV R30, (31*8)(R3) + MOVV R31, (32*8)(R3) + + // Perform a safe-point check. + MOVV R1, 8(R3) + CALL runtime·debugCallCheck(SB) + MOVV 16(R3), R30 + BEQ R30, good + + // The safety check failed. Put the reason string at the top + // of the stack. + MOVV R30, 8(R3) + + MOVV 24(R3), R30 + MOVV R30, 16(R3) + + MOVV $8, R19 + BREAK + JMP restore + +good: + // Registers are saved and it's safe to make a call. + // Open up a call frame, moving the stack if necessary. + // + // Once the frame is allocated, this will set R19 to 0 and + // invoke BREAK. The debugger should write the argument + // frame for the call at SP+8, set up argument registers, + // set the LR as the signal PC + 4, set the PC to the function + // to call, set R29 to point to the closure (if a closure call), + // and resume execution. + // + // If the function returns, this will set R19 to 1 and invoke + // BREAK. The debugger can then inspect any return value saved + // on the stack at SP+8 and in registers. To resume execution, + // the debugger should restore the LR from (SP). + // + // If the function panics, this will set R19 to 2 and invoke BREAK. + // The interface{} value of the panic will be at SP+8. The debugger + // can inspect the panic value and resume execution again. +#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \ + MOVV $MAXSIZE, R27; \ + BLT R27, R30, 5(PC); \ + MOVV $NAME(SB), R28; \ + MOVV R28, 8(R3); \ + CALL runtime·debugCallWrap(SB); \ + JMP restore + + MOVV 264(R3), R30 // the argument frame size + DEBUG_CALL_DISPATCH(debugCall32<>, 32) + DEBUG_CALL_DISPATCH(debugCall64<>, 64) + DEBUG_CALL_DISPATCH(debugCall128<>, 128) + DEBUG_CALL_DISPATCH(debugCall256<>, 256) + DEBUG_CALL_DISPATCH(debugCall512<>, 512) + DEBUG_CALL_DISPATCH(debugCall1024<>, 1024) + DEBUG_CALL_DISPATCH(debugCall2048<>, 2048) + DEBUG_CALL_DISPATCH(debugCall4096<>, 4096) + DEBUG_CALL_DISPATCH(debugCall8192<>, 8192) + DEBUG_CALL_DISPATCH(debugCall16384<>, 16384) + DEBUG_CALL_DISPATCH(debugCall32768<>, 32768) + DEBUG_CALL_DISPATCH(debugCall65536<>, 65536) + // The frame size is too large. Report the error. + MOVV $debugCallFrameTooLarge<>(SB), R30 + MOVV R30, 8(R3) + MOVV $20, R30 + MOVV R30, 16(R3) // length of debugCallFrameTooLarge string + MOVV $8, R19 + BREAK + JMP restore + +restore: + // Calls and failures resume here. + // + // Set R19 to 16 and invoke BREAK. The debugger should restore + // all registers except for PC and SP and resume execution. + MOVV $16, R19 + BREAK + // We must not modify flags after this point. + + // Restore pointer-containing registers, which may have been + // modified from the debugger's copy by stack copying. + MOVV (4*8)(R3), R2 + MOVV (5*8)(R3), R4 + MOVV (6*8)(R3), R5 + MOVV (7*8)(R3), R6 + MOVV (8*8)(R3), R7 + MOVV (9*8)(R3), R8 + MOVV (10*8)(R3), R9 + MOVV (11*8)(R3), R10 + MOVV (12*8)(R3), R11 + MOVV (13*8)(R3), R12 + MOVV (14*8)(R3), R13 + MOVV (15*8)(R3), R14 + MOVV (16*8)(R3), R15 + MOVV (17*8)(R3), R16 + MOVV (18*8)(R3), R17 + MOVV (19*8)(R3), R18 + MOVV (20*8)(R3), R19 + MOVV (21*8)(R3), R20 + MOVV (22*8)(R3), R21 + MOVV (23*8)(R3), g + MOVV (24*8)(R3), R23 + MOVV (25*8)(R3), R24 + MOVV (26*8)(R3), R25 + MOVV (27*8)(R3), R26 + MOVV (28*8)(R3), R27 + MOVV (29*8)(R3), R28 + MOVV (30*8)(R3), R29 + MOVV (31*8)(R3), R30 + MOVV (32*8)(R3), R31 + + MOVV 0(R3), R30 + ADDV $280, R3 // Add 8 more bytes, see saveSigContext + MOVV -8(R3), R1 + JMP (R30) + +// runtime.debugCallCheck assumes that functions defined with the +// DEBUG_CALL_FN macro are safe points to inject calls. +#define DEBUG_CALL_FN(NAME,MAXSIZE) \ +TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \ + NO_LOCAL_POINTERS; \ + MOVV $0, R19; \ + BREAK; \ + MOVV $1, R19; \ + BREAK; \ + RET +DEBUG_CALL_FN(debugCall32<>, 32) +DEBUG_CALL_FN(debugCall64<>, 64) +DEBUG_CALL_FN(debugCall128<>, 128) +DEBUG_CALL_FN(debugCall256<>, 256) +DEBUG_CALL_FN(debugCall512<>, 512) +DEBUG_CALL_FN(debugCall1024<>, 1024) +DEBUG_CALL_FN(debugCall2048<>, 2048) +DEBUG_CALL_FN(debugCall4096<>, 4096) +DEBUG_CALL_FN(debugCall8192<>, 8192) +DEBUG_CALL_FN(debugCall16384<>, 16384) +DEBUG_CALL_FN(debugCall32768<>, 32768) +DEBUG_CALL_FN(debugCall65536<>, 65536) + +// func debugCallPanicked(val interface{}) +TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16 + // Copy the panic value to the top of stack at SP+8. + MOVV val_type+0(FP), R30 + MOVV R30, 8(R3) + MOVV val_data+8(FP), R30 + MOVV R30, 16(R3) + MOVV $2, R19 + BREAK + RET + // Note: these functions use a special calling convention to save generated code space. // Arguments are passed in registers, but the space for those arguments are allocated // in the caller's stack frame. These stubs write the args into that stack space and diff --git a/src/runtime/debug_test.go b/src/runtime/debug_test.go index 1c00d2fb0d..0ee873d43f 100644 --- a/src/runtime/debug_test.go +++ b/src/runtime/debug_test.go @@ -9,7 +9,7 @@ // spends all of its time in the race runtime, which isn't a safe // point. -//go:build (amd64 || arm64 || ppc64le) && linux && !race +//go:build (amd64 || arm64 || loong64 || ppc64le) && linux && !race package runtime_test diff --git a/src/runtime/debugcall.go b/src/runtime/debugcall.go index fee4116aa5..a9be4f4ee3 100644 --- a/src/runtime/debugcall.go +++ b/src/runtime/debugcall.go @@ -5,7 +5,7 @@ // Though the debug call function feature is not enabled on // ppc64, inserted ppc64 to avoid missing Go declaration error // for debugCallPanicked while building runtime.test -//go:build amd64 || arm64 || ppc64le || ppc64 +//go:build amd64 || arm64 || loong64 || ppc64le || ppc64 package runtime diff --git a/src/runtime/export_debug_loong64_test.go b/src/runtime/export_debug_loong64_test.go new file mode 100644 index 0000000000..eaaf359892 --- /dev/null +++ b/src/runtime/export_debug_loong64_test.go @@ -0,0 +1,227 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build loong64 && linux + +package runtime + +import ( + "internal/abi" + "internal/goarch" + "unsafe" +) + +type sigContext struct { + savedRegs sigcontext +} + +func sigctxtSetContextRegister(ctxt *sigctxt, x uint64) { + ctxt.regs().sc_regs[29] = x +} + +func sigctxtAtTrapInstruction(ctxt *sigctxt) bool { + return *(*uint32)(unsafe.Pointer(ctxt.sigpc())) == 0x002a0000 // BREAK 0 +} + +func sigctxtStatus(ctxt *sigctxt) uint64 { + return ctxt.r19() +} + +func (h *debugCallHandler) saveSigContext(ctxt *sigctxt) { + sp := ctxt.sp() + sp -= goarch.PtrSize + ctxt.set_sp(sp) + *(*uint64)(unsafe.Pointer(uintptr(sp))) = ctxt.link() // save the current lr + ctxt.set_link(ctxt.pc()) // set new lr to the current pc + // Write the argument frame size. + *(*uintptr)(unsafe.Pointer(uintptr(sp - 8))) = h.argSize + // Save current registers. + h.sigCtxt.savedRegs = *ctxt.regs() +} + +// case 0 +func (h *debugCallHandler) debugCallRun(ctxt *sigctxt) { + sp := ctxt.sp() + memmove(unsafe.Pointer(uintptr(sp)+8), h.argp, h.argSize) + if h.regArgs != nil { + storeRegArgs(ctxt.regs(), h.regArgs) + } + // Push return PC, which should be the signal PC+4, because + // the signal PC is the PC of the trap instruction itself. + ctxt.set_link(ctxt.pc() + 4) + // Set PC to call and context register. + ctxt.set_pc(uint64(h.fv.fn)) + sigctxtSetContextRegister(ctxt, uint64(uintptr(unsafe.Pointer(h.fv)))) +} + +// case 1 +func (h *debugCallHandler) debugCallReturn(ctxt *sigctxt) { + sp := ctxt.sp() + memmove(h.argp, unsafe.Pointer(uintptr(sp)+8), h.argSize) + if h.regArgs != nil { + loadRegArgs(h.regArgs, ctxt.regs()) + } + // Restore the old lr from *sp + olr := *(*uint64)(unsafe.Pointer(uintptr(sp))) + ctxt.set_link(olr) + pc := ctxt.pc() + ctxt.set_pc(pc + 4) // step to next instruction +} + +// case 2 +func (h *debugCallHandler) debugCallPanicOut(ctxt *sigctxt) { + sp := ctxt.sp() + memmove(unsafe.Pointer(&h.panic), unsafe.Pointer(uintptr(sp)+8), 2*goarch.PtrSize) + ctxt.set_pc(ctxt.pc() + 4) +} + +// case 8 +func (h *debugCallHandler) debugCallUnsafe(ctxt *sigctxt) { + sp := ctxt.sp() + reason := *(*string)(unsafe.Pointer(uintptr(sp) + 8)) + h.err = plainError(reason) + ctxt.set_pc(ctxt.pc() + 4) +} + +// case 16 +func (h *debugCallHandler) restoreSigContext(ctxt *sigctxt) { + // Restore all registers except for pc and sp + pc, sp := ctxt.pc(), ctxt.sp() + *ctxt.regs() = h.sigCtxt.savedRegs + ctxt.set_pc(pc + 4) + ctxt.set_sp(sp) +} + +func getVal32(base uintptr, off uintptr) uint32 { + return *(*uint32)(unsafe.Pointer(base + off)) +} + +func getVal64(base uintptr, off uintptr) uint64 { + return *(*uint64)(unsafe.Pointer(base + off)) +} + +func setVal64(base uintptr, off uintptr, val uint64) { + *(*uint64)(unsafe.Pointer(base + off)) = val +} + +// Layout for sigcontext on linux/loong64: arch/loongarch/include/uapi/asm/sigcontext.h +// +// sc_extcontext | sctx_info +// ------------------------------------------ +// | {fpu,lsx,lasx}_context +// --------------------------- +// | sctx_info +// --------------------------- +// | lbt_context +// + +const ( + INVALID_MAGIC uint32 = 0 + FPU_CTX_MAGIC = 0x46505501 + LSX_CTX_MAGIC = 0x53580001 + LASX_CTX_MAGIC = 0x41535801 + LBT_CTX_MAGIC = 0x42540001 +) + +const ( + SCTX_INFO_SIZE = 4 + 4 + 8 + FPU_CTX_SIZE = 8*32 + 8 + 4 // fpu context size + LSX_CTX_SIZE = 8*64 + 8 + 4 // lsx context size + LASX_CTX_SIZE = 8*128 + 8 + 4 // lasx context size + LBT_CTX_SIZE = 8*4 + 4 + 4 // lbt context size +) + +// storeRegArgs sets up argument registers in the signal context state +// from an abi.RegArgs. +// +// Both src and dst must be non-nil. +func storeRegArgs(dst *sigcontext, src *abi.RegArgs) { + // R4..R19 are used to pass int arguments in registers on loong64 + for i := 0; i < abi.IntArgRegs; i++ { + dst.sc_regs[i+4] = (uint64)(src.Ints[i]) + } + + // F0..F15 are used to pass float arguments in registers on loong64 + offset := (uintptr)(0) + baseAddr := (uintptr)(unsafe.Pointer(&dst.sc_extcontext)) + + for { + magic := getVal32(baseAddr, offset) + size := getVal32(baseAddr, offset+4) + + switch magic { + case INVALID_MAGIC: + return + + case FPU_CTX_MAGIC: + offset += SCTX_INFO_SIZE + for i := 0; i < abi.FloatArgRegs; i++ { + setVal64(baseAddr, ((uintptr)(i*8) + offset), src.Floats[i]) + } + return + + case LSX_CTX_MAGIC: + offset += SCTX_INFO_SIZE + for i := 0; i < abi.FloatArgRegs; i++ { + setVal64(baseAddr, ((uintptr)(i*16) + offset), src.Floats[i]) + } + return + + case LASX_CTX_MAGIC: + offset += SCTX_INFO_SIZE + for i := 0; i < abi.FloatArgRegs; i++ { + setVal64(baseAddr, ((uintptr)(i*32) + offset), src.Floats[i]) + } + return + + case LBT_CTX_MAGIC: + offset += uintptr(size) + } + } +} + +func loadRegArgs(dst *abi.RegArgs, src *sigcontext) { + // R4..R19 are used to pass int arguments in registers on loong64 + for i := 0; i < abi.IntArgRegs; i++ { + dst.Ints[i] = uintptr(src.sc_regs[i+4]) + } + + // F0..F15 are used to pass float arguments in registers on loong64 + offset := (uintptr)(0) + baseAddr := (uintptr)(unsafe.Pointer(&src.sc_extcontext)) + + for { + magic := getVal32(baseAddr, offset) + size := getVal32(baseAddr, (offset + 4)) + + switch magic { + case INVALID_MAGIC: + return + + case FPU_CTX_MAGIC: + offset += SCTX_INFO_SIZE + for i := 0; i < abi.FloatArgRegs; i++ { + dst.Floats[i] = getVal64(baseAddr, (uintptr(i*8) + offset)) + } + return + + case LSX_CTX_MAGIC: + offset += SCTX_INFO_SIZE + for i := 0; i < abi.FloatArgRegs; i++ { + dst.Floats[i] = getVal64(baseAddr, (uintptr(i*16) + offset)) + } + return + + case LASX_CTX_MAGIC: + offset += SCTX_INFO_SIZE + for i := 0; i < abi.FloatArgRegs; i++ { + dst.Floats[i] = getVal64(baseAddr, (uintptr(i*32) + offset)) + } + return + + case LBT_CTX_MAGIC: + offset += uintptr(size) + } + } +} diff --git a/src/runtime/export_debug_test.go b/src/runtime/export_debug_test.go index 4e0a4ef97e..96f6fd9eea 100644 --- a/src/runtime/export_debug_test.go +++ b/src/runtime/export_debug_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build (amd64 || arm64 || ppc64le) && linux +//go:build (amd64 || arm64 || loong64 || ppc64le) && linux package runtime diff --git a/src/runtime/signal_loong64.go b/src/runtime/signal_loong64.go index ac842c0c94..970af01cee 100644 --- a/src/runtime/signal_loong64.go +++ b/src/runtime/signal_loong64.go @@ -53,9 +53,10 @@ func dumpregs(c *sigctxt) { //go:nowritebarrierrec func (c *sigctxt) sigpc() uintptr { return uintptr(c.pc()) } -func (c *sigctxt) sigsp() uintptr { return uintptr(c.sp()) } -func (c *sigctxt) siglr() uintptr { return uintptr(c.link()) } -func (c *sigctxt) fault() uintptr { return uintptr(c.sigaddr()) } +func (c *sigctxt) setsigpc(x uint64) { c.set_pc(x) } +func (c *sigctxt) sigsp() uintptr { return uintptr(c.sp()) } +func (c *sigctxt) siglr() uintptr { return uintptr(c.link()) } +func (c *sigctxt) fault() uintptr { return uintptr(c.sigaddr()) } // preparePanic sets up the stack to look like a call to sigpanic. func (c *sigctxt) preparePanic(sig uint32, gp *g) { -- 2.48.1