From: Paul E. Murphy Date: Mon, 13 Mar 2023 20:58:15 +0000 (-0500) Subject: runtime: cleanup PPC64/linux runtime.sigtramp X-Git-Tag: go1.21rc1~1060 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=ac571a388dce20068f6639067789b4fbac9c0e7d;p=gostls13.git runtime: cleanup PPC64/linux runtime.sigtramp Add new helper macros to further simplify the transition from the host's ABI to Go. Fortunately the same one should work for all PPC64 targets. Update the other site which uses these wrappers to further consolidate. Also, update the call to runtime.sigtrampgo to call the ABIInternal version directly. Also, update the SAVE/RESTORE_VR macros to accept R0. Change-Id: I0046176029e1e1b25838688e4b7bf57805b01bd4 Reviewed-on: https://go-review.googlesource.com/c/go/+/476297 Reviewed-by: Archana Ravindar Run-TryBot: Paul Murphy Reviewed-by: Matthew Dempsky Reviewed-by: Lynn Boger Reviewed-by: Dmitri Shuralyov TryBot-Result: Gopher Robot --- diff --git a/src/runtime/cgo/abi_ppc64x.h b/src/runtime/cgo/abi_ppc64x.h index 5982c857b3..245a5266f6 100644 --- a/src/runtime/cgo/abi_ppc64x.h +++ b/src/runtime/cgo/abi_ppc64x.h @@ -104,58 +104,92 @@ // Save and restore VR20-31 (aka VSR56-63). These // macros must point to a 16B aligned offset. #define SAVE_VR_SIZE (12*16) -#define SAVE_VR(offset, rtmp) \ - MOVD $(offset), rtmp \ - STVX V20, (rtmp)(R1) \ - ADD $16, rtmp \ - STVX V21, (rtmp)(R1) \ - ADD $16, rtmp \ - STVX V22, (rtmp)(R1) \ - ADD $16, rtmp \ - STVX V23, (rtmp)(R1) \ - ADD $16, rtmp \ - STVX V24, (rtmp)(R1) \ - ADD $16, rtmp \ - STVX V25, (rtmp)(R1) \ - ADD $16, rtmp \ - STVX V26, (rtmp)(R1) \ - ADD $16, rtmp \ - STVX V27, (rtmp)(R1) \ - ADD $16, rtmp \ - STVX V28, (rtmp)(R1) \ - ADD $16, rtmp \ - STVX V29, (rtmp)(R1) \ - ADD $16, rtmp \ - STVX V30, (rtmp)(R1) \ - ADD $16, rtmp \ +#define SAVE_VR(offset, rtmp) \ + MOVD $(offset+16*0), rtmp \ + STVX V20, (rtmp)(R1) \ + MOVD $(offset+16*1), rtmp \ + STVX V21, (rtmp)(R1) \ + MOVD $(offset+16*2), rtmp \ + STVX V22, (rtmp)(R1) \ + MOVD $(offset+16*3), rtmp \ + STVX V23, (rtmp)(R1) \ + MOVD $(offset+16*4), rtmp \ + STVX V24, (rtmp)(R1) \ + MOVD $(offset+16*5), rtmp \ + STVX V25, (rtmp)(R1) \ + MOVD $(offset+16*6), rtmp \ + STVX V26, (rtmp)(R1) \ + MOVD $(offset+16*7), rtmp \ + STVX V27, (rtmp)(R1) \ + MOVD $(offset+16*8), rtmp \ + STVX V28, (rtmp)(R1) \ + MOVD $(offset+16*9), rtmp \ + STVX V29, (rtmp)(R1) \ + MOVD $(offset+16*10), rtmp \ + STVX V30, (rtmp)(R1) \ + MOVD $(offset+16*11), rtmp \ STVX V31, (rtmp)(R1) -#define RESTORE_VR(offset, rtmp) \ - MOVD $(offset), rtmp \ - LVX (rtmp)(R1), V20 \ - ADD $16, rtmp \ - LVX (rtmp)(R1), V21 \ - ADD $16, rtmp \ - LVX (rtmp)(R1), V22 \ - ADD $16, rtmp \ - LVX (rtmp)(R1), V23 \ - ADD $16, rtmp \ - LVX (rtmp)(R1), V24 \ - ADD $16, rtmp \ - LVX (rtmp)(R1), V25 \ - ADD $16, rtmp \ - LVX (rtmp)(R1), V26 \ - ADD $16, rtmp \ - LVX (rtmp)(R1), V27 \ - ADD $16, rtmp \ - LVX (rtmp)(R1), V28 \ - ADD $16, rtmp \ - LVX (rtmp)(R1), V29 \ - ADD $16, rtmp \ - LVX (rtmp)(R1), V30 \ - ADD $16, rtmp \ +#define RESTORE_VR(offset, rtmp) \ + MOVD $(offset+16*0), rtmp \ + LVX (rtmp)(R1), V20 \ + MOVD $(offset+16*1), rtmp \ + LVX (rtmp)(R1), V21 \ + MOVD $(offset+16*2), rtmp \ + LVX (rtmp)(R1), V22 \ + MOVD $(offset+16*3), rtmp \ + LVX (rtmp)(R1), V23 \ + MOVD $(offset+16*4), rtmp \ + LVX (rtmp)(R1), V24 \ + MOVD $(offset+16*5), rtmp \ + LVX (rtmp)(R1), V25 \ + MOVD $(offset+16*6), rtmp \ + LVX (rtmp)(R1), V26 \ + MOVD $(offset+16*7), rtmp \ + LVX (rtmp)(R1), V27 \ + MOVD $(offset+16*8), rtmp \ + LVX (rtmp)(R1), V28 \ + MOVD $(offset+16*9), rtmp \ + LVX (rtmp)(R1), V29 \ + MOVD $(offset+16*10), rtmp \ + LVX (rtmp)(R1), V30 \ + MOVD $(offset+16*11), rtmp \ LVX (rtmp)(R1), V31 // LR and CR are saved in the caller's frame. The callee must // make space for all other callee-save registers. #define SAVE_ALL_REG_SIZE (SAVE_GPR_SIZE+SAVE_FPR_SIZE+SAVE_VR_SIZE) + +// Stack a frame and save all callee-save registers following the +// host OS's ABI. Fortunately, this is identical for AIX, ELFv1, and +// ELFv2. All host ABIs require the stack pointer to maintain 16 byte +// alignment, and save the callee-save registers in the same places. +// +// To restate, R1 is assumed to be aligned when this macro is used. +// This assumes the caller's frame is compliant with the host ABI. +// CR and LR are saved into the caller's frame per the host ABI. +// R0 is initialized to $0 as expected by Go. +#define STACK_AND_SAVE_HOST_TO_GO_ABI(extra) \ + MOVD LR, R0 \ + MOVD R0, 16(R1) \ + MOVW CR, R0 \ + MOVD R0, 8(R1) \ + MOVDU R1, -(extra)-FIXED_FRAME-SAVE_ALL_REG_SIZE(R1) \ + SAVE_GPR(extra+FIXED_FRAME) \ + SAVE_FPR(extra+FIXED_FRAME+SAVE_GPR_SIZE) \ + SAVE_VR(extra+FIXED_FRAME+SAVE_GPR_SIZE+SAVE_FPR_SIZE, R0) \ + MOVD $0, R0 + +// This unstacks the frame, restoring all callee-save registers +// as saved by STACK_AND_SAVE_HOST_TO_GO_ABI. +// +// R0 is not guaranteed to contain $0 after this macro. +#define UNSTACK_AND_RESTORE_GO_TO_HOST_ABI(extra) \ + RESTORE_GPR(extra+FIXED_FRAME) \ + RESTORE_FPR(extra+FIXED_FRAME+SAVE_GPR_SIZE) \ + RESTORE_VR(extra+FIXED_FRAME+SAVE_GPR_SIZE+SAVE_FPR_SIZE, R0) \ + ADD $(extra+FIXED_FRAME+SAVE_ALL_REG_SIZE), R1 \ + MOVD 16(R1), R0 \ + MOVD R0, LR \ + MOVD 8(R1), R0 \ + MOVW R0, CR diff --git a/src/runtime/sys_linux_ppc64x.s b/src/runtime/sys_linux_ppc64x.s index 44cd0e7b3e..d105585b7e 100644 --- a/src/runtime/sys_linux_ppc64x.s +++ b/src/runtime/sys_linux_ppc64x.s @@ -456,90 +456,15 @@ TEXT sigtramp<>(SB),NOSPLIT|NOFRAME|TOPFRAME,$0 // ppc64le doesn't need function descriptors // Save callee-save registers in the case of signal forwarding. // Same as on ARM64 https://golang.org/issue/31827 . +// +// Note, it is assumed this is always called indirectly (e.g via +// a function pointer) as R2 may not be preserved when calling this +// function. In those cases, the caller preserves their R2. TEXT runtime·sigtramp(SB),NOSPLIT|NOFRAME,$0 #endif - // Start with standard C stack frame layout and linkage. - MOVD LR, R0 - MOVD R0, 16(R1) // Save LR in caller's frame. - MOVW CR, R0 // Save CR in caller's frame - MOVD R0, 8(R1) - // The stack must be acquired here and not - // in the automatic way based on stack size - // since that sequence clobbers R31 before it - // gets saved. - // We are being ultra safe here in saving the - // Vregs. The case where they might need to - // be saved is very unlikely. - MOVDU R1, -544(R1) - MOVD R14, 64(R1) - MOVD R15, 72(R1) - MOVD R16, 80(R1) - MOVD R17, 88(R1) - MOVD R18, 96(R1) - MOVD R19, 104(R1) - MOVD R20, 112(R1) - MOVD R21, 120(R1) - MOVD R22, 128(R1) - MOVD R23, 136(R1) - MOVD R24, 144(R1) - MOVD R25, 152(R1) - MOVD R26, 160(R1) - MOVD R27, 168(R1) - MOVD R28, 176(R1) - MOVD R29, 184(R1) - MOVD g, 192(R1) // R30 - MOVD R31, 200(R1) - FMOVD F14, 208(R1) - FMOVD F15, 216(R1) - FMOVD F16, 224(R1) - FMOVD F17, 232(R1) - FMOVD F18, 240(R1) - FMOVD F19, 248(R1) - FMOVD F20, 256(R1) - FMOVD F21, 264(R1) - FMOVD F22, 272(R1) - FMOVD F23, 280(R1) - FMOVD F24, 288(R1) - FMOVD F25, 296(R1) - FMOVD F26, 304(R1) - FMOVD F27, 312(R1) - FMOVD F28, 320(R1) - FMOVD F29, 328(R1) - FMOVD F30, 336(R1) - FMOVD F31, 344(R1) - // Save V regs - // STXVD2X and LXVD2X used since - // we aren't sure of alignment. - // Endianness doesn't matter - // if we are just loading and - // storing values. - MOVD $352, R7 // V20 - STXVD2X VS52, (R7)(R1) - ADD $16, R7 // V21 368 - STXVD2X VS53, (R7)(R1) - ADD $16, R7 // V22 384 - STXVD2X VS54, (R7)(R1) - ADD $16, R7 // V23 400 - STXVD2X VS55, (R7)(R1) - ADD $16, R7 // V24 416 - STXVD2X VS56, (R7)(R1) - ADD $16, R7 // V25 432 - STXVD2X VS57, (R7)(R1) - ADD $16, R7 // V26 448 - STXVD2X VS58, (R7)(R1) - ADD $16, R7 // V27 464 - STXVD2X VS59, (R7)(R1) - ADD $16, R7 // V28 480 - STXVD2X VS60, (R7)(R1) - ADD $16, R7 // V29 496 - STXVD2X VS61, (R7)(R1) - ADD $16, R7 // V30 512 - STXVD2X VS62, (R7)(R1) - ADD $16, R7 // V31 528 - STXVD2X VS63, (R7)(R1) - - // initialize essential registers (just in case) - BL runtime·reginit(SB) + // This is called with ELF calling conventions. Convert to Go. + // Allocate space for argument storage to call runtime.sigtrampgo. + STACK_AND_SAVE_HOST_TO_GO_ABI(32) // this might be called in external code context, // where g is not set. @@ -548,80 +473,19 @@ TEXT runtime·sigtramp(SB),NOSPLIT|NOFRAME,$0 BEQ 2(PC) BL runtime·load_g(SB) - MOVW R3, FIXED_FRAME+0(R1) - MOVD R4, FIXED_FRAME+8(R1) - MOVD R5, FIXED_FRAME+16(R1) - MOVD $runtime·sigtrampgo(SB), R12 + // R3,R4,R5 already hold the arguments. Forward them on. + // TODO: Indirectly call runtime.sigtrampgo to avoid the linker's static NOSPLIT stack + // overflow detection. It thinks this might be called on a small Go stack, but this is only + // called from a larger pthread or sigaltstack stack. Can the checker be improved to not + // flag a direct call here? + MOVD $runtime·sigtrampgo(SB), R12 MOVD R12, CTR BL (CTR) - MOVD 24(R1), R2 // Should this be here? Where is it saved? - // Starts at 64; FIXED_FRAME is 32 - MOVD 64(R1), R14 - MOVD 72(R1), R15 - MOVD 80(R1), R16 - MOVD 88(R1), R17 - MOVD 96(R1), R18 - MOVD 104(R1), R19 - MOVD 112(R1), R20 - MOVD 120(R1), R21 - MOVD 128(R1), R22 - MOVD 136(R1), R23 - MOVD 144(R1), R24 - MOVD 152(R1), R25 - MOVD 160(R1), R26 - MOVD 168(R1), R27 - MOVD 176(R1), R28 - MOVD 184(R1), R29 - MOVD 192(R1), g // R30 - MOVD 200(R1), R31 - FMOVD 208(R1), F14 - FMOVD 216(R1), F15 - FMOVD 224(R1), F16 - FMOVD 232(R1), F17 - FMOVD 240(R1), F18 - FMOVD 248(R1), F19 - FMOVD 256(R1), F20 - FMOVD 264(R1), F21 - FMOVD 272(R1), F22 - FMOVD 280(R1), F23 - FMOVD 288(R1), F24 - FMOVD 292(R1), F25 - FMOVD 300(R1), F26 - FMOVD 308(R1), F27 - FMOVD 316(R1), F28 - FMOVD 328(R1), F29 - FMOVD 336(R1), F30 - FMOVD 344(R1), F31 - MOVD $352, R7 - LXVD2X (R7)(R1), VS52 - ADD $16, R7 // 368 V21 - LXVD2X (R7)(R1), VS53 - ADD $16, R7 // 384 V22 - LXVD2X (R7)(R1), VS54 - ADD $16, R7 // 400 V23 - LXVD2X (R7)(R1), VS55 - ADD $16, R7 // 416 V24 - LXVD2X (R7)(R1), VS56 - ADD $16, R7 // 432 V25 - LXVD2X (R7)(R1), VS57 - ADD $16, R7 // 448 V26 - LXVD2X (R7)(R1), VS58 - ADD $16, R8 // 464 V27 - LXVD2X (R7)(R1), VS59 - ADD $16, R7 // 480 V28 - LXVD2X (R7)(R1), VS60 - ADD $16, R7 // 496 V29 - LXVD2X (R7)(R1), VS61 - ADD $16, R7 // 512 V30 - LXVD2X (R7)(R1), VS62 - ADD $16, R7 // 528 V31 - LXVD2X (R7)(R1), VS63 - ADD $544, R1 - MOVD 8(R1), R0 - MOVFL R0, $0xff - MOVD 16(R1), R0 - MOVD R0, LR + // Restore R2 (TOC pointer) in the event it might be used later in this function. + // If this was not compiled as shared code, R2 is undefined, reloading it is harmless. + MOVD 24(R1), R2 + UNSTACK_AND_RESTORE_GO_TO_HOST_ABI(32) RET #ifdef GOARCH_ppc64le @@ -726,34 +590,12 @@ GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8 TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT|NOFRAME,$0 // This is called from C code. Callee save registers must be saved. - // R3,R4,R5 hold arguments. - // Save LR into R0 and stack a big frame. - MOVD LR, R0 - MOVD R0, 16(R1) - MOVW CR, R0 - MOVD R0, 8(R1) - // Don't save a back chain pointer when calling into Go. It will be overwritten. - // Go stores LR where ELF stores a back chain pointer. And, allocate 64B for - // FIXED_FRAME and 24B argument space, rounded up to a 16 byte boundary. - ADD $-(64+SAVE_ALL_REG_SIZE), R1 - - SAVE_GPR(64) - SAVE_FPR(64+SAVE_GPR_SIZE) - SAVE_VR(64+SAVE_GPR_SIZE+SAVE_FPR_SIZE, R6) + // R3,R4,R5 hold arguments, and allocate argument space to call sigprofNonGo. + STACK_AND_SAVE_HOST_TO_GO_ABI(32) - MOVD $0, R0 CALL runtime·sigprofNonGo(SB) - RESTORE_GPR(64) - RESTORE_FPR(64+SAVE_GPR_SIZE) - RESTORE_VR(64+SAVE_GPR_SIZE+SAVE_FPR_SIZE, R6) - - // Clear frame, restore LR, return - ADD $(64+SAVE_ALL_REG_SIZE), R1 - MOVD 16(R1), R0 - MOVD R0, LR - MOVD 8(R1), R0 - MOVW R0, CR + UNSTACK_AND_RESTORE_GO_TO_HOST_ABI(32) RET TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0