// Save and restore VR20-31 (aka VSR56-63). These
// macros must point to a 16B aligned offset.
#define SAVE_VR_SIZE (12*16)
-#define SAVE_VR(offset, rtmp) \
- MOVD $(offset), rtmp \
- STVX V20, (rtmp)(R1) \
- ADD $16, rtmp \
- STVX V21, (rtmp)(R1) \
- ADD $16, rtmp \
- STVX V22, (rtmp)(R1) \
- ADD $16, rtmp \
- STVX V23, (rtmp)(R1) \
- ADD $16, rtmp \
- STVX V24, (rtmp)(R1) \
- ADD $16, rtmp \
- STVX V25, (rtmp)(R1) \
- ADD $16, rtmp \
- STVX V26, (rtmp)(R1) \
- ADD $16, rtmp \
- STVX V27, (rtmp)(R1) \
- ADD $16, rtmp \
- STVX V28, (rtmp)(R1) \
- ADD $16, rtmp \
- STVX V29, (rtmp)(R1) \
- ADD $16, rtmp \
- STVX V30, (rtmp)(R1) \
- ADD $16, rtmp \
+#define SAVE_VR(offset, rtmp) \
+ MOVD $(offset+16*0), rtmp \
+ STVX V20, (rtmp)(R1) \
+ MOVD $(offset+16*1), rtmp \
+ STVX V21, (rtmp)(R1) \
+ MOVD $(offset+16*2), rtmp \
+ STVX V22, (rtmp)(R1) \
+ MOVD $(offset+16*3), rtmp \
+ STVX V23, (rtmp)(R1) \
+ MOVD $(offset+16*4), rtmp \
+ STVX V24, (rtmp)(R1) \
+ MOVD $(offset+16*5), rtmp \
+ STVX V25, (rtmp)(R1) \
+ MOVD $(offset+16*6), rtmp \
+ STVX V26, (rtmp)(R1) \
+ MOVD $(offset+16*7), rtmp \
+ STVX V27, (rtmp)(R1) \
+ MOVD $(offset+16*8), rtmp \
+ STVX V28, (rtmp)(R1) \
+ MOVD $(offset+16*9), rtmp \
+ STVX V29, (rtmp)(R1) \
+ MOVD $(offset+16*10), rtmp \
+ STVX V30, (rtmp)(R1) \
+ MOVD $(offset+16*11), rtmp \
STVX V31, (rtmp)(R1)
-#define RESTORE_VR(offset, rtmp) \
- MOVD $(offset), rtmp \
- LVX (rtmp)(R1), V20 \
- ADD $16, rtmp \
- LVX (rtmp)(R1), V21 \
- ADD $16, rtmp \
- LVX (rtmp)(R1), V22 \
- ADD $16, rtmp \
- LVX (rtmp)(R1), V23 \
- ADD $16, rtmp \
- LVX (rtmp)(R1), V24 \
- ADD $16, rtmp \
- LVX (rtmp)(R1), V25 \
- ADD $16, rtmp \
- LVX (rtmp)(R1), V26 \
- ADD $16, rtmp \
- LVX (rtmp)(R1), V27 \
- ADD $16, rtmp \
- LVX (rtmp)(R1), V28 \
- ADD $16, rtmp \
- LVX (rtmp)(R1), V29 \
- ADD $16, rtmp \
- LVX (rtmp)(R1), V30 \
- ADD $16, rtmp \
+#define RESTORE_VR(offset, rtmp) \
+ MOVD $(offset+16*0), rtmp \
+ LVX (rtmp)(R1), V20 \
+ MOVD $(offset+16*1), rtmp \
+ LVX (rtmp)(R1), V21 \
+ MOVD $(offset+16*2), rtmp \
+ LVX (rtmp)(R1), V22 \
+ MOVD $(offset+16*3), rtmp \
+ LVX (rtmp)(R1), V23 \
+ MOVD $(offset+16*4), rtmp \
+ LVX (rtmp)(R1), V24 \
+ MOVD $(offset+16*5), rtmp \
+ LVX (rtmp)(R1), V25 \
+ MOVD $(offset+16*6), rtmp \
+ LVX (rtmp)(R1), V26 \
+ MOVD $(offset+16*7), rtmp \
+ LVX (rtmp)(R1), V27 \
+ MOVD $(offset+16*8), rtmp \
+ LVX (rtmp)(R1), V28 \
+ MOVD $(offset+16*9), rtmp \
+ LVX (rtmp)(R1), V29 \
+ MOVD $(offset+16*10), rtmp \
+ LVX (rtmp)(R1), V30 \
+ MOVD $(offset+16*11), rtmp \
LVX (rtmp)(R1), V31
// LR and CR are saved in the caller's frame. The callee must
// make space for all other callee-save registers.
#define SAVE_ALL_REG_SIZE (SAVE_GPR_SIZE+SAVE_FPR_SIZE+SAVE_VR_SIZE)
+
+// Stack a frame and save all callee-save registers following the
+// host OS's ABI. Fortunately, this is identical for AIX, ELFv1, and
+// ELFv2. All host ABIs require the stack pointer to maintain 16 byte
+// alignment, and save the callee-save registers in the same places.
+//
+// To restate, R1 is assumed to be aligned when this macro is used.
+// This assumes the caller's frame is compliant with the host ABI.
+// CR and LR are saved into the caller's frame per the host ABI.
+// R0 is initialized to $0 as expected by Go.
+#define STACK_AND_SAVE_HOST_TO_GO_ABI(extra) \
+ MOVD LR, R0 \
+ MOVD R0, 16(R1) \
+ MOVW CR, R0 \
+ MOVD R0, 8(R1) \
+ MOVDU R1, -(extra)-FIXED_FRAME-SAVE_ALL_REG_SIZE(R1) \
+ SAVE_GPR(extra+FIXED_FRAME) \
+ SAVE_FPR(extra+FIXED_FRAME+SAVE_GPR_SIZE) \
+ SAVE_VR(extra+FIXED_FRAME+SAVE_GPR_SIZE+SAVE_FPR_SIZE, R0) \
+ MOVD $0, R0
+
+// This unstacks the frame, restoring all callee-save registers
+// as saved by STACK_AND_SAVE_HOST_TO_GO_ABI.
+//
+// R0 is not guaranteed to contain $0 after this macro.
+#define UNSTACK_AND_RESTORE_GO_TO_HOST_ABI(extra) \
+ RESTORE_GPR(extra+FIXED_FRAME) \
+ RESTORE_FPR(extra+FIXED_FRAME+SAVE_GPR_SIZE) \
+ RESTORE_VR(extra+FIXED_FRAME+SAVE_GPR_SIZE+SAVE_FPR_SIZE, R0) \
+ ADD $(extra+FIXED_FRAME+SAVE_ALL_REG_SIZE), R1 \
+ MOVD 16(R1), R0 \
+ MOVD R0, LR \
+ MOVD 8(R1), R0 \
+ MOVW R0, CR
// ppc64le doesn't need function descriptors
// Save callee-save registers in the case of signal forwarding.
// Same as on ARM64 https://golang.org/issue/31827 .
+//
+// Note, it is assumed this is always called indirectly (e.g via
+// a function pointer) as R2 may not be preserved when calling this
+// function. In those cases, the caller preserves their R2.
TEXT runtime·sigtramp(SB),NOSPLIT|NOFRAME,$0
#endif
- // Start with standard C stack frame layout and linkage.
- MOVD LR, R0
- MOVD R0, 16(R1) // Save LR in caller's frame.
- MOVW CR, R0 // Save CR in caller's frame
- MOVD R0, 8(R1)
- // The stack must be acquired here and not
- // in the automatic way based on stack size
- // since that sequence clobbers R31 before it
- // gets saved.
- // We are being ultra safe here in saving the
- // Vregs. The case where they might need to
- // be saved is very unlikely.
- MOVDU R1, -544(R1)
- MOVD R14, 64(R1)
- MOVD R15, 72(R1)
- MOVD R16, 80(R1)
- MOVD R17, 88(R1)
- MOVD R18, 96(R1)
- MOVD R19, 104(R1)
- MOVD R20, 112(R1)
- MOVD R21, 120(R1)
- MOVD R22, 128(R1)
- MOVD R23, 136(R1)
- MOVD R24, 144(R1)
- MOVD R25, 152(R1)
- MOVD R26, 160(R1)
- MOVD R27, 168(R1)
- MOVD R28, 176(R1)
- MOVD R29, 184(R1)
- MOVD g, 192(R1) // R30
- MOVD R31, 200(R1)
- FMOVD F14, 208(R1)
- FMOVD F15, 216(R1)
- FMOVD F16, 224(R1)
- FMOVD F17, 232(R1)
- FMOVD F18, 240(R1)
- FMOVD F19, 248(R1)
- FMOVD F20, 256(R1)
- FMOVD F21, 264(R1)
- FMOVD F22, 272(R1)
- FMOVD F23, 280(R1)
- FMOVD F24, 288(R1)
- FMOVD F25, 296(R1)
- FMOVD F26, 304(R1)
- FMOVD F27, 312(R1)
- FMOVD F28, 320(R1)
- FMOVD F29, 328(R1)
- FMOVD F30, 336(R1)
- FMOVD F31, 344(R1)
- // Save V regs
- // STXVD2X and LXVD2X used since
- // we aren't sure of alignment.
- // Endianness doesn't matter
- // if we are just loading and
- // storing values.
- MOVD $352, R7 // V20
- STXVD2X VS52, (R7)(R1)
- ADD $16, R7 // V21 368
- STXVD2X VS53, (R7)(R1)
- ADD $16, R7 // V22 384
- STXVD2X VS54, (R7)(R1)
- ADD $16, R7 // V23 400
- STXVD2X VS55, (R7)(R1)
- ADD $16, R7 // V24 416
- STXVD2X VS56, (R7)(R1)
- ADD $16, R7 // V25 432
- STXVD2X VS57, (R7)(R1)
- ADD $16, R7 // V26 448
- STXVD2X VS58, (R7)(R1)
- ADD $16, R7 // V27 464
- STXVD2X VS59, (R7)(R1)
- ADD $16, R7 // V28 480
- STXVD2X VS60, (R7)(R1)
- ADD $16, R7 // V29 496
- STXVD2X VS61, (R7)(R1)
- ADD $16, R7 // V30 512
- STXVD2X VS62, (R7)(R1)
- ADD $16, R7 // V31 528
- STXVD2X VS63, (R7)(R1)
-
- // initialize essential registers (just in case)
- BL runtime·reginit(SB)
+ // This is called with ELF calling conventions. Convert to Go.
+ // Allocate space for argument storage to call runtime.sigtrampgo.
+ STACK_AND_SAVE_HOST_TO_GO_ABI(32)
// this might be called in external code context,
// where g is not set.
BEQ 2(PC)
BL runtime·load_g(SB)
- MOVW R3, FIXED_FRAME+0(R1)
- MOVD R4, FIXED_FRAME+8(R1)
- MOVD R5, FIXED_FRAME+16(R1)
- MOVD $runtime·sigtrampgo(SB), R12
+ // R3,R4,R5 already hold the arguments. Forward them on.
+ // TODO: Indirectly call runtime.sigtrampgo to avoid the linker's static NOSPLIT stack
+ // overflow detection. It thinks this might be called on a small Go stack, but this is only
+ // called from a larger pthread or sigaltstack stack. Can the checker be improved to not
+ // flag a direct call here?
+ MOVD $runtime·sigtrampgo<ABIInternal>(SB), R12
MOVD R12, CTR
BL (CTR)
- MOVD 24(R1), R2 // Should this be here? Where is it saved?
- // Starts at 64; FIXED_FRAME is 32
- MOVD 64(R1), R14
- MOVD 72(R1), R15
- MOVD 80(R1), R16
- MOVD 88(R1), R17
- MOVD 96(R1), R18
- MOVD 104(R1), R19
- MOVD 112(R1), R20
- MOVD 120(R1), R21
- MOVD 128(R1), R22
- MOVD 136(R1), R23
- MOVD 144(R1), R24
- MOVD 152(R1), R25
- MOVD 160(R1), R26
- MOVD 168(R1), R27
- MOVD 176(R1), R28
- MOVD 184(R1), R29
- MOVD 192(R1), g // R30
- MOVD 200(R1), R31
- FMOVD 208(R1), F14
- FMOVD 216(R1), F15
- FMOVD 224(R1), F16
- FMOVD 232(R1), F17
- FMOVD 240(R1), F18
- FMOVD 248(R1), F19
- FMOVD 256(R1), F20
- FMOVD 264(R1), F21
- FMOVD 272(R1), F22
- FMOVD 280(R1), F23
- FMOVD 288(R1), F24
- FMOVD 292(R1), F25
- FMOVD 300(R1), F26
- FMOVD 308(R1), F27
- FMOVD 316(R1), F28
- FMOVD 328(R1), F29
- FMOVD 336(R1), F30
- FMOVD 344(R1), F31
- MOVD $352, R7
- LXVD2X (R7)(R1), VS52
- ADD $16, R7 // 368 V21
- LXVD2X (R7)(R1), VS53
- ADD $16, R7 // 384 V22
- LXVD2X (R7)(R1), VS54
- ADD $16, R7 // 400 V23
- LXVD2X (R7)(R1), VS55
- ADD $16, R7 // 416 V24
- LXVD2X (R7)(R1), VS56
- ADD $16, R7 // 432 V25
- LXVD2X (R7)(R1), VS57
- ADD $16, R7 // 448 V26
- LXVD2X (R7)(R1), VS58
- ADD $16, R8 // 464 V27
- LXVD2X (R7)(R1), VS59
- ADD $16, R7 // 480 V28
- LXVD2X (R7)(R1), VS60
- ADD $16, R7 // 496 V29
- LXVD2X (R7)(R1), VS61
- ADD $16, R7 // 512 V30
- LXVD2X (R7)(R1), VS62
- ADD $16, R7 // 528 V31
- LXVD2X (R7)(R1), VS63
- ADD $544, R1
- MOVD 8(R1), R0
- MOVFL R0, $0xff
- MOVD 16(R1), R0
- MOVD R0, LR
+ // Restore R2 (TOC pointer) in the event it might be used later in this function.
+ // If this was not compiled as shared code, R2 is undefined, reloading it is harmless.
+ MOVD 24(R1), R2
+ UNSTACK_AND_RESTORE_GO_TO_HOST_ABI(32)
RET
#ifdef GOARCH_ppc64le
TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT|NOFRAME,$0
// This is called from C code. Callee save registers must be saved.
- // R3,R4,R5 hold arguments.
- // Save LR into R0 and stack a big frame.
- MOVD LR, R0
- MOVD R0, 16(R1)
- MOVW CR, R0
- MOVD R0, 8(R1)
- // Don't save a back chain pointer when calling into Go. It will be overwritten.
- // Go stores LR where ELF stores a back chain pointer. And, allocate 64B for
- // FIXED_FRAME and 24B argument space, rounded up to a 16 byte boundary.
- ADD $-(64+SAVE_ALL_REG_SIZE), R1
-
- SAVE_GPR(64)
- SAVE_FPR(64+SAVE_GPR_SIZE)
- SAVE_VR(64+SAVE_GPR_SIZE+SAVE_FPR_SIZE, R6)
+ // R3,R4,R5 hold arguments, and allocate argument space to call sigprofNonGo.
+ STACK_AND_SAVE_HOST_TO_GO_ABI(32)
- MOVD $0, R0
CALL runtime·sigprofNonGo<ABIInternal>(SB)
- RESTORE_GPR(64)
- RESTORE_FPR(64+SAVE_GPR_SIZE)
- RESTORE_VR(64+SAVE_GPR_SIZE+SAVE_FPR_SIZE, R6)
-
- // Clear frame, restore LR, return
- ADD $(64+SAVE_ALL_REG_SIZE), R1
- MOVD 16(R1), R0
- MOVD R0, LR
- MOVD 8(R1), R0
- MOVW R0, CR
+ UNSTACK_AND_RESTORE_GO_TO_HOST_ABI(32)
RET
TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0