Debugging the Windows breakage I noticed that SEH
only exists on 386, so we can balance the two stacks
a little more on amd64 and reclaim another word.
Now we're down to just one word consumed by
cgocallback_gofunc, having reclaimed 25% of the
overall budget (4 words out of 16).
Separately, fix windows/386 - the SEH must be on the
m0 stack, as must the saved SP, so we are forced to have
a three-word frame for 386. It matters much less for
386, because there 128 bytes gives 32 words to use.
R=dvyukov, alex.brainman
CC=golang-dev
https://golang.org/cl/
11551044
// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
// See cgocall.c for more details.
-TEXT runtime·cgocallback_gofunc(SB),7,$8-12
+TEXT runtime·cgocallback_gofunc(SB),7,$12-12
// If m is nil, Go did not create the current thread.
// Call needm to obtain one for temporary use.
// In this case, we're running on the thread stack, so there's
JEQ 2(PC)
#endif
MOVL m(CX), BP
- MOVL BP, 4(SP)
+ MOVL BP, DX // saved copy of oldm
CMPL BP, $0
JNE havem
needm:
+ MOVL DX, 0(SP)
MOVL $runtime·needm(SB), AX
CALL AX
+ MOVL 0(SP), DX
get_tls(CX)
MOVL m(CX), BP
// Save current sp in m->g0->sched.sp in preparation for
// switch back to m->curg stack.
// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
+ // On Windows, the SEH is at 4(SP) and 8(SP).
MOVL m_g0(BP), SI
MOVL (g_sched+gobuf_sp)(SI), AX
MOVL AX, 0(SP)
// so that the traceback will seamlessly trace back into
// the earlier calls.
//
- // In the new goroutine, 0(SP) and 4(SP) are unused except
- // on Windows, where they are the SEH block.
+ // In the new goroutine, 0(SP) holds the saved oldm (DX) register.
+ // 4(SP) and 8(SP) are unused.
MOVL m_curg(BP), SI
MOVL SI, g(CX)
MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
MOVL (g_sched+gobuf_pc)(SI), BP
MOVL BP, -4(DI)
- LEAL -(4+8)(DI), SP
+ LEAL -(4+12)(DI), SP
+ MOVL DX, 0(SP)
CALL runtime·cgocallbackg(SB)
+ MOVL 0(SP), DX
// Restore g->sched (== m->curg->sched) from saved values.
get_tls(CX)
MOVL g(CX), SI
- MOVL 8(SP), BP
+ MOVL 12(SP), BP
MOVL BP, (g_sched+gobuf_pc)(SI)
- LEAL (8+4)(SP), DI
+ LEAL (12+4)(SP), DI
MOVL DI, (g_sched+gobuf_sp)(SI)
// Switch back to m->g0's stack and restore m->g0->sched.sp.
// If the m on entry was nil, we called needm above to borrow an m
// for the duration of the call. Since the call is over, return it with dropm.
- MOVL 8(SP), BP
- CMPL BP, $0
+ CMPL DX, $0
JNE 3(PC)
MOVL $runtime·dropm(SB), AX
CALL AX
// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
// See cgocall.c for more details.
-TEXT runtime·cgocallback_gofunc(SB),7,$16-24
+TEXT runtime·cgocallback_gofunc(SB),7,$8-24
// If m is nil, Go did not create the current thread.
// Call needm to obtain one for temporary use.
// In this case, we're running on the thread stack, so there's
JEQ 2(PC)
#endif
MOVQ m(CX), BP
- MOVQ BP, 8(SP)
+ MOVQ BP, R8 // holds oldm until end of function
CMPQ BP, $0
JNE havem
needm:
+ MOVQ R8, 0(SP)
MOVQ $runtime·needm(SB), AX
CALL AX
+ MOVQ 0(SP), R8
get_tls(CX)
MOVQ m(CX), BP
// so that the traceback will seamlessly trace back into
// the earlier calls.
//
- // In the new goroutine, 0(SP) and 8(SP) are unused except
- // on Windows, where they are the SEH block.
+ // In the new goroutine, 0(SP) holds the saved R8.
MOVQ m_curg(BP), SI
MOVQ SI, g(CX)
MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
MOVQ (g_sched+gobuf_pc)(SI), BP
MOVQ BP, -8(DI)
- LEAQ -(8+16)(DI), SP
+ LEAQ -(8+8)(DI), SP
+ MOVQ R8, 0(SP)
CALL runtime·cgocallbackg(SB)
+ MOVQ 0(SP), R8
// Restore g->sched (== m->curg->sched) from saved values.
get_tls(CX)
MOVQ g(CX), SI
- MOVQ 16(SP), BP
+ MOVQ 8(SP), BP
MOVQ BP, (g_sched+gobuf_pc)(SI)
- LEAQ (16+8)(SP), DI
+ LEAQ (8+8)(SP), DI
MOVQ DI, (g_sched+gobuf_sp)(SI)
// Switch back to m->g0's stack and restore m->g0->sched.sp.
// If the m on entry was nil, we called needm above to borrow an m
// for the duration of the call. Since the call is over, return it with dropm.
- MOVQ 8(SP), BP
- CMPQ BP, $0
+ CMPQ R8, $0
JNE 3(PC)
MOVQ $runtime·dropm(SB), AX
CALL AX
uintptr argsize;
};
-#define CBARGS (CallbackArgs*)((byte*)m->g0->sched.sp+(3+(thechar=='5'))*sizeof(void*))
+// Location of callback arguments depends on stack frame layout
+// and size of stack frame of cgocallback_gofunc.
+
+// On arm, stack frame is two words and there's a saved LR between
+// SP and the stack frame and between the stack frame and the arguments.
+#ifdef GOARCH_arm
+#define CBARGS (CallbackArgs*)((byte*)m->g0->sched.sp+4*sizeof(void*))
+#endif
+
+// On amd64, stack frame is one word, plus caller PC.
+#ifdef GOARCH_amd64
+#define CBARGS (CallbackArgs*)((byte*)m->g0->sched.sp+2*sizeof(void*))
+#endif
+
+// On 386, stack frame is three words, plus caller PC.
+#ifdef GOARCH_386
+#define CBARGS (CallbackArgs*)((byte*)m->g0->sched.sp+4*sizeof(void*))
+#endif
void
runtime·cgocallbackg(void)
runtime·gosave(&m->g0->sched);
m->g0->sched.pc = (uintptr)-1; // make sure it is never used
m->g0->stackguard = m->g0->stackguard0; // cgo sets only stackguard0, copy it to stackguard
+#ifdef GOOS_windows
+#ifdef GOARCH_386
m->seh = &seh;
+#endif
+#endif
runtime·asminit();
runtime·minit();
g->stackguard = (uintptr)(&x - 32*1024);
g->stackguard0 = g->stackguard;
+#ifdef GOOS_windows
+#ifdef GOARCH_386
// On windows/386, we need to put an SEH frame (two words)
// somewhere on the current stack. We are called from cgocallback_gofunc
// and we know that it will leave two unused words below m->curg->sched.sp.
// Use those.
- m->seh = (SEH*)((uintptr*)m->curg->sched.sp - 3);
+ m->seh = (SEH*)((uintptr*)&x + 1);
+#endif
+#endif
// Initialize this thread to use the m.
runtime·asminit();
// Undo whatever initialization minit did during needm.
runtime·unminit();
+
+#ifdef GOOS_windows
+#ifdef GOARCH_386
m->seh = nil; // reset dangling typed pointer
+#endif
+#endif
// Clear m and g, and return m to the extra list.
// After the call to setmg we can only call nosplit functions.